1
0

refresh patches

This commit is contained in:
2025-03-27 01:51:30 +03:00
parent 3d597650a9
commit b65c570ac2
239 changed files with 14214 additions and 9267 deletions

View File

@@ -1,266 +0,0 @@
From d2589889bf6001daef33479d29680fa3f991fae9 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Tue, 12 Nov 2024 22:33:06 +0530
Subject: [PATCH 1/2] platform/x86/amd: amd_3d_vcache: Add AMD 3D V-Cache
optimizer driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
AMD X3D processors, also known as AMD 3D V-Cache, feature dual Core
Complex Dies (CCDs) and enlarged L3 cache, enabling dynamic mode
switching between Frequency and Cache modes. To optimize performance,
implement the AMD 3D V-Cache Optimizer, which allows selecting either:
Frequency mode: cores within the faster CCD are prioritized before
those in the slower CCD.
Cache mode: cores within the larger L3 CCD are prioritized before
those in the smaller L3 CCD.
Co-developed-by: Perry Yuan <perry.yuan@amd.com>
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Armin Wolf <W_Armin@gmx.de>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Link: https://lore.kernel.org/r/20241112170307.3745777-2-Basavaraj.Natikar@amd.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
MAINTAINERS | 7 +
drivers/platform/x86/amd/Kconfig | 12 ++
drivers/platform/x86/amd/Makefile | 2 +
drivers/platform/x86/amd/x3d_vcache.c | 176 ++++++++++++++++++++++++++
4 files changed, 197 insertions(+)
create mode 100644 drivers/platform/x86/amd/x3d_vcache.c
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -965,6 +965,13 @@ Q: https://patchwork.kernel.org/project/
F: drivers/infiniband/hw/efa/
F: include/uapi/rdma/efa-abi.h
+AMD 3D V-CACHE PERFORMANCE OPTIMIZER DRIVER
+M: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+R: Mario Limonciello <mario.limonciello@amd.com>
+L: platform-driver-x86@vger.kernel.org
+S: Supported
+F: drivers/platform/x86/amd/x3d_vcache.c
+
AMD ADDRESS TRANSLATION LIBRARY (ATL)
M: Yazen Ghannam <Yazen.Ghannam@amd.com>
L: linux-edac@vger.kernel.org
--- a/drivers/platform/x86/amd/Kconfig
+++ b/drivers/platform/x86/amd/Kconfig
@@ -19,6 +19,18 @@ config AMD_HSMP
If you choose to compile this driver as a module the module will be
called amd_hsmp.
+config AMD_3D_VCACHE
+ tristate "AMD 3D V-Cache Performance Optimizer Driver"
+ depends on X86_64 && ACPI
+ help
+ The driver provides a sysfs interface, enabling the setting of a bias
+ that alters CPU core reordering. This bias prefers cores with higher
+ frequencies or larger L3 caches on processors supporting AMD 3D V-Cache
+ technology.
+
+ If you choose to compile this driver as a module the module will be
+ called amd_3d_vcache.
+
config AMD_WBRF
bool "AMD Wifi RF Band mitigations (WBRF)"
depends on ACPI
--- a/drivers/platform/x86/amd/Makefile
+++ b/drivers/platform/x86/amd/Makefile
@@ -4,6 +4,8 @@
# AMD x86 Platform-Specific Drivers
#
+obj-$(CONFIG_AMD_3D_VCACHE) += amd_3d_vcache.o
+amd_3d_vcache-objs := x3d_vcache.o
obj-$(CONFIG_AMD_PMC) += pmc/
amd_hsmp-y := hsmp.o
obj-$(CONFIG_AMD_HSMP) += amd_hsmp.o
--- /dev/null
+++ b/drivers/platform/x86/amd/x3d_vcache.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD 3D V-Cache Performance Optimizer Driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Authors: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ * Perry Yuan <perry.yuan@amd.com>
+ * Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/array_size.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/sysfs.h>
+#include <linux/uuid.h>
+
+static char *x3d_mode = "frequency";
+module_param(x3d_mode, charp, 0);
+MODULE_PARM_DESC(x3d_mode, "Initial 3D-VCache mode; 'frequency' (default) or 'cache'");
+
+#define DSM_REVISION_ID 0
+#define DSM_SET_X3D_MODE 1
+
+static guid_t x3d_guid = GUID_INIT(0xdff8e55f, 0xbcfd, 0x46fb, 0xba, 0x0a,
+ 0xef, 0xd0, 0x45, 0x0f, 0x34, 0xee);
+
+enum amd_x3d_mode_type {
+ MODE_INDEX_FREQ,
+ MODE_INDEX_CACHE,
+};
+
+static const char * const amd_x3d_mode_strings[] = {
+ [MODE_INDEX_FREQ] = "frequency",
+ [MODE_INDEX_CACHE] = "cache",
+};
+
+struct amd_x3d_dev {
+ struct device *dev;
+ acpi_handle ahandle;
+ /* To protect x3d mode setting */
+ struct mutex lock;
+ enum amd_x3d_mode_type curr_mode;
+};
+
+static int amd_x3d_get_mode(struct amd_x3d_dev *data)
+{
+ guard(mutex)(&data->lock);
+
+ return data->curr_mode;
+}
+
+static int amd_x3d_mode_switch(struct amd_x3d_dev *data, int new_state)
+{
+ union acpi_object *out, argv;
+
+ guard(mutex)(&data->lock);
+ argv.type = ACPI_TYPE_INTEGER;
+ argv.integer.value = new_state;
+
+ out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID,
+ DSM_SET_X3D_MODE, &argv);
+ if (!out) {
+ dev_err(data->dev, "failed to evaluate _DSM\n");
+ return -EINVAL;
+ }
+
+ data->curr_mode = new_state;
+
+ kfree(out);
+
+ return 0;
+}
+
+static ssize_t amd_x3d_mode_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct amd_x3d_dev *data = dev_get_drvdata(dev);
+ int ret;
+
+ ret = sysfs_match_string(amd_x3d_mode_strings, buf);
+ if (ret < 0)
+ return ret;
+
+ ret = amd_x3d_mode_switch(data, ret);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t amd_x3d_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amd_x3d_dev *data = dev_get_drvdata(dev);
+ int mode = amd_x3d_get_mode(data);
+
+ return sysfs_emit(buf, "%s\n", amd_x3d_mode_strings[mode]);
+}
+static DEVICE_ATTR_RW(amd_x3d_mode);
+
+static struct attribute *amd_x3d_attrs[] = {
+ &dev_attr_amd_x3d_mode.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(amd_x3d);
+
+static int amd_x3d_resume_handler(struct device *dev)
+{
+ struct amd_x3d_dev *data = dev_get_drvdata(dev);
+ int ret = amd_x3d_get_mode(data);
+
+ return amd_x3d_mode_switch(data, ret);
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(amd_x3d_pm, NULL, amd_x3d_resume_handler);
+
+static const struct acpi_device_id amd_x3d_acpi_ids[] = {
+ {"AMDI0101"},
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, amd_x3d_acpi_ids);
+
+static int amd_x3d_probe(struct platform_device *pdev)
+{
+ struct amd_x3d_dev *data;
+ acpi_handle handle;
+ int ret;
+
+ handle = ACPI_HANDLE(&pdev->dev);
+ if (!handle)
+ return -ENODEV;
+
+ if (!acpi_check_dsm(handle, &x3d_guid, DSM_REVISION_ID, BIT(DSM_SET_X3D_MODE)))
+ return -ENODEV;
+
+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->dev = &pdev->dev;
+
+ ret = devm_mutex_init(data->dev, &data->lock);
+ if (ret)
+ return ret;
+
+ data->ahandle = handle;
+ platform_set_drvdata(pdev, data);
+
+ ret = match_string(amd_x3d_mode_strings, ARRAY_SIZE(amd_x3d_mode_strings), x3d_mode);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "invalid mode %s\n", x3d_mode);
+
+ return amd_x3d_mode_switch(data, ret);
+}
+
+static struct platform_driver amd_3d_vcache_driver = {
+ .driver = {
+ .name = "amd_x3d_vcache",
+ .dev_groups = amd_x3d_groups,
+ .acpi_match_table = amd_x3d_acpi_ids,
+ .pm = pm_sleep_ptr(&amd_x3d_pm),
+ },
+ .probe = amd_x3d_probe,
+};
+module_platform_driver(amd_3d_vcache_driver);
+
+MODULE_DESCRIPTION("AMD 3D V-Cache Performance Optimizer Driver");
+MODULE_LICENSE("GPL");

View File

@@ -1,55 +0,0 @@
From edf899b17950e1b926889b501e06c86dd867bac0 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Tue, 12 Nov 2024 22:33:07 +0530
Subject: [PATCH 2/2] platform/x86/amd: amd_3d_vcache: Add sysfs ABI
documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add documentation for the amd_3d_vcache sysfs bus platform driver
interface so that userspace applications can use it to change mode
preferences, either frequency or cache.
Co-developed-by: Perry Yuan <perry.yuan@amd.com>
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Link: https://lore.kernel.org/r/20241112170307.3745777-3-Basavaraj.Natikar@amd.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
.../sysfs-bus-platform-drivers-amd_x3d_vcache | 12 ++++++++++++
MAINTAINERS | 1 +
2 files changed, 13 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
@@ -0,0 +1,12 @@
+What: /sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101:00/amd_x3d_mode
+Date: November 2024
+KernelVersion: 6.13
+Contact: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Description: (RW) AMD 3D V-Cache optimizer allows users to switch CPU core
+ rankings dynamically.
+
+ This file switches between these two modes:
+ - "frequency" cores within the faster CCD are prioritized before
+ those in the slower CCD.
+ - "cache" cores within the larger L3 CCD are prioritized before
+ those in the smaller L3 CCD.
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -970,6 +970,7 @@ M: Basavaraj Natikar <Basavaraj.Natikar@
R: Mario Limonciello <mario.limonciello@amd.com>
L: platform-driver-x86@vger.kernel.org
S: Supported
+F: Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
F: drivers/platform/x86/amd/x3d_vcache.c
AMD ADDRESS TRANSLATION LIBRARY (ATL)

View File

@@ -1,7 +1,7 @@
From b492213c96ded86e7800b320706ad15bd31c7c1b Mon Sep 17 00:00:00 2001
From ae8cebfd2446a0564c849adcd771ce538855b6b2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Wed, 16 Jan 2019 23:13:25 +0100
Subject: [PATCH 1/4] binder: turn into module
Subject: binder: turn into module
The Android binder driver needs to become a module for the sake of shipping
Anbox. To do this we need to export the following functions since binder is
@@ -29,6 +29,7 @@ Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
[ arighi: zap_page_range() has been dropped, export zap_page_range_single() in 6.3 ]
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -45,7 +46,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
Binderfs is a pseudo-filesystem for the Android Binder IPC driver
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -7027,9 +7027,20 @@ err_alloc_device_names_failed:
@@ -7031,9 +7031,20 @@ err_alloc_device_names_failed:
return ret;
}
@@ -79,7 +80,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
#include <linux/rbtree.h>
#include <linux/list.h>
#include <linux/mm.h>
@@ -111,7 +112,7 @@ struct binder_alloc {
@@ -120,7 +121,7 @@ struct binder_alloc {
bool oneway_spam_detected;
};
@@ -98,7 +99,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
@@ -78,7 +79,7 @@ extern const struct file_operations bind
@@ -77,7 +78,7 @@ extern const struct file_operations bind
extern char *binder_devices_param;
@@ -107,7 +108,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
extern bool is_binderfs_device(const struct inode *inode);
extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name,
const struct file_operations *fops,
@@ -99,7 +100,7 @@ static inline struct dentry *binderfs_cr
@@ -98,7 +99,7 @@ static inline struct dentry *binderfs_cr
static inline void binderfs_remove_file(struct dentry *dentry) {}
#endif
@@ -127,7 +128,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
#else
bool use_reserve = true;
#endif
@@ -399,7 +399,7 @@ static int binderfs_binder_ctl_create(st
@@ -402,7 +402,7 @@ static int binderfs_binder_ctl_create(st
struct dentry *root = sb->s_root;
struct binderfs_info *info = sb->s_fs_info;
#if defined(CONFIG_IPC_NS)
@@ -136,7 +137,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
#else
bool use_reserve = true;
#endif
@@ -691,7 +691,7 @@ static int binderfs_fill_super(struct su
@@ -694,7 +694,7 @@ static int binderfs_fill_super(struct su
return -ENOMEM;
info = sb->s_fs_info;
@@ -184,7 +185,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
return container_of(ns, struct ipc_namespace, ns);
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3176,6 +3176,7 @@ struct vm_struct *get_vm_area(unsigned l
@@ -3181,6 +3181,7 @@ struct vm_struct *get_vm_area(unsigned l
NUMA_NO_NODE, GFP_KERNEL,
__builtin_return_address(0));
}

View File

@@ -0,0 +1,29 @@
From 0156792aef65a27c5938dc821630f5546dc6a3c9 Mon Sep 17 00:00:00 2001
From: Paolo Pisati <paolo.pisati@canonical.com>
Date: Thu, 6 Feb 2025 15:38:05 +0100
Subject: binder: turn into module - list_lru_add()/list_lru_del()
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/list_lru.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -175,6 +175,7 @@ bool list_lru_add(struct list_lru *lru,
unlock_list_lru(l, false);
return false;
}
+EXPORT_SYMBOL_GPL(list_lru_add);
bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
{
@@ -212,6 +213,7 @@ bool list_lru_del(struct list_lru *lru,
unlock_list_lru(l, false);
return false;
}
+EXPORT_SYMBOL_GPL(list_lru_del);
bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
{

View File

@@ -0,0 +1,21 @@
From 51d6dcc335e157df9ce5b9605841b879db64894a Mon Sep 17 00:00:00 2001
From: Paolo Pisati <paolo.pisati@canonical.com>
Date: Thu, 6 Feb 2025 15:40:09 +0100
Subject: binder: turn into module - lock_vma_under_rcu()
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/memory.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6395,6 +6395,7 @@ inval:
count_vm_vma_lock_event(VMA_LOCK_ABORT);
return NULL;
}
+EXPORT_SYMBOL_GPL(lock_vma_under_rcu);
#endif /* CONFIG_PER_VMA_LOCK */
#ifndef __PAGETABLE_P4D_FOLDED

View File

@@ -1,7 +1,7 @@
From cdcc9fde68f01d86d8f9ff0baaf0e9fbd15fa8ba Mon Sep 17 00:00:00 2001
From fa6cddbfd7915ed81dcbed99f9e5b5a9267d80a3 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 13 Dec 2018 01:00:49 +0000
Subject: [PATCH 1/4] sched/wait: Do accept() in LIFO order for cache
Subject: sched/wait: Do accept() in LIFO order for cache
efficiency
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From c6f8d4723c8185d7096cdea7f5e499184f22426e Mon Sep 17 00:00:00 2001
From b837910f5e9f1928872e600a6835be6d422b761b Mon Sep 17 00:00:00 2001
From: William Douglas <william.douglas@intel.com>
Date: Wed, 20 Jun 2018 17:23:21 +0000
Subject: [PATCH 2/4] firmware: Enable stateless firmware loading
Subject: firmware: Enable stateless firmware loading
Prefer the order of specific version before generic and /etc before
/lib to enable the user to give specific overrides for generic

View File

@@ -1,7 +1,7 @@
From 78a04a7536d68fa0d8e7dc2955d37aa7f592fca5 Mon Sep 17 00:00:00 2001
From 274ba9c23b6fe3212c7f02f3e833086427034705 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH 3/4] locking: rwsem: spin faster
Subject: locking: rwsem: spin faster
tweak rwsem owner spinning a bit

View File

@@ -0,0 +1,45 @@
From 0234467781c5b1c50f71f3936571e4ea3e77c279 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: drivers: initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
drivers/Makefile | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,14 +64,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb depends on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-
obj-$(CONFIG_PARPORT) += parport/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
@@ -83,6 +77,13 @@ obj-y += macintosh/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb depends on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/

View File

@@ -1,7 +1,7 @@
From 2099f9c57216c836e445d2f6ba65f04131267f47 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 27 Feb 2023 01:38:18 +0000
Subject: [PATCH 1/2] netfilter: Add netfilter nf_tables fullcone support
Subject: netfilter: Add netfilter nf_tables fullcone support
Signed-off-by: Syrone Wong <wong.syrone@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 6fbfabdc4e5ef8a186c27e4ed2db28ee1ddf4b4e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 20 Feb 2018 15:56:02 +0100
Subject: [PATCH 2/2] netfilter: add xt_FLOWOFFLOAD target
Subject: netfilter: add xt_FLOWOFFLOAD target
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -18,7 +18,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -294,6 +294,11 @@ void nf_flow_table_free(struct nf_flowta
@@ -295,6 +295,11 @@ void nf_flow_table_free(struct nf_flowta
void flow_offload_teardown(struct flow_offload *flow);
@@ -88,7 +88,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -373,8 +372,7 @@ flow_offload_lookup(struct nf_flowtable
@@ -413,8 +412,7 @@ flow_offload_lookup(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
@@ -98,7 +98,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
void (*iter)(struct nf_flowtable *flowtable,
struct flow_offload *flow, void *data),
void *data)
@@ -435,6 +433,7 @@ static void nf_flow_offload_gc_step(stru
@@ -580,6 +578,7 @@ static void nf_flow_offload_gc_step(stru
nf_flow_offload_stats(flow_table, flow);
}
}

View File

@@ -0,0 +1,52 @@
From 5435b92688a57d175607374d5bbff357e4ba3e71 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 11 Jun 2019 12:26:55 -0400
Subject: net-tcp_bbr: broaden app-limited rate sample detection
This commit is a bug fix for the Linux TCP app-limited
(application-limited) logic that is used for collecting rate
(bandwidth) samples.
Previously the app-limited logic only looked for "bubbles" of
silence in between application writes, by checking at the start
of each sendmsg. But "bubbles" of silence can also happen before
retransmits: e.g. bubbles can happen between an application write
and a retransmit, or between two retransmits.
Retransmits are triggered by ACKs or timers. So this commit checks
for bubbles of app-limited silence upon ACKs or timers.
Why does this commit check for app-limited state at the start of
ACKs and timer handling? Because at that point we know whether
inflight was fully using the cwnd. During processing the ACK or
timer event we often change the cwnd; after changing the cwnd we
can't know whether inflight was fully using the old cwnd.
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
net/ipv4/tcp_input.c | 1 +
net/ipv4/tcp_timer.c | 1 +
2 files changed, 2 insertions(+)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3967,6 +3967,7 @@ static int tcp_ack(struct sock *sk, cons
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
+ tcp_rate_check_app_limited(sk);
/* ts_recent update must be made after we are sure that the packet
* is in window.
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -699,6 +699,7 @@ void tcp_write_timer_handler(struct sock
return;
}
+ tcp_rate_check_app_limited(sk);
tcp_mstamp_refresh(tcp_sk(sk));
event = icsk->icsk_pending;

View File

@@ -0,0 +1,74 @@
From 9aa33a35b5b9cbe65c87e6f9438e69ede143d11a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 24 Jun 2018 21:55:59 -0400
Subject: net-tcp_bbr: v2: shrink delivered_mstamp,
first_tx_mstamp to u32 to free up 8 bytes
Free up some space for tracking inflight and losses for each
bw sample, in upcoming commits.
These timestamps are in microseconds, and are now stored in 32
bits. So they can only hold time intervals up to roughly 2^12 = 4096
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit
microsecond implementation approach and resulting deployment
limitations. So this is not introducing a new limit. And these should
not be a limitation for the foreseeable future.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 9 +++++++--
net/ipv4/tcp_rate.c | 7 ++++---
2 files changed, 11 insertions(+), 5 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -901,6 +901,11 @@ static inline u32 tcp_stamp_us_delta(u64
return max_t(s64, t1 - t0, 0);
}
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
+{
+ return max_t(s32, t1 - t0, 0);
+}
+
/* provide the departure time in us unit */
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
{
@@ -990,9 +995,9 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
- u64 first_tx_mstamp;
+ u32 first_tx_mstamp;
/* when we reached the "delivered" count */
- u64 delivered_mstamp;
+ u32 delivered_mstamp;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock
/* Record send time of most recently ACKed packet: */
tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
- scb->tx.first_tx_mstamp);
+ rs->interval_us = tcp_stamp32_us_delta(
+ tp->first_tx_mstamp,
+ scb->tx.first_tx_mstamp);
}
/* Mark off the skb delivered once it's sacked to avoid being
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);

View File

@@ -0,0 +1,109 @@
From 63e1d064c4e4355293b9ee7014f4559cdeba4b8b Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 5 Aug 2017 11:49:50 -0400
Subject: net-tcp_bbr: v2: snapshot packets in flight at transmit
time and pass in rate_sample
CC algorithms may want to snapshot the number of packets in flight at
transmit time and pass in rate_sample, to understand the relationship
between inflight and losses or ECN signals, to try to find the highest
inflight value that has acceptable levels of loss/ECN marking.
We split out the code to set an skb's tx.in_flight field into its own
function, so that this code can be used for the TCP_REPAIR "fake send"
code path that inserts skbs into the rtx queue without sending them.
Effort: net-tcp_bbr
Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 6 ++++++
net/ipv4/tcp_output.c | 1 +
net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++
3 files changed, 27 insertions(+)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -998,6 +998,10 @@ struct tcp_skb_cb {
u32 first_tx_mstamp;
/* when we reached the "delivered" count */
u32 delivered_mstamp;
+#define TCPCB_IN_FLIGHT_BITS 20
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
+ u32 in_flight:20, /* packets in flight at transmit */
+ unused2:12;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -1154,6 +1158,7 @@ struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+ u32 tx_in_flight; /* packets in flight at starting timestamp */
s32 delivered; /* number of packets delivered over interval */
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
@@ -1276,6 +1281,7 @@ static inline void tcp_ca_event(struct s
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
/* From tcp_rate.c */
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2770,6 +2770,7 @@ static bool tcp_write_xmit(struct sock *
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
+ tcp_set_tx_in_flight(sk, skb);
goto repair; /* Skip network transmission */
}
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -34,6 +34,24 @@
* ready to send in the write queue.
*/
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 in_flight;
+
+ /* Check, sanitize, and record packets in flight after skb was sent. */
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
+ "insane in_flight %u cc %s mss %u "
+ "cwnd %u pif %u %u %u %u\n",
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
+ tp->mss_cache, tp->snd_cwnd,
+ tp->packets_out, tp->retrans_out,
+ tp->sacked_out, tp->lost_out))
+ in_flight = TCPCB_IN_FLIGHT_MAX;
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
+}
+
/* Snapshot the current delivery information in the skb, to generate
* a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
*/
@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk,
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
+ tcp_set_tx_in_flight(sk, skb);
}
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->tx_in_flight = scb->tx.in_flight;
rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */

View File

@@ -0,0 +1,70 @@
From 4022fb6da58dd67760dc8f3351067945a377df91 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 12 Oct 2017 23:44:27 -0400
Subject: net-tcp_bbr: v2: count packets lost over TCP rate
sampling interval
For understanding the relationship between inflight and packet loss
signals, to try to find the highest inflight value that has acceptable
levels of packet losses.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 5 ++++-
net/ipv4/tcp_rate.c | 3 +++
2 files changed, 7 insertions(+), 1 deletion(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1002,6 +1002,7 @@ struct tcp_skb_cb {
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
u32 in_flight:20, /* packets in flight at transmit */
unused2:12;
+ u32 lost; /* packets lost so far upon tx of skb */
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -1156,11 +1157,13 @@ struct ack_sample {
*/
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
u32 tx_in_flight; /* packets in flight at starting timestamp */
+ s32 lost; /* number of packets lost over interval */
s32 delivered; /* number of packets delivered over interval */
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk,
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
tcp_set_tx_in_flight(sk, skb);
}
@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock
if (!rs->prior_delivered ||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
scb->end_seq, rs->last_end_seq)) {
+ rs->prior_lost = scb->tx.lost;
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
return;
}
rs->delivered = tp->delivered - rs->prior_delivered;
+ rs->lost = tp->lost - rs->prior_lost;
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
/* delivered_ce occupies less than 32 bits in the skb control block */

View File

@@ -0,0 +1,38 @@
From 3ff71ca0a15ebe4e5db9c0089121eafd2efc02ba Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 19 Nov 2018 13:48:36 -0500
Subject: net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
For understanding the relationship between inflight and ECN signals,
to try to find the highest inflight value that has acceptable levels
ECN marking.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 1 +
net/ipv4/tcp_input.c | 1 +
2 files changed, 2 insertions(+)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1175,6 +1175,7 @@ struct rate_sample {
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
+ bool is_ece; /* did this ACK have ECN marked? */
};
struct tcp_congestion_ops {
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4066,6 +4066,7 @@ static int tcp_ack(struct sock *sk, cons
delivered = tcp_newly_delivered(sk, delivered, flag);
lost = tp->lost - lost; /* freshly marked lost */
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
+ rs.is_ece = !!(flag & FLAG_ECE);
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);

View File

@@ -0,0 +1,57 @@
From fa9348cbc2b5a0f1f3fc82e51ae6ce956f8cfb1f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 7 Aug 2018 21:52:06 -0400
Subject: net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
module callback API
For connections experiencing reordering, RACK can mark packets lost
long after we receive the SACKs/ACKs hinting that the packets were
actually lost.
This means that CC modules cannot easily learn the volume of inflight
data at which packet loss happens by looking at the current inflight
or even the packets in flight when the most recently SACKed packet was
sent. To learn this, CC modules need to know how many packets were in
flight at the time lost packets were sent. This new callback, combined
with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
This also provides a consistent callback that is invoked whether
packets are marked lost upon ACK processing, using the RACK reordering
timer, or at RTO time.
Effort: net-tcp_bbr
Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 3 +++
net/ipv4/tcp_input.c | 5 +++++
2 files changed, 8 insertions(+)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1202,6 +1202,9 @@ struct tcp_congestion_ops {
/* override sysctl_tcp_min_tso_segs */
u32 (*min_tso_segs)(struct sock *sk);
+ /* react to a specific lost skb (optional) */
+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
+
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1126,7 +1126,12 @@ static void tcp_verify_retransmit_hint(s
*/
static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
{
+ struct sock *sk = (struct sock *)tp;
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+
tp->lost += tcp_skb_pcount(skb);
+ if (ca_ops->skb_marked_lost)
+ ca_ops->skb_marked_lost(sk, skb);
}
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)

View File

@@ -0,0 +1,59 @@
From 3add8086d7d76fe240fb341a4e49149ac4332990 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 1 May 2019 20:16:33 -0400
Subject: net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
tcp_shifted_skb()
When tcp_shifted_skb() updates state as adjacent SACKed skbs are
coalesced, previously the tx.in_flight was not adjusted, so we could
get contradictory state where the skb's recorded pcount was bigger
than the tx.in_flight (the number of segments that were in_flight
after sending the skb).
Normally have a SACKed skb with contradictory pcount/tx.in_flight
would not matter. However, with SACK reneging, the SACKed bit is
removed, and an skb once again becomes eligible for retransmitting,
fragmenting, SACKing, etc. Packetdrill testing verified the following
sequence is possible in a kernel that does not have this commit:
- skb N is SACKed
- skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
- tcp_shifted_skb() will increase the pcount of prev,
but leave tx.in_flight as-is
- so prev skb can have pcount > tx.in_flight
- RTO, tcp_timeout_mark_lost(), detect reneg,
remove "SACKed" bit, mark skb N as lost
- find pcount of skb N is greater than its tx.in_flight
I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
WARN_ON_ONCE(inflight_prev < 0)
to fire in production machines using bbr2.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
net/ipv4/tcp_input.c | 11 +++++++++++
1 file changed, 11 insertions(+)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1512,6 +1512,17 @@ static bool tcp_shifted_skb(struct sock
WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
+ "prev in_flight: %u skb in_flight: %u pcount: %u",
+ TCP_SKB_CB(prev)->tx.in_flight,
+ TCP_SKB_CB(skb)->tx.in_flight,
+ pcount))
+ TCP_SKB_CB(skb)->tx.in_flight = 0;
+ else
+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
+
/* When we're adding to gso_segs == 1, gso_size will be zero,
* in theory this shouldn't be necessary but as long as DSACK
* code can come after this skb later on it's better to keep

View File

@@ -0,0 +1,97 @@
From 6363d43645b3383ba590d0574dc37a215386aacf Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 1 May 2019 20:16:25 -0400
Subject: net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
tcp_fragment()
When we fragment an skb that has already been sent, we need to update
the tx.in_flight for the first skb in the resulting pair ("buff").
Because we were not updating the tx.in_flight, the tx.in_flight value
was inconsistent with the pcount of the "buff" skb (tx.in_flight would
be too high). That meant that if the "buff" skb was lost, then
bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
that is too high. This could result in longer queues and higher packet
loss.
Packetdrill testing verified that without this commit, when the second
half of an skb is SACKed and then later the first half of that skb is
marked lost, the calculated inflight_hi was incorrect.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup]
Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings
Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 15 +++++++++++++++
net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++-
2 files changed, 40 insertions(+), 1 deletion(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1301,6 +1301,21 @@ static inline bool tcp_skb_sent_after(u6
return t1 > t2 || (t1 == t2 && after(seq1, seq2));
}
+/* If a retransmit failed due to local qdisc congestion or other local issues,
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We
+ * do not have the state to know whether a retransmit failed due to local qdisc
+ * congestion or other local issues, so to avoid spurious warnings we consider
+ * that any skb marked lost may have suffered that fate.
+ */
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
+ u32 skb_sacked_flags,
+ u32 tx_in_flight)
+{
+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
+}
+
/* These functions determine how the current flow behaves in respect of SACK
* handling. SACK is negotiated with the peer, and therefore it can vary
* between different flows.
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1606,7 +1606,7 @@ int tcp_fragment(struct sock *sk, enum t
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
- int old_factor;
+ int old_factor, inflight_prev;
long limit;
int nlen;
u8 flags;
@@ -1681,6 +1681,30 @@ int tcp_fragment(struct sock *sk, enum t
if (diff)
tcp_adjust_pcount(sk, skb, diff);
+
+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
+ if (inflight_prev < 0) {
+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
+ old_factor,
+ TCP_SKB_CB(skb)->sacked,
+ TCP_SKB_CB(skb)->tx.in_flight),
+ "inconsistent: tx.in_flight: %u "
+ "old_factor: %d mss: %u sacked: %u "
+ "1st pcount: %d 2nd pcount: %d "
+ "1st len: %u 2nd len: %u ",
+ TCP_SKB_CB(skb)->tx.in_flight, old_factor,
+ mss_now, TCP_SKB_CB(skb)->sacked,
+ tcp_skb_pcount(skb), tcp_skb_pcount(buff),
+ skb->len, buff->len);
+ inflight_prev = 0;
+ }
+ /* Set 1st tx.in_flight as if 1st were sent by itself: */
+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
+ tcp_skb_pcount(skb);
+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
+ tcp_skb_pcount(skb) +
+ tcp_skb_pcount(buff);
}
/* Link BUFF into the send queue. */

View File

@@ -0,0 +1,73 @@
From 8c1b5bf6012099cba8911e255487bca5d0a2bd02 Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Wed, 23 May 2018 17:55:54 -0700
Subject: net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
congestion control module to receive CE events.
Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
bit in opts flag to receive CE events but this may incur changes in ECN
behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
that allows congestion control modules to receive CE events
independently of TCP_CONG_NEEDS_ECN.
Effort: net-tcp
Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 14 +++++++++++++-
net/ipv4/tcp_input.c | 4 ++--
2 files changed, 15 insertions(+), 3 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1137,7 +1137,11 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
+#define TCP_CONG_WANTS_CE_EVENTS 0x4
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
+ TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_WANTS_CE_EVENTS)
union tcp_cc_info;
@@ -1269,6 +1273,14 @@ static inline char *tcp_ca_get_name_by_k
}
#endif
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
+ TCP_CONG_WANTS_CE_EVENTS);
+}
+
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -376,7 +376,7 @@ static void __tcp_ecn_check_ce(struct so
tcp_enter_quickack_mode(sk, 2);
break;
case INET_ECN_CE:
- if (tcp_ca_needs_ecn(sk))
+ if (tcp_ca_wants_ce_events(sk))
tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
@@ -387,7 +387,7 @@ static void __tcp_ecn_check_ce(struct so
tp->ecn_flags |= TCP_ECN_SEEN;
break;
default:
- if (tcp_ca_needs_ecn(sk))
+ if (tcp_ca_wants_ce_events(sk))
tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
tp->ecn_flags |= TCP_ECN_SEEN;
break;

View File

@@ -0,0 +1,139 @@
From 15fd38de916127d286bd373903fdfa5215b05aa4 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 27 Sep 2019 17:10:26 -0400
Subject: net-tcp: re-generalize TSO sizing in TCP CC module API
Reorganize the API for CC modules so that the CC module once again
gets complete control of the TSO sizing decision. This is how the API
was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
Dumazet simplified it. But with wider testing it now seems that to
avoid CPU regressions BBR needs to have a different TSO sizing
function.
This is necessary to handle cases where there are many flows
bottlenecked on the sender host's NIC, in which case BBR's pacing rate
is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
BBR's pacing rate adapts to the low bandwidth share each flow sees. By
contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
large cwnd, and thus large pacing rate and large TSO burst size.
Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 4 ++--
net/ipv4/bpf_tcp_ca.c | 4 ++--
net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++-----------
net/ipv4/tcp_output.c | 11 +++++------
4 files changed, 35 insertions(+), 21 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1203,8 +1203,8 @@ struct tcp_congestion_ops {
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
- u32 (*min_tso_segs)(struct sock *sk);
+ /* pick target number of segments per TSO/GSO skb (optional): */
+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
/* react to a specific lost skb (optional) */
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -280,7 +280,7 @@ static void bpf_tcp_ca_pkts_acked(struct
{
}
-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
{
return 0;
}
@@ -315,7 +315,7 @@ static struct tcp_congestion_ops __bpf_o
.cwnd_event = bpf_tcp_ca_cwnd_event,
.in_ack_event = bpf_tcp_ca_in_ack_event,
.pkts_acked = bpf_tcp_ca_pkts_acked,
- .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+ .tso_segs = bpf_tcp_ca_tso_segs,
.cong_control = bpf_tcp_ca_cong_control,
.undo_cwnd = bpf_tcp_ca_undo_cwnd,
.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -301,20 +301,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(
return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
+ * a particular max gso size as a constraint.
+ */
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
+ u32 gso_max_size)
+{
+ u32 segs;
+ u64 bytes;
+
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
+
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
+ return segs;
+}
+
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
+}
+
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 segs, bytes;
-
- /* Sort of tcp_tso_autosize() but ignoring
- * driver provided sk_gso_max_size.
- */
- bytes = min_t(unsigned long,
- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- return min(segs, 0x7FU);
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -1150,7 +1165,7 @@ static struct tcp_congestion_ops tcp_bbr
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .min_tso_segs = bbr_min_tso_segs,
+ .tso_segs = bbr_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2062,13 +2062,12 @@ static u32 tcp_tso_autosize(const struct
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 min_tso, tso_segs;
+ u32 tso_segs;
- min_tso = ca_ops->min_tso_segs ?
- ca_ops->min_tso_segs(sk) :
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
-
- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ tso_segs = ca_ops->tso_segs ?
+ ca_ops->tso_segs(sk, mss_now) :
+ tcp_tso_autosize(sk, mss_now,
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}

View File

@@ -0,0 +1,73 @@
From 344af0ac329b2b1ce5f1ce920166e4aeb5e83037 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 16 Nov 2019 13:16:25 -0500
Subject: net-tcp: add fast_ack_mode=1: skip rwin check in
tcp_fast_ack_mode__tcp_ack_snd_check()
Add logic for an optional TCP connection behavior, enabled with
tp->fast_ack_mode = 1, which disables checking the receive window
before sending an ack in __tcp_ack_snd_check(). If this behavior is
enabled, the data receiver sends an ACK if the amount of data is >
RCV.MSS. TCP congestion control modules can enable this bit if
they want to generate ACKs quickly.
Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/tcp.h | 3 ++-
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_cong.c | 1 +
net/ipv4/tcp_input.c | 5 +++--
4 files changed, 7 insertions(+), 3 deletions(-)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -243,7 +243,8 @@ struct tcp_sock {
/* OOO segments go in this rbtree. Socket lock must be held. */
struct rb_root out_of_order_queue;
u32 snd_ssthresh; /* Slow start size threshold */
- u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
+ u32 recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
+ fast_ack_mode:1;/* ack ASAP if >1 rcv_mss received? */
__cacheline_group_end(tcp_sock_read_rx);
/* TX read-write hotpath cache lines */
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3398,6 +3398,7 @@ int tcp_disconnect(struct sock *sk, int
tp->rx_opt.dsack = 0;
tp->rx_opt.num_sacks = 0;
tp->rcv_ooopack = 0;
+ tp->fast_ack_mode = 0;
/* Clean up fastopen related fields */
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_sk(sk)->prior_ssthresh = 0;
+ tcp_sk(sk)->fast_ack_mode = 0;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5782,13 +5782,14 @@ static void __tcp_ack_snd_check(struct s
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+ (tp->fast_ack_mode == 1 ||
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise).
* If application uses SO_RCVLOWAT, we want send ack now if
* we have not received enough bytes to satisfy the condition.
*/
- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* Protocol state mandates a one-time immediate ACK */

View File

@@ -0,0 +1,45 @@
From 18f564dbe586ab02c48563a9e05e71aa1a421607 Mon Sep 17 00:00:00 2001
From: Jianfeng Wang <jfwang@google.com>
Date: Fri, 19 Jun 2020 17:33:45 +0000
Subject: net-tcp_bbr: v2: record app-limited status of
TLP-repaired flight
When sending a TLP retransmit, record whether the outstanding flight
of data is application limited. This is important for congestion
control modules that want to respond to losses repaired by TLP
retransmits. This is important because the following scenarios convey
very different information:
(1) a packet loss with a small number of packets in flight;
(2) a packet loss with the maximum amount of data in flight allowed
by the CC module;
Effort: net-tcp_bbr
Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/tcp.h | 3 ++-
net/ipv4/tcp_output.c | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -301,7 +301,8 @@ struct tcp_sock {
*/
struct tcp_options_received rx_opt;
u8 nonagle : 4,/* Disable Nagle algorithm? */
- rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
+ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
+ tlp_orig_data_app_limited:1; /* app-limited before TLP rtx? */
__cacheline_group_end(tcp_sock_write_txrx);
/* RX read-write hotpath cache lines */
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3006,6 +3006,7 @@ void tcp_send_loss_probe(struct sock *sk
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
if (__tcp_retransmit_skb(sk, skb, 1))
goto rearm_timer;

View File

@@ -0,0 +1,45 @@
From 8da6e7d31a73453ce8495f004951069f5ef67c86 Mon Sep 17 00:00:00 2001
From: Jianfeng Wang <jfwang@google.com>
Date: Tue, 16 Jun 2020 17:41:19 +0000
Subject: net-tcp_bbr: v2: inform CC module of losses repaired by
TLP probe
Before this commit, when there is a packet loss that creates a sequence
hole that is filled by a TLP loss probe, then tcp_process_tlp_ack()
only informs the congestion control (CC) module via a back-to-back entry
and exit of CWR. But some congestion control modules (e.g. BBR) do not
respond to CWR events.
This commit adds a new CA event with which the core TCP stack notifies
the CC module when a loss is repaired by a TLP. This will allow CC
modules that do not use the CWR mechanism to have a custom handler for
such TLP recoveries.
Effort: net-tcp_bbr
Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 1 +
net/ipv4/tcp_input.c | 1 +
2 files changed, 2 insertions(+)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1115,6 +1115,7 @@ enum tcp_ca_event {
CA_EVENT_LOSS, /* loss timeout */
CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */
CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */
+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */
};
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3865,6 +3865,7 @@ static void tcp_process_tlp_ack(struct s
/* ACK advances: there was a loss, so reduce cwnd. Reset
* tlp_high_seq in tcp_init_cwnd_reduction()
*/
+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
tcp_init_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);

View File

@@ -0,0 +1,73 @@
From 528d5f9d97954b32db6ae1fe1729c4965886b6df Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 21 Sep 2020 14:46:26 -0400
Subject: net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq
into rate_sample
Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will
export to the CC module the knowledge of whether the current ACK
matched a TLP retransmit.
Note that when this bool is true, we cannot yet tell (in general) whether
this ACK is for the original or the TLP retransmit.
Effort: net-tcp_bbr
Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 1 +
net/ipv4/tcp_input.c | 12 +++++++++---
2 files changed, 10 insertions(+), 3 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1179,6 +1179,7 @@ struct rate_sample {
u32 last_end_seq; /* end_seq of most recently ACKed packet */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
bool is_ece; /* did this ACK have ECN marked? */
};
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3848,7 +3848,8 @@ static void tcp_replace_ts_recent(struct
/* This routine deals with acks during a TLP episode and ends an episode by
* resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
*/
-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
+ struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -3876,6 +3877,11 @@ static void tcp_process_tlp_ack(struct s
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
/* Pure dupack: original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
+ } else {
+ /* This ACK matches a TLP retransmit. We cannot yet tell if
+ * this ACK is for the original or the TLP retransmit.
+ */
+ rs->is_acking_tlp_retrans_seq = 1;
}
}
@@ -4059,7 +4065,7 @@ static int tcp_ack(struct sock *sk, cons
tcp_rack_update_reo_wnd(sk, &rs);
if (tp->tlp_high_seq)
- tcp_process_tlp_ack(sk, ack, flag);
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
if (tcp_ack_is_dubious(sk, flag)) {
if (!(flag & (FLAG_SND_UNA_ADVANCED |
@@ -4103,7 +4109,7 @@ no_queue:
tcp_ack_probe(sk);
if (tp->tlp_high_seq)
- tcp_process_tlp_ack(sk, ack, flag);
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
return 1;
old_ack:

View File

@@ -0,0 +1,112 @@
From a086cf589b0ab974965d88d338c0a373eff5d67c Mon Sep 17 00:00:00 2001
From: David Morley <morleyd@google.com>
Date: Fri, 14 Jul 2023 11:07:56 -0400
Subject: tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
This feature indicates that the given destination network is a
low-latency ECN environment, meaning both that ECN CE marks are
applied by the network using a low-latency marking threshold and also
that TCP endpoints provide precise per-data-segment ECN feedback in
ACKs (where the ACK ECE flag echoes the received CE status of all
newly-acknowledged data segments). This feature indication can be used
by congestion control algorithms to decide how to interpret ECN
signals over the given destination network.
This feature is appropriate for datacenter-style ECN marking, such as
the ECN marking approach expected by DCTCP or BBR congestion control
modules.
Signed-off-by: David Morley <morleyd@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Tested-by: David Morley <morleyd@google.com>
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 10 ++++++++++
include/uapi/linux/rtnetlink.h | 4 +++-
net/ipv4/tcp_minisocks.c | 2 ++
net/ipv4/tcp_output.c | 6 ++++--
4 files changed, 19 insertions(+), 3 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -376,6 +376,7 @@ static inline void tcp_dec_quickack_mode
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
+#define TCP_ECN_LOW 16
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
@@ -796,6 +797,15 @@ static inline void tcp_fast_path_check(s
u32 tcp_delack_max(const struct sock *sk);
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
+ const struct dst_entry *dst)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
+ tp->ecn_flags |= TCP_ECN_LOW;
+}
+
/* Compute the actual rto_min value */
static inline u32 tcp_rto_min(const struct sock *sk)
{
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -516,12 +516,14 @@ enum {
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
+#define RTAX_FEATURE_ECN_LOW (1 << 5)
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
RTAX_FEATURE_SACK | \
RTAX_FEATURE_TIMESTAMP | \
RTAX_FEATURE_ALLFRAG | \
- RTAX_FEATURE_TCP_USEC_TS)
+ RTAX_FEATURE_TCP_USEC_TS | \
+ RTAX_FEATURE_ECN_LOW)
struct rta_session {
__u8 proto;
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -471,6 +471,8 @@ void tcp_ca_openreq_child(struct sock *s
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
bool ca_got_dst = false;
+ tcp_set_ecn_low_from_dst(sk, dst);
+
if (ca_key != TCP_CA_UNSPEC) {
const struct tcp_congestion_ops *ca;
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+ const struct dst_entry *dst = __sk_dst_get(sk);
if (!use_ecn) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
use_ecn = true;
}
@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk);
+
+ if (dst)
+ tcp_set_ecn_low_from_dst(sk, dst);
}
}

View File

@@ -0,0 +1,59 @@
From 79dbc43c63d17b05e0b04c6ed68b5e24515cfe2f Mon Sep 17 00:00:00 2001
From: Adithya Abraham Philip <abrahamphilip@google.com>
Date: Fri, 11 Jun 2021 21:56:10 +0000
Subject: net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT
on retransmits
Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
indicate that retransmitted packets and pure ACKs must have the
ECT bit set. This is necessary for BBR, which when using
ECN expects ECT to be set even on retransmitted packets and ACKs.
Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use
ECN but don't "need" it did not have a way to indicate that ECT should
be set on retransmissions/ACKs.
Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 1 +
net/ipv4/tcp_bbr.c | 3 +++
net/ipv4/tcp_output.c | 3 ++-
3 files changed, 6 insertions(+), 1 deletion(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -377,6 +377,7 @@ static inline void tcp_dec_quickack_mode
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
#define TCP_ECN_LOW 16
+#define TCP_ECN_ECT_PERMANENT 32
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -2152,6 +2152,9 @@ __bpf_kfunc static void bbr_init(struct
bbr->plb.pause_until = 0;
tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
+
+ if (bbr_can_use_ecn(sk))
+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
}
/* BBR marks the current round trip as a loss round. */
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
th->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}
- } else if (!tcp_ca_needs_ecn(sk)) {
+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
+ !tcp_ca_needs_ecn(sk)) {
/* ACK or retransmitted segment: clear ECT|CE */
INET_ECN_dontxmit(sk);
}

View File

@@ -0,0 +1,38 @@
From 74f5a9e717fb41742cf30802e9f9c55c001d2576 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 23 Jul 2023 23:25:34 -0400
Subject: tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options
field
Analogous to other important ECN information, export TCPI_OPT_ECN_LOW
in tcp_info tcpi_options field.
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/uapi/linux/tcp.h | 1 +
net/ipv4/tcp.c | 2 ++
2 files changed, 3 insertions(+)
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
#define TCPI_OPT_USEC_TS 64 /* usec timestamps */
+#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN enabled at conn init */
/*
* Sender's congestion state indicating normal or abnormal situations
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4125,6 +4125,8 @@ void tcp_get_info(struct sock *sk, struc
info->tcpi_options |= TCPI_OPT_ECN;
if (tp->ecn_flags & TCP_ECN_SEEN)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
+ if (tp->ecn_flags & TCP_ECN_LOW)
+ info->tcpi_options |= TCPI_OPT_ECN_LOW;
if (tp->syn_data_acked)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
if (tp->tcp_usec_ts)

View File

@@ -67,7 +67,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
TP_PROTO(const struct sock *sk, const struct request_sock *req),
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1558,6 +1558,13 @@ static struct ctl_table ipv4_net_table[]
@@ -1568,6 +1568,13 @@ static struct ctl_table ipv4_net_table[]
.extra2 = SYSCTL_ONE,
},
{
@@ -83,7 +83,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
.maxlen = sizeof(u8),
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5658,6 +5658,7 @@ static bool tcp_prune_ofo_queue(struct s
@@ -5664,6 +5664,7 @@ static bool tcp_prune_ofo_queue(struct s
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -91,7 +91,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
@@ -5669,6 +5670,39 @@ static int tcp_prune_queue(struct sock *
@@ -5675,6 +5676,39 @@ static int tcp_prune_queue(struct sock *
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
@@ -131,7 +131,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
@@ -5687,6 +5721,8 @@ static int tcp_prune_queue(struct sock *
@@ -5693,6 +5727,8 @@ static int tcp_prune_queue(struct sock *
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
@@ -142,7 +142,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
* and hopefully then we'll have sufficient space.
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3525,6 +3525,7 @@ static int __net_init tcp_sk_init(struct
@@ -3530,6 +3530,7 @@ static int __net_init tcp_sk_init(struct
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
net->ipv4.sysctl_tcp_shrink_window = 0;

View File

@@ -1,4 +1,4 @@
From d79c32bd2a17e206d1c198570ef705549d0f644b Mon Sep 17 00:00:00 2001
From 2eb935c59e24cc1303dcb7153261be0a1b61b38b Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] PCI: Enable overrides for missing ACS capabilities
@@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4475,6 +4475,15 @@
@@ -4644,6 +4644,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.

View File

@@ -1,7 +1,7 @@
From e914b6a0d571a92db04869a02e06dc83ec7c0700 Mon Sep 17 00:00:00 2001
From cd6bf6bb5fd26e58638aa441dacd9104eb990fe5 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 27 Feb 2022 14:46:08 -0800
Subject: [PATCH 1/6] extcon: Add driver for Steam Deck
Subject: extcon: Add driver for Steam Deck
(cherry picked from commit f9f2eddae582ae39d5f89c1218448fc259b90aa8)
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>

View File

@@ -1,7 +1,7 @@
From 8fe7bb2680d3e1201fdf3329e51078831f32fe12 Mon Sep 17 00:00:00 2001
From c4da1a4d0efa203d10fdceda267816f7838c8a85 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sat, 19 Feb 2022 16:09:45 -0800
Subject: [PATCH 2/6] hwmon: Add driver for Steam Deck's EC sensors
Subject: hwmon: Add driver for Steam Deck's EC sensors
Add driver for sensors exposed by EC firmware on Steam Deck hardware.
@@ -17,7 +17,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -2053,6 +2053,17 @@ config SENSORS_SCH5636
@@ -2089,6 +2089,17 @@ config SENSORS_SCH5636
This driver can also be built as a module. If so, the module
will be called sch5636.
@@ -37,7 +37,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
depends on I2C
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -208,6 +208,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47
@@ -211,6 +211,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47
obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o
obj-$(CONFIG_SENSORS_SPD5118) += spd5118.o

View File

@@ -1,7 +1,7 @@
From 8181870b30687aa9351d919d082bc2b671a9c4cb Mon Sep 17 00:00:00 2001
From 9f7d5453fd576ddf2c810146c5f61863b52d777d Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sat, 15 Jul 2023 12:58:54 -0700
Subject: [PATCH 3/6] hwmon: steamdeck-hwmon: Add support for max battery
Subject: hwmon: steamdeck-hwmon: Add support for max battery
level/rate
Add support for max battery level/charge rate attributes.

View File

@@ -1,7 +1,7 @@
From 4df11ab1bd9ad50e6ed928d1c2f3a8404775837b Mon Sep 17 00:00:00 2001
From 93fc97eeb7fd11b7da124eab29c8d455331d364c Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 27 Feb 2022 12:58:05 -0800
Subject: [PATCH 4/6] leds: steamdeck: Add support for Steam Deck LED
Subject: leds: steamdeck: Add support for Steam Deck LED
(cherry picked from commit 85a86d19aa7022ff0555023d53aef78323a42d0c)
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
@@ -15,7 +15,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -959,6 +959,13 @@ config LEDS_ACER_A500
@@ -1003,6 +1003,13 @@ config LEDS_ACER_A500
This option enables support for the Power Button LED of
Acer Iconia Tab A500.
@@ -31,10 +31,10 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
comment "Flash and Torch LED drivers"
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_LEDS_POWERNV) += leds-powe
obj-$(CONFIG_LEDS_PWM) += leds-pwm.o
@@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_QNAP_MCU) += leds-qna
obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o
obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o
obj-$(CONFIG_LEDS_ST1202) += leds-st1202.o
+obj-$(CONFIG_LEDS_STEAMDECK) += leds-steamdeck.o
obj-$(CONFIG_LEDS_SUN50I_A100) += leds-sun50i-a100.o
obj-$(CONFIG_LEDS_SUNFIRE) += leds-sunfire.o

View File

@@ -1,7 +1,7 @@
From 947c953bf24af62c58e9eb0bab533816882b83a3 Mon Sep 17 00:00:00 2001
From 544af2c7ba194f959e8b317efb6e82b229b8ceff Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sat, 19 Feb 2022 16:08:36 -0800
Subject: [PATCH 5/6] mfd: Add MFD core driver for Steam Deck
Subject: mfd: Add MFD core driver for Steam Deck
Add MFD core driver for Steam Deck. Doesn't really do much so far
besides instantiating a number of MFD cells that implement all the
@@ -19,9 +19,9 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -2402,5 +2402,16 @@ config MFD_RSMU_SPI
Additional drivers must be enabled in order to use the functionality
of the device.
@@ -2439,5 +2439,16 @@ config MFD_UPBOARD_FPGA
To compile this driver as a module, choose M here: the module will be
called upboard-fpga.
+config MFD_STEAMDECK
+ tristate "Valve Steam Deck"
@@ -38,10 +38,10 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
endif
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -289,3 +289,5 @@ obj-$(CONFIG_MFD_ATC260X_I2C) += atc260x
obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o rsmu_core.o
@@ -294,3 +294,5 @@ obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o
obj-$(CONFIG_MFD_RSMU_SPI) += rsmu_spi.o rsmu_core.o
obj-$(CONFIG_MFD_UPBOARD_FPGA) += upboard-fpga.o
+
+obj-$(CONFIG_MFD_STEAMDECK) += steamdeck.o
--- /dev/null

View File

@@ -1,7 +1,7 @@
From 2f8a2543aa33103cf237853d5f2ca8999261dd0d Mon Sep 17 00:00:00 2001
From cf5a7be3ab145c5743b673722ce01002dcdac3e6 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 24 Sep 2023 15:02:33 -0700
Subject: [PATCH 6/6] mfd: steamdeck: Expose controller board power in sysfs
Subject: mfd: steamdeck: Expose controller board power in sysfs
As of version 118 Deck's BIOS implements "SCBP" method that allows
gating power of the controller board (VBUS). Add a basic WO method to

View File

@@ -0,0 +1,23 @@
From 878cd0d9982ee6810036adce9e9c96cdb3714be1 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Thu, 28 Nov 2024 22:55:27 +0000
Subject: kbuild: Re-add .config file required to sign external
modules
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
scripts/package/install-extmod-build | 3 +++
1 file changed, 3 insertions(+)
--- a/scripts/package/install-extmod-build
+++ b/scripts/package/install-extmod-build
@@ -44,6 +44,9 @@ mkdir -p "${destdir}"
fi
} | tar -c -f - -T - | tar -xf - -C "${destdir}"
+# copy .config manually to be where it's expected to be
+cp "${KCONFIG_CONFIG}" "${destdir}/.config"
+
# When ${CC} and ${HOSTCC} differ, rebuild host programs using ${CC}.
#
# This caters to host programs that participate in Kbuild. objtool and

View File

@@ -1,7 +1,7 @@
From 67e174927705e71b0d254ab6fab5af40193376a4 Mon Sep 17 00:00:00 2001
From 6e1157f40aa2de736b79766c53f87dfe7de36bb5 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Sat, 31 Aug 2024 16:57:41 +0000
Subject: [PATCH 03/18] kbuild: Remove GCC minimal function alignment
Subject: kbuild: Remove GCC minimal function alignment
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
@@ -12,7 +12,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/Makefile
+++ b/Makefile
@@ -1004,15 +1004,8 @@ export CC_FLAGS_FPU
@@ -1056,15 +1056,8 @@ export CC_FLAGS_FPU
export CC_FLAGS_NO_FPU
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
@@ -30,7 +30,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
NOSTDINC_FLAGS += -nostdinc
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1667,18 +1667,6 @@ config FUNCTION_ALIGNMENT
@@ -1723,18 +1723,6 @@ config FUNCTION_ALIGNMENT
default 4 if FUNCTION_ALIGNMENT_4B
default 0

View File

@@ -1,7 +1,7 @@
From 43c0eb6ded02d18daa26e0186ae2f92bec5bfb8f Mon Sep 17 00:00:00 2001
From 91f0f89ac5315be99ea1aea5d732c68311f68bda Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Thu, 11 May 2023 19:41:41 +0000
Subject: [PATCH 04/18] XANMOD: fair: Set scheduler tunable latencies to
Subject: XANMOD: fair: Set scheduler tunable latencies to
unscaled
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -66,7 +66,7 @@
@@ -69,7 +69,7 @@
*
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
*/

View File

@@ -1,7 +1,7 @@
From b20c46d59b4102165248167bd5911c2d695679cc Mon Sep 17 00:00:00 2001
From 5a126e141df4850073a8f057cc5eeb22e8f6ea57 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Sun, 15 Sep 2024 23:03:38 +0000
Subject: [PATCH 05/18] XANMOD: sched: Add yield_type sysctl to reduce or
Subject: XANMOD: sched: Add yield_type sysctl to reduce or
disable sched_yield
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -12,7 +12,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1391,15 +1391,29 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
@@ -1350,15 +1350,29 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
return ret;
}
@@ -53,7 +53,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
static const int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
@@ -1631,6 +1632,15 @@ static struct ctl_table kern_table[] = {
@@ -1630,6 +1631,15 @@ static const struct ctl_table kern_table
.proc_handler = proc_dointvec,
},
#endif

View File

@@ -1,7 +1,7 @@
From d23f0554f1b381f082dc81a6f3c523b90043b941 Mon Sep 17 00:00:00 2001
From f91c466320368433d644a1bbaeb303b682c6b7d1 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 11 May 2022 18:56:51 +0000
Subject: [PATCH 06/18] XANMOD: block/mq-deadline: Increase write priority to
Subject: XANMOD: block/mq-deadline: Increase write priority to
improve responsiveness
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 8c3035b22be106d8659d85c2651e589f53e89cc5 Mon Sep 17 00:00:00 2001
From 99aceb32885686182f2e38ed6c19a380828249b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Thu, 6 Jan 2022 16:59:01 +0000
Subject: [PATCH 07/18] XANMOD: block/mq-deadline: Disable front_merges by
Subject: XANMOD: block/mq-deadline: Disable front_merges by
default
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 3d1e3f450e9ca926a899a0502fd34df6d483efae Mon Sep 17 00:00:00 2001
From e664c30c44caccc43b50a7cde90d4ad2a57faef2 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 16 Sep 2024 15:36:01 +0000
Subject: [PATCH 08/18] XANMOD: block: Set rq_affinity to force complete I/O
Subject: XANMOD: block: Set rq_affinity to force complete I/O
requests on same CPU
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -614,7 +614,8 @@ enum {
@@ -626,7 +626,8 @@ enum {
QUEUE_FLAG_MAX
};

View File

@@ -1,7 +1,7 @@
From 80e2bd58a4f13d1a946c6616e18d124b1291f2a7 Mon Sep 17 00:00:00 2001
From 34db71a0c7669de56fb221bacb4955012f52efa8 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 15 Jul 2024 04:50:34 +0000
Subject: [PATCH 09/18] XANMOD: blk-wbt: Set wbt_default_latency_nsec() to
Subject: XANMOD: blk-wbt: Set wbt_default_latency_nsec() to
2msec
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 74767b639b4e9141b1961764655111a4fd62a5ab Mon Sep 17 00:00:00 2001
From 6f6902c8942b881988088c7f7d61053b41f00f0a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Jan 2018 17:26:15 +0000
Subject: [PATCH 10/18] XANMOD: kconfig: add 500Hz timer interrupt kernel
Subject: XANMOD: kconfig: add 500Hz timer interrupt kernel
config option
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From a047058c64e9d75db8e714a8c1202057920e21c7 Mon Sep 17 00:00:00 2001
From 269ed90bb0c714fc237be05611c82804f81b7038 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 11/18] XANMOD: dcache: cache_pressure = 50 decreases the rate
Subject: XANMOD: dcache: cache_pressure = 50 decreases the rate
at which VFS caches are reclaimed
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 910bd8c627ea16ea9bcf70c153197aaba473b6b9 Mon Sep 17 00:00:00 2001
From ba310efa15e3c9677121c31e79b72695bcca87df Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Sun, 28 Apr 2024 09:06:54 +0000
Subject: [PATCH 12/18] XANMOD: mm: Raise max_map_count default value
Subject: XANMOD: mm: Raise max_map_count default value
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
@@ -22,7 +22,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
mem_profiling
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -191,17 +191,18 @@ static inline void __mm_zero_struct_page
@@ -192,17 +192,18 @@ static inline void __mm_zero_struct_page
*
* When a program's coredump is generated as ELF format, a section is created
* per a vma. In ELF, the number of sections is represented in unsigned short.

View File

@@ -1,7 +1,7 @@
From 1ad86d993666c2d74ed6fd97e143b073e4b2c4c9 Mon Sep 17 00:00:00 2001
From 14ff7a682d0936937d6813105484da7b6245aabb Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 14 Aug 2024 18:54:53 +0000
Subject: [PATCH 13/18] XANMOD: mm/vmscan: Set minimum amount of swapping
Subject: XANMOD: mm/vmscan: Set minimum amount of swapping
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---

View File

@@ -1,7 +1,7 @@
From a24ca4c968092cf419821aaaa57b070c088e74e7 Mon Sep 17 00:00:00 2001
From 2354e3f9a9b181ca2e150c27c57a01049b52b6f0 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 15 Jun 2022 17:07:29 +0000
Subject: [PATCH 14/18] XANMOD: sched/autogroup: Add kernel parameter and
Subject: XANMOD: sched/autogroup: Add kernel parameter and
config option to enable/disable autogroup feature by default
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -13,7 +13,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -498,6 +498,10 @@
@@ -511,6 +511,10 @@
Format: <int> (must be >=0)
Default: 64
@@ -24,9 +24,9 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
bau= [X86_UV] Enable the BAU on SGI UV. The default
behavior is to disable the BAU (i.e. bau=0).
Format: { "0" | "1" }
@@ -3881,8 +3885,6 @@
noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
IOAPICs that may be present in the system.
@@ -4039,8 +4043,6 @@
noapictimer [APIC,X86] Don't set up the APIC timer
- noautogroup Disable scheduler automatic task group creation.
-
@@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
no_console_suspend
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1353,6 +1353,18 @@ config SCHED_AUTOGROUP
@@ -1367,6 +1367,18 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.

View File

@@ -1,7 +1,7 @@
From 4664b97efde786ff28f2eb234c1d59c9da30c3b4 Mon Sep 17 00:00:00 2001
From fe02f80f7e47a5ae805393bcba3dbe8c2bd74b0e Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Tue, 31 Mar 2020 13:32:08 -0300
Subject: [PATCH 15/18] XANMOD: cpufreq: tunes ondemand and conservative
Subject: XANMOD: cpufreq: tunes ondemand and conservative
governor for performance
Signed-off-by: Alexandre Frade <kernel@xanmod.org>

View File

@@ -1,7 +1,7 @@
From 444f831f229a418b4865d11940b3987f55ab151f Mon Sep 17 00:00:00 2001
From f2c2f7ec98ca5bfda92d4691af46403348ae0d77 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 16 Sep 2024 08:09:56 +0000
Subject: [PATCH 16/18] XANMOD: lib/kconfig.debug: disable default
Subject: XANMOD: lib/kconfig.debug: disable default
SYMBOLIC_ERRNAME and DEBUG_BUGVERBOSE
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -12,14 +12,14 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -23,7 +23,6 @@ config BCACHEFS_FS
@@ -24,7 +24,6 @@ config BCACHEFS_FS
select XOR_BLOCKS
select XXHASH
select SRCU
- select SYMBOLIC_ERRNAME
select MIN_HEAP
help
The bcachefs filesystem - a modern, copy on write filesystem, with
support for multiple devices, compression, checksumming, etc.
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -190,7 +190,7 @@ config DYNAMIC_DEBUG_CORE

View File

@@ -1,7 +1,7 @@
From 3536b212b829712a928b03cf513f3da87e15b3ef Mon Sep 17 00:00:00 2001
From c706cd7134b55e1f188de6ea23e4b25b0497f18e Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Sun, 29 May 2022 00:57:40 +0000
Subject: [PATCH 17/18] XANMOD: scripts/setlocalversion: remove "+" tag for git
Subject: XANMOD: scripts/setlocalversion: remove "+" tag for git
repo short version
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -113,7 +113,6 @@ scm_version()
@@ -117,7 +117,6 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then

View File

@@ -1,7 +1,7 @@
From 857de795e16a927cf251e5ede247b6e96938916e Mon Sep 17 00:00:00 2001
From 4c8da54c3f59b0e71408b0c980ffb162fc4bb022 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 24 Apr 2023 04:50:34 +0000
Subject: [PATCH 18/18] XANMOD: scripts/setlocalversion: Move localversion*
Subject: XANMOD: scripts/setlocalversion: Move localversion*
files to the end
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -204,4 +204,4 @@ elif [ "${LOCALVERSION+set}" != "set" ];
@@ -208,4 +208,4 @@ elif [ "${LOCALVERSION+set}" != "set" ];
scm_version="$(scm_version --short)"
fi