add 3rd party/custom patches

3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
2024-10-29 05:12:06 +03:00
parent 8082dfeaca
commit 8cbaf1dea2
186 changed files with 43626 additions and 0 deletions
--- a/debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
+++ b/debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
@@ -0,0 +1,180 @@
+From d31e903a364802c068ff23bdd448cc70eda71a7c Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:38 +0100
+Subject: cpuidle: menu: Remove iowait influence
+
+Remove CPU iowaiters influence on idle state selection.
+Remove the menu notion of performance multiplier which increased with
+the number of tasks that went to iowait sleep on this CPU and haven't
+woken up yet.
+
+Relying on iowait for cpuidle is problematic for a few reasons:
+1. There is no guarantee that an iowaiting task will wake up on the
+same CPU.
+2. The task being in iowait says nothing about the idle duration, we
+could be selecting shallower states for a long time.
+3. The task being in iowait doesn't always imply a performance hit
+with increased latency.
+4. If there is such a performance hit, the number of iowaiting tasks
+doesn't directly correlate.
+5. The definition of iowait altogether is vague at best, it is
+sprinkled across kernel code.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpuidle/governors/menu.c | 76 ++++----------------------------
+ 1 file changed, 9 insertions(+), 67 deletions(-)
+
+--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
+@@ -19,7 +19,7 @@
+ 
+ #include "gov.h"
+ 
+-#define BUCKETS 12
+#define BUCKETS 6
+ #define INTERVAL_SHIFT 3
+ #define INTERVALS (1UL << INTERVAL_SHIFT)
+ #define RESOLUTION 1024
+@@ -29,12 +29,11 @@
+ /*
+  * Concepts and ideas behind the menu governor
+  *
+- * For the menu governor, there are 3 decision factors for picking a C
+ * For the menu governor, there are 2 decision factors for picking a C
+  * state:
+  * 1) Energy break even point
+- * 2) Performance impact
+- * 3) Latency tolerance (from pmqos infrastructure)
+- * These three factors are treated independently.
+ * 2) Latency tolerance (from pmqos infrastructure)
+ * These two factors are treated independently.
+  *
+  * Energy break even point
+  * -----------------------
+@@ -75,30 +74,6 @@
+  * intervals and if the stand deviation of these 8 intervals is below a
+  * threshold value, we use the average of these intervals as prediction.
+  *
+- * Limiting Performance Impact
+- * ---------------------------
+- * C states, especially those with large exit latencies, can have a real
+- * noticeable impact on workloads, which is not acceptable for most sysadmins,
+- * and in addition, less performance has a power price of its own.
+- *
+- * As a general rule of thumb, menu assumes that the following heuristic
+- * holds:
+- *     The busier the system, the less impact of C states is acceptable
+- *
+- * This rule-of-thumb is implemented using a performance-multiplier:
+- * If the exit latency times the performance multiplier is longer than
+- * the predicted duration, the C state is not considered a candidate
+- * for selection due to a too high performance impact. So the higher
+- * this multiplier is, the longer we need to be idle to pick a deep C
+- * state, and thus the less likely a busy CPU will hit such a deep
+- * C state.
+- *
+- * Currently there is only one value determining the factor:
+- * 10 points are added for each process that is waiting for IO on this CPU.
+- * (This value was experimentally determined.)
+- * Utilization is no longer a factor as it was shown that it never contributed
+- * significantly to the performance multiplier in the first place.
+- *
+  */
+ 
+ struct menu_device {
+@@ -112,19 +87,10 @@ struct menu_device {
+ 	int		interval_ptr;
+ };
+ 
+-static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
+static inline int which_bucket(u64 duration_ns)
+ {
+ 	int bucket = 0;
+ 
+-	/*
+-	 * We keep two groups of stats; one with no
+-	 * IO pending, one without.
+-	 * This allows us to calculate
+-	 * E(duration)|iowait
+-	 */
+-	if (nr_iowaiters)
+-		bucket = BUCKETS/2;
+-
+ 	if (duration_ns < 10ULL * NSEC_PER_USEC)
+ 		return bucket;
+ 	if (duration_ns < 100ULL * NSEC_PER_USEC)
+@@ -138,19 +104,6 @@ static inline int which_bucket(u64 durat
+ 	return bucket + 5;
+ }
+ 
+-/*
+- * Return a multiplier for the exit latency that is intended
+- * to take performance requirements into account.
+- * The more performance critical we estimate the system
+- * to be, the higher this multiplier, and thus the higher
+- * the barrier to go to an expensive C state.
+- */
+-static inline int performance_multiplier(unsigned int nr_iowaiters)
+-{
+-	/* for IO wait tasks (per cpu!) we add 10x each */
+-	return 1 + 10 * nr_iowaiters;
+-}
+-
+ static DEFINE_PER_CPU(struct menu_device, menu_devices);
+ 
+ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
+@@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_dr
+ 	struct menu_device *data = this_cpu_ptr(&menu_devices);
+ 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+ 	u64 predicted_ns;
+-	u64 interactivity_req;
+-	unsigned int nr_iowaiters;
+ 	ktime_t delta, delta_tick;
+ 	int i, idx;
+ 
+@@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_dr
+ 		data->needs_update = 0;
+ 	}
+ 
+-	nr_iowaiters = nr_iowait_cpu(dev->cpu);
+-
+ 	/* Find the shortest expected idle interval. */
+ 	predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+ 	if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+@@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_dr
+ 		}
+ 
+ 		data->next_timer_ns = delta;
+-		data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+		data->bucket = which_bucket(data->next_timer_ns);
+ 
+ 		/* Round up the result for half microseconds. */
+ 		timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
+@@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_dr
+ 		 */
+ 		data->next_timer_ns = KTIME_MAX;
+ 		delta_tick = TICK_NSEC / 2;
+-		data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+		data->bucket = which_bucket(KTIME_MAX);
+ 	}
+ 
+ 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
+@@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_dr
+ 		 */
+ 		if (predicted_ns < TICK_NSEC)
+ 			predicted_ns = data->next_timer_ns;
+-	} else {
+-		/*
+-		 * Use the performance multiplier and the user-configurable
+-		 * latency_req to determine the maximum exit latency.
+-		 */
+-		interactivity_req = div64_u64(predicted_ns,
+-					      performance_multiplier(nr_iowaiters));
+-		if (latency_req > interactivity_req)
+-			latency_req = interactivity_req;
+	} else if (latency_req > predicted_ns) {
+		latency_req = predicted_ns;
+ 	}
+ 
+ 	/*
--- a/debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
+++ b/debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
@@ -0,0 +1,58 @@
+From 3f840a42780323a4437dd1a417488d141c33af15 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:39 +0100
+Subject: cpuidle: Prefer teo over menu governor
+
+Since menu no longer has the interactivity boost teo works better
+overall, so make it the default.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpuidle/Kconfig          | 5 +----
+ drivers/cpuidle/governors/menu.c | 2 +-
+ drivers/cpuidle/governors/teo.c  | 2 +-
+ 3 files changed, 3 insertions(+), 6 deletions(-)
+
+--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
+@@ -5,7 +5,7 @@ config CPU_IDLE
+ 	bool "CPU idle PM support"
+ 	default y if ACPI || PPC_PSERIES
+ 	select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+-	select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_TEO
+	select CPU_IDLE_GOV_TEO if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_MENU
+ 	help
+ 	  CPU idle is a generic framework for supporting software-controlled
+ 	  idle processor power management.  It includes modular cross-platform
+@@ -30,9 +30,6 @@ config CPU_IDLE_GOV_TEO
+ 	  This governor implements a simplified idle state selection method
+ 	  focused on timer events and does not do any interactivity boosting.
+ 
+-	  Some workloads benefit from using it and it generally should be safe
+-	  to use.  Say Y here if you are not happy with the alternatives.
+-
+ config CPU_IDLE_GOV_HALTPOLL
+ 	bool "Haltpoll governor (for virtualized systems)"
+ 	depends on KVM_GUEST
+--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
+@@ -508,7 +508,7 @@ static int menu_enable_device(struct cpu
+ 
+ static struct cpuidle_governor menu_governor = {
+ 	.name =		"menu",
+-	.rating =	20,
+	.rating =	19,
+ 	.enable =	menu_enable_device,
+ 	.select =	menu_select,
+ 	.reflect =	menu_reflect,
+--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
+@@ -537,7 +537,7 @@ static int teo_enable_device(struct cpui
+ 
+ static struct cpuidle_governor teo_governor = {
+ 	.name =		"teo",
+-	.rating =	19,
+	.rating =	20,
+ 	.enable =	teo_enable_device,
+ 	.select =	teo_select,
+ 	.reflect =	teo_reflect,
--- a/debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
+++ b/debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
@@ -0,0 +1,39 @@
+From ca8c9368b6f28ef625716b03aa930acfb8afe158 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:40 +0100
+Subject: TEST: cpufreq/schedutil: Linear iowait boost step
+
+In preparation for capping iowait boost make the steps linear as
+opposed to doubling.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -267,7 +267,8 @@ static void sugov_iowait_boost(struct su
+ 	/* Double the boost at each request */
+ 	if (sg_cpu->iowait_boost) {
+ 		sg_cpu->iowait_boost =
+-			min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
+			min_t(unsigned int,
+			      sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
+ 		return;
+ 	}
+ 
+@@ -308,11 +309,9 @@ static unsigned long sugov_iowait_apply(
+ 		/*
+ 		 * No boost pending; reduce the boost value.
+ 		 */
+-		sg_cpu->iowait_boost >>= 1;
+-		if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
+-			sg_cpu->iowait_boost = 0;
+		sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
+		if (!sg_cpu->iowait_boost)
+ 			return 0;
+-		}
+ 	}
+ 
+ 	sg_cpu->iowait_boost_pending = false;
--- a/debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
+++ b/debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
@@ -0,0 +1,106 @@
+From 33f05bd16a4ac2f6f36c9eb88016e2375dcb597c Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:41 +0100
+Subject: TEST: cpufreq/schedutil: iowait boost cap sysfs
+
+Add a knob to cap applied iowait_boost per sysfs.
+This is to test for potential regressions.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 38 ++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -11,6 +11,7 @@
+ struct sugov_tunables {
+ 	struct gov_attr_set	attr_set;
+ 	unsigned int		rate_limit_us;
+	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_policy {
+@@ -35,6 +36,8 @@ struct sugov_policy {
+ 
+ 	bool			limits_changed;
+ 	bool			need_freq_update;
+
+	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_cpu {
+@@ -316,6 +319,9 @@ static unsigned long sugov_iowait_apply(
+ 
+ 	sg_cpu->iowait_boost_pending = false;
+ 
+	if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
+		sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
+
+ 	/*
+ 	 * sg_cpu->util is already in capacity scale; convert iowait_boost
+ 	 * into the same scale so we can compare.
+@@ -554,6 +560,14 @@ static ssize_t rate_limit_us_show(struct
+ 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+ }
+ 
+
+static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
+}
+
+ static ssize_t
+ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+ {
+@@ -572,10 +586,30 @@ rate_limit_us_store(struct gov_attr_set
+ 	return count;
+ }
+ 
+static ssize_t
+iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int iowait_boost_cap;
+
+	if (kstrtouint(buf, 10, &iowait_boost_cap))
+		return -EINVAL;
+
+	tunables->iowait_boost_cap = iowait_boost_cap;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+		sg_policy->iowait_boost_cap = iowait_boost_cap;
+
+	return count;
+}
+
+ static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
+ 
+ static struct attribute *sugov_attrs[] = {
+ 	&rate_limit_us.attr,
+	&iowait_boost_cap.attr,
+ 	NULL
+ };
+ ATTRIBUTE_GROUPS(sugov);
+@@ -765,6 +799,8 @@ static int sugov_init(struct cpufreq_pol
+ 
+ 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+ 
+	tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
+
+ 	policy->governor_data = sg_policy;
+ 	sg_policy->tunables = tunables;
+ 
+@@ -834,6 +870,8 @@ static int sugov_start(struct cpufreq_po
+ 	sg_policy->limits_changed		= false;
+ 	sg_policy->cached_raw_freq		= 0;
+ 
+	sg_policy->iowait_boost_cap		= SCHED_CAPACITY_SCALE;
+
+ 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ 
+ 	if (policy_is_shared(policy))
--- a/debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
+++ b/debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
@@ -0,0 +1,325 @@
+From 33eb6c08d7c615fad308001921c7b1148cbccfde Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:42 +0100
+Subject: cpufreq/schedutil: Remove iowait boost
+
+iowait boost in schedutil was introduced by
+commit ("21ca6d2c52f8 cpufreq: schedutil: Add iowait boosting").
+with it more or less following intel_pstate's approach to increase
+frequency after an iowait wakeup.
+Behaviour that is piggy-backed onto iowait boost is problematic
+due to a lot of reasons, so remove it.
+
+For schedutil specifically these are some of the reasons:
+1. Boosting is applied even in scenarios where it doesn't improve
+throughput.
+2. The boost is not accounted for in EAS: a) feec() will only consider
+ the actual task utilization for task placement, but another CPU might
+ be more energy-efficient at that capacity than the boosted one.)
+ b) When placing a non-IO task while a CPU is boosted compute_energy()
+ assumes a lower OPP than what is actually applied. This leads to
+ wrong EAS decisions.
+3. Actual IO heavy workloads are hardly distinguished from infrequent
+in_iowait wakeups.
+4. The boost isn't accounted for in task placement.
+5. The boost isn't associated with a task, it therefore lingers on the
+rq even after the responsible task has migrated / stopped.
+6. The boost isn't associated with a task, it therefore needs to ramp
+up again when migrated.
+7. Since schedutil doesn't know which task is getting woken up,
+multiple unrelated in_iowait tasks lead to boosting.
+8. Boosting is hard to control with UCLAMP_MAX (which is only active
+when the task is on the rq, which for boosted tasks is usually not
+the case for most of the time).
+
+One benefit of schedutil specifically is the reliance on the
+scheduler's utilization signals, which have evolved a lot since it's
+original introduction. Some cases that benefitted from iowait boosting
+in the past can now be covered by e.g. util_est.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 181 +------------------------------
+ 1 file changed, 3 insertions(+), 178 deletions(-)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -6,12 +6,9 @@
+  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+  */
+ 
+-#define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8)
+-
+ struct sugov_tunables {
+ 	struct gov_attr_set	attr_set;
+ 	unsigned int		rate_limit_us;
+-	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_policy {
+@@ -36,8 +33,6 @@ struct sugov_policy {
+ 
+ 	bool			limits_changed;
+ 	bool			need_freq_update;
+-
+-	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_cpu {
+@@ -45,10 +40,6 @@ struct sugov_cpu {
+ 	struct sugov_policy	*sg_policy;
+ 	unsigned int		cpu;
+ 
+-	bool			iowait_boost_pending;
+-	unsigned int		iowait_boost;
+-	u64			last_update;
+-
+ 	unsigned long		util;
+ 	unsigned long		bw_min;
+ 
+@@ -198,137 +189,15 @@ unsigned long sugov_effective_cpu_perf(i
+ 	return max(min, max);
+ }
+ 
+-static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
+static void sugov_get_util(struct sugov_cpu *sg_cpu)
+ {
+ 	unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
+ 
+ 	util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
+-	util = max(util, boost);
+ 	sg_cpu->bw_min = min;
+ 	sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
+ }
+ 
+-/**
+- * sugov_iowait_reset() - Reset the IO boost status of a CPU.
+- * @sg_cpu: the sugov data for the CPU to boost
+- * @time: the update time from the caller
+- * @set_iowait_boost: true if an IO boost has been requested
+- *
+- * The IO wait boost of a task is disabled after a tick since the last update
+- * of a CPU. If a new IO wait boost is requested after more then a tick, then
+- * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
+- * efficiency by ignoring sporadic wakeups from IO.
+- */
+-static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
+-			       bool set_iowait_boost)
+-{
+-	s64 delta_ns = time - sg_cpu->last_update;
+-
+-	/* Reset boost only if a tick has elapsed since last request */
+-	if (delta_ns <= TICK_NSEC)
+-		return false;
+-
+-	sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
+-	sg_cpu->iowait_boost_pending = set_iowait_boost;
+-
+-	return true;
+-}
+-
+-/**
+- * sugov_iowait_boost() - Updates the IO boost status of a CPU.
+- * @sg_cpu: the sugov data for the CPU to boost
+- * @time: the update time from the caller
+- * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
+- *
+- * Each time a task wakes up after an IO operation, the CPU utilization can be
+- * boosted to a certain utilization which doubles at each "frequent and
+- * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
+- * of the maximum OPP.
+- *
+- * To keep doubling, an IO boost has to be requested at least once per tick,
+- * otherwise we restart from the utilization of the minimum OPP.
+- */
+-static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+-			       unsigned int flags)
+-{
+-	bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
+-
+-	/* Reset boost if the CPU appears to have been idle enough */
+-	if (sg_cpu->iowait_boost &&
+-	    sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
+-		return;
+-
+-	/* Boost only tasks waking up after IO */
+-	if (!set_iowait_boost)
+-		return;
+-
+-	/* Ensure boost doubles only one time at each request */
+-	if (sg_cpu->iowait_boost_pending)
+-		return;
+-	sg_cpu->iowait_boost_pending = true;
+-
+-	/* Double the boost at each request */
+-	if (sg_cpu->iowait_boost) {
+-		sg_cpu->iowait_boost =
+-			min_t(unsigned int,
+-			      sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
+-		return;
+-	}
+-
+-	/* First wakeup after IO: start with minimum boost */
+-	sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
+-}
+-
+-/**
+- * sugov_iowait_apply() - Apply the IO boost to a CPU.
+- * @sg_cpu: the sugov data for the cpu to boost
+- * @time: the update time from the caller
+- * @max_cap: the max CPU capacity
+- *
+- * A CPU running a task which woken up after an IO operation can have its
+- * utilization boosted to speed up the completion of those IO operations.
+- * The IO boost value is increased each time a task wakes up from IO, in
+- * sugov_iowait_apply(), and it's instead decreased by this function,
+- * each time an increase has not been requested (!iowait_boost_pending).
+- *
+- * A CPU which also appears to have been idle for at least one tick has also
+- * its IO boost utilization reset.
+- *
+- * This mechanism is designed to boost high frequently IO waiting tasks, while
+- * being more conservative on tasks which does sporadic IO operations.
+- */
+-static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+-			       unsigned long max_cap)
+-{
+-	/* No boost currently required */
+-	if (!sg_cpu->iowait_boost)
+-		return 0;
+-
+-	/* Reset boost if the CPU appears to have been idle enough */
+-	if (sugov_iowait_reset(sg_cpu, time, false))
+-		return 0;
+-
+-	if (!sg_cpu->iowait_boost_pending) {
+-		/*
+-		 * No boost pending; reduce the boost value.
+-		 */
+-		sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
+-		if (!sg_cpu->iowait_boost)
+-			return 0;
+-	}
+-
+-	sg_cpu->iowait_boost_pending = false;
+-
+-	if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
+-		sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
+-
+-	/*
+-	 * sg_cpu->util is already in capacity scale; convert iowait_boost
+-	 * into the same scale so we can compare.
+-	 */
+-	return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT;
+-}
+-
+ #ifdef CONFIG_NO_HZ_COMMON
+ static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+ {
+@@ -356,18 +225,12 @@ static inline bool sugov_update_single_c
+ 					      u64 time, unsigned long max_cap,
+ 					      unsigned int flags)
+ {
+-	unsigned long boost;
+-
+-	sugov_iowait_boost(sg_cpu, time, flags);
+-	sg_cpu->last_update = time;
+-
+ 	ignore_dl_rate_limit(sg_cpu);
+ 
+ 	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
+ 		return false;
+ 
+-	boost = sugov_iowait_apply(sg_cpu, time, max_cap);
+-	sugov_get_util(sg_cpu, boost);
+	sugov_get_util(sg_cpu);
+ 
+ 	return true;
+ }
+@@ -468,11 +331,8 @@ static unsigned int sugov_next_freq_shar
+ 
+ 	for_each_cpu(j, policy->cpus) {
+ 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
+-		unsigned long boost;
+-
+-		boost = sugov_iowait_apply(j_sg_cpu, time, max_cap);
+-		sugov_get_util(j_sg_cpu, boost);
+ 
+		sugov_get_util(j_sg_cpu);
+ 		util = max(j_sg_cpu->util, util);
+ 	}
+ 
+@@ -488,9 +348,6 @@ sugov_update_shared(struct update_util_d
+ 
+ 	raw_spin_lock(&sg_policy->update_lock);
+ 
+-	sugov_iowait_boost(sg_cpu, time, flags);
+-	sg_cpu->last_update = time;
+-
+ 	ignore_dl_rate_limit(sg_cpu);
+ 
+ 	if (sugov_should_update_freq(sg_policy, time)) {
+@@ -560,14 +417,6 @@ static ssize_t rate_limit_us_show(struct
+ 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+ }
+ 
+-
+-static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
+-{
+-	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+-
+-	return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
+-}
+-
+ static ssize_t
+ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+ {
+@@ -586,30 +435,10 @@ rate_limit_us_store(struct gov_attr_set
+ 	return count;
+ }
+ 
+-static ssize_t
+-iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+-{
+-	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+-	struct sugov_policy *sg_policy;
+-	unsigned int iowait_boost_cap;
+-
+-	if (kstrtouint(buf, 10, &iowait_boost_cap))
+-		return -EINVAL;
+-
+-	tunables->iowait_boost_cap = iowait_boost_cap;
+-
+-	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+-		sg_policy->iowait_boost_cap = iowait_boost_cap;
+-
+-	return count;
+-}
+-
+ static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+-static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
+ 
+ static struct attribute *sugov_attrs[] = {
+ 	&rate_limit_us.attr,
+-	&iowait_boost_cap.attr,
+ 	NULL
+ };
+ ATTRIBUTE_GROUPS(sugov);
+@@ -799,8 +628,6 @@ static int sugov_init(struct cpufreq_pol
+ 
+ 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+ 
+-	tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
+-
+ 	policy->governor_data = sg_policy;
+ 	sg_policy->tunables = tunables;
+ 
+@@ -870,8 +697,6 @@ static int sugov_start(struct cpufreq_po
+ 	sg_policy->limits_changed		= false;
+ 	sg_policy->cached_raw_freq		= 0;
+ 
+-	sg_policy->iowait_boost_cap		= SCHED_CAPACITY_SCALE;
+-
+ 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ 
+ 	if (policy_is_shared(policy))
--- a/debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
+++ b/debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
@@ -0,0 +1,113 @@
+From af7bbb59c2411e985a5d79173af5686337b4af9b Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:43 +0100
+Subject: cpufreq: intel_pstate: Remove iowait boost
+
+Analogous to schedutil, remove iowait boost for the same reasons.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpufreq/intel_pstate.c | 50 ++--------------------------------
+ 1 file changed, 3 insertions(+), 47 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
+@@ -191,7 +191,6 @@ struct global_params {
+  * @policy:		CPUFreq policy value
+  * @update_util:	CPUFreq utility callback information
+  * @update_util_set:	CPUFreq utility callback is set
+- * @iowait_boost:	iowait-related boost fraction
+  * @last_update:	Time of the last update.
+  * @pstate:		Stores P state limits for this CPU
+  * @vid:		Stores VID limits for this CPU
+@@ -245,7 +244,6 @@ struct cpudata {
+ 	struct acpi_processor_performance acpi_perf_data;
+ 	bool valid_pss_table;
+ #endif
+-	unsigned int iowait_boost;
+ 	s16 epp_powersave;
+ 	s16 epp_policy;
+ 	s16 epp_default;
+@@ -2136,28 +2134,7 @@ static inline void intel_pstate_update_u
+ {
+ 	cpu->sample.time = time;
+ 
+-	if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
+-		bool do_io = false;
+-
+-		cpu->sched_flags = 0;
+-		/*
+-		 * Set iowait_boost flag and update time. Since IO WAIT flag
+-		 * is set all the time, we can't just conclude that there is
+-		 * some IO bound activity is scheduled on this CPU with just
+-		 * one occurrence. If we receive at least two in two
+-		 * consecutive ticks, then we treat as boost candidate.
+-		 */
+-		if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
+-			do_io = true;
+-
+-		cpu->last_io_update = time;
+-
+-		if (do_io)
+-			intel_pstate_hwp_boost_up(cpu);
+-
+-	} else {
+-		intel_pstate_hwp_boost_down(cpu);
+-	}
+	intel_pstate_hwp_boost_down(cpu);
+ }
+ 
+ static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
+@@ -2240,9 +2217,6 @@ static inline int32_t get_target_pstate(
+ 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
+ 			   sample->tsc);
+ 
+-	if (busy_frac < cpu->iowait_boost)
+-		busy_frac = cpu->iowait_boost;
+-
+ 	sample->busy_scaled = busy_frac * 100;
+ 
+ 	target = READ_ONCE(global.no_turbo) ?
+@@ -2303,7 +2277,7 @@ static void intel_pstate_adjust_pstate(s
+ 		sample->aperf,
+ 		sample->tsc,
+ 		get_avg_frequency(cpu),
+-		fp_toint(cpu->iowait_boost * 100));
+		0);
+ }
+ 
+ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+@@ -2317,24 +2291,6 @@ static void intel_pstate_update_util(str
+ 		return;
+ 
+ 	delta_ns = time - cpu->last_update;
+-	if (flags & SCHED_CPUFREQ_IOWAIT) {
+-		/* Start over if the CPU may have been idle. */
+-		if (delta_ns > TICK_NSEC) {
+-			cpu->iowait_boost = ONE_EIGHTH_FP;
+-		} else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
+-			cpu->iowait_boost <<= 1;
+-			if (cpu->iowait_boost > int_tofp(1))
+-				cpu->iowait_boost = int_tofp(1);
+-		} else {
+-			cpu->iowait_boost = ONE_EIGHTH_FP;
+-		}
+-	} else if (cpu->iowait_boost) {
+-		/* Clear iowait_boost if the CPU may have been idle. */
+-		if (delta_ns > TICK_NSEC)
+-			cpu->iowait_boost = 0;
+-		else
+-			cpu->iowait_boost >>= 1;
+-	}
+ 	cpu->last_update = time;
+ 	delta_ns = time - cpu->sample.time;
+ 	if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
+@@ -2832,7 +2788,7 @@ static void intel_cpufreq_trace(struct c
+ 		sample->aperf,
+ 		sample->tsc,
+ 		get_avg_frequency(cpu),
+-		fp_toint(cpu->iowait_boost * 100));
+		0);
+ }
+ 
+ static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
--- a/debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
+++ b/debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
@@ -0,0 +1,42 @@
+From fd1e0723b0a7ad140d2bf7cd9154997d5ece2b37 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:44 +0100
+Subject: cpufreq: Remove SCHED_CPUFREQ_IOWAIT update
+
+Neither intel_pstate nor schedutil care for the flag anymore, so
+remove the update and flag definition.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ include/linux/sched/cpufreq.h | 2 --
+ kernel/sched/fair.c           | 8 --------
+ 2 files changed, 10 deletions(-)
+
+--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
+@@ -8,8 +8,6 @@
+  * Interface between cpufreq drivers and the scheduler:
+  */
+ 
+-#define SCHED_CPUFREQ_IOWAIT	(1U << 0)
+-
+ #ifdef CONFIG_CPU_FREQ
+ struct cpufreq_policy;
+ 
+--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
+@@ -6768,14 +6768,6 @@ enqueue_task_fair(struct rq *rq, struct
+ 	 */
+ 	util_est_enqueue(&rq->cfs, p);
+ 
+-	/*
+-	 * If in_iowait is set, the code below may not trigger any cpufreq
+-	 * utilization updates, so do it here explicitly with the IOWAIT flag
+-	 * passed.
+-	 */
+-	if (p->in_iowait)
+-		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
+-
+ 	for_each_sched_entity(se) {
+ 		if (se->on_rq)
+ 			break;
--- a/debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
+++ b/debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
@@ -0,0 +1,55 @@
+From 30cdb8d7d06f51bb86142c537ea05bd01c31bb40 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:45 +0100
+Subject: io_uring: Do not set iowait before sleeping
+
+Setting in_iowait was introduced in commit
+8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
+to tackle a perf regression that was caused by menu taking iowait into
+account for synchronous IO and thus not selecting deeper states like in
+the io_uring counterpart.
+That behaviour is gone, so the workaround can be removed.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ io_uring/io_uring.c | 17 -----------------
+ 1 file changed, 17 deletions(-)
+
+--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
+@@ -2359,15 +2359,6 @@ int io_run_task_work_sig(struct io_ring_
+ 	return 0;
+ }
+ 
+-static bool current_pending_io(void)
+-{
+-	struct io_uring_task *tctx = current->io_uring;
+-
+-	if (!tctx)
+-		return false;
+-	return percpu_counter_read_positive(&tctx->inflight);
+-}
+-
+ /* when returns >0, the caller should retry */
+ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+ 					  struct io_wait_queue *iowq)
+@@ -2385,19 +2376,11 @@ static inline int io_cqring_wait_schedul
+ 	if (unlikely(io_should_wake(iowq)))
+ 		return 0;
+ 
+-	/*
+-	 * Mark us as being in io_wait if we have pending requests, so cpufreq
+-	 * can take into account that the task is waiting for IO - turns out
+-	 * to be important for low QD IO.
+-	 */
+-	if (current_pending_io())
+-		current->in_iowait = 1;
+ 	ret = 0;
+ 	if (iowq->timeout == KTIME_MAX)
+ 		schedule();
+ 	else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+ 		ret = -ETIME;
+-	current->in_iowait = 0;
+ 	return ret;
+ }
+