add 3rd party/custom patches
3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
This commit is contained in:
180
debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
vendored
Normal file
180
debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
From d31e903a364802c068ff23bdd448cc70eda71a7c Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:38 +0100
|
||||
Subject: cpuidle: menu: Remove iowait influence
|
||||
|
||||
Remove CPU iowaiters influence on idle state selection.
|
||||
Remove the menu notion of performance multiplier which increased with
|
||||
the number of tasks that went to iowait sleep on this CPU and haven't
|
||||
woken up yet.
|
||||
|
||||
Relying on iowait for cpuidle is problematic for a few reasons:
|
||||
1. There is no guarantee that an iowaiting task will wake up on the
|
||||
same CPU.
|
||||
2. The task being in iowait says nothing about the idle duration, we
|
||||
could be selecting shallower states for a long time.
|
||||
3. The task being in iowait doesn't always imply a performance hit
|
||||
with increased latency.
|
||||
4. If there is such a performance hit, the number of iowaiting tasks
|
||||
doesn't directly correlate.
|
||||
5. The definition of iowait altogether is vague at best, it is
|
||||
sprinkled across kernel code.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
drivers/cpuidle/governors/menu.c | 76 ++++----------------------------
|
||||
1 file changed, 9 insertions(+), 67 deletions(-)
|
||||
|
||||
--- a/drivers/cpuidle/governors/menu.c
|
||||
+++ b/drivers/cpuidle/governors/menu.c
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
#include "gov.h"
|
||||
|
||||
-#define BUCKETS 12
|
||||
+#define BUCKETS 6
|
||||
#define INTERVAL_SHIFT 3
|
||||
#define INTERVALS (1UL << INTERVAL_SHIFT)
|
||||
#define RESOLUTION 1024
|
||||
@@ -29,12 +29,11 @@
|
||||
/*
|
||||
* Concepts and ideas behind the menu governor
|
||||
*
|
||||
- * For the menu governor, there are 3 decision factors for picking a C
|
||||
+ * For the menu governor, there are 2 decision factors for picking a C
|
||||
* state:
|
||||
* 1) Energy break even point
|
||||
- * 2) Performance impact
|
||||
- * 3) Latency tolerance (from pmqos infrastructure)
|
||||
- * These three factors are treated independently.
|
||||
+ * 2) Latency tolerance (from pmqos infrastructure)
|
||||
+ * These two factors are treated independently.
|
||||
*
|
||||
* Energy break even point
|
||||
* -----------------------
|
||||
@@ -75,30 +74,6 @@
|
||||
* intervals and if the stand deviation of these 8 intervals is below a
|
||||
* threshold value, we use the average of these intervals as prediction.
|
||||
*
|
||||
- * Limiting Performance Impact
|
||||
- * ---------------------------
|
||||
- * C states, especially those with large exit latencies, can have a real
|
||||
- * noticeable impact on workloads, which is not acceptable for most sysadmins,
|
||||
- * and in addition, less performance has a power price of its own.
|
||||
- *
|
||||
- * As a general rule of thumb, menu assumes that the following heuristic
|
||||
- * holds:
|
||||
- * The busier the system, the less impact of C states is acceptable
|
||||
- *
|
||||
- * This rule-of-thumb is implemented using a performance-multiplier:
|
||||
- * If the exit latency times the performance multiplier is longer than
|
||||
- * the predicted duration, the C state is not considered a candidate
|
||||
- * for selection due to a too high performance impact. So the higher
|
||||
- * this multiplier is, the longer we need to be idle to pick a deep C
|
||||
- * state, and thus the less likely a busy CPU will hit such a deep
|
||||
- * C state.
|
||||
- *
|
||||
- * Currently there is only one value determining the factor:
|
||||
- * 10 points are added for each process that is waiting for IO on this CPU.
|
||||
- * (This value was experimentally determined.)
|
||||
- * Utilization is no longer a factor as it was shown that it never contributed
|
||||
- * significantly to the performance multiplier in the first place.
|
||||
- *
|
||||
*/
|
||||
|
||||
struct menu_device {
|
||||
@@ -112,19 +87,10 @@ struct menu_device {
|
||||
int interval_ptr;
|
||||
};
|
||||
|
||||
-static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
|
||||
+static inline int which_bucket(u64 duration_ns)
|
||||
{
|
||||
int bucket = 0;
|
||||
|
||||
- /*
|
||||
- * We keep two groups of stats; one with no
|
||||
- * IO pending, one without.
|
||||
- * This allows us to calculate
|
||||
- * E(duration)|iowait
|
||||
- */
|
||||
- if (nr_iowaiters)
|
||||
- bucket = BUCKETS/2;
|
||||
-
|
||||
if (duration_ns < 10ULL * NSEC_PER_USEC)
|
||||
return bucket;
|
||||
if (duration_ns < 100ULL * NSEC_PER_USEC)
|
||||
@@ -138,19 +104,6 @@ static inline int which_bucket(u64 durat
|
||||
return bucket + 5;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Return a multiplier for the exit latency that is intended
|
||||
- * to take performance requirements into account.
|
||||
- * The more performance critical we estimate the system
|
||||
- * to be, the higher this multiplier, and thus the higher
|
||||
- * the barrier to go to an expensive C state.
|
||||
- */
|
||||
-static inline int performance_multiplier(unsigned int nr_iowaiters)
|
||||
-{
|
||||
- /* for IO wait tasks (per cpu!) we add 10x each */
|
||||
- return 1 + 10 * nr_iowaiters;
|
||||
-}
|
||||
-
|
||||
static DEFINE_PER_CPU(struct menu_device, menu_devices);
|
||||
|
||||
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
|
||||
@@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_dr
|
||||
struct menu_device *data = this_cpu_ptr(&menu_devices);
|
||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||
u64 predicted_ns;
|
||||
- u64 interactivity_req;
|
||||
- unsigned int nr_iowaiters;
|
||||
ktime_t delta, delta_tick;
|
||||
int i, idx;
|
||||
|
||||
@@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_dr
|
||||
data->needs_update = 0;
|
||||
}
|
||||
|
||||
- nr_iowaiters = nr_iowait_cpu(dev->cpu);
|
||||
-
|
||||
/* Find the shortest expected idle interval. */
|
||||
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
|
||||
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
|
||||
@@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_dr
|
||||
}
|
||||
|
||||
data->next_timer_ns = delta;
|
||||
- data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
|
||||
+ data->bucket = which_bucket(data->next_timer_ns);
|
||||
|
||||
/* Round up the result for half microseconds. */
|
||||
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
|
||||
@@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_dr
|
||||
*/
|
||||
data->next_timer_ns = KTIME_MAX;
|
||||
delta_tick = TICK_NSEC / 2;
|
||||
- data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
|
||||
+ data->bucket = which_bucket(KTIME_MAX);
|
||||
}
|
||||
|
||||
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
|
||||
@@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_dr
|
||||
*/
|
||||
if (predicted_ns < TICK_NSEC)
|
||||
predicted_ns = data->next_timer_ns;
|
||||
- } else {
|
||||
- /*
|
||||
- * Use the performance multiplier and the user-configurable
|
||||
- * latency_req to determine the maximum exit latency.
|
||||
- */
|
||||
- interactivity_req = div64_u64(predicted_ns,
|
||||
- performance_multiplier(nr_iowaiters));
|
||||
- if (latency_req > interactivity_req)
|
||||
- latency_req = interactivity_req;
|
||||
+ } else if (latency_req > predicted_ns) {
|
||||
+ latency_req = predicted_ns;
|
||||
}
|
||||
|
||||
/*
|
58
debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
vendored
Normal file
58
debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
From 3f840a42780323a4437dd1a417488d141c33af15 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:39 +0100
|
||||
Subject: cpuidle: Prefer teo over menu governor
|
||||
|
||||
Since menu no longer has the interactivity boost teo works better
|
||||
overall, so make it the default.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
drivers/cpuidle/Kconfig | 5 +----
|
||||
drivers/cpuidle/governors/menu.c | 2 +-
|
||||
drivers/cpuidle/governors/teo.c | 2 +-
|
||||
3 files changed, 3 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/cpuidle/Kconfig
|
||||
+++ b/drivers/cpuidle/Kconfig
|
||||
@@ -5,7 +5,7 @@ config CPU_IDLE
|
||||
bool "CPU idle PM support"
|
||||
default y if ACPI || PPC_PSERIES
|
||||
select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
|
||||
- select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_TEO
|
||||
+ select CPU_IDLE_GOV_TEO if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_MENU
|
||||
help
|
||||
CPU idle is a generic framework for supporting software-controlled
|
||||
idle processor power management. It includes modular cross-platform
|
||||
@@ -30,9 +30,6 @@ config CPU_IDLE_GOV_TEO
|
||||
This governor implements a simplified idle state selection method
|
||||
focused on timer events and does not do any interactivity boosting.
|
||||
|
||||
- Some workloads benefit from using it and it generally should be safe
|
||||
- to use. Say Y here if you are not happy with the alternatives.
|
||||
-
|
||||
config CPU_IDLE_GOV_HALTPOLL
|
||||
bool "Haltpoll governor (for virtualized systems)"
|
||||
depends on KVM_GUEST
|
||||
--- a/drivers/cpuidle/governors/menu.c
|
||||
+++ b/drivers/cpuidle/governors/menu.c
|
||||
@@ -508,7 +508,7 @@ static int menu_enable_device(struct cpu
|
||||
|
||||
static struct cpuidle_governor menu_governor = {
|
||||
.name = "menu",
|
||||
- .rating = 20,
|
||||
+ .rating = 19,
|
||||
.enable = menu_enable_device,
|
||||
.select = menu_select,
|
||||
.reflect = menu_reflect,
|
||||
--- a/drivers/cpuidle/governors/teo.c
|
||||
+++ b/drivers/cpuidle/governors/teo.c
|
||||
@@ -537,7 +537,7 @@ static int teo_enable_device(struct cpui
|
||||
|
||||
static struct cpuidle_governor teo_governor = {
|
||||
.name = "teo",
|
||||
- .rating = 19,
|
||||
+ .rating = 20,
|
||||
.enable = teo_enable_device,
|
||||
.select = teo_select,
|
||||
.reflect = teo_reflect,
|
39
debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
vendored
Normal file
39
debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
From ca8c9368b6f28ef625716b03aa930acfb8afe158 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:40 +0100
|
||||
Subject: TEST: cpufreq/schedutil: Linear iowait boost step
|
||||
|
||||
In preparation for capping iowait boost make the steps linear as
|
||||
opposed to doubling.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
kernel/sched/cpufreq_schedutil.c | 9 ++++-----
|
||||
1 file changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/kernel/sched/cpufreq_schedutil.c
|
||||
+++ b/kernel/sched/cpufreq_schedutil.c
|
||||
@@ -267,7 +267,8 @@ static void sugov_iowait_boost(struct su
|
||||
/* Double the boost at each request */
|
||||
if (sg_cpu->iowait_boost) {
|
||||
sg_cpu->iowait_boost =
|
||||
- min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
|
||||
+ min_t(unsigned int,
|
||||
+ sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -308,11 +309,9 @@ static unsigned long sugov_iowait_apply(
|
||||
/*
|
||||
* No boost pending; reduce the boost value.
|
||||
*/
|
||||
- sg_cpu->iowait_boost >>= 1;
|
||||
- if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
|
||||
- sg_cpu->iowait_boost = 0;
|
||||
+ sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
|
||||
+ if (!sg_cpu->iowait_boost)
|
||||
return 0;
|
||||
- }
|
||||
}
|
||||
|
||||
sg_cpu->iowait_boost_pending = false;
|
106
debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
vendored
Normal file
106
debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
From 33f05bd16a4ac2f6f36c9eb88016e2375dcb597c Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:41 +0100
|
||||
Subject: TEST: cpufreq/schedutil: iowait boost cap sysfs
|
||||
|
||||
Add a knob to cap applied iowait_boost per sysfs.
|
||||
This is to test for potential regressions.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
kernel/sched/cpufreq_schedutil.c | 38 ++++++++++++++++++++++++++++++++
|
||||
1 file changed, 38 insertions(+)
|
||||
|
||||
--- a/kernel/sched/cpufreq_schedutil.c
|
||||
+++ b/kernel/sched/cpufreq_schedutil.c
|
||||
@@ -11,6 +11,7 @@
|
||||
struct sugov_tunables {
|
||||
struct gov_attr_set attr_set;
|
||||
unsigned int rate_limit_us;
|
||||
+ unsigned int iowait_boost_cap;
|
||||
};
|
||||
|
||||
struct sugov_policy {
|
||||
@@ -35,6 +36,8 @@ struct sugov_policy {
|
||||
|
||||
bool limits_changed;
|
||||
bool need_freq_update;
|
||||
+
|
||||
+ unsigned int iowait_boost_cap;
|
||||
};
|
||||
|
||||
struct sugov_cpu {
|
||||
@@ -316,6 +319,9 @@ static unsigned long sugov_iowait_apply(
|
||||
|
||||
sg_cpu->iowait_boost_pending = false;
|
||||
|
||||
+ if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
|
||||
+ sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
|
||||
+
|
||||
/*
|
||||
* sg_cpu->util is already in capacity scale; convert iowait_boost
|
||||
* into the same scale so we can compare.
|
||||
@@ -554,6 +560,14 @@ static ssize_t rate_limit_us_show(struct
|
||||
return sprintf(buf, "%u\n", tunables->rate_limit_us);
|
||||
}
|
||||
|
||||
+
|
||||
+static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
|
||||
+{
|
||||
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
|
||||
+
|
||||
+ return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
|
||||
+}
|
||||
+
|
||||
static ssize_t
|
||||
rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
|
||||
{
|
||||
@@ -572,10 +586,30 @@ rate_limit_us_store(struct gov_attr_set
|
||||
return count;
|
||||
}
|
||||
|
||||
+static ssize_t
|
||||
+iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
|
||||
+{
|
||||
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
|
||||
+ struct sugov_policy *sg_policy;
|
||||
+ unsigned int iowait_boost_cap;
|
||||
+
|
||||
+ if (kstrtouint(buf, 10, &iowait_boost_cap))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ tunables->iowait_boost_cap = iowait_boost_cap;
|
||||
+
|
||||
+ list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
|
||||
+ sg_policy->iowait_boost_cap = iowait_boost_cap;
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
|
||||
+static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
|
||||
|
||||
static struct attribute *sugov_attrs[] = {
|
||||
&rate_limit_us.attr,
|
||||
+ &iowait_boost_cap.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(sugov);
|
||||
@@ -765,6 +799,8 @@ static int sugov_init(struct cpufreq_pol
|
||||
|
||||
tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
|
||||
|
||||
+ tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
|
||||
+
|
||||
policy->governor_data = sg_policy;
|
||||
sg_policy->tunables = tunables;
|
||||
|
||||
@@ -834,6 +870,8 @@ static int sugov_start(struct cpufreq_po
|
||||
sg_policy->limits_changed = false;
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
|
||||
+ sg_policy->iowait_boost_cap = SCHED_CAPACITY_SCALE;
|
||||
+
|
||||
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
|
||||
|
||||
if (policy_is_shared(policy))
|
325
debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
vendored
Normal file
325
debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
vendored
Normal file
@@ -0,0 +1,325 @@
|
||||
From 33eb6c08d7c615fad308001921c7b1148cbccfde Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:42 +0100
|
||||
Subject: cpufreq/schedutil: Remove iowait boost
|
||||
|
||||
iowait boost in schedutil was introduced by
|
||||
commit ("21ca6d2c52f8 cpufreq: schedutil: Add iowait boosting").
|
||||
with it more or less following intel_pstate's approach to increase
|
||||
frequency after an iowait wakeup.
|
||||
Behaviour that is piggy-backed onto iowait boost is problematic
|
||||
due to a lot of reasons, so remove it.
|
||||
|
||||
For schedutil specifically these are some of the reasons:
|
||||
1. Boosting is applied even in scenarios where it doesn't improve
|
||||
throughput.
|
||||
2. The boost is not accounted for in EAS: a) feec() will only consider
|
||||
the actual task utilization for task placement, but another CPU might
|
||||
be more energy-efficient at that capacity than the boosted one.)
|
||||
b) When placing a non-IO task while a CPU is boosted compute_energy()
|
||||
assumes a lower OPP than what is actually applied. This leads to
|
||||
wrong EAS decisions.
|
||||
3. Actual IO heavy workloads are hardly distinguished from infrequent
|
||||
in_iowait wakeups.
|
||||
4. The boost isn't accounted for in task placement.
|
||||
5. The boost isn't associated with a task, it therefore lingers on the
|
||||
rq even after the responsible task has migrated / stopped.
|
||||
6. The boost isn't associated with a task, it therefore needs to ramp
|
||||
up again when migrated.
|
||||
7. Since schedutil doesn't know which task is getting woken up,
|
||||
multiple unrelated in_iowait tasks lead to boosting.
|
||||
8. Boosting is hard to control with UCLAMP_MAX (which is only active
|
||||
when the task is on the rq, which for boosted tasks is usually not
|
||||
the case for most of the time).
|
||||
|
||||
One benefit of schedutil specifically is the reliance on the
|
||||
scheduler's utilization signals, which have evolved a lot since it's
|
||||
original introduction. Some cases that benefitted from iowait boosting
|
||||
in the past can now be covered by e.g. util_est.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
kernel/sched/cpufreq_schedutil.c | 181 +------------------------------
|
||||
1 file changed, 3 insertions(+), 178 deletions(-)
|
||||
|
||||
--- a/kernel/sched/cpufreq_schedutil.c
|
||||
+++ b/kernel/sched/cpufreq_schedutil.c
|
||||
@@ -6,12 +6,9 @@
|
||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
*/
|
||||
|
||||
-#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
|
||||
-
|
||||
struct sugov_tunables {
|
||||
struct gov_attr_set attr_set;
|
||||
unsigned int rate_limit_us;
|
||||
- unsigned int iowait_boost_cap;
|
||||
};
|
||||
|
||||
struct sugov_policy {
|
||||
@@ -36,8 +33,6 @@ struct sugov_policy {
|
||||
|
||||
bool limits_changed;
|
||||
bool need_freq_update;
|
||||
-
|
||||
- unsigned int iowait_boost_cap;
|
||||
};
|
||||
|
||||
struct sugov_cpu {
|
||||
@@ -45,10 +40,6 @@ struct sugov_cpu {
|
||||
struct sugov_policy *sg_policy;
|
||||
unsigned int cpu;
|
||||
|
||||
- bool iowait_boost_pending;
|
||||
- unsigned int iowait_boost;
|
||||
- u64 last_update;
|
||||
-
|
||||
unsigned long util;
|
||||
unsigned long bw_min;
|
||||
|
||||
@@ -198,137 +189,15 @@ unsigned long sugov_effective_cpu_perf(i
|
||||
return max(min, max);
|
||||
}
|
||||
|
||||
-static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
|
||||
+static void sugov_get_util(struct sugov_cpu *sg_cpu)
|
||||
{
|
||||
unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
|
||||
|
||||
util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
|
||||
- util = max(util, boost);
|
||||
sg_cpu->bw_min = min;
|
||||
sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
|
||||
}
|
||||
|
||||
-/**
|
||||
- * sugov_iowait_reset() - Reset the IO boost status of a CPU.
|
||||
- * @sg_cpu: the sugov data for the CPU to boost
|
||||
- * @time: the update time from the caller
|
||||
- * @set_iowait_boost: true if an IO boost has been requested
|
||||
- *
|
||||
- * The IO wait boost of a task is disabled after a tick since the last update
|
||||
- * of a CPU. If a new IO wait boost is requested after more then a tick, then
|
||||
- * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
|
||||
- * efficiency by ignoring sporadic wakeups from IO.
|
||||
- */
|
||||
-static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
|
||||
- bool set_iowait_boost)
|
||||
-{
|
||||
- s64 delta_ns = time - sg_cpu->last_update;
|
||||
-
|
||||
- /* Reset boost only if a tick has elapsed since last request */
|
||||
- if (delta_ns <= TICK_NSEC)
|
||||
- return false;
|
||||
-
|
||||
- sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
|
||||
- sg_cpu->iowait_boost_pending = set_iowait_boost;
|
||||
-
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * sugov_iowait_boost() - Updates the IO boost status of a CPU.
|
||||
- * @sg_cpu: the sugov data for the CPU to boost
|
||||
- * @time: the update time from the caller
|
||||
- * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
|
||||
- *
|
||||
- * Each time a task wakes up after an IO operation, the CPU utilization can be
|
||||
- * boosted to a certain utilization which doubles at each "frequent and
|
||||
- * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
|
||||
- * of the maximum OPP.
|
||||
- *
|
||||
- * To keep doubling, an IO boost has to be requested at least once per tick,
|
||||
- * otherwise we restart from the utilization of the minimum OPP.
|
||||
- */
|
||||
-static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
|
||||
- unsigned int flags)
|
||||
-{
|
||||
- bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
|
||||
-
|
||||
- /* Reset boost if the CPU appears to have been idle enough */
|
||||
- if (sg_cpu->iowait_boost &&
|
||||
- sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
|
||||
- return;
|
||||
-
|
||||
- /* Boost only tasks waking up after IO */
|
||||
- if (!set_iowait_boost)
|
||||
- return;
|
||||
-
|
||||
- /* Ensure boost doubles only one time at each request */
|
||||
- if (sg_cpu->iowait_boost_pending)
|
||||
- return;
|
||||
- sg_cpu->iowait_boost_pending = true;
|
||||
-
|
||||
- /* Double the boost at each request */
|
||||
- if (sg_cpu->iowait_boost) {
|
||||
- sg_cpu->iowait_boost =
|
||||
- min_t(unsigned int,
|
||||
- sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /* First wakeup after IO: start with minimum boost */
|
||||
- sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * sugov_iowait_apply() - Apply the IO boost to a CPU.
|
||||
- * @sg_cpu: the sugov data for the cpu to boost
|
||||
- * @time: the update time from the caller
|
||||
- * @max_cap: the max CPU capacity
|
||||
- *
|
||||
- * A CPU running a task which woken up after an IO operation can have its
|
||||
- * utilization boosted to speed up the completion of those IO operations.
|
||||
- * The IO boost value is increased each time a task wakes up from IO, in
|
||||
- * sugov_iowait_apply(), and it's instead decreased by this function,
|
||||
- * each time an increase has not been requested (!iowait_boost_pending).
|
||||
- *
|
||||
- * A CPU which also appears to have been idle for at least one tick has also
|
||||
- * its IO boost utilization reset.
|
||||
- *
|
||||
- * This mechanism is designed to boost high frequently IO waiting tasks, while
|
||||
- * being more conservative on tasks which does sporadic IO operations.
|
||||
- */
|
||||
-static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
|
||||
- unsigned long max_cap)
|
||||
-{
|
||||
- /* No boost currently required */
|
||||
- if (!sg_cpu->iowait_boost)
|
||||
- return 0;
|
||||
-
|
||||
- /* Reset boost if the CPU appears to have been idle enough */
|
||||
- if (sugov_iowait_reset(sg_cpu, time, false))
|
||||
- return 0;
|
||||
-
|
||||
- if (!sg_cpu->iowait_boost_pending) {
|
||||
- /*
|
||||
- * No boost pending; reduce the boost value.
|
||||
- */
|
||||
- sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
|
||||
- if (!sg_cpu->iowait_boost)
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- sg_cpu->iowait_boost_pending = false;
|
||||
-
|
||||
- if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
|
||||
- sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
|
||||
-
|
||||
- /*
|
||||
- * sg_cpu->util is already in capacity scale; convert iowait_boost
|
||||
- * into the same scale so we can compare.
|
||||
- */
|
||||
- return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT;
|
||||
-}
|
||||
-
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
|
||||
{
|
||||
@@ -356,18 +225,12 @@ static inline bool sugov_update_single_c
|
||||
u64 time, unsigned long max_cap,
|
||||
unsigned int flags)
|
||||
{
|
||||
- unsigned long boost;
|
||||
-
|
||||
- sugov_iowait_boost(sg_cpu, time, flags);
|
||||
- sg_cpu->last_update = time;
|
||||
-
|
||||
ignore_dl_rate_limit(sg_cpu);
|
||||
|
||||
if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
|
||||
return false;
|
||||
|
||||
- boost = sugov_iowait_apply(sg_cpu, time, max_cap);
|
||||
- sugov_get_util(sg_cpu, boost);
|
||||
+ sugov_get_util(sg_cpu);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -468,11 +331,8 @@ static unsigned int sugov_next_freq_shar
|
||||
|
||||
for_each_cpu(j, policy->cpus) {
|
||||
struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
|
||||
- unsigned long boost;
|
||||
-
|
||||
- boost = sugov_iowait_apply(j_sg_cpu, time, max_cap);
|
||||
- sugov_get_util(j_sg_cpu, boost);
|
||||
|
||||
+ sugov_get_util(j_sg_cpu);
|
||||
util = max(j_sg_cpu->util, util);
|
||||
}
|
||||
|
||||
@@ -488,9 +348,6 @@ sugov_update_shared(struct update_util_d
|
||||
|
||||
raw_spin_lock(&sg_policy->update_lock);
|
||||
|
||||
- sugov_iowait_boost(sg_cpu, time, flags);
|
||||
- sg_cpu->last_update = time;
|
||||
-
|
||||
ignore_dl_rate_limit(sg_cpu);
|
||||
|
||||
if (sugov_should_update_freq(sg_policy, time)) {
|
||||
@@ -560,14 +417,6 @@ static ssize_t rate_limit_us_show(struct
|
||||
return sprintf(buf, "%u\n", tunables->rate_limit_us);
|
||||
}
|
||||
|
||||
-
|
||||
-static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
|
||||
-{
|
||||
- struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
|
||||
-
|
||||
- return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
|
||||
-}
|
||||
-
|
||||
static ssize_t
|
||||
rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
|
||||
{
|
||||
@@ -586,30 +435,10 @@ rate_limit_us_store(struct gov_attr_set
|
||||
return count;
|
||||
}
|
||||
|
||||
-static ssize_t
|
||||
-iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
|
||||
-{
|
||||
- struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
|
||||
- struct sugov_policy *sg_policy;
|
||||
- unsigned int iowait_boost_cap;
|
||||
-
|
||||
- if (kstrtouint(buf, 10, &iowait_boost_cap))
|
||||
- return -EINVAL;
|
||||
-
|
||||
- tunables->iowait_boost_cap = iowait_boost_cap;
|
||||
-
|
||||
- list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
|
||||
- sg_policy->iowait_boost_cap = iowait_boost_cap;
|
||||
-
|
||||
- return count;
|
||||
-}
|
||||
-
|
||||
static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
|
||||
-static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
|
||||
|
||||
static struct attribute *sugov_attrs[] = {
|
||||
&rate_limit_us.attr,
|
||||
- &iowait_boost_cap.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(sugov);
|
||||
@@ -799,8 +628,6 @@ static int sugov_init(struct cpufreq_pol
|
||||
|
||||
tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
|
||||
|
||||
- tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
|
||||
-
|
||||
policy->governor_data = sg_policy;
|
||||
sg_policy->tunables = tunables;
|
||||
|
||||
@@ -870,8 +697,6 @@ static int sugov_start(struct cpufreq_po
|
||||
sg_policy->limits_changed = false;
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
|
||||
- sg_policy->iowait_boost_cap = SCHED_CAPACITY_SCALE;
|
||||
-
|
||||
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
|
||||
|
||||
if (policy_is_shared(policy))
|
113
debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
vendored
Normal file
113
debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
From af7bbb59c2411e985a5d79173af5686337b4af9b Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:43 +0100
|
||||
Subject: cpufreq: intel_pstate: Remove iowait boost
|
||||
|
||||
Analogous to schedutil, remove iowait boost for the same reasons.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
drivers/cpufreq/intel_pstate.c | 50 ++--------------------------------
|
||||
1 file changed, 3 insertions(+), 47 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/intel_pstate.c
|
||||
+++ b/drivers/cpufreq/intel_pstate.c
|
||||
@@ -191,7 +191,6 @@ struct global_params {
|
||||
* @policy: CPUFreq policy value
|
||||
* @update_util: CPUFreq utility callback information
|
||||
* @update_util_set: CPUFreq utility callback is set
|
||||
- * @iowait_boost: iowait-related boost fraction
|
||||
* @last_update: Time of the last update.
|
||||
* @pstate: Stores P state limits for this CPU
|
||||
* @vid: Stores VID limits for this CPU
|
||||
@@ -245,7 +244,6 @@ struct cpudata {
|
||||
struct acpi_processor_performance acpi_perf_data;
|
||||
bool valid_pss_table;
|
||||
#endif
|
||||
- unsigned int iowait_boost;
|
||||
s16 epp_powersave;
|
||||
s16 epp_policy;
|
||||
s16 epp_default;
|
||||
@@ -2136,28 +2134,7 @@ static inline void intel_pstate_update_u
|
||||
{
|
||||
cpu->sample.time = time;
|
||||
|
||||
- if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
|
||||
- bool do_io = false;
|
||||
-
|
||||
- cpu->sched_flags = 0;
|
||||
- /*
|
||||
- * Set iowait_boost flag and update time. Since IO WAIT flag
|
||||
- * is set all the time, we can't just conclude that there is
|
||||
- * some IO bound activity is scheduled on this CPU with just
|
||||
- * one occurrence. If we receive at least two in two
|
||||
- * consecutive ticks, then we treat as boost candidate.
|
||||
- */
|
||||
- if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
|
||||
- do_io = true;
|
||||
-
|
||||
- cpu->last_io_update = time;
|
||||
-
|
||||
- if (do_io)
|
||||
- intel_pstate_hwp_boost_up(cpu);
|
||||
-
|
||||
- } else {
|
||||
- intel_pstate_hwp_boost_down(cpu);
|
||||
- }
|
||||
+ intel_pstate_hwp_boost_down(cpu);
|
||||
}
|
||||
|
||||
static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
|
||||
@@ -2240,9 +2217,6 @@ static inline int32_t get_target_pstate(
|
||||
busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
|
||||
sample->tsc);
|
||||
|
||||
- if (busy_frac < cpu->iowait_boost)
|
||||
- busy_frac = cpu->iowait_boost;
|
||||
-
|
||||
sample->busy_scaled = busy_frac * 100;
|
||||
|
||||
target = READ_ONCE(global.no_turbo) ?
|
||||
@@ -2303,7 +2277,7 @@ static void intel_pstate_adjust_pstate(s
|
||||
sample->aperf,
|
||||
sample->tsc,
|
||||
get_avg_frequency(cpu),
|
||||
- fp_toint(cpu->iowait_boost * 100));
|
||||
+ 0);
|
||||
}
|
||||
|
||||
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
|
||||
@@ -2317,24 +2291,6 @@ static void intel_pstate_update_util(str
|
||||
return;
|
||||
|
||||
delta_ns = time - cpu->last_update;
|
||||
- if (flags & SCHED_CPUFREQ_IOWAIT) {
|
||||
- /* Start over if the CPU may have been idle. */
|
||||
- if (delta_ns > TICK_NSEC) {
|
||||
- cpu->iowait_boost = ONE_EIGHTH_FP;
|
||||
- } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
|
||||
- cpu->iowait_boost <<= 1;
|
||||
- if (cpu->iowait_boost > int_tofp(1))
|
||||
- cpu->iowait_boost = int_tofp(1);
|
||||
- } else {
|
||||
- cpu->iowait_boost = ONE_EIGHTH_FP;
|
||||
- }
|
||||
- } else if (cpu->iowait_boost) {
|
||||
- /* Clear iowait_boost if the CPU may have been idle. */
|
||||
- if (delta_ns > TICK_NSEC)
|
||||
- cpu->iowait_boost = 0;
|
||||
- else
|
||||
- cpu->iowait_boost >>= 1;
|
||||
- }
|
||||
cpu->last_update = time;
|
||||
delta_ns = time - cpu->sample.time;
|
||||
if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
|
||||
@@ -2832,7 +2788,7 @@ static void intel_cpufreq_trace(struct c
|
||||
sample->aperf,
|
||||
sample->tsc,
|
||||
get_avg_frequency(cpu),
|
||||
- fp_toint(cpu->iowait_boost * 100));
|
||||
+ 0);
|
||||
}
|
||||
|
||||
static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
|
42
debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
vendored
Normal file
42
debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
From fd1e0723b0a7ad140d2bf7cd9154997d5ece2b37 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:44 +0100
|
||||
Subject: cpufreq: Remove SCHED_CPUFREQ_IOWAIT update
|
||||
|
||||
Neither intel_pstate nor schedutil care for the flag anymore, so
|
||||
remove the update and flag definition.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
include/linux/sched/cpufreq.h | 2 --
|
||||
kernel/sched/fair.c | 8 --------
|
||||
2 files changed, 10 deletions(-)
|
||||
|
||||
--- a/include/linux/sched/cpufreq.h
|
||||
+++ b/include/linux/sched/cpufreq.h
|
||||
@@ -8,8 +8,6 @@
|
||||
* Interface between cpufreq drivers and the scheduler:
|
||||
*/
|
||||
|
||||
-#define SCHED_CPUFREQ_IOWAIT (1U << 0)
|
||||
-
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
struct cpufreq_policy;
|
||||
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -6768,14 +6768,6 @@ enqueue_task_fair(struct rq *rq, struct
|
||||
*/
|
||||
util_est_enqueue(&rq->cfs, p);
|
||||
|
||||
- /*
|
||||
- * If in_iowait is set, the code below may not trigger any cpufreq
|
||||
- * utilization updates, so do it here explicitly with the IOWAIT flag
|
||||
- * passed.
|
||||
- */
|
||||
- if (p->in_iowait)
|
||||
- cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
|
||||
-
|
||||
for_each_sched_entity(se) {
|
||||
if (se->on_rq)
|
||||
break;
|
55
debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
vendored
Normal file
55
debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
From 30cdb8d7d06f51bb86142c537ea05bd01c31bb40 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:45 +0100
|
||||
Subject: io_uring: Do not set iowait before sleeping
|
||||
|
||||
Setting in_iowait was introduced in commit
|
||||
8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
|
||||
to tackle a perf regression that was caused by menu taking iowait into
|
||||
account for synchronous IO and thus not selecting deeper states like in
|
||||
the io_uring counterpart.
|
||||
That behaviour is gone, so the workaround can be removed.
|
||||
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
---
|
||||
io_uring/io_uring.c | 17 -----------------
|
||||
1 file changed, 17 deletions(-)
|
||||
|
||||
--- a/io_uring/io_uring.c
|
||||
+++ b/io_uring/io_uring.c
|
||||
@@ -2359,15 +2359,6 @@ int io_run_task_work_sig(struct io_ring_
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static bool current_pending_io(void)
|
||||
-{
|
||||
- struct io_uring_task *tctx = current->io_uring;
|
||||
-
|
||||
- if (!tctx)
|
||||
- return false;
|
||||
- return percpu_counter_read_positive(&tctx->inflight);
|
||||
-}
|
||||
-
|
||||
/* when returns >0, the caller should retry */
|
||||
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
struct io_wait_queue *iowq)
|
||||
@@ -2385,19 +2376,11 @@ static inline int io_cqring_wait_schedul
|
||||
if (unlikely(io_should_wake(iowq)))
|
||||
return 0;
|
||||
|
||||
- /*
|
||||
- * Mark us as being in io_wait if we have pending requests, so cpufreq
|
||||
- * can take into account that the task is waiting for IO - turns out
|
||||
- * to be important for low QD IO.
|
||||
- */
|
||||
- if (current_pending_io())
|
||||
- current->in_iowait = 1;
|
||||
ret = 0;
|
||||
if (iowq->timeout == KTIME_MAX)
|
||||
schedule();
|
||||
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
|
||||
ret = -ETIME;
|
||||
- current->in_iowait = 0;
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user