1
0

refresh patches

This commit is contained in:
2025-03-27 01:51:30 +03:00
parent 3d597650a9
commit b65c570ac2
239 changed files with 14214 additions and 9267 deletions

View File

@@ -0,0 +1,59 @@
From b6c0305214154bc26d20b130266fc1ba8341b58c Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:14 +0000
Subject: cpufreq/amd-pstate: Modify the min_perf calculation in adjust_perf
callback
Instead of setting a fixed floor at lowest_nonlinear_perf, use the
min_limit_perf value, so that it gives the user the freedom to lower the
floor further.
There are two minimum frequency/perf limits that we need to consider in
the adjust_perf callback. One provided by schedutil i.e. the sg_cpu->bw_min
value passed in _min_perf arg, another is the effective value of
min_freq_qos request that is updated in cpudata->min_limit_perf. Modify the
code to use the bigger of these two values.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -672,7 +672,7 @@ static void amd_pstate_adjust_perf(unsig
unsigned long capacity)
{
unsigned long max_perf, min_perf, des_perf,
- cap_perf, lowest_nonlinear_perf;
+ cap_perf, min_limit_perf;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
@@ -684,20 +684,20 @@ static void amd_pstate_adjust_perf(unsig
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
-
cap_perf = READ_ONCE(cpudata->highest_perf);
- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ min_limit_perf = READ_ONCE(cpudata->min_limit_perf);
des_perf = cap_perf;
if (target_perf < capacity)
des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
- min_perf = READ_ONCE(cpudata->lowest_perf);
if (_min_perf < capacity)
min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
+ else
+ min_perf = cap_perf;
- if (min_perf < lowest_nonlinear_perf)
- min_perf = lowest_nonlinear_perf;
+ if (min_perf < min_limit_perf)
+ min_perf = min_limit_perf;
max_perf = cpudata->max_limit_perf;
if (max_perf < min_perf)

View File

@@ -0,0 +1,27 @@
From 6e51c53b5e940312c71ce5ea68cf94a000beab01 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:15 +0000
Subject: cpufreq/amd-pstate: Remove the redundant des_perf clamping in
adjust_perf
des_perf is later on clamped between min_perf and max_perf in
amd_pstate_update. So, remove the redundant clamping from
amd_pstate_adjust_perf.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -703,8 +703,6 @@ static void amd_pstate_adjust_perf(unsig
if (max_perf < min_perf)
max_perf = min_perf;
- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
-
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
cpufreq_cpu_put(policy);

View File

@@ -0,0 +1,51 @@
From ad3fffe8ff1f18ad437d8b0d0bb602ba3c24adf7 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:16 +0000
Subject: cpufreq/amd-pstate: Pass min/max_limit_perf as min/max_perf to
amd_pstate_update
Currently, amd_pstate_update_freq passes the hardware perf limits as
min/max_perf to amd_pstate_update, which eventually gets programmed into
the min/max_perf fields of the CPPC_REQ register.
Instead pass the effective perf limits i.e. min/max_limit_perf values to
amd_pstate_update as min/max_perf.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -615,7 +615,7 @@ static int amd_pstate_update_freq(struct
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
- unsigned long max_perf, min_perf, des_perf, cap_perf;
+ unsigned long des_perf, cap_perf;
if (!cpudata->max_freq)
return -ENODEV;
@@ -624,8 +624,6 @@ static int amd_pstate_update_freq(struct
amd_pstate_update_min_max_limit(policy);
cap_perf = READ_ONCE(cpudata->highest_perf);
- min_perf = READ_ONCE(cpudata->lowest_perf);
- max_perf = cap_perf;
freqs.old = policy->cur;
freqs.new = target_freq;
@@ -642,8 +640,9 @@ static int amd_pstate_update_freq(struct
if (!fast_switch)
cpufreq_freq_transition_begin(policy, &freqs);
- amd_pstate_update(cpudata, min_perf, des_perf,
- max_perf, fast_switch, policy->governor->flags);
+ amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf,
+ cpudata->max_limit_perf, fast_switch,
+ policy->governor->flags);
if (!fast_switch)
cpufreq_freq_transition_end(policy, &freqs, false);

View File

@@ -0,0 +1,355 @@
From 300686c32b77583f45c6763535da85f2242bf820 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:17 +0000
Subject: cpufreq/amd-pstate: Convert all perf values to u8
All perf values are always within 0-255 range, hence convert their
datatype to u8 everywhere.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-trace.h | 46 +++++++++++------------
drivers/cpufreq/amd-pstate.c | 60 +++++++++++++++---------------
drivers/cpufreq/amd-pstate.h | 18 ++++-----
3 files changed, 62 insertions(+), 62 deletions(-)
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -24,9 +24,9 @@
TRACE_EVENT(amd_pstate_perf,
- TP_PROTO(unsigned long min_perf,
- unsigned long target_perf,
- unsigned long capacity,
+ TP_PROTO(u8 min_perf,
+ u8 target_perf,
+ u8 capacity,
u64 freq,
u64 mperf,
u64 aperf,
@@ -47,9 +47,9 @@ TRACE_EVENT(amd_pstate_perf,
),
TP_STRUCT__entry(
- __field(unsigned long, min_perf)
- __field(unsigned long, target_perf)
- __field(unsigned long, capacity)
+ __field(u8, min_perf)
+ __field(u8, target_perf)
+ __field(u8, capacity)
__field(unsigned long long, freq)
__field(unsigned long long, mperf)
__field(unsigned long long, aperf)
@@ -70,10 +70,10 @@ TRACE_EVENT(amd_pstate_perf,
__entry->fast_switch = fast_switch;
),
- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s",
- (unsigned long)__entry->min_perf,
- (unsigned long)__entry->target_perf,
- (unsigned long)__entry->capacity,
+ TP_printk("amd_min_perf=%hhu amd_des_perf=%hhu amd_max_perf=%hhu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s",
+ (u8)__entry->min_perf,
+ (u8)__entry->target_perf,
+ (u8)__entry->capacity,
(unsigned long long)__entry->freq,
(unsigned long long)__entry->mperf,
(unsigned long long)__entry->aperf,
@@ -86,10 +86,10 @@ TRACE_EVENT(amd_pstate_perf,
TRACE_EVENT(amd_pstate_epp_perf,
TP_PROTO(unsigned int cpu_id,
- unsigned int highest_perf,
- unsigned int epp,
- unsigned int min_perf,
- unsigned int max_perf,
+ u8 highest_perf,
+ u8 epp,
+ u8 min_perf,
+ u8 max_perf,
bool boost
),
@@ -102,10 +102,10 @@ TRACE_EVENT(amd_pstate_epp_perf,
TP_STRUCT__entry(
__field(unsigned int, cpu_id)
- __field(unsigned int, highest_perf)
- __field(unsigned int, epp)
- __field(unsigned int, min_perf)
- __field(unsigned int, max_perf)
+ __field(u8, highest_perf)
+ __field(u8, epp)
+ __field(u8, min_perf)
+ __field(u8, max_perf)
__field(bool, boost)
),
@@ -118,12 +118,12 @@ TRACE_EVENT(amd_pstate_epp_perf,
__entry->boost = boost;
),
- TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u",
+ TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u",
(unsigned int)__entry->cpu_id,
- (unsigned int)__entry->min_perf,
- (unsigned int)__entry->max_perf,
- (unsigned int)__entry->highest_perf,
- (unsigned int)__entry->epp,
+ (u8)__entry->min_perf,
+ (u8)__entry->max_perf,
+ (u8)__entry->highest_perf,
+ (u8)__entry->epp,
(bool)__entry->boost
)
);
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -186,7 +186,7 @@ static inline int get_mode_idx_from_str(
static DEFINE_MUTEX(amd_pstate_limits_lock);
static DEFINE_MUTEX(amd_pstate_driver_lock);
-static s16 msr_get_epp(struct amd_cpudata *cpudata)
+static u8 msr_get_epp(struct amd_cpudata *cpudata)
{
u64 value;
int ret;
@@ -207,7 +207,7 @@ static inline s16 amd_pstate_get_epp(str
return static_call(amd_pstate_get_epp)(cpudata);
}
-static s16 shmem_get_epp(struct amd_cpudata *cpudata)
+static u8 shmem_get_epp(struct amd_cpudata *cpudata)
{
u64 epp;
int ret;
@@ -218,11 +218,11 @@ static s16 shmem_get_epp(struct amd_cpud
return ret;
}
- return (s16)(epp & 0xff);
+ return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp);
}
-static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
+static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+ u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
u64 value, prev;
@@ -257,15 +257,15 @@ static int msr_update_perf(struct amd_cp
DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
- u32 min_perf, u32 des_perf,
- u32 max_perf, u32 epp,
+ u8 min_perf, u8 des_perf,
+ u8 max_perf, u8 epp,
bool fast_switch)
{
return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
max_perf, epp, fast_switch);
}
-static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp)
+static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp)
{
u64 value, prev;
int ret;
@@ -292,12 +292,12 @@ static int msr_set_epp(struct amd_cpudat
DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
-static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp)
{
return static_call(amd_pstate_set_epp)(cpudata, epp);
}
-static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp)
+static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp)
{
int ret;
struct cppc_perf_ctrls perf_ctrls;
@@ -320,7 +320,7 @@ static int amd_pstate_set_energy_pref_in
int pref_index)
{
struct amd_cpudata *cpudata = policy->driver_data;
- int epp;
+ u8 epp;
if (!pref_index)
epp = cpudata->epp_default;
@@ -479,8 +479,8 @@ static inline int amd_pstate_init_perf(s
return static_call(amd_pstate_init_perf)(cpudata);
}
-static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
+static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+ u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
struct cppc_perf_ctrls perf_ctrls;
@@ -531,14 +531,14 @@ static inline bool amd_pstate_sample(str
return true;
}
-static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
+static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
+ u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
unsigned long max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
- u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+ des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
max_freq = READ_ONCE(cpudata->max_limit_freq);
policy->cur = div_u64(des_perf * max_freq, max_perf);
@@ -550,7 +550,7 @@ static void amd_pstate_update(struct amd
/* limit the max perf when core performance boost feature is disabled */
if (!cpudata->boost_supported)
- max_perf = min_t(unsigned long, nominal_perf, max_perf);
+ max_perf = min_t(u8, nominal_perf, max_perf);
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
@@ -591,7 +591,8 @@ static int amd_pstate_verify(struct cpuf
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
+ u8 max_limit_perf, min_limit_perf, max_perf;
+ u32 max_freq;
struct amd_cpudata *cpudata = policy->driver_data;
max_perf = READ_ONCE(cpudata->highest_perf);
@@ -615,7 +616,7 @@ static int amd_pstate_update_freq(struct
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
- unsigned long des_perf, cap_perf;
+ u8 des_perf, cap_perf;
if (!cpudata->max_freq)
return -ENODEV;
@@ -670,8 +671,7 @@ static void amd_pstate_adjust_perf(unsig
unsigned long target_perf,
unsigned long capacity)
{
- unsigned long max_perf, min_perf, des_perf,
- cap_perf, min_limit_perf;
+ u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
@@ -905,8 +905,8 @@ static int amd_pstate_init_freq(struct a
{
int ret;
u32 min_freq, max_freq;
- u32 highest_perf, nominal_perf, nominal_freq;
- u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
+ u8 highest_perf, nominal_perf, lowest_nonlinear_perf;
+ u32 nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
@@ -1113,7 +1113,7 @@ static ssize_t show_amd_pstate_lowest_no
static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
char *buf)
{
- u32 perf;
+ u8 perf;
struct amd_cpudata *cpudata = policy->driver_data;
perf = READ_ONCE(cpudata->highest_perf);
@@ -1124,7 +1124,7 @@ static ssize_t show_amd_pstate_highest_p
static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
char *buf)
{
- u32 perf;
+ u8 perf;
struct amd_cpudata *cpudata = policy->driver_data;
perf = READ_ONCE(cpudata->prefcore_ranking);
@@ -1187,7 +1187,7 @@ static ssize_t show_energy_performance_p
struct cpufreq_policy *policy, char *buf)
{
struct amd_cpudata *cpudata = policy->driver_data;
- int preference;
+ u8 preference;
switch (cpudata->epp_cached) {
case AMD_CPPC_EPP_PERFORMANCE:
@@ -1549,7 +1549,7 @@ static void amd_pstate_epp_cpu_exit(stru
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u32 epp;
+ u8 epp;
amd_pstate_update_min_max_limit(policy);
@@ -1598,7 +1598,7 @@ static int amd_pstate_epp_set_policy(str
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u64 max_perf;
+ u8 max_perf;
int ret;
ret = amd_pstate_cppc_enable(true);
@@ -1635,7 +1635,7 @@ static int amd_pstate_epp_cpu_online(str
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- int min_perf;
+ u8 min_perf;
if (cpudata->suspended)
return 0;
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -70,13 +70,13 @@ struct amd_cpudata {
struct freq_qos_request req[2];
u64 cppc_req_cached;
- u32 highest_perf;
- u32 nominal_perf;
- u32 lowest_nonlinear_perf;
- u32 lowest_perf;
- u32 prefcore_ranking;
- u32 min_limit_perf;
- u32 max_limit_perf;
+ u8 highest_perf;
+ u8 nominal_perf;
+ u8 lowest_nonlinear_perf;
+ u8 lowest_perf;
+ u8 prefcore_ranking;
+ u8 min_limit_perf;
+ u8 max_limit_perf;
u32 min_limit_freq;
u32 max_limit_freq;
@@ -93,11 +93,11 @@ struct amd_cpudata {
bool hw_prefcore;
/* EPP feature related attributes*/
- s16 epp_cached;
+ u8 epp_cached;
u32 policy;
u64 cppc_cap1_cached;
bool suspended;
- s16 epp_default;
+ u8 epp_default;
};
/*

View File

@@ -0,0 +1,131 @@
From 8b87350a2e336e54b4d2638ac042bb2f7416312a Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:18 +0000
Subject: cpufreq/amd-pstate: Modularize perf<->freq conversion
Delegate the perf<->frequency conversion to helper functions to reduce
code duplication, and improve readability.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 57 +++++++++++++++++++-----------------
1 file changed, 30 insertions(+), 27 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -142,6 +142,20 @@ static struct quirk_entry quirk_amd_7k62
.lowest_freq = 550,
};
+static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
+{
+ u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
+ cpudata->nominal_freq);
+
+ return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+}
+
+static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
+{
+ return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
+ cpudata->nominal_perf);
+}
+
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
{
/**
@@ -534,14 +548,12 @@ static inline bool amd_pstate_sample(str
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
- unsigned long max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
- max_freq = READ_ONCE(cpudata->max_limit_freq);
- policy->cur = div_u64(des_perf * max_freq, max_perf);
+ policy->cur = perf_to_freq(cpudata, des_perf);
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
min_perf = des_perf;
@@ -591,14 +603,11 @@ static int amd_pstate_verify(struct cpuf
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u8 max_limit_perf, min_limit_perf, max_perf;
- u32 max_freq;
+ u8 max_limit_perf, min_limit_perf;
struct amd_cpudata *cpudata = policy->driver_data;
- max_perf = READ_ONCE(cpudata->highest_perf);
- max_freq = READ_ONCE(cpudata->max_freq);
- max_limit_perf = div_u64(policy->max * max_perf, max_freq);
- min_limit_perf = div_u64(policy->min * max_perf, max_freq);
+ max_limit_perf = freq_to_perf(cpudata, policy->max);
+ min_limit_perf = freq_to_perf(cpudata, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
@@ -616,21 +625,15 @@ static int amd_pstate_update_freq(struct
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
- u8 des_perf, cap_perf;
-
- if (!cpudata->max_freq)
- return -ENODEV;
+ u8 des_perf;
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
- cap_perf = READ_ONCE(cpudata->highest_perf);
-
freqs.old = policy->cur;
freqs.new = target_freq;
- des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
- cpudata->max_freq);
+ des_perf = freq_to_perf(cpudata, target_freq);
WARN_ON(fast_switch && !policy->fast_switch_enabled);
/*
@@ -905,7 +908,6 @@ static int amd_pstate_init_freq(struct a
{
int ret;
u32 min_freq, max_freq;
- u8 highest_perf, nominal_perf, lowest_nonlinear_perf;
u32 nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
@@ -923,16 +925,17 @@ static int amd_pstate_init_freq(struct a
else
nominal_freq = cppc_perf.nominal_freq;
- highest_perf = READ_ONCE(cpudata->highest_perf);
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
- max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf);
-
- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
- lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf);
- WRITE_ONCE(cpudata->min_freq, min_freq * 1000);
- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000);
- WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);
- WRITE_ONCE(cpudata->max_freq, max_freq * 1000);
+ min_freq *= 1000;
+ nominal_freq *= 1000;
+
+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
+ WRITE_ONCE(cpudata->min_freq, min_freq);
+
+ max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
+ lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
+
+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
+ WRITE_ONCE(cpudata->max_freq, max_freq);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load

View File

@@ -0,0 +1,37 @@
From b638a74c3b16e0781bb25478c135726862c9271d Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:19 +0000
Subject: cpufreq/amd-pstate: Remove the unnecessary cpufreq_update_policy call
The update_limits callback is only called in two conditions.
* When the preferred core rankings change. In which case, we just need to
change the prefcore ranking in the cpudata struct. As there are no changes
to any of the perf values, there is no need to call cpufreq_update_policy()
* When the _PPC ACPI object changes, i.e. the highest allowed Pstate
changes. The _PPC object is only used for a table based cpufreq driver
like acpi-cpufreq, hence is irrelevant for CPPC based amd-pstate.
Hence, the cpufreq_update_policy() call becomes unnecessary and can be
removed.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ----
1 file changed, 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -853,10 +853,6 @@ static void amd_pstate_update_limits(uns
sched_set_itmt_core_prio((int)cur_high, cpu);
}
cpufreq_cpu_put(policy);
-
- if (!highest_perf_changed)
- cpufreq_update_policy(cpu);
-
}
/*

View File

@@ -0,0 +1,26 @@
From 156278367fd2c0863dc06f9a7df0a654ae336726 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:21 +0000
Subject: cpufreq/amd-pstate: Add missing NULL ptr check in amd_pstate_update
Check if policy is NULL before dereferencing it in amd_pstate_update.
Fixes: e8f555daacd3 ("cpufreq/amd-pstate: fix setting policy current frequency value")
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -551,6 +551,9 @@ static void amd_pstate_update(struct amd
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ if (!policy)
+ return;
+
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
policy->cur = perf_to_freq(cpudata, des_perf);

View File

@@ -0,0 +1,124 @@
From e36868a11daa43eff94abd32f19b1783e89298d4 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:22 +0000
Subject: cpufreq/amd-pstate: Use scope based cleanup for cpufreq_policy refs
There have been instances in past where refcount decrementing is missed
while exiting a function. Use automatic scope based cleanup to avoid
such errors.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 25 ++++++++-----------------
include/linux/cpufreq.h | 3 +++
2 files changed, 11 insertions(+), 17 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -548,7 +548,7 @@ static inline bool amd_pstate_sample(str
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
if (!policy)
@@ -574,8 +574,6 @@ static void amd_pstate_update(struct amd
}
amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
-
- cpufreq_cpu_put(policy);
}
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
@@ -587,7 +585,8 @@ static int amd_pstate_verify(struct cpuf
* amd-pstate qos_requests.
*/
if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
- struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) =
+ cpufreq_cpu_get(policy_data->cpu);
struct amd_cpudata *cpudata;
if (!policy)
@@ -595,7 +594,6 @@ static int amd_pstate_verify(struct cpuf
cpudata = policy->driver_data;
policy_data->min = cpudata->lowest_nonlinear_freq;
- cpufreq_cpu_put(policy);
}
cpufreq_verify_within_cpu_limits(policy_data);
@@ -678,7 +676,7 @@ static void amd_pstate_adjust_perf(unsig
unsigned long capacity)
{
u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
if (!policy)
@@ -710,7 +708,6 @@ static void amd_pstate_adjust_perf(unsig
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
- cpufreq_cpu_put(policy);
}
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
@@ -824,28 +821,23 @@ static void amd_pstate_init_prefcore(str
static void amd_pstate_update_limits(unsigned int cpu)
{
- struct cpufreq_policy *policy = NULL;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
u32 prev_high = 0, cur_high = 0;
- int ret;
bool highest_perf_changed = false;
if (!amd_pstate_prefcore)
return;
- policy = cpufreq_cpu_get(cpu);
if (!policy)
return;
- cpudata = policy->driver_data;
-
guard(mutex)(&amd_pstate_driver_lock);
- ret = amd_get_highest_perf(cpu, &cur_high);
- if (ret) {
- cpufreq_cpu_put(policy);
+ if (amd_get_highest_perf(cpu, &cur_high))
return;
- }
+
+ cpudata = policy->driver_data;
prev_high = READ_ONCE(cpudata->prefcore_ranking);
highest_perf_changed = (prev_high != cur_high);
@@ -855,7 +847,6 @@ static void amd_pstate_update_limits(uns
if (cur_high < CPPC_MAX_PERF)
sched_set_itmt_core_prio((int)cur_high, cpu);
}
- cpufreq_cpu_put(policy);
}
/*
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -210,6 +210,9 @@ static inline struct cpufreq_policy *cpu
static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
#endif
+/* Scope based cleanup macro for cpufreq_policy kobject reference counting */
+DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T))
+
static inline bool policy_is_inactive(struct cpufreq_policy *policy)
{
return cpumask_empty(policy->cpus);

View File

@@ -0,0 +1,26 @@
From 9b7b7d59c5425246ffda281e761ef3ec3b0e4fbc Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:23 +0000
Subject: cpufreq/amd-pstate: Remove the unncecessary driver_lock in
amd_pstate_update_limits
There is no need to take a driver wide lock while updating the
highest_perf value in the percpu cpudata struct. Hence remove it.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -832,8 +832,6 @@ static void amd_pstate_update_limits(uns
if (!policy)
return;
- guard(mutex)(&amd_pstate_driver_lock);
-
if (amd_get_highest_perf(cpu, &cur_high))
return;

View File

@@ -0,0 +1,35 @@
From f09ef5b8aacd5b16ac1ea93103b41a7e88b174ed Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Sat, 22 Feb 2025 03:32:22 +0000
Subject: cpufreq/amd-pstate: Fix the clamping of perf values
The clamping in freq_to_perf() is broken right now, as we first typecast
(read wraparound) the overflowing value into a u8 and then clamp it down.
So, use a u32 to store the >255 value in certain edge cases and then clamp
it down into a u8.
Also, use a "explicit typecast + clamp" instead of just a "clamp_t" as the
latter typecasts first and then clamps between the limits, which defeats
our purpose.
Fixes: 305621eb6a8b ("cpufreq/amd-pstate: Modularize perf<->freq conversion")
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -144,10 +144,10 @@ static struct quirk_entry quirk_amd_7k62
static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
{
- u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
cpudata->nominal_freq);
- return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+ return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
}
static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)

View File

@@ -0,0 +1,42 @@
From 210d043d7b244588c911e355f2d5339bda9c8209 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:16 -0600
Subject: cpufreq/amd-pstate: Invalidate cppc_req_cached during suspend
During resume it's possible the firmware didn't restore the CPPC request
MSR but the kernel thinks the values line up. This leads to incorrect
performance after resume from suspend.
To fix the issue invalidate the cached value at suspend. During resume use
the saved values programmed as cached limits.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reported-by: Miroslav Pavleski <miroslav@pavleski.net>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217931
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1605,7 +1605,7 @@ static int amd_pstate_epp_reenable(struc
max_perf, policy->boost_enabled);
}
- return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false);
+ return amd_pstate_epp_update_limit(policy);
}
static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
@@ -1654,6 +1654,9 @@ static int amd_pstate_epp_suspend(struct
if (cppc_state != AMD_PSTATE_ACTIVE)
return 0;
+ /* invalidate to ensure it's rewritten during resume */
+ cpudata->cppc_req_cached = 0;
+
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;

View File

@@ -0,0 +1,35 @@
From a0233b8c2c01e98ddeb2e80768d4c7172311b200 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:17 -0600
Subject: cpufreq/amd-pstate: Show a warning when a CPU fails to setup
I came across a system that MSR_AMD_CPPC_CAP1 for some CPUs isn't
populated. This is an unexpected behavior that is most likely a
BIOS bug. In the event it happens I'd like users to report bugs
to properly root cause and get this fixed.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1028,6 +1028,7 @@ static int amd_pstate_cpu_init(struct cp
free_cpudata2:
freq_qos_remove_request(&cpudata->req[0]);
free_cpudata1:
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
return ret;
}
@@ -1521,6 +1522,7 @@ static int amd_pstate_epp_cpu_init(struc
return 0;
free_cpudata1:
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
return ret;
}

View File

@@ -0,0 +1,209 @@
From ad672c3336331cab028c27e4a73153f517bb1844 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:18 -0600
Subject: cpufreq/amd-pstate: Drop min and max cached frequencies
Use the perf_to_freq helpers to calculate this on the fly.
As the members are no longer cached add an extra check into
amd_pstate_epp_update_limit() to avoid unnecessary calls in
amd_pstate_update_min_max_limit().
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 14 +++++------
drivers/cpufreq/amd-pstate.c | 43 +++++++++------------------------
drivers/cpufreq/amd-pstate.h | 9 ++-----
3 files changed, 20 insertions(+), 46 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -214,14 +214,14 @@ static void amd_pstate_ut_check_freq(u32
break;
cpudata = policy->driver_data;
- if (!((cpudata->max_freq >= cpudata->nominal_freq) &&
+ if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
- (cpudata->lowest_nonlinear_freq > cpudata->min_freq) &&
- (cpudata->min_freq > 0))) {
+ (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
+ (policy->cpuinfo.min_freq > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
- __func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
- cpudata->lowest_nonlinear_freq, cpudata->min_freq);
+ __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
+ cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
goto skip_test;
}
@@ -233,13 +233,13 @@ static void amd_pstate_ut_check_freq(u32
}
if (cpudata->boost_supported) {
- if ((policy->max == cpudata->max_freq) ||
+ if ((policy->max == policy->cpuinfo.max_freq) ||
(policy->max == cpudata->nominal_freq))
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
else {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
- __func__, cpu, policy->max, cpudata->max_freq,
+ __func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
goto skip_test;
}
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -717,7 +717,7 @@ static int amd_pstate_cpu_boost_update(s
int ret = 0;
nominal_freq = READ_ONCE(cpudata->nominal_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
+ max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
if (on)
policy->cpuinfo.max_freq = max_freq;
@@ -917,13 +917,10 @@ static int amd_pstate_init_freq(struct a
nominal_freq *= 1000;
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- WRITE_ONCE(cpudata->min_freq, min_freq);
max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
-
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
- WRITE_ONCE(cpudata->max_freq, max_freq);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load
@@ -948,9 +945,9 @@ static int amd_pstate_init_freq(struct a
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
- int min_freq, max_freq, ret;
- struct device *dev;
struct amd_cpudata *cpudata;
+ struct device *dev;
+ int ret;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
@@ -981,17 +978,11 @@ static int amd_pstate_cpu_init(struct cp
if (ret)
goto free_cpudata1;
- min_freq = READ_ONCE(cpudata->min_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
-
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
- policy->min = min_freq;
- policy->max = max_freq;
-
- policy->cpuinfo.min_freq = min_freq;
- policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1015,9 +1006,6 @@ static int amd_pstate_cpu_init(struct cp
goto free_cpudata2;
}
- cpudata->max_limit_freq = max_freq;
- cpudata->min_limit_freq = min_freq;
-
policy->driver_data = cpudata;
if (!current_pstate_driver->adjust_perf)
@@ -1075,14 +1063,10 @@ static int amd_pstate_cpu_suspend(struct
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
- int max_freq;
struct amd_cpudata *cpudata = policy->driver_data;
- max_freq = READ_ONCE(cpudata->max_freq);
- if (max_freq < 0)
- return max_freq;
- return sysfs_emit(buf, "%u\n", max_freq);
+ return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
@@ -1440,10 +1424,10 @@ static bool amd_pstate_acpi_pm_profile_u
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
- int min_freq, max_freq, ret;
struct amd_cpudata *cpudata;
struct device *dev;
u64 value;
+ int ret;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
@@ -1474,19 +1458,13 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
goto free_cpudata1;
- min_freq = READ_ONCE(cpudata->min_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
-
- policy->cpuinfo.min_freq = min_freq;
- policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
policy->driver_data = cpudata;
- policy->min = policy->cpuinfo.min_freq;
- policy->max = policy->cpuinfo.max_freq;
-
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
/*
@@ -1544,7 +1522,8 @@ static int amd_pstate_epp_update_limit(s
struct amd_cpudata *cpudata = policy->driver_data;
u8 epp;
- amd_pstate_update_min_max_limit(policy);
+ if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+ amd_pstate_update_min_max_limit(policy);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
epp = 0;
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -46,8 +46,6 @@ struct amd_aperf_mperf {
* @max_limit_perf: Cached value of the performance corresponding to policy->max
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
- * @max_freq: the frequency (in khz) that mapped to highest_perf
- * @min_freq: the frequency (in khz) that mapped to lowest_perf
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
* @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
* @cur: Difference of Aperf/Mperf/tsc count between last and current sample
@@ -77,11 +75,8 @@ struct amd_cpudata {
u8 prefcore_ranking;
u8 min_limit_perf;
u8 max_limit_perf;
- u32 min_limit_freq;
- u32 max_limit_freq;
-
- u32 max_freq;
- u32 min_freq;
+ u32 min_limit_freq;
+ u32 max_limit_freq;
u32 nominal_freq;
u32 lowest_nonlinear_freq;

View File

@@ -0,0 +1,611 @@
From b96076ada115f25a4944f6f111b22c44a5d1a3cf Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:19 -0600
Subject: cpufreq/amd-pstate: Move perf values into a union
By storing perf values in a union all the writes and reads can
be done atomically, removing the need for some concurrency protections.
While making this change, also drop the cached frequency values,
using inline helpers to calculate them on demand from perf value.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 18 +--
drivers/cpufreq/amd-pstate.c | 205 ++++++++++++++++++--------------
drivers/cpufreq/amd-pstate.h | 51 +++++---
3 files changed, 158 insertions(+), 116 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32
struct cppc_perf_caps cppc_perf;
struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
+ union perf_cached cur_perf;
for_each_possible_cpu(cpu) {
policy = cpufreq_cpu_get(cpu);
@@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32
lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
}
- if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) {
+ cur_perf = READ_ONCE(cpudata->perf);
+ if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
- __func__, cpu, highest_perf, cpudata->highest_perf);
+ __func__, cpu, highest_perf, cur_perf.highest_perf);
goto skip_test;
}
- if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) ||
- (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) ||
- (lowest_perf != READ_ONCE(cpudata->lowest_perf))) {
+ if (nominal_perf != cur_perf.nominal_perf ||
+ (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
+ (lowest_perf != cur_perf.lowest_perf)) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
- __func__, cpu, nominal_perf, cpudata->nominal_perf,
- lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
- lowest_perf, cpudata->lowest_perf);
+ __func__, cpu, nominal_perf, cur_perf.nominal_perf,
+ lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
+ lowest_perf, cur_perf.lowest_perf);
goto skip_test;
}
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -142,18 +142,17 @@ static struct quirk_entry quirk_amd_7k62
.lowest_freq = 550,
};
-static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
+static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val)
{
- u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
- cpudata->nominal_freq);
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq);
- return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+ return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf);
}
-static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
+static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val)
{
- return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
- cpudata->nominal_perf);
+ return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val,
+ perf.nominal_perf);
}
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
@@ -347,7 +346,9 @@ static int amd_pstate_set_energy_pref_in
}
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
epp,
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
@@ -425,6 +426,7 @@ static inline int amd_pstate_cppc_enable
static int msr_init_perf(struct amd_cpudata *cpudata)
{
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u64 cap1, numerator;
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
@@ -436,19 +438,21 @@ static int msr_init_perf(struct amd_cpud
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, numerator);
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
- WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ perf.highest_perf = numerator;
+ perf.max_limit_perf = numerator;
+ perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
+ perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
+ perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ WRITE_ONCE(cpudata->perf, perf);
WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
- WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
return 0;
}
static int shmem_init_perf(struct amd_cpudata *cpudata)
{
struct cppc_perf_caps cppc_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u64 numerator;
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
@@ -459,14 +463,14 @@ static int shmem_init_perf(struct amd_cp
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, numerator);
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
- WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
- WRITE_ONCE(cpudata->lowest_nonlinear_perf,
- cppc_perf.lowest_nonlinear_perf);
- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ perf.highest_perf = numerator;
+ perf.max_limit_perf = numerator;
+ perf.min_limit_perf = cppc_perf.lowest_perf;
+ perf.nominal_perf = cppc_perf.nominal_perf;
+ perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
+ perf.lowest_perf = cppc_perf.lowest_perf;
+ WRITE_ONCE(cpudata->perf, perf);
WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
- WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
if (cppc_state == AMD_PSTATE_ACTIVE)
return 0;
@@ -549,14 +553,14 @@ static void amd_pstate_update(struct amd
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
- u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ union perf_cached perf = READ_ONCE(cpudata->perf);
if (!policy)
return;
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
- policy->cur = perf_to_freq(cpudata, des_perf);
+ policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf);
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
min_perf = des_perf;
@@ -565,7 +569,7 @@ static void amd_pstate_update(struct amd
/* limit the max perf when core performance boost feature is disabled */
if (!cpudata->boost_supported)
- max_perf = min_t(u8, nominal_perf, max_perf);
+ max_perf = min_t(u8, perf.nominal_perf, max_perf);
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
@@ -602,39 +606,41 @@ static int amd_pstate_verify(struct cpuf
return 0;
}
-static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u8 max_limit_perf, min_limit_perf;
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
- max_limit_perf = freq_to_perf(cpudata, policy->max);
- min_limit_perf = freq_to_perf(cpudata, policy->min);
+ perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
- min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
+ perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
-
- return 0;
+ WRITE_ONCE(cpudata->perf, perf);
}
static int amd_pstate_update_freq(struct cpufreq_policy *policy,
unsigned int target_freq, bool fast_switch)
{
struct cpufreq_freqs freqs;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
u8 des_perf;
+ cpudata = policy->driver_data;
+
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
+ perf = READ_ONCE(cpudata->perf);
+
freqs.old = policy->cur;
freqs.new = target_freq;
- des_perf = freq_to_perf(cpudata, target_freq);
+ des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq);
WARN_ON(fast_switch && !policy->fast_switch_enabled);
/*
@@ -645,8 +651,8 @@ static int amd_pstate_update_freq(struct
if (!fast_switch)
cpufreq_freq_transition_begin(policy, &freqs);
- amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf,
- cpudata->max_limit_perf, fast_switch,
+ amd_pstate_update(cpudata, perf.min_limit_perf, des_perf,
+ perf.max_limit_perf, fast_switch,
policy->governor->flags);
if (!fast_switch)
@@ -675,9 +681,10 @@ static void amd_pstate_adjust_perf(unsig
unsigned long target_perf,
unsigned long capacity)
{
- u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
+ u8 max_perf, min_perf, des_perf, cap_perf;
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
+ union perf_cached perf;
if (!policy)
return;
@@ -687,8 +694,8 @@ static void amd_pstate_adjust_perf(unsig
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
- cap_perf = READ_ONCE(cpudata->highest_perf);
- min_limit_perf = READ_ONCE(cpudata->min_limit_perf);
+ perf = READ_ONCE(cpudata->perf);
+ cap_perf = perf.highest_perf;
des_perf = cap_perf;
if (target_perf < capacity)
@@ -699,10 +706,10 @@ static void amd_pstate_adjust_perf(unsig
else
min_perf = cap_perf;
- if (min_perf < min_limit_perf)
- min_perf = min_limit_perf;
+ if (min_perf < perf.min_limit_perf)
+ min_perf = perf.min_limit_perf;
- max_perf = cpudata->max_limit_perf;
+ max_perf = perf.max_limit_perf;
if (max_perf < min_perf)
max_perf = min_perf;
@@ -713,11 +720,12 @@ static void amd_pstate_adjust_perf(unsig
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u32 nominal_freq, max_freq;
int ret = 0;
nominal_freq = READ_ONCE(cpudata->nominal_freq);
- max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
+ max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf);
if (on)
policy->cpuinfo.max_freq = max_freq;
@@ -882,30 +890,30 @@ static u32 amd_pstate_get_transition_lat
}
/*
- * amd_pstate_init_freq: Initialize the max_freq, min_freq,
- * nominal_freq and lowest_nonlinear_freq for
- * the @cpudata object.
+ * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq
+ * for the @cpudata object.
*
- * Requires: highest_perf, lowest_perf, nominal_perf and
- * lowest_nonlinear_perf members of @cpudata to be
- * initialized.
+ * Requires: all perf members of @cpudata to be initialized.
*
- * Returns 0 on success, non-zero value on failure.
+ * Returns 0 on success, non-zero value on failure.
*/
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
- int ret;
- u32 min_freq, max_freq;
- u32 nominal_freq, lowest_nonlinear_freq;
+ u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
+ union perf_cached perf;
+ int ret;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
return ret;
+ perf = READ_ONCE(cpudata->perf);
- if (quirks && quirks->lowest_freq)
+ if (quirks && quirks->lowest_freq) {
min_freq = quirks->lowest_freq;
- else
+ perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
+ WRITE_ONCE(cpudata->perf, perf);
+ } else
min_freq = cppc_perf.lowest_freq;
if (quirks && quirks->nominal_freq)
@@ -918,8 +926,8 @@ static int amd_pstate_init_freq(struct a
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
- lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
+ max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf);
+ lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
/**
@@ -946,6 +954,7 @@ static int amd_pstate_init_freq(struct a
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata;
+ union perf_cached perf;
struct device *dev;
int ret;
@@ -981,8 +990,14 @@ static int amd_pstate_cpu_init(struct cp
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
+ perf = READ_ONCE(cpudata->perf);
+
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1063,23 +1078,27 @@ static int amd_pstate_cpu_suspend(struct
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
+ cpudata = policy->driver_data;
+ perf = READ_ONCE(cpudata->perf);
- return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
+ return sysfs_emit(buf, "%u\n",
+ perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf));
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
char *buf)
{
- int freq;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
- freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
- if (freq < 0)
- return freq;
+ cpudata = policy->driver_data;
+ perf = READ_ONCE(cpudata->perf);
- return sysfs_emit(buf, "%u\n", freq);
+ return sysfs_emit(buf, "%u\n",
+ perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf));
}
/*
@@ -1089,12 +1108,11 @@ static ssize_t show_amd_pstate_lowest_no
static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
char *buf)
{
- u8 perf;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
- perf = READ_ONCE(cpudata->highest_perf);
+ cpudata = policy->driver_data;
- return sysfs_emit(buf, "%u\n", perf);
+ return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf);
}
static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
@@ -1425,6 +1443,7 @@ static bool amd_pstate_acpi_pm_profile_u
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata;
+ union perf_cached perf;
struct device *dev;
u64 value;
int ret;
@@ -1458,8 +1477,15 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
goto free_cpudata1;
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
+ perf = READ_ONCE(cpudata->perf);
+
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
+
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
@@ -1520,6 +1546,7 @@ static void amd_pstate_epp_cpu_exit(stru
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf;
u8 epp;
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
@@ -1530,15 +1557,16 @@ static int amd_pstate_epp_update_limit(s
else
epp = READ_ONCE(cpudata->epp_cached);
+ perf = READ_ONCE(cpudata->perf);
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp,
- cpudata->min_limit_perf,
- cpudata->max_limit_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
+ perf.min_limit_perf,
+ perf.max_limit_perf,
policy->boost_enabled);
}
- return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
- cpudata->max_limit_perf, epp, false);
+ return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
+ perf.max_limit_perf, epp, false);
}
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
@@ -1570,20 +1598,18 @@ static int amd_pstate_epp_set_policy(str
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 max_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
int ret;
ret = amd_pstate_cppc_enable(true);
if (ret)
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
- max_perf = READ_ONCE(cpudata->highest_perf);
-
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
cpudata->epp_cached,
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- max_perf, policy->boost_enabled);
+ perf.highest_perf, policy->boost_enabled);
}
return amd_pstate_epp_update_limit(policy);
@@ -1607,22 +1633,21 @@ static int amd_pstate_epp_cpu_online(str
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 min_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
if (cpudata->suspended)
return 0;
- min_perf = READ_ONCE(cpudata->lowest_perf);
-
guard(mutex)(&amd_pstate_limits_lock);
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE,
- min_perf, min_perf, policy->boost_enabled);
+ perf.lowest_perf, perf.lowest_perf,
+ policy->boost_enabled);
}
- return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf,
+ return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
}
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -13,6 +13,36 @@
/*********************************************************************
* AMD P-state INTERFACE *
*********************************************************************/
+
+/**
+ * union perf_cached - A union to cache performance-related data.
+ * @highest_perf: the maximum performance an individual processor may reach,
+ * assuming ideal conditions
+ * For platforms that support the preferred core feature, the highest_perf value maybe
+ * configured to any value in the range 166-255 by the firmware (because the preferred
+ * core ranking is encoded in the highest_perf value). To maintain consistency across
+ * all platforms, we split the highest_perf and preferred core ranking values into
+ * cpudata->perf.highest_perf and cpudata->prefcore_ranking.
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ * assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ * savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @min_limit_perf: Cached value of the performance corresponding to policy->min
+ * @max_limit_perf: Cached value of the performance corresponding to policy->max
+ */
+union perf_cached {
+ struct {
+ u8 highest_perf;
+ u8 nominal_perf;
+ u8 lowest_nonlinear_perf;
+ u8 lowest_perf;
+ u8 min_limit_perf;
+ u8 max_limit_perf;
+ };
+ u64 val;
+};
+
/**
* struct amd_aperf_mperf
* @aperf: actual performance frequency clock count
@@ -30,20 +60,9 @@ struct amd_aperf_mperf {
* @cpu: CPU number
* @req: constraint request to apply
* @cppc_req_cached: cached performance request hints
- * @highest_perf: the maximum performance an individual processor may reach,
- * assuming ideal conditions
- * For platforms that do not support the preferred core feature, the
- * highest_pef may be configured with 166 or 255, to avoid max frequency
- * calculated wrongly. we take the fixed value as the highest_perf.
- * @nominal_perf: the maximum sustained performance level of the processor,
- * assuming ideal operating conditions
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
- * savings are achieved
- * @lowest_perf: the absolute lowest performance level of the processor
+ * @perf: cached performance-related data
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
* priority.
- * @min_limit_perf: Cached value of the performance corresponding to policy->min
- * @max_limit_perf: Cached value of the performance corresponding to policy->max
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
@@ -68,13 +87,9 @@ struct amd_cpudata {
struct freq_qos_request req[2];
u64 cppc_req_cached;
- u8 highest_perf;
- u8 nominal_perf;
- u8 lowest_nonlinear_perf;
- u8 lowest_perf;
+ union perf_cached perf;
+
u8 prefcore_ranking;
- u8 min_limit_perf;
- u8 max_limit_perf;
u32 min_limit_freq;
u32 max_limit_freq;
u32 nominal_freq;

View File

@@ -0,0 +1,89 @@
From 6c0b59640cce68d7574078d7d1e549bdb8f0128d Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:20 -0600
Subject: cpufreq/amd-pstate: Overhaul locking
amd_pstate_cpu_boost_update() and refresh_frequency_limits() both
update the policy state and have nothing to do with the amd-pstate
driver itself.
A global "limits" lock doesn't make sense because each CPU can have
policies changed independently. Each time a CPU changes values they
will atomically be written to the per-CPU perf member. Drop per CPU
locking cases.
The remaining "global" driver lock is used to ensure that only one
entity can change driver modes at a given time.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -196,7 +196,6 @@ static inline int get_mode_idx_from_str(
return -EINVAL;
}
-static DEFINE_MUTEX(amd_pstate_limits_lock);
static DEFINE_MUTEX(amd_pstate_driver_lock);
static u8 msr_get_epp(struct amd_cpudata *cpudata)
@@ -752,7 +751,6 @@ static int amd_pstate_set_boost(struct c
pr_err("Boost mode is not supported by this processor or SBIOS\n");
return -EOPNOTSUPP;
}
- guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_cpu_boost_update(policy, state);
refresh_frequency_limits(policy);
@@ -1170,8 +1168,6 @@ static ssize_t store_energy_performance_
if (ret < 0)
return -EINVAL;
- guard(mutex)(&amd_pstate_limits_lock);
-
ret = amd_pstate_set_energy_pref_index(policy, ret);
return ret ? ret : count;
@@ -1344,8 +1340,10 @@ int amd_pstate_update_status(const char
if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
return -EINVAL;
- if (mode_state_machine[cppc_state][mode_idx])
+ if (mode_state_machine[cppc_state][mode_idx]) {
+ guard(mutex)(&amd_pstate_driver_lock);
return mode_state_machine[cppc_state][mode_idx](mode_idx);
+ }
return 0;
}
@@ -1366,7 +1364,6 @@ static ssize_t status_store(struct devic
char *p = memchr(buf, '\n', count);
int ret;
- guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_update_status(buf, p ? p - buf : count);
return ret < 0 ? ret : count;
@@ -1638,8 +1635,6 @@ static int amd_pstate_epp_cpu_offline(st
if (cpudata->suspended)
return 0;
- guard(mutex)(&amd_pstate_limits_lock);
-
if (trace_amd_pstate_epp_perf_enabled()) {
trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE,
@@ -1679,8 +1674,6 @@ static int amd_pstate_epp_resume(struct
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->suspended) {
- guard(mutex)(&amd_pstate_limits_lock);
-
/* enable amd pstate from suspend state*/
amd_pstate_epp_reenable(policy);

View File

@@ -0,0 +1,48 @@
From 7c9409faeb921c76988b4cd2294ca0a959775f35 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:21 -0600
Subject: cpufreq/amd-pstate: Drop `cppc_cap1_cached`
The `cppc_cap1_cached` variable isn't used at all, there is no
need to read it at initialization for each CPU.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 5 -----
drivers/cpufreq/amd-pstate.h | 2 --
2 files changed, 7 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1508,11 +1508,6 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
return ret;
WRITE_ONCE(cpudata->cppc_req_cached, value);
-
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
- if (ret)
- return ret;
- WRITE_ONCE(cpudata->cppc_cap1_cached, value);
}
ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
if (ret)
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -76,7 +76,6 @@ struct amd_aperf_mperf {
* AMD P-State driver supports preferred core featue.
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
- * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -105,7 +104,6 @@ struct amd_cpudata {
/* EPP feature related attributes*/
u8 epp_cached;
u32 policy;
- u64 cppc_cap1_cached;
bool suspended;
u8 epp_default;
};

View File

@@ -0,0 +1,144 @@
From 346b2824b742a8f5943db8c8200ba4a7492bb3cf Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:22 -0600
Subject: cpufreq/amd-pstate-ut: Use _free macro to free put policy
Using a scoped cleanup macro simplifies cleanup code.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -26,6 +26,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/fs.h>
+#include <linux/cleanup.h>
#include <acpi/cppc_acpi.h>
@@ -127,11 +128,12 @@ static void amd_pstate_ut_check_perf(u32
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
u64 cap1 = 0;
struct cppc_perf_caps cppc_perf;
- struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+
policy = cpufreq_cpu_get(cpu);
if (!policy)
break;
@@ -142,7 +144,7 @@ static void amd_pstate_ut_check_perf(u32
if (ret) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
- goto skip_test;
+ return;
}
highest_perf = cppc_perf.highest_perf;
@@ -154,7 +156,7 @@ static void amd_pstate_ut_check_perf(u32
if (ret) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
- goto skip_test;
+ return;
}
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
@@ -167,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
__func__, cpu, highest_perf, cur_perf.highest_perf);
- goto skip_test;
+ return;
}
if (nominal_perf != cur_perf.nominal_perf ||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
@@ -177,7 +179,7 @@ static void amd_pstate_ut_check_perf(u32
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
lowest_perf, cur_perf.lowest_perf);
- goto skip_test;
+ return;
}
if (!((highest_perf >= nominal_perf) &&
@@ -188,15 +190,11 @@ static void amd_pstate_ut_check_perf(u32
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
__func__, cpu, highest_perf, nominal_perf,
lowest_nonlinear_perf, lowest_perf);
- goto skip_test;
+ return;
}
- cpufreq_cpu_put(policy);
}
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- return;
-skip_test:
- cpufreq_cpu_put(policy);
}
/*
@@ -207,10 +205,11 @@ skip_test:
static void amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
- struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+
policy = cpufreq_cpu_get(cpu);
if (!policy)
break;
@@ -224,14 +223,14 @@ static void amd_pstate_ut_check_freq(u32
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
- goto skip_test;
+ return;
}
if (cpudata->lowest_nonlinear_freq != policy->min) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
- goto skip_test;
+ return;
}
if (cpudata->boost_supported) {
@@ -243,20 +242,16 @@ static void amd_pstate_ut_check_freq(u32
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
- goto skip_test;
+ return;
}
} else {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
- goto skip_test;
+ return;
}
- cpufreq_cpu_put(policy);
}
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- return;
-skip_test:
- cpufreq_cpu_put(policy);
}
static int amd_pstate_set_mode(enum amd_pstate_mode mode)

View File

@@ -0,0 +1,37 @@
From 310f8a994f55561902e5a75ff8623988921e3908 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:23 -0600
Subject: cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the
same
Several Ryzen AI processors support the exact same value for lowest
nonlinear perf and lowest perf. Loosen up the unit tests to allow this
scenario.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -184,7 +184,7 @@ static void amd_pstate_ut_check_perf(u32
if (!((highest_perf >= nominal_perf) &&
(nominal_perf > lowest_nonlinear_perf) &&
- (lowest_nonlinear_perf > lowest_perf) &&
+ (lowest_nonlinear_perf >= lowest_perf) &&
(lowest_perf > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
@@ -217,7 +217,7 @@ static void amd_pstate_ut_check_freq(u32
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
- (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
+ (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
(policy->cpuinfo.min_freq > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",

View File

@@ -0,0 +1,309 @@
From bc4a683dbfcc306851bbfec33f9c857c523d4848 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:24 -0600
Subject: cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums
Enums are effectively used as a boolean and don't show
the return value of the failing call.
Instead of using enums switch to returning the actual return
code from the unit test.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 143 ++++++++++++--------------------
1 file changed, 55 insertions(+), 88 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -32,30 +32,20 @@
#include "amd-pstate.h"
-/*
- * Abbreviations:
- * amd_pstate_ut: used as a shortform for AMD P-State unit test.
- * It helps to keep variable names smaller, simpler
- */
-enum amd_pstate_ut_result {
- AMD_PSTATE_UT_RESULT_PASS,
- AMD_PSTATE_UT_RESULT_FAIL,
-};
struct amd_pstate_ut_struct {
const char *name;
- void (*func)(u32 index);
- enum amd_pstate_ut_result result;
+ int (*func)(u32 index);
};
/*
* Kernel module for testing the AMD P-State unit test
*/
-static void amd_pstate_ut_acpi_cpc_valid(u32 index);
-static void amd_pstate_ut_check_enabled(u32 index);
-static void amd_pstate_ut_check_perf(u32 index);
-static void amd_pstate_ut_check_freq(u32 index);
-static void amd_pstate_ut_check_driver(u32 index);
+static int amd_pstate_ut_acpi_cpc_valid(u32 index);
+static int amd_pstate_ut_check_enabled(u32 index);
+static int amd_pstate_ut_check_perf(u32 index);
+static int amd_pstate_ut_check_freq(u32 index);
+static int amd_pstate_ut_check_driver(u32 index);
static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
{"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
@@ -78,51 +68,46 @@ static bool get_shared_mem(void)
/*
* check the _CPC object is present in SBIOS.
*/
-static void amd_pstate_ut_acpi_cpc_valid(u32 index)
+static int amd_pstate_ut_acpi_cpc_valid(u32 index)
{
- if (acpi_cpc_valid())
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+ if (!acpi_cpc_valid()) {
pr_err("%s the _CPC object is not present in SBIOS!\n", __func__);
+ return -EINVAL;
}
+
+ return 0;
}
-static void amd_pstate_ut_pstate_enable(u32 index)
+/*
+ * check if amd pstate is enabled
+ */
+static int amd_pstate_ut_check_enabled(u32 index)
{
- int ret = 0;
u64 cppc_enable = 0;
+ int ret;
+
+ if (get_shared_mem())
+ return 0;
ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
- if (cppc_enable)
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+
+ if (!cppc_enable) {
pr_err("%s amd pstate must be enabled!\n", __func__);
+ return -EINVAL;
}
-}
-/*
- * check if amd pstate is enabled
- */
-static void amd_pstate_ut_check_enabled(u32 index)
-{
- if (get_shared_mem())
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else
- amd_pstate_ut_pstate_enable(index);
+ return 0;
}
/*
* check if performance values are reasonable.
* highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0
*/
-static void amd_pstate_ut_check_perf(u32 index)
+static int amd_pstate_ut_check_perf(u32 index)
{
int cpu = 0, ret = 0;
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
@@ -142,9 +127,8 @@ static void amd_pstate_ut_check_perf(u32
if (get_shared_mem()) {
ret = cppc_get_perf_caps(cpu, &cppc_perf);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
highest_perf = cppc_perf.highest_perf;
@@ -154,9 +138,8 @@ static void amd_pstate_ut_check_perf(u32
} else {
ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
@@ -169,32 +152,30 @@ static void amd_pstate_ut_check_perf(u32
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
__func__, cpu, highest_perf, cur_perf.highest_perf);
- return;
+ return -EINVAL;
}
if (nominal_perf != cur_perf.nominal_perf ||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
(lowest_perf != cur_perf.lowest_perf)) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
lowest_perf, cur_perf.lowest_perf);
- return;
+ return -EINVAL;
}
if (!((highest_perf >= nominal_perf) &&
(nominal_perf > lowest_nonlinear_perf) &&
(lowest_nonlinear_perf >= lowest_perf) &&
(lowest_perf > 0))) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
__func__, cpu, highest_perf, nominal_perf,
lowest_nonlinear_perf, lowest_perf);
- return;
+ return -EINVAL;
}
}
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+ return 0;
}
/*
@@ -202,7 +183,7 @@ static void amd_pstate_ut_check_perf(u32
* max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0
* check max freq when set support boost mode.
*/
-static void amd_pstate_ut_check_freq(u32 index)
+static int amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
struct amd_cpudata *cpudata = NULL;
@@ -219,39 +200,33 @@ static void amd_pstate_ut_check_freq(u32
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
(cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
(policy->cpuinfo.min_freq > 0))) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
- return;
+ return -EINVAL;
}
if (cpudata->lowest_nonlinear_freq != policy->min) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
- return;
+ return -EINVAL;
}
if (cpudata->boost_supported) {
- if ((policy->max == policy->cpuinfo.max_freq) ||
- (policy->max == cpudata->nominal_freq))
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+ if ((policy->max != policy->cpuinfo.max_freq) &&
+ (policy->max != cpudata->nominal_freq)) {
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
- return;
+ return -EINVAL;
}
} else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
- return;
+ return -EINVAL;
}
}
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+ return 0;
}
static int amd_pstate_set_mode(enum amd_pstate_mode mode)
@@ -263,32 +238,28 @@ static int amd_pstate_set_mode(enum amd_
return amd_pstate_update_status(mode_str, strlen(mode_str));
}
-static void amd_pstate_ut_check_driver(u32 index)
+static int amd_pstate_ut_check_driver(u32 index)
{
enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
- int ret;
for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
- ret = amd_pstate_set_mode(mode1);
+ int ret = amd_pstate_set_mode(mode1);
if (ret)
- goto out;
+ return ret;
for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
if (mode1 == mode2)
continue;
ret = amd_pstate_set_mode(mode2);
- if (ret)
- goto out;
+ if (ret) {
+ pr_err("%s: failed to update status for %s->%s\n", __func__,
+ amd_pstate_get_mode_string(mode1),
+ amd_pstate_get_mode_string(mode2));
+ return ret;
+ }
}
}
-out:
- if (ret)
- pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
- amd_pstate_get_mode_string(mode1),
- amd_pstate_get_mode_string(mode2), ret);
-
- amd_pstate_ut_cases[index].result = ret ?
- AMD_PSTATE_UT_RESULT_FAIL :
- AMD_PSTATE_UT_RESULT_PASS;
+
+ return 0;
}
static int __init amd_pstate_ut_init(void)
@@ -296,16 +267,12 @@ static int __init amd_pstate_ut_init(voi
u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
for (i = 0; i < arr_size; i++) {
- amd_pstate_ut_cases[i].func(i);
- switch (amd_pstate_ut_cases[i].result) {
- case AMD_PSTATE_UT_RESULT_PASS:
+ int ret = amd_pstate_ut_cases[i].func(i);
+
+ if (ret)
+ pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret);
+ else
pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name);
- break;
- case AMD_PSTATE_UT_RESULT_FAIL:
- default:
- pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name);
- break;
- }
}
return 0;

View File

@@ -0,0 +1,50 @@
From 3651a3bd2d07f627d5382ec9e9b980c689d0eb98 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:25 -0600
Subject: cpufreq/amd-pstate-ut: Run on all of the correct CPUs
If a CPU is missing a policy or one has been offlined then the unit test
is skipped for the rest of the CPUs on the system.
Instead; iterate online CPUs and skip any missing policies to allow
continuing to test the rest of them.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -116,12 +116,12 @@ static int amd_pstate_ut_check_perf(u32
struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
policy = cpufreq_cpu_get(cpu);
if (!policy)
- break;
+ continue;
cpudata = policy->driver_data;
if (get_shared_mem()) {
@@ -188,12 +188,12 @@ static int amd_pstate_ut_check_freq(u32
int cpu = 0;
struct amd_cpudata *cpudata = NULL;
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
policy = cpufreq_cpu_get(cpu);
if (!policy)
- break;
+ continue;
cpudata = policy->driver_data;
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&

View File

@@ -0,0 +1,42 @@
From 4ec612c9d5de9620b8f0ad4463db5d08c2d68222 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:26 -0600
Subject: cpufreq/amd-pstate-ut: Adjust variable scope
In amd_pstate_ut_check_freq() and amd_pstate_ut_check_perf() the cpudata
variable is only needed in the scope of the for loop. Move it there.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -113,11 +113,11 @@ static int amd_pstate_ut_check_perf(u32
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
u64 cap1 = 0;
struct cppc_perf_caps cppc_perf;
- struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ struct amd_cpudata *cpudata;
policy = cpufreq_cpu_get(cpu);
if (!policy)
@@ -186,10 +186,10 @@ static int amd_pstate_ut_check_perf(u32
static int amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
- struct amd_cpudata *cpudata = NULL;
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ struct amd_cpudata *cpudata;
policy = cpufreq_cpu_get(cpu);
if (!policy)

View File

@@ -0,0 +1,123 @@
From 1512ed2a741a0df98972679da6177df4998fd8ce Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:27 -0600
Subject: cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks
Bitfield masks are easier to follow and less error prone.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
arch/x86/include/asm/msr-index.h | 20 +++++++++++---------
arch/x86/kernel/acpi/cppc.c | 4 +++-
drivers/cpufreq/amd-pstate-ut.c | 9 +++++----
drivers/cpufreq/amd-pstate.c | 16 ++++++----------
4 files changed, 25 insertions(+), 24 deletions(-)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -701,15 +701,17 @@
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -4,6 +4,8 @@
* Copyright (c) 2016, Intel Corporation.
*/
+#include <linux/bitfield.h>
+
#include <acpi/cppc_acpi.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -149,7 +151,7 @@ int amd_get_highest_perf(unsigned int cp
if (ret)
goto out;
- val = AMD_CPPC_HIGHEST_PERF(val);
+ val = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, val);
} else {
ret = cppc_get_highest_perf(cpu, &val);
if (ret)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -142,10 +143,10 @@ static int amd_pstate_ut_check_perf(u32
return ret;
}
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
- nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
- lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
- lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1);
+ nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
+ lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
+ lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
}
cur_perf = READ_ONCE(cpudata->perf);
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -89,11 +89,6 @@ static bool cppc_enabled;
static bool amd_pstate_prefcore = true;
static struct quirk_entry *quirks;
-#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
-#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
-#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
-#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
-
/*
* AMD Energy Preference Performance (EPP)
* The EPP is used in the CCLK DPM controller to drive
@@ -439,12 +434,13 @@ static int msr_init_perf(struct amd_cpud
perf.highest_perf = numerator;
perf.max_limit_perf = numerator;
- perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
- perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
- perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
- perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
+ perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
+ perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
+ perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
WRITE_ONCE(cpudata->perf, perf);
- WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1));
+
return 0;
}

View File

@@ -0,0 +1,60 @@
From bf6e8073cc7f17d6be40e16a04b5a277d7217f39 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:28 -0600
Subject: cpufreq/amd-pstate: Cache CPPC request in shared mem case too
In order to prevent a potential write for shmem_update_perf()
cache the request into the cppc_req_cached variable normally only
used for the MSR case.
This adds symmetry into the code and potentially avoids extra writes.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
struct cppc_perf_ctrls perf_ctrls;
+ u64 value, prev;
+ int ret;
if (cppc_state == AMD_PSTATE_ACTIVE) {
int ret = shmem_set_epp(cpudata, epp);
@@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_
return ret;
}
+ value = prev = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
+ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
+ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
+ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
+ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+
+ if (value == prev)
+ return 0;
+
perf_ctrls.max_perf = max_perf;
perf_ctrls.min_perf = min_perf;
perf_ctrls.desired_perf = des_perf;
- return cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ if (ret)
+ return ret;
+
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ return 0;
}
static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)

View File

@@ -0,0 +1,318 @@
From 1a3ff33ff2fbe3ecc2d86addd115329fddb28ea1 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:29 -0600
Subject: cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and
*_set_epp functions
The EPP tracing is done by the caller today, but this precludes the
information about whether the CPPC request has changed.
Move it into the update_perf and set_epp functions and include information
about whether the request has changed from the last one.
amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy
as an argument instead of the cpudata.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-trace.h | 13 +++-
drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------
2 files changed, 80 insertions(+), 51 deletions(-)
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
u8 epp,
u8 min_perf,
u8 max_perf,
- bool boost
+ bool boost,
+ bool changed
),
TP_ARGS(cpu_id,
@@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
epp,
min_perf,
max_perf,
- boost),
+ boost,
+ changed),
TP_STRUCT__entry(
__field(unsigned int, cpu_id)
@@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf,
__field(u8, min_perf)
__field(u8, max_perf)
__field(bool, boost)
+ __field(bool, changed)
),
TP_fast_assign(
@@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf,
__entry->min_perf = min_perf;
__entry->max_perf = max_perf;
__entry->boost = boost;
+ __entry->changed = changed;
),
- TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u",
+ TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u",
(unsigned int)__entry->cpu_id,
(u8)__entry->min_perf,
(u8)__entry->max_perf,
(u8)__entry->highest_perf,
(u8)__entry->epp,
- (bool)__entry->boost
+ (bool)__entry->boost,
+ (bool)__entry->changed
)
);
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpuda
return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp);
}
-static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
u64 value, prev;
value = prev = READ_ONCE(cpudata->cppc_req_cached);
@@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cp
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu,
+ perf.highest_perf,
+ epp,
+ min_perf,
+ max_perf,
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cp
}
WRITE_ONCE(cpudata->cppc_req_cached, value);
- WRITE_ONCE(cpudata->epp_cached, epp);
+ if (epp != cpudata->epp_cached)
+ WRITE_ONCE(cpudata->epp_cached, epp);
return 0;
}
DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
-static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
+static inline int amd_pstate_update_perf(struct cpufreq_policy *policy,
u8 min_perf, u8 des_perf,
u8 max_perf, u8 epp,
bool fast_switch)
{
- return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
+ return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf,
max_perf, epp, fast_switch);
}
-static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static int msr_set_epp(struct cpufreq_policy *policy, u8 epp)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
u64 value, prev;
int ret;
@@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudat
value &= ~AMD_CPPC_EPP_PERF_MASK;
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = cpudata->perf;
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
+ epp,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
+ cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
+ cpudata->cppc_req_cached),
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudat
DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
-static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp)
{
- return static_call(amd_pstate_set_epp)(cpudata, epp);
+ return static_call(amd_pstate_set_epp)(policy, epp);
}
-static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
{
- int ret;
+ struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ int ret;
+
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = cpudata->perf;
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
+ epp,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
+ cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
+ cpudata->cppc_req_cached),
+ policy->boost_enabled,
+ epp != cpudata->epp_cached);
+ }
if (epp == cpudata->epp_cached)
return 0;
@@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_in
return -EBUSY;
}
- if (trace_amd_pstate_epp_perf_enabled()) {
- union perf_cached perf = READ_ONCE(cpudata->perf);
-
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- epp,
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
- policy->boost_enabled);
- }
-
- return amd_pstate_set_epp(cpudata, epp);
+ return amd_pstate_set_epp(policy, epp);
}
static inline int msr_cppc_enable(bool enable)
@@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(s
return static_call(amd_pstate_init_perf)(cpudata);
}
-static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf,
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
u64 value, prev;
int ret;
if (cppc_state == AMD_PSTATE_ACTIVE) {
- int ret = shmem_set_epp(cpudata, epp);
+ int ret = shmem_set_epp(policy, epp);
if (ret)
return ret;
@@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu,
+ perf.highest_perf,
+ epp,
+ min_perf,
+ max_perf,
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd
cpudata->cpu, fast_switch);
}
- amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
+ amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch);
}
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
@@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struc
return ret;
WRITE_ONCE(cpudata->cppc_req_cached, value);
}
- ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
+ ret = amd_pstate_set_epp(policy, cpudata->epp_default);
if (ret)
return ret;
@@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(s
epp = READ_ONCE(cpudata->epp_cached);
perf = READ_ONCE(cpudata->perf);
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
- perf.min_limit_perf,
- perf.max_limit_perf,
- policy->boost_enabled);
- }
- return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
+ return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U,
perf.max_limit_perf, epp, false);
}
@@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(str
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- union perf_cached perf = READ_ONCE(cpudata->perf);
int ret;
ret = amd_pstate_cppc_enable(true);
if (ret)
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- cpudata->epp_cached,
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- perf.highest_perf, policy->boost_enabled);
- }
return amd_pstate_epp_update_limit(policy);
}
@@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(st
if (cpudata->suspended)
return 0;
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- AMD_CPPC_EPP_BALANCE_POWERSAVE,
- perf.lowest_perf, perf.lowest_perf,
- policy->boost_enabled);
- }
-
- return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
+ return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
}

View File

@@ -0,0 +1,37 @@
From eaf7b28995ee0346be8ac59869645e975eb6a91c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:30 -0600
Subject: cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes
On EPP only writes update the cached variable so that the min/max
performance controls don't need to be updated again.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_
{
struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ u64 value;
int ret;
if (trace_amd_pstate_epp_perf_enabled()) {
@@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_
}
WRITE_ONCE(cpudata->epp_cached, epp);
+ value = READ_ONCE(cpudata->cppc_req_cached);
+ value &= ~AMD_CPPC_EPP_PERF_MASK;
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
return ret;
}

View File

@@ -0,0 +1,38 @@
From a2ec1d51a050afc3a6d3ce35412d082e916e7eef Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:31 -0600
Subject: cpufreq/amd-pstate: Drop debug statements for policy setting
There are trace events that exist now for all amd-pstate modes that
will output information right before programming to the hardware.
This makes the existing debug statements unnecessary remaining
overhead. Drop them.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ----
1 file changed, 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpuf
}
cpufreq_verify_within_cpu_limits(policy_data);
- pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
return 0;
}
@@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(str
if (!policy->cpuinfo.max_freq)
return -ENODEV;
- pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
- policy->cpuinfo.max_freq, policy->max);
-
cpudata->policy = policy->policy;
ret = amd_pstate_epp_update_limit(policy);

View File

@@ -0,0 +1,327 @@
From 3a840f6d42aba96e1974857c157cab2f9c220045 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:32 -0600
Subject: cpufreq/amd-pstate: Rework CPPC enabling
The CPPC enable register is configured as "write once". That is
any future writes don't actually do anything.
Because of this, all the cleanup paths that currently exist for
CPPC disable are non-effective.
Rework CPPC enable to only enable after all the CAP registers have
been read to avoid enabling CPPC on CPUs with invalid _CPC or
unpopulated MSRs.
As the register is write once, remove all cleanup paths as well.
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 179 +++++++----------------------------
1 file changed, 35 insertions(+), 144 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -85,7 +85,6 @@ static struct cpufreq_driver *current_ps
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
-static bool cppc_enabled;
static bool amd_pstate_prefcore = true;
static struct quirk_entry *quirks;
@@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_
return ret;
}
-static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
- int pref_index)
+static inline int msr_cppc_enable(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- u8 epp;
-
- if (!pref_index)
- epp = cpudata->epp_default;
- else
- epp = epp_values[pref_index];
-
- if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
- pr_debug("EPP cannot be set under performance policy\n");
- return -EBUSY;
- }
-
- return amd_pstate_set_epp(policy, epp);
-}
-
-static inline int msr_cppc_enable(bool enable)
-{
- int ret, cpu;
- unsigned long logical_proc_id_mask = 0;
-
- /*
- * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
- */
- if (!enable)
- return 0;
-
- if (enable == cppc_enabled)
- return 0;
-
- for_each_present_cpu(cpu) {
- unsigned long logical_id = topology_logical_package_id(cpu);
-
- if (test_bit(logical_id, &logical_proc_id_mask))
- continue;
-
- set_bit(logical_id, &logical_proc_id_mask);
-
- ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
- enable);
- if (ret)
- return ret;
- }
-
- cppc_enabled = enable;
- return 0;
+ return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1);
}
-static int shmem_cppc_enable(bool enable)
+static int shmem_cppc_enable(struct cpufreq_policy *policy)
{
- int cpu, ret = 0;
- struct cppc_perf_ctrls perf_ctrls;
-
- if (enable == cppc_enabled)
- return 0;
-
- for_each_present_cpu(cpu) {
- ret = cppc_set_enable(cpu, enable);
- if (ret)
- return ret;
-
- /* Enable autonomous mode for EPP */
- if (cppc_state == AMD_PSTATE_ACTIVE) {
- /* Set desired perf as zero to allow EPP firmware control */
- perf_ctrls.desired_perf = 0;
- ret = cppc_set_perf(cpu, &perf_ctrls);
- if (ret)
- return ret;
- }
- }
-
- cppc_enabled = enable;
- return ret;
+ return cppc_set_enable(policy->cpu, 1);
}
DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable);
-static inline int amd_pstate_cppc_enable(bool enable)
+static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
{
- return static_call(amd_pstate_cppc_enable)(enable);
+ return static_call(amd_pstate_cppc_enable)(policy);
}
static int msr_init_perf(struct amd_cpudata *cpudata)
@@ -1063,6 +994,10 @@ static int amd_pstate_cpu_init(struct cp
cpudata->nominal_freq,
perf.highest_perf);
+ ret = amd_pstate_cppc_enable(policy);
+ if (ret)
+ goto free_cpudata1;
+
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
/* It will be updated by governor */
@@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct c
kfree(cpudata);
}
-static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(true);
- if (ret)
- pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
-
- return ret;
-}
-
-static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(false);
- if (ret)
- pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
-
- return ret;
-}
-
/* Sysfs attributes */
/*
@@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_a
static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
char str_preference[21];
ssize_t ret;
+ u8 epp;
ret = sscanf(buf, "%20s", str_preference);
if (ret != 1)
@@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_
if (ret < 0)
return -EINVAL;
- ret = amd_pstate_set_energy_pref_index(policy, ret);
+ if (!ret)
+ epp = cpudata->epp_default;
+ else
+ epp = epp_values[ret];
+
+ if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ pr_debug("EPP cannot be set under performance policy\n");
+ return -EBUSY;
+ }
+
+ ret = amd_pstate_set_epp(policy, epp);
return ret ? ret : count;
}
@@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_p
static void amd_pstate_driver_cleanup(void)
{
- amd_pstate_cppc_enable(false);
cppc_state = AMD_PSTATE_DISABLE;
current_pstate_driver = NULL;
}
@@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(in
cppc_state = mode;
- ret = amd_pstate_cppc_enable(true);
- if (ret) {
- pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
- ret);
- amd_pstate_driver_cleanup();
- return ret;
- }
-
/* at least one CPU supports CPB */
current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
@@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struc
policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
cpudata->nominal_freq,
perf.highest_perf);
+ policy->driver_data = cpudata;
+
+ ret = amd_pstate_cppc_enable(policy);
+ if (ret)
+ goto free_cpudata1;
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
- policy->driver_data = cpudata;
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(str
return 0;
}
-static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(true);
- if (ret)
- pr_err("failed to enable amd pstate during resume, return %d\n", ret);
-
-
- return amd_pstate_epp_update_limit(policy);
-}
-
static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- int ret;
-
- pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
+ pr_debug("AMD CPU Core %d going online\n", policy->cpu);
- ret = amd_pstate_epp_reenable(policy);
- if (ret)
- return ret;
- cpudata->suspended = false;
-
- return 0;
+ return amd_pstate_cppc_enable(policy);
}
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
@@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(st
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- int ret;
-
- /* avoid suspending when EPP is not enabled */
- if (cppc_state != AMD_PSTATE_ACTIVE)
- return 0;
/* invalidate to ensure it's rewritten during resume */
cpudata->cppc_req_cached = 0;
@@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;
- /* disable CPPC in lowlevel firmware */
- ret = amd_pstate_cppc_enable(false);
- if (ret)
- pr_err("failed to suspend, return %d\n", ret);
-
return 0;
}
@@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->suspended) {
+ int ret;
+
/* enable amd pstate from suspend state*/
- amd_pstate_epp_reenable(policy);
+ ret = amd_pstate_epp_update_limit(policy);
+ if (ret)
+ return ret;
cpudata->suspended = false;
}
@@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_
.fast_switch = amd_pstate_fast_switch,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
- .suspend = amd_pstate_cpu_suspend,
- .resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
.update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
@@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void)
global_attr_free:
cpufreq_unregister_driver(current_pstate_driver);
- amd_pstate_cppc_enable(false);
return ret;
}
device_initcall(amd_pstate_init);

View File

@@ -0,0 +1,105 @@
From 5fda2a5a547244c99bce9327e77e2ff253f77add Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:33 -0600
Subject: cpufreq/amd-pstate: Stop caching EPP
EPP values are cached in the cpudata structure per CPU. This is needless
though because they are also cached in the CPPC request variable.
Drop the separate cache for EPP values and always reference the CPPC
request variable when needed.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 19 ++++++++++---------
drivers/cpufreq/amd-pstate.h | 1 -
2 files changed, 10 insertions(+), 10 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufre
}
WRITE_ONCE(cpudata->cppc_req_cached, value);
- if (epp != cpudata->epp_cached)
- WRITE_ONCE(cpudata->epp_cached, epp);
return 0;
}
@@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_po
}
/* update both so that msr_update_perf() can effectively check */
- WRITE_ONCE(cpudata->epp_cached, epp);
WRITE_ONCE(cpudata->cppc_req_cached, value);
return ret;
@@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_
{
struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ u8 epp_cached;
u64 value;
int ret;
+
+ epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
if (trace_amd_pstate_epp_perf_enabled()) {
union perf_cached perf = cpudata->perf;
@@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_
FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
cpudata->cppc_req_cached),
policy->boost_enabled,
- epp != cpudata->epp_cached);
+ epp != epp_cached);
}
- if (epp == cpudata->epp_cached)
+ if (epp == epp_cached)
return 0;
perf_ctrls.energy_perf = epp;
@@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_
pr_debug("failed to set energy perf value (%d)\n", ret);
return ret;
}
- WRITE_ONCE(cpudata->epp_cached, epp);
value = READ_ONCE(cpudata->cppc_req_cached);
value &= ~AMD_CPPC_EPP_PERF_MASK;
@@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_p
struct cpufreq_policy *policy, char *buf)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 preference;
+ u8 preference, epp;
+
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
- switch (cpudata->epp_cached) {
+ switch (epp) {
case AMD_CPPC_EPP_PERFORMANCE:
preference = EPP_INDEX_PERFORMANCE;
break;
@@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(s
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
epp = 0;
else
- epp = READ_ONCE(cpudata->epp_cached);
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
perf = READ_ONCE(cpudata->perf);
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -102,7 +102,6 @@ struct amd_cpudata {
bool hw_prefcore;
/* EPP feature related attributes*/
- u8 epp_cached;
u32 policy;
bool suspended;
u8 epp_default;

View File

@@ -0,0 +1,39 @@
From 7757237a6ee08403e9a0e58eebf53ae2203f65ae Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:34 -0600
Subject: cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline()
When the CPU goes offline there is no need to change the CPPC request
because the CPU will go into the deepest C-state it supports already.
Actually changing the CPPC request when it goes offline messes up the
cached values and can lead to the wrong values being restored when
it comes back.
Instead drop the actions and if the CPU comes back online let
amd_pstate_epp_set_policy() restore it to expected values.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(str
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- union perf_cached perf = READ_ONCE(cpudata->perf);
-
- if (cpudata->suspended)
- return 0;
-
- return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
- AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
+ return 0;
}
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)

View File

@@ -0,0 +1,41 @@
From f25d506d1e54b7d0a5fe42284cd5f2ca5c21cef7 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <superm1@kernel.org>
Date: Thu, 27 Feb 2025 14:09:08 -0600
Subject: cpufreq/amd-pstate: fix warning noticed by kernel test robot
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202502272001.nafS0qXq-lkp@intel.com/
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
---
drivers/cpufreq/amd-pstate.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -903,20 +903,19 @@ static int amd_pstate_init_freq(struct a
return ret;
perf = READ_ONCE(cpudata->perf);
+ if (quirks && quirks->nominal_freq)
+ nominal_freq = quirks->nominal_freq;
+ else
+ nominal_freq = cppc_perf.nominal_freq;
+ nominal_freq *= 1000;
+
if (quirks && quirks->lowest_freq) {
min_freq = quirks->lowest_freq;
perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
WRITE_ONCE(cpudata->perf, perf);
} else
min_freq = cppc_perf.lowest_freq;
-
- if (quirks && quirks->nominal_freq)
- nominal_freq = quirks->nominal_freq;
- else
- nominal_freq = cppc_perf.nominal_freq;
-
min_freq *= 1000;
- nominal_freq *= 1000;
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);

View File

@@ -1,4 +1,4 @@
From 4c13cc86a7d9f1e88e8090a94c792eb45d7ef1ef Mon Sep 17 00:00:00 2001
From 7a0fbf076914b2b0e55feddd839212af92bdffb3 Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Thu, 5 Sep 2024 10:26:39 +0100
Subject: cpuidle: Prefer teo over menu governor
@@ -47,7 +47,7 @@ Signed-off-by: Christian Loehle <christian.loehle@arm.com>
.reflect = menu_reflect,
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -542,7 +542,7 @@ static int teo_enable_device(struct cpui
@@ -537,7 +537,7 @@ static int teo_enable_device(struct cpui
static struct cpuidle_governor teo_governor = {
.name = "teo",

View File

@@ -1,189 +0,0 @@
From 3d722d5259babc1650ab6cb1a8bbf27863af75f2 Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Thu, 5 Sep 2024 10:26:38 +0100
Subject: cpuidle: menu: Remove iowait influence
Remove CPU iowaiters influence on idle state selection.
Remove the menu notion of performance multiplier which increased with
the number of tasks that went to iowait sleep on this CPU and haven't
woken up yet.
Relying on iowait for cpuidle is problematic for a few reasons:
1. There is no guarantee that an iowaiting task will wake up on the
same CPU.
2. The task being in iowait says nothing about the idle duration, we
could be selecting shallower states for a long time.
3. The task being in iowait doesn't always imply a performance hit
with increased latency.
4. If there is such a performance hit, the number of iowaiting tasks
doesn't directly correlate.
5. The definition of iowait altogether is vague at best, it is
sprinkled across kernel code.
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/20240905092645.2885200-2-christian.loehle@arm.com
[ rjw: Minor edits in the changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
drivers/cpuidle/governors/menu.c | 76 ++++----------------------------
1 file changed, 9 insertions(+), 67 deletions(-)
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,7 +19,7 @@
#include "gov.h"
-#define BUCKETS 12
+#define BUCKETS 6
#define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT)
#define RESOLUTION 1024
@@ -29,12 +29,11 @@
/*
* Concepts and ideas behind the menu governor
*
- * For the menu governor, there are 3 decision factors for picking a C
+ * For the menu governor, there are 2 decision factors for picking a C
* state:
* 1) Energy break even point
- * 2) Performance impact
- * 3) Latency tolerance (from pmqos infrastructure)
- * These three factors are treated independently.
+ * 2) Latency tolerance (from pmqos infrastructure)
+ * These two factors are treated independently.
*
* Energy break even point
* -----------------------
@@ -75,30 +74,6 @@
* intervals and if the stand deviation of these 8 intervals is below a
* threshold value, we use the average of these intervals as prediction.
*
- * Limiting Performance Impact
- * ---------------------------
- * C states, especially those with large exit latencies, can have a real
- * noticeable impact on workloads, which is not acceptable for most sysadmins,
- * and in addition, less performance has a power price of its own.
- *
- * As a general rule of thumb, menu assumes that the following heuristic
- * holds:
- * The busier the system, the less impact of C states is acceptable
- *
- * This rule-of-thumb is implemented using a performance-multiplier:
- * If the exit latency times the performance multiplier is longer than
- * the predicted duration, the C state is not considered a candidate
- * for selection due to a too high performance impact. So the higher
- * this multiplier is, the longer we need to be idle to pick a deep C
- * state, and thus the less likely a busy CPU will hit such a deep
- * C state.
- *
- * Currently there is only one value determining the factor:
- * 10 points are added for each process that is waiting for IO on this CPU.
- * (This value was experimentally determined.)
- * Utilization is no longer a factor as it was shown that it never contributed
- * significantly to the performance multiplier in the first place.
- *
*/
struct menu_device {
@@ -112,19 +87,10 @@ struct menu_device {
int interval_ptr;
};
-static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
+static inline int which_bucket(u64 duration_ns)
{
int bucket = 0;
- /*
- * We keep two groups of stats; one with no
- * IO pending, one without.
- * This allows us to calculate
- * E(duration)|iowait
- */
- if (nr_iowaiters)
- bucket = BUCKETS/2;
-
if (duration_ns < 10ULL * NSEC_PER_USEC)
return bucket;
if (duration_ns < 100ULL * NSEC_PER_USEC)
@@ -138,19 +104,6 @@ static inline int which_bucket(u64 durat
return bucket + 5;
}
-/*
- * Return a multiplier for the exit latency that is intended
- * to take performance requirements into account.
- * The more performance critical we estimate the system
- * to be, the higher this multiplier, and thus the higher
- * the barrier to go to an expensive C state.
- */
-static inline int performance_multiplier(unsigned int nr_iowaiters)
-{
- /* for IO wait tasks (per cpu!) we add 10x each */
- return 1 + 10 * nr_iowaiters;
-}
-
static DEFINE_PER_CPU(struct menu_device, menu_devices);
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
@@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_dr
struct menu_device *data = this_cpu_ptr(&menu_devices);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
u64 predicted_ns;
- u64 interactivity_req;
- unsigned int nr_iowaiters;
ktime_t delta, delta_tick;
int i, idx;
@@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_dr
data->needs_update = 0;
}
- nr_iowaiters = nr_iowait_cpu(dev->cpu);
-
/* Find the shortest expected idle interval. */
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
@@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_dr
}
data->next_timer_ns = delta;
- data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+ data->bucket = which_bucket(data->next_timer_ns);
/* Round up the result for half microseconds. */
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
@@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_dr
*/
data->next_timer_ns = KTIME_MAX;
delta_tick = TICK_NSEC / 2;
- data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+ data->bucket = which_bucket(KTIME_MAX);
}
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_dr
*/
if (predicted_ns < TICK_NSEC)
predicted_ns = data->next_timer_ns;
- } else {
- /*
- * Use the performance multiplier and the user-configurable
- * latency_req to determine the maximum exit latency.
- */
- interactivity_req = div64_u64(predicted_ns,
- performance_multiplier(nr_iowaiters));
- if (latency_req > interactivity_req)
- latency_req = interactivity_req;
+ } else if (latency_req > predicted_ns) {
+ latency_req = predicted_ns;
}
/*

View File

@@ -0,0 +1,65 @@
From 594316efc465f1408482e0d1dd379f4e3a6a5c7c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 27 Jan 2025 13:16:09 -0800
Subject: crypto: x86/aes-xts - make the fast path 64-bit specific
Remove 32-bit support from the fast path in xts_crypt(). Then optimize
it for 64-bit, and simplify the code, by switching to sg_virt() and
removing the now-unnecessary checks for crossing a page boundary.
The result is simpler code that is slightly smaller and faster in the
case that actually matters (64-bit).
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/aesni-intel_glue.c | 30 ++++++++++--------------------
1 file changed, 10 insertions(+), 20 deletions(-)
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -581,11 +581,8 @@ xts_crypt(struct skcipher_request *req,
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
- const unsigned int cryptlen = req->cryptlen;
- struct scatterlist *src = req->src;
- struct scatterlist *dst = req->dst;
- if (unlikely(cryptlen < AES_BLOCK_SIZE))
+ if (unlikely(req->cryptlen < AES_BLOCK_SIZE))
return -EINVAL;
kernel_fpu_begin();
@@ -593,23 +590,16 @@ xts_crypt(struct skcipher_request *req,
/*
* In practice, virtually all XTS plaintexts and ciphertexts are either
- * 512 or 4096 bytes, aligned such that they don't span page boundaries.
- * To optimize the performance of these cases, and also any other case
- * where no page boundary is spanned, the below fast-path handles
- * single-page sources and destinations as efficiently as possible.
+ * 512 or 4096 bytes and do not use multiple scatterlist elements. To
+ * optimize the performance of these cases, the below fast-path handles
+ * single-scatterlist-element messages as efficiently as possible. The
+ * code is 64-bit specific, as it assumes no page mapping is needed.
*/
- if (likely(src->length >= cryptlen && dst->length >= cryptlen &&
- src->offset + cryptlen <= PAGE_SIZE &&
- dst->offset + cryptlen <= PAGE_SIZE)) {
- struct page *src_page = sg_page(src);
- struct page *dst_page = sg_page(dst);
- void *src_virt = kmap_local_page(src_page) + src->offset;
- void *dst_virt = kmap_local_page(dst_page) + dst->offset;
-
- (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen,
- req->iv);
- kunmap_local(dst_virt);
- kunmap_local(src_virt);
+ if (IS_ENABLED(CONFIG_X86_64) &&
+ likely(req->src->length >= req->cryptlen &&
+ req->dst->length >= req->cryptlen)) {
+ (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src),
+ sg_virt(req->dst), req->cryptlen, req->iv);
kernel_fpu_end();
return 0;
}

View File

@@ -1,181 +0,0 @@
From 0a957679a29a06fb2e3971615ff9f05f6becb941 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Oct 2024 21:06:49 -0700
Subject: crypto: x86/crc32c - simplify code for handling fewer than 200 bytes
The assembly code in crc32c-pcl-intel-asm_64.S is invoked only for
lengths >= 512, due to the overhead of saving and restoring FPU state.
Therefore, it is unnecessary for this code to be excessively "optimized"
for lengths < 200. Eliminate the excessive unrolling of this part of
the code and use a more straightforward qword-at-a-time loop.
Note: the part of the code in question is not entirely redundant, as it
is still used to process any remainder mod 24, as well as any remaining
data when fewer than 200 bytes remain after least one 3072-byte chunk.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 116 ++++++----------------
1 file changed, 33 insertions(+), 83 deletions(-)
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -56,20 +56,10 @@
.quad .Lcrc_\i
.endm
-.macro JNC_LESS_THAN j
- jnc .Lless_than_\j
-.endm
-
-# Define threshold where buffers are considered "small" and routed to more
-# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
-# SMALL_SIZE can be no larger than 255.
-
+# Define threshold below which buffers are considered "small" and routed to
+# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200
-.if (SMALL_SIZE > 255)
-.error "SMALL_ SIZE must be < 256"
-.endif
-
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
.text
@@ -100,25 +90,18 @@ SYM_FUNC_START(crc_pcl)
## Move crc_init for Linux to a different
mov crc_init_arg, crc_init
+ mov %bufp, bufptmp # rdi = *buf
+ cmp $SMALL_SIZE, len
+ jb .Lsmall
+
################################################################
## 1) ALIGN:
################################################################
-
- mov %bufp, bufptmp # rdi = *buf
neg %bufp
and $7, %bufp # calculate the unalignment amount of
# the address
je .Lproc_block # Skip if aligned
- ## If len is less than 8 and we're unaligned, we need to jump
- ## to special code to avoid reading beyond the end of the buffer
- cmp $8, len
- jae .Ldo_align
- # less_than_8 expects length in upper 3 bits of len_dw
- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
- shl $32-3+1, len_dw
- jmp .Lless_than_8_post_shl1
-
.Ldo_align:
#### Calculate CRC of unaligned bytes of the buffer (if any)
movq (bufptmp), tmp # load a quadward from the buffer
@@ -144,9 +127,6 @@ SYM_FUNC_START(crc_pcl)
jae .Lfull_block
.Lcontinue_block:
- cmpq $SMALL_SIZE, len
- jb .Lsmall
-
## len < 128*24
movq $2731, %rax # 2731 = ceil(2^16 / 24)
mul len_dw
@@ -243,68 +223,38 @@ LABEL crc_ 0
mov tmp, len
cmp $128*24, tmp
jae .Lfull_block
- cmp $24, tmp
+ cmp $SMALL_SIZE, tmp
jae .Lcontinue_block
-.Lless_than_24:
- shl $32-4, len_dw # less_than_16 expects length
- # in upper 4 bits of len_dw
- jnc .Lless_than_16
- crc32q (bufptmp), crc_init
- crc32q 8(bufptmp), crc_init
- jz .Ldo_return
- add $16, bufptmp
- # len is less than 8 if we got here
- # less_than_8 expects length in upper 3 bits of len_dw
- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
- shl $2, len_dw
- jmp .Lless_than_8_post_shl1
-
#######################################################################
- ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
+ ## 6) Process any remainder without interleaving:
#######################################################################
.Lsmall:
- shl $32-8, len_dw # Prepare len_dw for less_than_256
- j=256
-.rept 5 # j = {256, 128, 64, 32, 16}
-.altmacro
-LABEL less_than_ %j # less_than_j: Length should be in
- # upper lg(j) bits of len_dw
- j=(j/2)
- shl $1, len_dw # Get next MSB
- JNC_LESS_THAN %j
-.noaltmacro
- i=0
-.rept (j/8)
- crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
- i=i+8
-.endr
- jz .Ldo_return # Return if remaining length is zero
- add $j, bufptmp # Advance buf
-.endr
-
-.Lless_than_8: # Length should be stored in
- # upper 3 bits of len_dw
- shl $1, len_dw
-.Lless_than_8_post_shl1:
- jnc .Lless_than_4
- crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
- jz .Ldo_return # return if remaining data is zero
- add $4, bufptmp
-.Lless_than_4: # Length should be stored in
- # upper 2 bits of len_dw
- shl $1, len_dw
- jnc .Lless_than_2
- crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
- jz .Ldo_return # return if remaining data is zero
- add $2, bufptmp
-.Lless_than_2: # Length should be stored in the MSB
- # of len_dw
- shl $1, len_dw
- jnc .Lless_than_1
- crc32b (bufptmp), crc_init_dw # CRC of 1 byte
-.Lless_than_1: # Length should be zero
-.Ldo_return:
+ test len, len
+ jz .Ldone
+ mov len_dw, %eax
+ shr $3, %eax
+ jz .Ldo_dword
+.Ldo_qwords:
+ crc32q (bufptmp), crc_init
+ add $8, bufptmp
+ dec %eax
+ jnz .Ldo_qwords
+.Ldo_dword:
+ test $4, len_dw
+ jz .Ldo_word
+ crc32l (bufptmp), crc_init_dw
+ add $4, bufptmp
+.Ldo_word:
+ test $2, len_dw
+ jz .Ldo_byte
+ crc32w (bufptmp), crc_init_dw
+ add $2, bufptmp
+.Ldo_byte:
+ test $1, len_dw
+ jz .Ldone
+ crc32b (bufptmp), crc_init_dw
+.Ldone:
movq crc_init, %rax
popq %rsi
popq %rdi

View File

@@ -1,187 +0,0 @@
From 3ed4205afe9305d71d055554ba27e7b8923865dc Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Oct 2024 21:06:49 -0700
Subject: crypto: x86/crc32c - access 32-bit arguments as 32-bit
Fix crc32c-pcl-intel-asm_64.S to access 32-bit arguments as 32-bit
values instead of 64-bit, since the upper bits of the corresponding
64-bit registers are not guaranteed to be zero. Also update the type of
the length argument to be unsigned int rather than int, as the assembly
code treats it as unsigned.
Note: there haven't been any reports of this bug actually causing
incorrect behavior. Neither gcc nor clang guarantee zero-extension to
64 bits, but zero-extension is likely to happen in practice because most
instructions that operate on 32-bit registers zero-extend to 64 bits.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/crc32c-intel_glue.c | 2 +-
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 57 +++++++++++------------
2 files changed, 27 insertions(+), 32 deletions(-)
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -41,7 +41,7 @@
*/
#define CRC32C_PCL_BREAKEVEN 512
-asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
+asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
unsigned int crc_init);
#endif /* CONFIG_X86_64 */
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -60,7 +60,7 @@
# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200
-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
.text
SYM_FUNC_START(crc_pcl)
@@ -72,14 +72,11 @@ SYM_FUNC_START(crc_pcl)
#define block_0 %rcx
#define block_1 %rdx
#define block_2 %r11
-#define len %rsi
-#define len_dw %esi
-#define len_w %si
-#define len_b %sil
-#define crc_init_arg %rdx
+#define len %esi
+#define crc_init_arg %edx
#define tmp %rbx
-#define crc_init %r8
-#define crc_init_dw %r8d
+#define crc_init %r8d
+#define crc_init_q %r8
#define crc1 %r9
#define crc2 %r10
@@ -107,9 +104,9 @@ SYM_FUNC_START(crc_pcl)
movq (bufptmp), tmp # load a quadward from the buffer
add %bufp, bufptmp # align buffer pointer for quadword
# processing
- sub %bufp, len # update buffer length
+ sub bufp_dw, len # update buffer length
.Lalign_loop:
- crc32b %bl, crc_init_dw # compute crc32 of 1-byte
+ crc32b %bl, crc_init # compute crc32 of 1-byte
shr $8, tmp # get next byte
dec %bufp
jne .Lalign_loop
@@ -121,15 +118,14 @@ SYM_FUNC_START(crc_pcl)
################################################################
## compute num of bytes to be processed
- movq len, tmp # save num bytes in tmp
- cmpq $128*24, len
+ cmp $128*24, len
jae .Lfull_block
.Lcontinue_block:
## len < 128*24
movq $2731, %rax # 2731 = ceil(2^16 / 24)
- mul len_dw
+ mul len
shrq $16, %rax
## eax contains floor(bytes / 24) = num 24-byte chunks to do
@@ -176,7 +172,7 @@ SYM_FUNC_START(crc_pcl)
LABEL crc_ %i
.noaltmacro
ENDBR
- crc32q -i*8(block_0), crc_init
+ crc32q -i*8(block_0), crc_init_q
crc32q -i*8(block_1), crc1
crc32q -i*8(block_2), crc2
i=(i-1)
@@ -186,7 +182,7 @@ LABEL crc_ %i
LABEL crc_ %i
.noaltmacro
ENDBR
- crc32q -i*8(block_0), crc_init
+ crc32q -i*8(block_0), crc_init_q
crc32q -i*8(block_1), crc1
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
@@ -200,9 +196,9 @@ LABEL crc_ %i
shlq $3, %rax # rax *= 8
pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
- subq %rax, tmp # tmp -= rax*24
+ sub %eax, len # len -= rax*24
- movq crc_init, %xmm1 # CRC for block 1
+ movq crc_init_q, %xmm1 # CRC for block 1
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
@@ -211,8 +207,8 @@ LABEL crc_ %i
pxor %xmm2,%xmm1
movq %xmm1, %rax
xor -i*8(block_2), %rax
- mov crc2, crc_init
- crc32 %rax, crc_init
+ mov crc2, crc_init_q
+ crc32 %rax, crc_init_q
################################################################
## 5) Check for end:
@@ -220,10 +216,9 @@ LABEL crc_ %i
LABEL crc_ 0
ENDBR
- mov tmp, len
- cmp $128*24, tmp
+ cmp $128*24, len
jae .Lfull_block
- cmp $SMALL_SIZE, tmp
+ cmp $SMALL_SIZE, len
jae .Lcontinue_block
#######################################################################
@@ -232,30 +227,30 @@ LABEL crc_ 0
.Lsmall:
test len, len
jz .Ldone
- mov len_dw, %eax
+ mov len, %eax
shr $3, %eax
jz .Ldo_dword
.Ldo_qwords:
- crc32q (bufptmp), crc_init
+ crc32q (bufptmp), crc_init_q
add $8, bufptmp
dec %eax
jnz .Ldo_qwords
.Ldo_dword:
- test $4, len_dw
+ test $4, len
jz .Ldo_word
- crc32l (bufptmp), crc_init_dw
+ crc32l (bufptmp), crc_init
add $4, bufptmp
.Ldo_word:
- test $2, len_dw
+ test $2, len
jz .Ldo_byte
- crc32w (bufptmp), crc_init_dw
+ crc32w (bufptmp), crc_init
add $2, bufptmp
.Ldo_byte:
- test $1, len_dw
+ test $1, len
jz .Ldone
- crc32b (bufptmp), crc_init_dw
+ crc32b (bufptmp), crc_init
.Ldone:
- movq crc_init, %rax
+ mov crc_init, %eax
popq %rsi
popq %rdi
popq %rbx

View File

@@ -1,374 +0,0 @@
From 5ffad9b234995f73548763a8487ecd256bba8d8d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Oct 2024 21:06:49 -0700
Subject: crypto: x86/crc32c - eliminate jump table and excessive unrolling
crc32c-pcl-intel-asm_64.S has a loop with 1 to 127 iterations fully
unrolled and uses a jump table to jump into the correct location. This
optimization is misguided, as it bloats the binary code size and
introduces an indirect call. x86_64 CPUs can predict loops well, so it
is fine to just use a loop instead. Loop bookkeeping instructions can
compete with the crc instructions for the ALUs, but this is easily
mitigated by unrolling the loop by a smaller amount, such as 4 times.
Therefore, re-roll the loop and make related tweaks to the code.
This reduces the binary code size of crc_pclmul() from 4546 bytes to 418
bytes, a 91% reduction. In general it also makes the code faster, with
some large improvements seen when retpoline is enabled.
More detailed performance results are shown below. They are given as
percent improvement in throughput (negative means regressed) for CPU
microarchitecture vs. input length in bytes. E.g. an improvement from
40 GB/s to 50 GB/s would be listed as 25%.
Table 1: Results with retpoline enabled (the default):
| 512 | 833 | 1024 | 2000 | 3173 | 4096 |
---------------------+-------+-------+-------+------ +-------+-------+
Intel Haswell | 35.0% | 20.7% | 17.8% | 9.7% | -0.2% | 4.4% |
Intel Emerald Rapids | 66.8% | 45.2% | 36.3% | 19.3% | 0.0% | 5.4% |
AMD Zen 2 | 29.5% | 17.2% | 13.5% | 8.6% | -0.5% | 2.8% |
Table 2: Results with retpoline disabled:
| 512 | 833 | 1024 | 2000 | 3173 | 4096 |
---------------------+-------+-------+-------+------ +-------+-------+
Intel Haswell | 3.3% | 4.8% | 4.5% | 0.9% | -2.9% | 0.3% |
Intel Emerald Rapids | 7.5% | 6.4% | 5.2% | 2.3% | -0.0% | 0.6% |
AMD Zen 2 | 11.8% | 1.4% | 0.2% | 1.3% | -0.9% | -0.2% |
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 233 +++++++++-------------
1 file changed, 92 insertions(+), 141 deletions(-)
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -7,6 +7,7 @@
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
*
* Copyright (C) 2012 Intel Corporation.
+ * Copyright 2024 Google LLC
*
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
@@ -44,18 +45,9 @@
*/
#include <linux/linkage.h>
-#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-.macro LABEL prefix n
-.L\prefix\n\():
-.endm
-
-.macro JMPTBL_ENTRY i
-.quad .Lcrc_\i
-.endm
-
# Define threshold below which buffers are considered "small" and routed to
# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200
@@ -64,139 +56,116 @@
.text
SYM_FUNC_START(crc_pcl)
-#define bufp rdi
-#define bufp_dw %edi
-#define bufp_w %di
-#define bufp_b %dil
-#define bufptmp %rcx
-#define block_0 %rcx
-#define block_1 %rdx
-#define block_2 %r11
-#define len %esi
-#define crc_init_arg %edx
-#define tmp %rbx
-#define crc_init %r8d
-#define crc_init_q %r8
-#define crc1 %r9
-#define crc2 %r10
-
- pushq %rbx
- pushq %rdi
- pushq %rsi
-
- ## Move crc_init for Linux to a different
- mov crc_init_arg, crc_init
+#define bufp %rdi
+#define bufp_d %edi
+#define len %esi
+#define crc_init %edx
+#define crc_init_q %rdx
+#define n_misaligned %ecx /* overlaps chunk_bytes! */
+#define n_misaligned_q %rcx
+#define chunk_bytes %ecx /* overlaps n_misaligned! */
+#define chunk_bytes_q %rcx
+#define crc1 %r8
+#define crc2 %r9
- mov %bufp, bufptmp # rdi = *buf
cmp $SMALL_SIZE, len
jb .Lsmall
################################################################
## 1) ALIGN:
################################################################
- neg %bufp
- and $7, %bufp # calculate the unalignment amount of
+ mov bufp_d, n_misaligned
+ neg n_misaligned
+ and $7, n_misaligned # calculate the misalignment amount of
# the address
- je .Lproc_block # Skip if aligned
+ je .Laligned # Skip if aligned
+ # Process 1 <= n_misaligned <= 7 bytes individually in order to align
+ # the remaining data to an 8-byte boundary.
.Ldo_align:
- #### Calculate CRC of unaligned bytes of the buffer (if any)
- movq (bufptmp), tmp # load a quadward from the buffer
- add %bufp, bufptmp # align buffer pointer for quadword
- # processing
- sub bufp_dw, len # update buffer length
+ movq (bufp), %rax
+ add n_misaligned_q, bufp
+ sub n_misaligned, len
.Lalign_loop:
- crc32b %bl, crc_init # compute crc32 of 1-byte
- shr $8, tmp # get next byte
- dec %bufp
+ crc32b %al, crc_init # compute crc32 of 1-byte
+ shr $8, %rax # get next byte
+ dec n_misaligned
jne .Lalign_loop
-
-.Lproc_block:
+.Laligned:
################################################################
- ## 2) PROCESS BLOCKS:
+ ## 2) PROCESS BLOCK:
################################################################
- ## compute num of bytes to be processed
-
cmp $128*24, len
jae .Lfull_block
-.Lcontinue_block:
- ## len < 128*24
- movq $2731, %rax # 2731 = ceil(2^16 / 24)
- mul len
- shrq $16, %rax
-
- ## eax contains floor(bytes / 24) = num 24-byte chunks to do
-
- ## process rax 24-byte chunks (128 >= rax >= 0)
-
- ## compute end address of each block
- ## block 0 (base addr + RAX * 8)
- ## block 1 (base addr + RAX * 16)
- ## block 2 (base addr + RAX * 24)
- lea (bufptmp, %rax, 8), block_0
- lea (block_0, %rax, 8), block_1
- lea (block_1, %rax, 8), block_2
-
- xor crc1, crc1
- xor crc2, crc2
-
- ## branch into array
- leaq jump_table(%rip), %bufp
- mov (%bufp,%rax,8), %bufp
- JMP_NOSPEC bufp
+.Lpartial_block:
+ # Compute floor(len / 24) to get num qwords to process from each lane.
+ imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
+ shr $16, %eax
+ jmp .Lcrc_3lanes
- ################################################################
- ## 2a) PROCESS FULL BLOCKS:
- ################################################################
.Lfull_block:
- movl $128,%eax
- lea 128*8*2(block_0), block_1
- lea 128*8*3(block_0), block_2
- add $128*8*1, block_0
-
- xor crc1,crc1
- xor crc2,crc2
-
- # Fall through into top of crc array (crc_128)
+ # Processing 128 qwords from each lane.
+ mov $128, %eax
################################################################
- ## 3) CRC Array:
+ ## 3) CRC each of three lanes:
################################################################
- i=128
-.rept 128-1
-.altmacro
-LABEL crc_ %i
-.noaltmacro
- ENDBR
- crc32q -i*8(block_0), crc_init_q
- crc32q -i*8(block_1), crc1
- crc32q -i*8(block_2), crc2
- i=(i-1)
-.endr
-
-.altmacro
-LABEL crc_ %i
-.noaltmacro
- ENDBR
- crc32q -i*8(block_0), crc_init_q
- crc32q -i*8(block_1), crc1
-# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
+.Lcrc_3lanes:
+ xor crc1,crc1
+ xor crc2,crc2
+ mov %eax, chunk_bytes
+ shl $3, chunk_bytes # num bytes to process from each lane
+ sub $5, %eax # 4 for 4x_loop, 1 for special last iter
+ jl .Lcrc_3lanes_4x_done
+
+ # Unroll the loop by a factor of 4 to reduce the overhead of the loop
+ # bookkeeping instructions, which can compete with crc32q for the ALUs.
+.Lcrc_3lanes_4x_loop:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+ crc32q (bufp,chunk_bytes_q,2), crc2
+ crc32q 8(bufp), crc_init_q
+ crc32q 8(bufp,chunk_bytes_q), crc1
+ crc32q 8(bufp,chunk_bytes_q,2), crc2
+ crc32q 16(bufp), crc_init_q
+ crc32q 16(bufp,chunk_bytes_q), crc1
+ crc32q 16(bufp,chunk_bytes_q,2), crc2
+ crc32q 24(bufp), crc_init_q
+ crc32q 24(bufp,chunk_bytes_q), crc1
+ crc32q 24(bufp,chunk_bytes_q,2), crc2
+ add $32, bufp
+ sub $4, %eax
+ jge .Lcrc_3lanes_4x_loop
+
+.Lcrc_3lanes_4x_done:
+ add $4, %eax
+ jz .Lcrc_3lanes_last_qword
+
+.Lcrc_3lanes_1x_loop:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+ crc32q (bufp,chunk_bytes_q,2), crc2
+ add $8, bufp
+ dec %eax
+ jnz .Lcrc_3lanes_1x_loop
- mov block_2, block_0
+.Lcrc_3lanes_last_qword:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
################################################################
## 4) Combine three results:
################################################################
- lea (K_table-8)(%rip), %bufp # first entry is for idx 1
- shlq $3, %rax # rax *= 8
- pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
- leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
- sub %eax, len # len -= rax*24
+ lea (K_table-8)(%rip), %rax # first entry is for idx 1
+ pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
+ lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
+ sub %eax, len # len -= chunk_bytes * 3
movq crc_init_q, %xmm1 # CRC for block 1
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
@@ -206,20 +175,19 @@ LABEL crc_ %i
pxor %xmm2,%xmm1
movq %xmm1, %rax
- xor -i*8(block_2), %rax
+ xor (bufp,chunk_bytes_q,2), %rax
mov crc2, crc_init_q
crc32 %rax, crc_init_q
+ lea 8(bufp,chunk_bytes_q,2), bufp
################################################################
- ## 5) Check for end:
+ ## 5) If more blocks remain, goto (2):
################################################################
-LABEL crc_ 0
- ENDBR
cmp $128*24, len
- jae .Lfull_block
+ jae .Lfull_block
cmp $SMALL_SIZE, len
- jae .Lcontinue_block
+ jae .Lpartial_block
#######################################################################
## 6) Process any remainder without interleaving:
@@ -231,47 +199,30 @@ LABEL crc_ 0
shr $3, %eax
jz .Ldo_dword
.Ldo_qwords:
- crc32q (bufptmp), crc_init_q
- add $8, bufptmp
+ crc32q (bufp), crc_init_q
+ add $8, bufp
dec %eax
jnz .Ldo_qwords
.Ldo_dword:
test $4, len
jz .Ldo_word
- crc32l (bufptmp), crc_init
- add $4, bufptmp
+ crc32l (bufp), crc_init
+ add $4, bufp
.Ldo_word:
test $2, len
jz .Ldo_byte
- crc32w (bufptmp), crc_init
- add $2, bufptmp
+ crc32w (bufp), crc_init
+ add $2, bufp
.Ldo_byte:
test $1, len
jz .Ldone
- crc32b (bufptmp), crc_init
+ crc32b (bufp), crc_init
.Ldone:
mov crc_init, %eax
- popq %rsi
- popq %rdi
- popq %rbx
RET
SYM_FUNC_END(crc_pcl)
.section .rodata, "a", @progbits
- ################################################################
- ## jump table Table is 129 entries x 2 bytes each
- ################################################################
-.align 4
-jump_table:
- i=0
-.rept 129
-.altmacro
-JMPTBL_ENTRY %i
-.noaltmacro
- i=i+1
-.endr
-
-
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 words (8 bytes) each

View File

@@ -1,31 +0,0 @@
From cda0e050fec85635986e9cfe991e26339bf305dc Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
Date: Sat, 13 Jan 2024 15:29:25 +0100
Subject: arch/Kconfig: Default to maximum amount of ASLR bits
To mitigate https://zolutal.github.io/aslrnt/; do this with a patch to
avoid having to enable `CONFIG_EXPERT`.
---
arch/Kconfig | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1089,7 +1089,7 @@ config ARCH_MMAP_RND_BITS
int "Number of bits to use for ASLR of mmap base address" if EXPERT
range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
- default ARCH_MMAP_RND_BITS_MIN
+ default ARCH_MMAP_RND_BITS_MAX
depends on HAVE_ARCH_MMAP_RND_BITS
help
This value can be used to select the number of bits to use to
@@ -1123,7 +1123,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
- default ARCH_MMAP_RND_COMPAT_BITS_MIN
+ default ARCH_MMAP_RND_COMPAT_BITS_MAX
depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
help
This value can be used to select the number of bits to use to

View File

@@ -0,0 +1,94 @@
From 52af8f543922b47a31ddbb6ffb81f40ad9993309 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Date: Fri, 7 Feb 2025 15:07:46 -0300
Subject: tpm: do not start chip while suspended
Checking TPM_CHIP_FLAG_SUSPENDED after the call to tpm_find_get_ops() can
lead to a spurious tpm_chip_start() call:
[35985.503771] i2c i2c-1: Transfer while suspended
[35985.503796] WARNING: CPU: 0 PID: 74 at drivers/i2c/i2c-core.h:56 __i2c_transfer+0xbe/0x810
[35985.503802] Modules linked in:
[35985.503808] CPU: 0 UID: 0 PID: 74 Comm: hwrng Tainted: G W 6.13.0-next-20250203-00005-gfa0cb5642941 #19 9c3d7f78192f2d38e32010ac9c90fdc71109ef6f
[35985.503814] Tainted: [W]=WARN
[35985.503817] Hardware name: Google Morphius/Morphius, BIOS Google_Morphius.13434.858.0 10/26/2023
[35985.503819] RIP: 0010:__i2c_transfer+0xbe/0x810
[35985.503825] Code: 30 01 00 00 4c 89 f7 e8 40 fe d8 ff 48 8b 93 80 01 00 00 48 85 d2 75 03 49 8b 16 48 c7 c7 0a fb 7c a7 48 89 c6 e8 32 ad b0 fe <0f> 0b b8 94 ff ff ff e9 33 04 00 00 be 02 00 00 00 83 fd 02 0f 5
[35985.503828] RSP: 0018:ffffa106c0333d30 EFLAGS: 00010246
[35985.503833] RAX: 074ba64aa20f7000 RBX: ffff8aa4c1167120 RCX: 0000000000000000
[35985.503836] RDX: 0000000000000000 RSI: ffffffffa77ab0e4 RDI: 0000000000000001
[35985.503838] RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000
[35985.503841] R10: 0000000000000004 R11: 00000001000313d5 R12: ffff8aa4c10f1820
[35985.503843] R13: ffff8aa4c0e243c0 R14: ffff8aa4c1167250 R15: ffff8aa4c1167120
[35985.503846] FS: 0000000000000000(0000) GS:ffff8aa4eae00000(0000) knlGS:0000000000000000
[35985.503849] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[35985.503852] CR2: 00007fab0aaf1000 CR3: 0000000105328000 CR4: 00000000003506f0
[35985.503855] Call Trace:
[35985.503859] <TASK>
[35985.503863] ? __warn+0xd4/0x260
[35985.503868] ? __i2c_transfer+0xbe/0x810
[35985.503874] ? report_bug+0xf3/0x210
[35985.503882] ? handle_bug+0x63/0xb0
[35985.503887] ? exc_invalid_op+0x16/0x50
[35985.503892] ? asm_exc_invalid_op+0x16/0x20
[35985.503904] ? __i2c_transfer+0xbe/0x810
[35985.503913] tpm_cr50_i2c_transfer_message+0x24/0xf0
[35985.503920] tpm_cr50_i2c_read+0x8e/0x120
[35985.503928] tpm_cr50_request_locality+0x75/0x170
[35985.503935] tpm_chip_start+0x116/0x160
[35985.503942] tpm_try_get_ops+0x57/0x90
[35985.503948] tpm_find_get_ops+0x26/0xd0
[35985.503955] tpm_get_random+0x2d/0x80
Don't move forward with tpm_chip_start() inside tpm_try_get_ops(), unless
TPM_CHIP_FLAG_SUSPENDED is not set. tpm_find_get_ops() will return NULL in
such a failure case.
Fixes: 9265fed6db60 ("tpm: Lock TPM chip in tpm_pm_suspend() first")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Cc: stable@vger.kernel.org
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Mike Seo <mikeseohyungjin@gmail.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
---
drivers/char/tpm/tpm-chip.c | 5 +++++
drivers/char/tpm/tpm-interface.c | 7 -------
2 files changed, 5 insertions(+), 7 deletions(-)
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -168,6 +168,11 @@ int tpm_try_get_ops(struct tpm_chip *chi
goto out_ops;
mutex_lock(&chip->tpm_mutex);
+
+ /* tmp_chip_start may issue IO that is denied while suspended */
+ if (chip->flags & TPM_CHIP_FLAG_SUSPENDED)
+ goto out_lock;
+
rc = tpm_chip_start(chip);
if (rc)
goto out_lock;
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -445,18 +445,11 @@ int tpm_get_random(struct tpm_chip *chip
if (!chip)
return -ENODEV;
- /* Give back zero bytes, as TPM chip has not yet fully resumed: */
- if (chip->flags & TPM_CHIP_FLAG_SUSPENDED) {
- rc = 0;
- goto out;
- }
-
if (chip->flags & TPM_CHIP_FLAG_TPM2)
rc = tpm2_get_random(chip, out, max);
else
rc = tpm1_get_random(chip, out, max);
-out:
tpm_put_ops(chip);
return rc;
}

View File

@@ -1,83 +0,0 @@
From 218e958524c673d6e68737e7f82d80ba2b6ef59a Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 19 May 2022 14:40:07 +0200
Subject: drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set
The Nvidia proprietary driver has some bugs that leads to issues if used
with the simpledrm driver. The most noticeable is that does not register
an emulated fbdev device.
It just relies on a fbdev to be registered by another driver, that could
be that could be attached to the framebuffer console. On UEFI machines,
this is the efifb driver.
This means that disabling the efifb driver will cause virtual consoles to
not be present in the system when using the Nvidia driver. Legacy BIOS is
not affected just because fbcon is not used there, but instead vgacon.
Unless a VGA mode is specified using the vga= kernel command line option,
in that case the vesafb driver is used instead and its fbdev attached to
the fbcon.
This is a problem because with CONFIG_SYSFB_SIMPLEFB=y, the sysfb platform
code attempts to register a "simple-framebuffer" platform device (that is
matched against simpledrm) and only registers either an "efi-framebuffer"
or "vesa-framebuffer" if this fails to be registered due the video modes
not being compatible.
The Nvidia driver relying on another driver to register the fbdev is quite
fragile, since it can't really assume those will stick around. For example
there are patches posted to remove the EFI and VESA platform devices once
a real DRM or fbdev driver probes.
But in any case, moving to a simpledrm + emulated fbdev only breaks this
assumption and causes users to not have VT if the Nvidia driver is used.
So to prevent this, let's add a workaround and make the sysfb to skip the
"simple-framebuffer" registration when nvidia-drm.modeset=1 option is set.
This is quite horrible, but honestly I can't think of any other approach.
For this to work, the CONFIG_FB_EFI and CONFIG_FB_VESA config options must
be enabled besides CONFIG_DRM_SIMPLEDRM.
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Cherry-picked-for: https://bugs.archlinux.org/task/73720
---
drivers/firmware/sysfb.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -35,6 +35,22 @@
#include <linux/screen_info.h>
#include <linux/sysfb.h>
+static int skip_simpledrm;
+
+static int __init simpledrm_disable(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+
+ get_option(&opt, &skip_simpledrm);
+
+ if (skip_simpledrm)
+ pr_info("The simpledrm driver will not be probed\n");
+
+ return 0;
+}
+early_param("nvidia-drm.modeset", simpledrm_disable);
+
static struct platform_device *pd;
static DEFINE_MUTEX(disable_lock);
static bool disabled;
@@ -145,7 +161,7 @@ static __init int sysfb_init(void)
/* try to create a simple-framebuffer device */
compatible = sysfb_parse_mode(si, &mode);
- if (compatible) {
+ if (compatible && !skip_simpledrm) {
pd = sysfb_create_simplefb(si, &mode, parent);
if (!IS_ERR(pd))
goto put_device;

View File

@@ -0,0 +1,45 @@
From 2c26fd36ffb4bed4d55f9c7ba8d4f22db093eba2 Mon Sep 17 00:00:00 2001
From: David Rheinsberg <david@readahead.eu>
Date: Tue, 24 Jan 2023 12:04:59 +0100
Subject: x86/insn_decoder_test: allow longer symbol-names
Increase the allowed line-length of the insn-decoder-test to 4k to allow
for symbol-names longer than 256 characters.
The insn-decoder-test takes objdump output as input, which may contain
symbol-names as instruction arguments. With rust-code entering the
kernel, those symbol-names will include mangled-symbols which might
exceed the current line-length-limit of the tool.
By bumping the line-length-limit of the tool to 4k, we get a reasonable
buffer for all objdump outputs I have seen so far. Unfortunately, ELF
symbol-names are not restricted in length, so technically this might
still end up failing if we encounter longer names in the future.
My compile-failure looks like this:
arch/x86/tools/insn_decoder_test: error: malformed line 1152000:
tBb_+0xf2>
..which overflowed by 10 characters reading this line:
ffffffff81458193: 74 3d je ffffffff814581d2 <_RNvXse_NtNtNtCshGpAVYOtgW1_4core4iter8adapters7flattenINtB5_13FlattenCompatINtNtB7_3map3MapNtNtNtBb_3str4iter5CharsNtB1v_17CharEscapeDefaultENtNtBb_4char13EscapeDefaultENtNtBb_3fmt5Debug3fmtBb_+0xf2>
Signed-off-by: David Rheinsberg <david@readahead.eu>
Signed-off-by: Scott Weaver <scweaver@redhat.com>
Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/63
---
arch/x86/tools/insn_decoder_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -106,7 +106,7 @@ static void parse_args(int argc, char **
}
}
-#define BUFSIZE 256
+#define BUFSIZE 4096
int main(int argc, char **argv)
{

View File

@@ -1,398 +0,0 @@
From 4eb6615c1498cb8bff76c31e9596b585410f507d Mon Sep 17 00:00:00 2001
From: Oleksandr Natalenko <oleksandr@natalenko.name>
Date: Mon, 30 Sep 2024 08:58:38 +0200
Subject: mm: expose per-process KSM control via syscalls
d7597f59d1d3 added a new API to enable per-process KSM control. It
however uses prctl, which doesn't allow controlling KSM from outside of
the current process.
Hence, expose this API via 3 syscalls: process_ksm_enable,
process_ksm_disable and process_ksm_status. Given sufficient privileges,
auto-KSM can be enable by another process.
Since these syscalls are not in the upstream kernel, also expose their
numbers under /sys/kernel/process_ksm so that userspace tooling like
uksmd knows how to use them.
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
---
arch/alpha/kernel/syscalls/syscall.tbl | 3 +
arch/arm/tools/syscall.tbl | 3 +
arch/m68k/kernel/syscalls/syscall.tbl | 3 +
arch/microblaze/kernel/syscalls/syscall.tbl | 3 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 3 +
arch/mips/kernel/syscalls/syscall_n64.tbl | 3 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 3 +
arch/parisc/kernel/syscalls/syscall.tbl | 3 +
arch/powerpc/kernel/syscalls/syscall.tbl | 3 +
arch/s390/kernel/syscalls/syscall.tbl | 3 +
arch/sh/kernel/syscalls/syscall.tbl | 3 +
arch/sparc/kernel/syscalls/syscall.tbl | 3 +
arch/x86/entry/syscalls/syscall_32.tbl | 3 +
arch/x86/entry/syscalls/syscall_64.tbl | 3 +
arch/xtensa/kernel/syscalls/syscall.tbl | 3 +
include/linux/syscalls.h | 3 +
include/uapi/asm-generic/unistd.h | 9 +-
kernel/sys.c | 138 ++++++++++++++++++
kernel/sys_ni.c | 3 +
scripts/syscall.tbl | 3 +
.../arch/powerpc/entry/syscalls/syscall.tbl | 3 +
.../perf/arch/s390/entry/syscalls/syscall.tbl | 3 +
22 files changed, 206 insertions(+), 1 deletion(-)
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -502,3 +502,6 @@
570 common lsm_set_self_attr sys_lsm_set_self_attr
571 common lsm_list_modules sys_lsm_list_modules
572 common mseal sys_mseal
+573 common process_ksm_enable sys_process_ksm_enable
+574 common process_ksm_disable sys_process_ksm_disable
+575 common process_ksm_status sys_process_ksm_status
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -477,3 +477,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -462,3 +462,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -468,3 +468,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -401,3 +401,6 @@
460 n32 lsm_set_self_attr sys_lsm_set_self_attr
461 n32 lsm_list_modules sys_lsm_list_modules
462 n32 mseal sys_mseal
+463 n32 process_ksm_enable sys_process_ksm_enable
+464 n32 process_ksm_disable sys_process_ksm_disable
+465 n32 process_ksm_status sys_process_ksm_status
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -377,3 +377,6 @@
460 n64 lsm_set_self_attr sys_lsm_set_self_attr
461 n64 lsm_list_modules sys_lsm_list_modules
462 n64 mseal sys_mseal
+463 n64 process_ksm_enable sys_process_ksm_enable
+464 n64 process_ksm_disable sys_process_ksm_disable
+465 n64 process_ksm_status sys_process_ksm_status
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -450,3 +450,6 @@
460 o32 lsm_set_self_attr sys_lsm_set_self_attr
461 o32 lsm_list_modules sys_lsm_list_modules
462 o32 mseal sys_mseal
+463 o32 process_ksm_enable sys_process_ksm_enable
+464 o32 process_ksm_disable sys_process_ksm_disable
+465 o32 process_ksm_status sys_process_ksm_status
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -461,3 +461,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -553,3 +553,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -465,3 +465,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -466,3 +466,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -508,3 +508,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -468,3 +468,6 @@
460 i386 lsm_set_self_attr sys_lsm_set_self_attr
461 i386 lsm_list_modules sys_lsm_list_modules
462 i386 mseal sys_mseal
+463 i386 process_ksm_enable sys_process_ksm_enable
+464 i386 process_ksm_disable sys_process_ksm_disable
+465 i386 process_ksm_status sys_process_ksm_status
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -386,6 +386,9 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
#
# Due to a historical design error, certain syscalls are numbered differently
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -433,3 +433,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned lon
asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
size_t vlen, int behavior, unsigned int flags);
asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -841,8 +841,15 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm
#define __NR_mseal 462
__SYSCALL(__NR_mseal, sys_mseal)
+#define __NR_process_ksm_enable 463
+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
+#define __NR_process_ksm_disable 464
+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
+#define __NR_process_ksm_status 465
+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
+
#undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 466
/*
* 32 bit systems traditionally used different
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2791,6 +2791,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
return error;
}
+#ifdef CONFIG_KSM
+enum pkc_action {
+ PKSM_ENABLE = 0,
+ PKSM_DISABLE,
+ PKSM_STATUS,
+};
+
+static long do_process_ksm_control(int pidfd, enum pkc_action action)
+{
+ long ret;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ unsigned int f_flags;
+
+ task = pidfd_get_task(pidfd, &f_flags);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+
+ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+ if (IS_ERR_OR_NULL(mm)) {
+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ goto release_task;
+ }
+
+ /* Require CAP_SYS_NICE for influencing process performance. */
+ if (!capable(CAP_SYS_NICE)) {
+ ret = -EPERM;
+ goto release_mm;
+ }
+
+ if (mmap_write_lock_killable(mm)) {
+ ret = -EINTR;
+ goto release_mm;
+ }
+
+ switch (action) {
+ case PKSM_ENABLE:
+ ret = ksm_enable_merge_any(mm);
+ break;
+ case PKSM_DISABLE:
+ ret = ksm_disable_merge_any(mm);
+ break;
+ case PKSM_STATUS:
+ ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
+ break;
+ }
+
+ mmap_write_unlock(mm);
+
+release_mm:
+ mmput(mm);
+release_task:
+ put_task_struct(task);
+out:
+ return ret;
+}
+#endif /* CONFIG_KSM */
+
+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_ENABLE);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_DISABLE);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_STATUS);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+#ifdef CONFIG_KSM
+static ssize_t process_ksm_enable_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_enable);
+}
+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
+
+static ssize_t process_ksm_disable_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_disable);
+}
+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
+
+static ssize_t process_ksm_status_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_status);
+}
+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
+
+static struct attribute *process_ksm_sysfs_attrs[] = {
+ &process_ksm_enable_attr.attr,
+ &process_ksm_disable_attr.attr,
+ &process_ksm_status_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group process_ksm_sysfs_attr_group = {
+ .attrs = process_ksm_sysfs_attrs,
+ .name = "process_ksm",
+};
+
+static int __init process_ksm_sysfs_init(void)
+{
+ return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
+}
+subsys_initcall(process_ksm_sysfs_init);
+#endif /* CONFIG_KSM */
+
SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
struct getcpu_cache __user *, unused)
{
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,6 +186,9 @@ COND_SYSCALL(mincore);
COND_SYSCALL(madvise);
COND_SYSCALL(process_madvise);
COND_SYSCALL(process_mrelease);
+COND_SYSCALL(process_ksm_enable);
+COND_SYSCALL(process_ksm_disable);
+COND_SYSCALL(process_ksm_status);
COND_SYSCALL(remap_file_pages);
COND_SYSCALL(mbind);
COND_SYSCALL(get_mempolicy);
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -403,3 +403,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -553,3 +553,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -465,3 +465,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status

View File

@@ -1,178 +0,0 @@
From 6fe0d820b76da3a4f1f8d1fd605b2afc9edcb3f8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 3 Nov 2024 20:19:39 -0800
Subject: xfs: fix chown with rt quota
Make chown's quota adjustments work with realtime files. This is mostly
a matter of calling xfs_inode_count_blocks on a given file to figure out
the number of blocks allocated to the data device and to the realtime
device, and using those quantities to update the quota accounting when
the id changes. Delayed allocation reservations are moved from the old
dquot's incore reservation to the new dquot's incore reservation.
Note that there was a missing ILOCK bug in xfs_qm_dqusage_adjust that we
must fix before calling xfs_iread_extents. Prior to 2.6.37 the locking
was correct, but then someone removed the ILOCK as part of a cleanup.
Nobody noticed because nowhere in the git history have we ever supported
rt+quota so nobody can use this.
I'm leaving git breadcrumbs in case anyone is desperate enough to try to
backport the rtquota code to old kernels.
Not-Cc: <stable@vger.kernel.org> # v2.6.37
Fixes: 52fda114249578 ("xfs: simplify xfs_qm_dqusage_adjust")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_qm.c | 44 +++++++++++++++++++++++++++-----------------
fs/xfs/xfs_trans.c | 31 +++++++++++++++++++++++++++++--
2 files changed, 56 insertions(+), 19 deletions(-)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1181,8 +1181,8 @@ xfs_qm_dqusage_adjust(
void *data)
{
struct xfs_inode *ip;
- xfs_qcnt_t nblks;
- xfs_filblks_t rtblks = 0; /* total rt blks */
+ xfs_filblks_t nblks, rtblks;
+ unsigned int lock_mode;
int error;
ASSERT(XFS_IS_QUOTA_ON(mp));
@@ -1219,18 +1219,17 @@ xfs_qm_dqusage_adjust(
ASSERT(ip->i_delayed_blks == 0);
+ lock_mode = xfs_ilock_data_map_shared(ip);
if (XFS_IS_REALTIME_INODE(ip)) {
- struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
-
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
- if (error)
+ if (error) {
+ xfs_iunlock(ip, lock_mode);
goto error0;
-
- xfs_bmap_count_leaves(ifp, &rtblks);
+ }
}
-
- nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
+ xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
+ xfs_iunlock(ip, lock_mode);
/*
* Add the (disk blocks and inode) resources occupied by this
@@ -1892,9 +1891,8 @@ xfs_qm_vop_chown(
struct xfs_dquot *newdq)
{
struct xfs_dquot *prevdq;
- uint bfield = XFS_IS_REALTIME_INODE(ip) ?
- XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
+ xfs_filblks_t dblocks, rblocks;
+ bool isrt = XFS_IS_REALTIME_INODE(ip);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
@@ -1904,11 +1902,17 @@ xfs_qm_vop_chown(
ASSERT(prevdq);
ASSERT(prevdq != newdq);
- xfs_trans_mod_ino_dquot(tp, ip, prevdq, bfield, -(ip->i_nblocks));
+ xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
+
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_BCOUNT,
+ -(xfs_qcnt_t)dblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_RTBCOUNT,
+ -(xfs_qcnt_t)rblocks);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
/* the sparkling new dquot */
- xfs_trans_mod_ino_dquot(tp, ip, newdq, bfield, ip->i_nblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_BCOUNT, dblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_RTBCOUNT, rblocks);
xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
@@ -1918,7 +1922,8 @@ xfs_qm_vop_chown(
* (having already bumped up the real counter) so that we don't have
* any reservation to give back when we commit.
*/
- xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
+ xfs_trans_mod_dquot(tp, newdq,
+ isrt ? XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
-ip->i_delayed_blks);
/*
@@ -1930,8 +1935,13 @@ xfs_qm_vop_chown(
*/
tp->t_flags |= XFS_TRANS_DIRTY;
xfs_dqlock(prevdq);
- ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
- prevdq->q_blk.reserved -= ip->i_delayed_blks;
+ if (isrt) {
+ ASSERT(prevdq->q_rtb.reserved >= ip->i_delayed_blks);
+ prevdq->q_rtb.reserved -= ip->i_delayed_blks;
+ } else {
+ ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
+ prevdq->q_blk.reserved -= ip->i_delayed_blks;
+ }
xfs_dqunlock(prevdq);
/*
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1257,11 +1257,26 @@ retry:
gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
if (udqp || gdqp || pdqp) {
+ xfs_filblks_t dblocks, rblocks;
unsigned int qflags = XFS_QMOPT_RES_REGBLKS;
+ bool isrt = XFS_IS_REALTIME_INODE(ip);
if (force)
qflags |= XFS_QMOPT_FORCE_RES;
+ if (isrt) {
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_cancel;
+ }
+
+ xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
+
+ if (isrt)
+ rblocks += ip->i_delayed_blks;
+ else
+ dblocks += ip->i_delayed_blks;
+
/*
* Reserve enough quota to handle blocks on disk and reserved
* for a delayed allocation. We'll actually transfer the
@@ -1269,8 +1284,20 @@ retry:
* though that part is only semi-transactional.
*/
error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
- pdqp, ip->i_nblocks + ip->i_delayed_blks,
- 1, qflags);
+ pdqp, dblocks, 1, qflags);
+ if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ xfs_trans_cancel(tp);
+ xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+ retried = true;
+ goto retry;
+ }
+ if (error)
+ goto out_cancel;
+
+ /* Do the same for realtime. */
+ qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES);
+ error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
+ pdqp, rblocks, 0, qflags);
if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);

View File

@@ -1,4 +1,4 @@
From 3dacd15edbed579d6966a884ce04aab95f1dfdeb Mon Sep 17 00:00:00 2001
From 0df7cc91ac0a3e84f2e0aeec1a71cd737de41b8a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 22 Jan 2024 16:27:56 -0800
Subject: lib: zstd: Refactor intentional wrap-around test
@@ -38,7 +38,7 @@ Signed-off-by: Kees Cook <keescook@chromium.org>
--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -618,7 +618,7 @@ size_t ZSTD_readSkippableFrame(void* dst
@@ -620,7 +620,7 @@ size_t ZSTD_readSkippableFrame(void* dst
* @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
@@ -47,7 +47,7 @@ Signed-off-by: Kees Cook <keescook@chromium.org>
while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
U32 const magicNumber = MEM_readLE32(src);
@@ -636,7 +636,7 @@ unsigned long long ZSTD_findDecompressed
@@ -638,7 +638,7 @@ unsigned long long ZSTD_findDecompressed
{ unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;