add 3rd party/custom patches

3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
2024-10-29 05:12:06 +03:00
parent 8082dfeaca
commit 8cbaf1dea2
186 changed files with 43626 additions and 0 deletions
--- a/debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-add-quirk-for-Ryzen-3000-series-p.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-add-quirk-for-Ryzen-3000-series-p.patch
@@ -0,0 +1,70 @@
+From 3427331872c37b2edb42406c65764e1565b0591b Mon Sep 17 00:00:00 2001
+From: Perry Yuan <perry.yuan@amd.com>
+Date: Fri, 9 Aug 2024 14:09:05 +0800
+Subject: cpufreq: amd-pstate: add quirk for Ryzen 3000 series processor
+
+The Ryzen 3000 series processors have been observed lacking the
+nominal_freq and lowest_freq parameters in their ACPI tables. This
+absence causes issues with loading the amd-pstate driver on these
+systems. Introduces a fix to resolve the dependency issue
+by adding a quirk specifically for the Ryzen 3000 series.
+
+Reported-by: David Wang <00107082@163.com>
+Signed-off-by: Perry Yuan <perry.yuan@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -142,6 +142,11 @@ static struct quirk_entry quirk_amd_7k62
+ 	.lowest_freq = 550,
+ };
+ 
+static struct quirk_entry quirk_amd_mts = {
+	.nominal_freq = 3600,
+	.lowest_freq = 550,
+};
+
+ static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
+ {
+ 	/**
+@@ -158,6 +163,21 @@ static int __init dmi_matched_7k62_bios_
+ 	return 0;
+ }
+ 
+static int __init dmi_matched_mts_bios_bug(const struct dmi_system_id *dmi)
+{
+	/**
+	 * match the broken bios for ryzen 3000 series processor support CPPC V2
+	 * broken BIOS lack of nominal_freq and lowest_freq capabilities
+	 * definition in ACPI tables
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
+		quirks = dmi->driver_data;
+		pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
+		return 1;
+	}
+
+	return 0;
+}
+ static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
+ 	{
+ 		.callback = dmi_matched_7k62_bios_bug,
+@@ -168,6 +188,16 @@ static const struct dmi_system_id amd_ps
+ 		},
+ 		.driver_data = &quirk_amd_7k62,
+ 	},
+	{
+		.callback = dmi_matched_mts_bios_bug,
+		.ident = "AMD Ryzen 3000",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "B450M MORTAR MAX (MS-7B89)"),
+			DMI_MATCH(DMI_BIOS_RELEASE, "06/10/2020"),
+			DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
+		},
+		.driver_data = &quirk_amd_mts,
+	},
+ 	{}
+ };
+ MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
--- a/debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Export-symbols-for-changing-modes.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Export-symbols-for-changing-modes.patch
@@ -0,0 +1,88 @@
+From 44f21855901b1fd618ac16b07dbd14e8fea4ee13 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 31 Aug 2024 21:49:11 -0500
+Subject: cpufreq/amd-pstate: Export symbols for changing modes
+
+In order to effectively test all mode switch combinations export
+everything necessarily for amd-pstate-ut to trigger a mode switch.
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 23 ++++++++++-------------
+ drivers/cpufreq/amd-pstate.h | 14 ++++++++++++++
+ 2 files changed, 24 insertions(+), 13 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -60,18 +60,6 @@
+ #define AMD_CPPC_EPP_BALANCE_POWERSAVE		0xBF
+ #define AMD_CPPC_EPP_POWERSAVE			0xFF
+ 
+-/*
+- * enum amd_pstate_mode - driver working mode of amd pstate
+- */
+-enum amd_pstate_mode {
+-	AMD_PSTATE_UNDEFINED = 0,
+-	AMD_PSTATE_DISABLE,
+-	AMD_PSTATE_PASSIVE,
+-	AMD_PSTATE_ACTIVE,
+-	AMD_PSTATE_GUIDED,
+-	AMD_PSTATE_MAX,
+-};
+-
+ static const char * const amd_pstate_mode_string[] = {
+ 	[AMD_PSTATE_UNDEFINED]   = "undefined",
+ 	[AMD_PSTATE_DISABLE]     = "disable",
+@@ -81,6 +69,14 @@ static const char * const amd_pstate_mod
+ 	NULL,
+ };
+ 
+const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode)
+{
+	if (mode < 0 || mode >= AMD_PSTATE_MAX)
+		return NULL;
+	return amd_pstate_mode_string[mode];
+}
+EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string);
+
+ struct quirk_entry {
+ 	u32 nominal_freq;
+ 	u32 lowest_freq;
+@@ -1392,7 +1388,7 @@ static ssize_t amd_pstate_show_status(ch
+ 	return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
+ }
+ 
+-static int amd_pstate_update_status(const char *buf, size_t size)
+int amd_pstate_update_status(const char *buf, size_t size)
+ {
+ 	int mode_idx;
+ 
+@@ -1409,6 +1405,7 @@ static int amd_pstate_update_status(cons
+ 
+ 	return 0;
+ }
+EXPORT_SYMBOL_GPL(amd_pstate_update_status);
+ 
+ static ssize_t status_show(struct device *dev,
+ 			   struct device_attribute *attr, char *buf)
+--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
+@@ -103,4 +103,18 @@ struct amd_cpudata {
+ 	bool	boost_state;
+ };
+ 
+/*
+ * enum amd_pstate_mode - driver working mode of amd pstate
+ */
+enum amd_pstate_mode {
+	AMD_PSTATE_UNDEFINED = 0,
+	AMD_PSTATE_DISABLE,
+	AMD_PSTATE_PASSIVE,
+	AMD_PSTATE_ACTIVE,
+	AMD_PSTATE_GUIDED,
+	AMD_PSTATE_MAX,
+};
+const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode);
+int amd_pstate_update_status(const char *buf, size_t size);
+
+ #endif /* _LINUX_AMD_PSTATE_H */
--- a/debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-ut-Add-test-case-for-mode-switche.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-ut-Add-test-case-for-mode-switche.patch
@@ -0,0 +1,77 @@
+From aabfc7370a7da9c52be97c79ba70a20201e6864a Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 31 Aug 2024 21:49:12 -0500
+Subject: cpufreq/amd-pstate-ut: Add test case for mode switches
+
+There is a state machine in the amd-pstate driver utilized for
+switches for all modes. To make sure that cleanup and setup works
+properly for each mode add a unit test case that tries all
+combinations.
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate-ut.c | 41 ++++++++++++++++++++++++++++++++-
+ 1 file changed, 40 insertions(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
+@@ -54,12 +54,14 @@ static void amd_pstate_ut_acpi_cpc_valid
+ static void amd_pstate_ut_check_enabled(u32 index);
+ static void amd_pstate_ut_check_perf(u32 index);
+ static void amd_pstate_ut_check_freq(u32 index);
+static void amd_pstate_ut_check_driver(u32 index);
+ 
+ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
+ 	{"amd_pstate_ut_acpi_cpc_valid",   amd_pstate_ut_acpi_cpc_valid   },
+ 	{"amd_pstate_ut_check_enabled",    amd_pstate_ut_check_enabled    },
+ 	{"amd_pstate_ut_check_perf",       amd_pstate_ut_check_perf       },
+-	{"amd_pstate_ut_check_freq",       amd_pstate_ut_check_freq       }
+	{"amd_pstate_ut_check_freq",       amd_pstate_ut_check_freq       },
+	{"amd_pstate_ut_check_driver",	   amd_pstate_ut_check_driver     }
+ };
+ 
+ static bool get_shared_mem(void)
+@@ -257,6 +259,43 @@ skip_test:
+ 	cpufreq_cpu_put(policy);
+ }
+ 
+static int amd_pstate_set_mode(enum amd_pstate_mode mode)
+{
+	const char *mode_str = amd_pstate_get_mode_string(mode);
+
+	pr_debug("->setting mode to %s\n", mode_str);
+
+	return amd_pstate_update_status(mode_str, strlen(mode_str));
+}
+
+static void amd_pstate_ut_check_driver(u32 index)
+{
+	enum amd_pstate_mode mode1, mode2;
+	int ret;
+
+	for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
+		ret = amd_pstate_set_mode(mode1);
+		if (ret)
+			goto out;
+		for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
+			if (mode1 == mode2)
+				continue;
+			ret = amd_pstate_set_mode(mode2);
+			if (ret)
+				goto out;
+		}
+	}
+out:
+	if (ret)
+		pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
+			amd_pstate_get_mode_string(mode1),
+			amd_pstate_get_mode_string(mode2), ret);
+
+	amd_pstate_ut_cases[index].result = ret ?
+					    AMD_PSTATE_UT_RESULT_FAIL :
+					    AMD_PSTATE_UT_RESULT_PASS;
+}
+
+ static int __init amd_pstate_ut_init(void)
+ {
+ 	u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
--- a/debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Catch-failures-for-amd_pstate_epp.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Catch-failures-for-amd_pstate_epp.patch
@@ -0,0 +1,60 @@
+From 24e62fbc101d079d398ac6fc76f458676d3d9491 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sun, 1 Sep 2024 00:00:35 -0500
+Subject: cpufreq/amd-pstate: Catch failures for amd_pstate_epp_update_limit()
+
+amd_pstate_set_epp() calls cppc_set_epp_perf() which can fail for
+a variety of reasons but this is ignored.  Change the return flow
+to allow failures.
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1595,7 +1595,7 @@ static void amd_pstate_epp_cpu_exit(stru
+ 	pr_debug("CPU %d exiting\n", policy->cpu);
+ }
+ 
+-static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
+@@ -1645,7 +1645,7 @@ static void amd_pstate_epp_update_limit(
+ 		 * This return value can only be negative for shared_memory
+ 		 * systems where EPP register read/write not supported.
+ 		 */
+-		return;
+		return epp;
+ 	}
+ 
+ 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+@@ -1658,12 +1658,13 @@ static void amd_pstate_epp_update_limit(
+ 	}
+ 
+ 	WRITE_ONCE(cpudata->cppc_req_cached, value);
+-	amd_pstate_set_epp(cpudata, epp);
+	return amd_pstate_set_epp(cpudata, epp);
+ }
+ 
+ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+	int ret;
+ 
+ 	if (!policy->cpuinfo.max_freq)
+ 		return -ENODEV;
+@@ -1673,7 +1674,9 @@ static int amd_pstate_epp_set_policy(str
+ 
+ 	cpudata->policy = policy->policy;
+ 
+-	amd_pstate_epp_update_limit(policy);
+	ret = amd_pstate_epp_update_limit(policy);
+	if (ret)
+		return ret;
+ 
+ 	/*
+ 	 * policy->cur is never updated with the amd_pstate_epp driver, but it
--- a/debian/patches/patchset-pf/amd-pstate/0005-x86-amd-Move-amd_get_highest_perf-from-amd.c-to-cppc.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0005-x86-amd-Move-amd_get_highest_perf-from-amd.c-to-cppc.patch
@@ -0,0 +1,67 @@
+From 29c0347dd542e091e2f7e5980dd885f918f5f676 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:29:57 -0500
+Subject: x86/amd: Move amd_get_highest_perf() from amd.c to cppc.c
+
+To prepare to let amd_get_highest_perf() detect preferred cores
+it will require CPPC functions. Move amd_get_highest_perf() to
+cppc.c to prepare for 'preferred core detection' rework.
+
+No functional changes intended.
+
+Reviewed-by: Perry Yuan <perry.yuan@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ arch/x86/kernel/acpi/cppc.c | 16 ++++++++++++++++
+ arch/x86/kernel/cpu/amd.c   | 16 ----------------
+ 2 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -116,3 +116,19 @@ void init_freq_invariance_cppc(void)
+ 	init_done = true;
+ 	mutex_unlock(&freq_invariance_lock);
+ }
+
+u32 amd_get_highest_perf(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+		return 166;
+
+	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+		return 166;
+
+	return 255;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
+@@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsig
+ }
+ EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);
+ 
+-u32 amd_get_highest_perf(void)
+-{
+-	struct cpuinfo_x86 *c = &boot_cpu_data;
+-
+-	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+-			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+-		return 166;
+-
+-	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+-			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+-		return 166;
+-
+-	return 255;
+-}
+-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+-
+ static void zenbleed_check_cpu(void *unused)
+ {
+ 	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
--- a/debian/patches/patchset-pf/amd-pstate/0006-ACPI-CPPC-Adjust-return-code-for-inline-functions-in.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0006-ACPI-CPPC-Adjust-return-code-for-inline-functions-in.patch
@@ -0,0 +1,95 @@
+From 072efeb45349edd8ba9def11b6a450eaf56690a8 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:29:58 -0500
+Subject: ACPI: CPPC: Adjust return code for inline functions in
+ !CONFIG_ACPI_CPPC_LIB
+
+Checkpath emits the following warning:
+```
+WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP
+```
+
+Adjust the code accordingly.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ include/acpi/cppc_acpi.h | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
+@@ -164,31 +164,31 @@ extern int cppc_set_auto_sel(int cpu, bo
+ #else /* !CONFIG_ACPI_CPPC_LIB */
+ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_enable(int cpu, bool enable)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline bool cppc_perf_ctrs_in_pcc(void)
+ {
+@@ -212,27 +212,27 @@ static inline bool cpc_ffh_supported(voi
+ }
+ static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_auto_sel(int cpu, bool enable)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
+ {
+-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+ }
+ #endif /* !CONFIG_ACPI_CPPC_LIB */
+ 
--- a/debian/patches/patchset-pf/amd-pstate/0007-x86-amd-Rename-amd_get_highest_perf-to-amd_get_boost.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0007-x86-amd-Rename-amd_get_highest_perf-to-amd_get_boost.patch
@@ -0,0 +1,162 @@
+From 21492d91ffc7c3fdb6507f64a74abf8326c75141 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:29:59 -0500
+Subject: x86/amd: Rename amd_get_highest_perf() to
+ amd_get_boost_ratio_numerator()
+
+The function name is ambiguous because it returns an intermediate value
+for calculating maximum frequency rather than the CPPC 'Highest Perf'
+register.
+
+Rename the function to clarify its use and allow the function to return
+errors. Adjust the consumer in acpi-cpufreq to catch errors.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ arch/x86/include/asm/processor.h |  3 ---
+ arch/x86/kernel/acpi/cppc.c      | 44 +++++++++++++++++++++++---------
+ drivers/cpufreq/acpi-cpufreq.c   | 12 ++++++---
+ include/acpi/cppc_acpi.h         |  5 ++++
+ 4 files changed, 46 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
+@@ -691,8 +691,6 @@ static inline u32 per_cpu_l2c_id(unsigne
+ }
+ 
+ #ifdef CONFIG_CPU_SUP_AMD
+-extern u32 amd_get_highest_perf(void);
+-
+ /*
+  * Issue a DIV 0/1 insn to clear any division data from previous DIV
+  * operations.
+@@ -705,7 +703,6 @@ static __always_inline void amd_clear_di
+ 
+ extern void amd_check_microcode(void);
+ #else
+-static inline u32 amd_get_highest_perf(void)		{ return 0; }
+ static inline void amd_clear_divider(void)		{ }
+ static inline void amd_check_microcode(void)		{ }
+ #endif
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -69,7 +69,7 @@ int cpc_write_ffh(int cpunum, struct cpc
+ static void amd_set_max_freq_ratio(void)
+ {
+ 	struct cppc_perf_caps perf_caps;
+-	u64 highest_perf, nominal_perf;
+	u64 numerator, nominal_perf;
+ 	u64 perf_ratio;
+ 	int rc;
+ 
+@@ -79,15 +79,19 @@ static void amd_set_max_freq_ratio(void)
+ 		return;
+ 	}
+ 
+-	highest_perf = amd_get_highest_perf();
+	rc = amd_get_boost_ratio_numerator(0, &numerator);
+	if (rc) {
+		pr_debug("Could not retrieve highest performance (%d)\n", rc);
+		return;
+	}
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+-	if (!highest_perf || !nominal_perf) {
+-		pr_debug("Could not retrieve highest or nominal performance\n");
+	if (!nominal_perf) {
+		pr_debug("Could not retrieve nominal performance\n");
+ 		return;
+ 	}
+ 
+-	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf);
+ 	/* midpoint between max_boost and max_P */
+ 	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+ 	if (!perf_ratio) {
+@@ -117,18 +121,34 @@ void init_freq_invariance_cppc(void)
+ 	mutex_unlock(&freq_invariance_lock);
+ }
+ 
+-u32 amd_get_highest_perf(void)
+/**
+ * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
+ * @cpu: CPU to get numerator for.
+ * @numerator: Output variable for numerator.
+ *
+ * Determine the numerator to use for calculating the boost ratio on
+ * a CPU. On systems that support preferred cores, this will be a hardcoded
+ * value. On other systems this will the highest performance register value.
+ *
+ * Return: 0 for success, negative error code otherwise.
+ */
+int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+ {
+ 	struct cpuinfo_x86 *c = &boot_cpu_data;
+ 
+ 	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+-			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+-		return 166;
+			       (c->x86_model >= 0x70 && c->x86_model < 0x80))) {
+		*numerator = 166;
+		return 0;
+	}
+ 
+ 	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+-			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+-		return 166;
+			       (c->x86_model >= 0x40 && c->x86_model < 0x70))) {
+		*numerator = 166;
+		return 0;
+	}
+	*numerator = 255;
+ 
+-	return 255;
+	return 0;
+ }
+-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator);
+--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
+@@ -642,10 +642,16 @@ static u64 get_max_boost_ratio(unsigned
+ 		return 0;
+ 	}
+ 
+-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+-		highest_perf = amd_get_highest_perf();
+-	else
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		ret = amd_get_boost_ratio_numerator(cpu, &highest_perf);
+		if (ret) {
+			pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n",
+				 cpu, ret);
+			return 0;
+		}
+	} else {
+ 		highest_perf = perf_caps.highest_perf;
+	}
+ 
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
+@@ -161,6 +161,7 @@ extern int cppc_get_epp_perf(int cpunum,
+ extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
+ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
+ extern int cppc_set_auto_sel(int cpu, bool enable);
+extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
+ #else /* !CONFIG_ACPI_CPPC_LIB */
+ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+ {
+@@ -234,6 +235,10 @@ static inline int cppc_get_auto_sel_caps
+ {
+ 	return -EOPNOTSUPP;
+ }
+static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+{
+	return -EOPNOTSUPP;
+}
+ #endif /* !CONFIG_ACPI_CPPC_LIB */
+ 
+ #endif /* _CPPC_ACPI_H*/
--- a/debian/patches/patchset-pf/amd-pstate/0008-ACPI-CPPC-Drop-check-for-non-zero-perf-ratio.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0008-ACPI-CPPC-Drop-check-for-non-zero-perf-ratio.patch
@@ -0,0 +1,35 @@
+From 6f10d066dce0f1781b514a0352f0b427a32b1bb2 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:00 -0500
+Subject: ACPI: CPPC: Drop check for non zero perf ratio
+
+perf_ratio is a u64 and SCHED_CAPACITY_SCALE is a large number.
+Shifting by one will never have a zero value.
+
+Drop the check.
+
+Suggested-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+---
+ arch/x86/kernel/acpi/cppc.c | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -91,13 +91,8 @@ static void amd_set_max_freq_ratio(void)
+ 		return;
+ 	}
+ 
+-	perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf);
+ 	/* midpoint between max_boost and max_P */
+-	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+-	if (!perf_ratio) {
+-		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+-		return;
+-	}
+	perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1;
+ 
+ 	freq_invariance_set_perf_ratio(perf_ratio, false);
+ }
--- a/debian/patches/patchset-pf/amd-pstate/0009-ACPI-CPPC-Adjust-debug-messages-in-amd_set_max_freq_.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0009-ACPI-CPPC-Adjust-debug-messages-in-amd_set_max_freq_.patch
@@ -0,0 +1,44 @@
+From 8c142a91a58f24119e99d4e66b11890f4a4ef984 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:01 -0500
+Subject: ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn
+
+If the boost ratio isn't calculated properly for the system for any
+reason this can cause other problems that are non-obvious.
+
+Raise all messages to warn instead.
+
+Suggested-by: Perry Yuan <Perry.Yuan@amd.com>
+Reviewed-by: Perry Yuan <perry.yuan@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ arch/x86/kernel/acpi/cppc.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -75,19 +75,19 @@ static void amd_set_max_freq_ratio(void)
+ 
+ 	rc = cppc_get_perf_caps(0, &perf_caps);
+ 	if (rc) {
+-		pr_debug("Could not retrieve perf counters (%d)\n", rc);
+		pr_warn("Could not retrieve perf counters (%d)\n", rc);
+ 		return;
+ 	}
+ 
+ 	rc = amd_get_boost_ratio_numerator(0, &numerator);
+ 	if (rc) {
+-		pr_debug("Could not retrieve highest performance (%d)\n", rc);
+		pr_warn("Could not retrieve highest performance (%d)\n", rc);
+ 		return;
+ 	}
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+ 	if (!nominal_perf) {
+-		pr_debug("Could not retrieve nominal performance\n");
+		pr_warn("Could not retrieve nominal performance\n");
+ 		return;
+ 	}
+ 
--- a/debian/patches/patchset-pf/amd-pstate/0010-x86-amd-Move-amd_get_highest_perf-out-of-amd-pstate.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0010-x86-amd-Move-amd_get_highest_perf-out-of-amd-pstate.patch
@@ -0,0 +1,138 @@
+From 952e7bdc4cf67603f230f8eb91818ad4676e5a83 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:02 -0500
+Subject: x86/amd: Move amd_get_highest_perf() out of amd-pstate
+
+amd_pstate_get_highest_perf() is a helper used to get the highest perf
+value on AMD systems.  It's used in amd-pstate as part of preferred
+core handling, but applicable for acpi-cpufreq as well.
+
+Move it out to cppc handling code as amd_get_highest_perf().
+
+Reviewed-by: Perry Yuan <perry.yuan@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ arch/x86/kernel/acpi/cppc.c  | 30 ++++++++++++++++++++++++++++++
+ drivers/cpufreq/amd-pstate.c | 34 ++--------------------------------
+ include/acpi/cppc_acpi.h     |  5 +++++
+ 3 files changed, 37 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -116,6 +116,36 @@ void init_freq_invariance_cppc(void)
+ 	mutex_unlock(&freq_invariance_lock);
+ }
+ 
+/*
+ * Get the highest performance register value.
+ * @cpu: CPU from which to get highest performance.
+ * @highest_perf: Return address for highest performance value.
+ *
+ * Return: 0 for success, negative error code otherwise.
+ */
+int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
+{
+	u64 val;
+	int ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
+		if (ret)
+			goto out;
+
+		val = AMD_CPPC_HIGHEST_PERF(val);
+	} else {
+		ret = cppc_get_highest_perf(cpu, &val);
+		if (ret)
+			goto out;
+	}
+
+	WRITE_ONCE(*highest_perf, (u32)val);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+
+ /**
+  * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
+  * @cpu: CPU to get numerator for.
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -837,36 +837,6 @@ static void amd_pstste_sched_prefcore_wo
+ }
+ static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
+ 
+-/*
+- * Get the highest performance register value.
+- * @cpu: CPU from which to get highest performance.
+- * @highest_perf: Return address.
+- *
+- * Return: 0 for success, -EIO otherwise.
+- */
+-static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
+-{
+-	int ret;
+-
+-	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+-		u64 cap1;
+-
+-		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+-		if (ret)
+-			return ret;
+-		WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+-	} else {
+-		u64 cppc_highest_perf;
+-
+-		ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
+-		if (ret)
+-			return ret;
+-		WRITE_ONCE(*highest_perf, cppc_highest_perf);
+-	}
+-
+-	return (ret);
+-}
+-
+ #define CPPC_MAX_PERF	U8_MAX
+ 
+ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+@@ -874,7 +844,7 @@ static void amd_pstate_init_prefcore(str
+ 	int ret, prio;
+ 	u32 highest_perf;
+ 
+-	ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
+	ret = amd_get_highest_perf(cpudata->cpu, &highest_perf);
+ 	if (ret)
+ 		return;
+ 
+@@ -918,7 +888,7 @@ static void amd_pstate_update_limits(uns
+ 	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+ 		goto free_cpufreq_put;
+ 
+-	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
+	ret = amd_get_highest_perf(cpu, &cur_high);
+ 	if (ret)
+ 		goto free_cpufreq_put;
+ 
+--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
+@@ -161,6 +161,7 @@ extern int cppc_get_epp_perf(int cpunum,
+ extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
+ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
+ extern int cppc_set_auto_sel(int cpu, bool enable);
+extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
+ extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
+ #else /* !CONFIG_ACPI_CPPC_LIB */
+ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+@@ -235,6 +236,10 @@ static inline int cppc_get_auto_sel_caps
+ {
+ 	return -EOPNOTSUPP;
+ }
+static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
+{
+	return -ENODEV;
+}
+ static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+ {
+ 	return -EOPNOTSUPP;
--- a/debian/patches/patchset-pf/amd-pstate/0011-x86-amd-Detect-preferred-cores-in-amd_get_boost_rati.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0011-x86-amd-Detect-preferred-cores-in-amd_get_boost_rati.patch
@@ -0,0 +1,251 @@
+From 3ab7da5bbf2087982dbfe2b0f2937d0dddc3afb1 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:03 -0500
+Subject: x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()
+
+AMD systems that support preferred cores will use "166" as their
+numerator for max frequency calculations instead of "255".
+
+Add a function for detecting preferred cores by looking at the
+highest perf value on all cores.
+
+If preferred cores are enabled return 166 and if disabled the
+value in the highest perf register. As the function will be called
+multiple times, cache the values for the boost numerator and if
+preferred cores will be enabled in global variables.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ arch/x86/kernel/acpi/cppc.c  | 93 ++++++++++++++++++++++++++++++++----
+ drivers/cpufreq/amd-pstate.c | 34 +++++--------
+ include/acpi/cppc_acpi.h     |  5 ++
+ 3 files changed, 101 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -9,6 +9,16 @@
+ #include <asm/processor.h>
+ #include <asm/topology.h>
+ 
+#define CPPC_HIGHEST_PERF_PREFCORE	166
+
+enum amd_pref_core {
+	AMD_PREF_CORE_UNKNOWN = 0,
+	AMD_PREF_CORE_SUPPORTED,
+	AMD_PREF_CORE_UNSUPPORTED,
+};
+static enum amd_pref_core amd_pref_core_detected;
+static u64 boost_numerator;
+
+ /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+ 
+ bool cpc_supported_by_cpu(void)
+@@ -147,6 +157,66 @@ out:
+ EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+ 
+ /**
+ * amd_detect_prefcore: Detect if CPUs in the system support preferred cores
+ * @detected: Output variable for the result of the detection.
+ *
+ * Determine whether CPUs in the system support preferred cores. On systems
+ * that support preferred cores, different highest perf values will be found
+ * on different cores. On other systems, the highest perf value will be the
+ * same on all cores.
+ *
+ * The result of the detection will be stored in the 'detected' parameter.
+ *
+ * Return: 0 for success, negative error code otherwise
+ */
+int amd_detect_prefcore(bool *detected)
+{
+	int cpu, count = 0;
+	u64 highest_perf[2] = {0};
+
+	if (WARN_ON(!detected))
+		return -EINVAL;
+
+	switch (amd_pref_core_detected) {
+	case AMD_PREF_CORE_SUPPORTED:
+		*detected = true;
+		return 0;
+	case AMD_PREF_CORE_UNSUPPORTED:
+		*detected = false;
+		return 0;
+	default:
+		break;
+	}
+
+	for_each_present_cpu(cpu) {
+		u32 tmp;
+		int ret;
+
+		ret = amd_get_highest_perf(cpu, &tmp);
+		if (ret)
+			return ret;
+
+		if (!count || (count == 1 && tmp != highest_perf[0]))
+			highest_perf[count++] = tmp;
+
+		if (count == 2)
+			break;
+	}
+
+	*detected = (count == 2);
+	boost_numerator = highest_perf[0];
+
+	amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED :
+					     AMD_PREF_CORE_UNSUPPORTED;
+
+	pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n",
+		 *detected ? "" : "un", highest_perf[0]);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(amd_detect_prefcore);
+
+/**
+  * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
+  * @cpu: CPU to get numerator for.
+  * @numerator: Output variable for numerator.
+@@ -155,24 +225,27 @@ EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+  * a CPU. On systems that support preferred cores, this will be a hardcoded
+  * value. On other systems this will the highest performance register value.
+  *
+ * If booting the system with amd-pstate enabled but preferred cores disabled then
+ * the correct boost numerator will be returned to match hardware capabilities
+ * even if the preferred cores scheduling hints are not enabled.
+ *
+  * Return: 0 for success, negative error code otherwise.
+  */
+ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+ {
+-	struct cpuinfo_x86 *c = &boot_cpu_data;
+-
+-	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+-			       (c->x86_model >= 0x70 && c->x86_model < 0x80))) {
+-		*numerator = 166;
+-		return 0;
+-	}
+	bool prefcore;
+	int ret;
+ 
+-	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+-			       (c->x86_model >= 0x40 && c->x86_model < 0x70))) {
+-		*numerator = 166;
+	ret = amd_detect_prefcore(&prefcore);
+	if (ret)
+		return ret;
+
+	/* without preferred cores, return the highest perf register value */
+	if (!prefcore) {
+		*numerator = boost_numerator;
+ 		return 0;
+ 	}
+-	*numerator = 255;
+	*numerator = CPPC_HIGHEST_PERF_PREFCORE;
+ 
+ 	return 0;
+ }
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -841,32 +841,18 @@ static DECLARE_WORK(sched_prefcore_work,
+ 
+ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+ {
+-	int ret, prio;
+-	u32 highest_perf;
+-
+-	ret = amd_get_highest_perf(cpudata->cpu, &highest_perf);
+-	if (ret)
+	/* user disabled or not detected */
+	if (!amd_pstate_prefcore)
+ 		return;
+ 
+ 	cpudata->hw_prefcore = true;
+-	/* check if CPPC preferred core feature is enabled*/
+-	if (highest_perf < CPPC_MAX_PERF)
+-		prio = (int)highest_perf;
+-	else {
+-		pr_debug("AMD CPPC preferred core is unsupported!\n");
+-		cpudata->hw_prefcore = false;
+-		return;
+-	}
+-
+-	if (!amd_pstate_prefcore)
+-		return;
+ 
+ 	/*
+ 	 * The priorities can be set regardless of whether or not
+ 	 * sched_set_itmt_support(true) has been called and it is valid to
+ 	 * update them at any time after it has been called.
+ 	 */
+-	sched_set_itmt_core_prio(prio, cpudata->cpu);
+	sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu);
+ 
+ 	schedule_work(&sched_prefcore_work);
+ }
+@@ -1037,12 +1023,12 @@ static int amd_pstate_cpu_init(struct cp
+ 
+ 	cpudata->cpu = policy->cpu;
+ 
+-	amd_pstate_init_prefcore(cpudata);
+-
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+ 
+	amd_pstate_init_prefcore(cpudata);
+
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+@@ -1493,12 +1479,12 @@ static int amd_pstate_epp_cpu_init(struc
+ 	cpudata->cpu = policy->cpu;
+ 	cpudata->epp_policy = 0;
+ 
+-	amd_pstate_init_prefcore(cpudata);
+-
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+ 
+	amd_pstate_init_prefcore(cpudata);
+
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+@@ -1960,6 +1946,12 @@ static int __init amd_pstate_init(void)
+ 		static_call_update(amd_pstate_update_perf, cppc_update_perf);
+ 	}
+ 
+	if (amd_pstate_prefcore) {
+		ret = amd_detect_prefcore(&amd_pstate_prefcore);
+		if (ret)
+			return ret;
+	}
+
+ 	/* enable amd pstate feature */
+ 	ret = amd_pstate_enable(true);
+ 	if (ret) {
+--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
+@@ -163,6 +163,7 @@ extern int cppc_get_auto_sel_caps(int cp
+ extern int cppc_set_auto_sel(int cpu, bool enable);
+ extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
+ extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
+extern int amd_detect_prefcore(bool *detected);
+ #else /* !CONFIG_ACPI_CPPC_LIB */
+ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+ {
+@@ -244,6 +245,10 @@ static inline int amd_get_boost_ratio_nu
+ {
+ 	return -EOPNOTSUPP;
+ }
+static inline int amd_detect_prefcore(bool *detected)
+{
+	return -ENODEV;
+}
+ #endif /* !CONFIG_ACPI_CPPC_LIB */
+ 
+ #endif /* _CPPC_ACPI_H*/
--- a/debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-Merge-amd_pstate_highest_perf_set.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-Merge-amd_pstate_highest_perf_set.patch
@@ -0,0 +1,169 @@
+From 68d89574b86625f4bd7a784fe9bcc221dc290e4f Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:04 -0500
+Subject: cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into
+ amd_get_boost_ratio_numerator()
+
+The special case in amd_pstate_highest_perf_set() is the value used
+for calculating the boost numerator.  Merge this into
+amd_get_boost_ratio_numerator() and then use that to calculate boost
+ratio.
+
+This allows dropping more special casing of the highest perf value.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ Documentation/admin-guide/pm/amd-pstate.rst |  3 +-
+ arch/x86/kernel/acpi/cppc.c                 | 16 +++++++
+ drivers/cpufreq/amd-pstate.c                | 52 ++++-----------------
+ 3 files changed, 28 insertions(+), 43 deletions(-)
+
+--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
+@@ -251,7 +251,8 @@ performance supported in `AMD CPPC Perfo
+ In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
+ table, so we need to expose it to sysfs. If boost is not active, but
+ still supported, this maximum frequency will be larger than the one in
+-``cpuinfo``.
+``cpuinfo``. On systems that support preferred core, the driver will have
+different values for some cores than others.
+ This attribute is read-only.
+ 
+ ``amd_pstate_lowest_nonlinear_freq``
+--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
+@@ -9,6 +9,7 @@
+ #include <asm/processor.h>
+ #include <asm/topology.h>
+ 
+#define CPPC_HIGHEST_PERF_PERFORMANCE	196
+ #define CPPC_HIGHEST_PERF_PREFCORE	166
+ 
+ enum amd_pref_core {
+@@ -245,6 +246,21 @@ int amd_get_boost_ratio_numerator(unsign
+ 		*numerator = boost_numerator;
+ 		return 0;
+ 	}
+
+	/*
+	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
+	 * the highest performance level is set to 196.
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_ZEN4)) {
+		switch (boot_cpu_data.x86_model) {
+		case 0x70 ... 0x7f:
+			*numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
+			return 0;
+		default:
+			break;
+		}
+	}
+ 	*numerator = CPPC_HIGHEST_PERF_PREFCORE;
+ 
+ 	return 0;
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -52,8 +52,6 @@
+ #define AMD_PSTATE_TRANSITION_LATENCY	20000
+ #define AMD_PSTATE_TRANSITION_DELAY	1000
+ #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
+-#define CPPC_HIGHEST_PERF_PERFORMANCE	196
+-#define CPPC_HIGHEST_PERF_DEFAULT	166
+ 
+ #define AMD_CPPC_EPP_PERFORMANCE		0x00
+ #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
+@@ -398,43 +396,17 @@ static inline int amd_pstate_enable(bool
+ 	return static_call(amd_pstate_enable)(enable);
+ }
+ 
+-static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
+-{
+-	struct cpuinfo_x86 *c = &cpu_data(0);
+-
+-	/*
+-	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
+-	 * the highest performance level is set to 196.
+-	 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
+-	 */
+-	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
+-		return CPPC_HIGHEST_PERF_PERFORMANCE;
+-
+-	return CPPC_HIGHEST_PERF_DEFAULT;
+-}
+-
+ static int pstate_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	u64 cap1;
+-	u32 highest_perf;
+ 
+ 	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
+ 				     &cap1);
+ 	if (ret)
+ 		return ret;
+ 
+-	/* For platforms that do not support the preferred core feature, the
+-	 * highest_pef may be configured with 166 or 255, to avoid max frequency
+-	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+-	 * the default max perf.
+-	 */
+-	if (cpudata->hw_prefcore)
+-		highest_perf = amd_pstate_highest_perf_set(cpudata);
+-	else
+-		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
+-
+-	WRITE_ONCE(cpudata->highest_perf, highest_perf);
+-	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
+	WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+	WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
+ 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
+ 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+@@ -446,19 +418,13 @@ static int pstate_init_perf(struct amd_c
+ static int cppc_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	struct cppc_perf_caps cppc_perf;
+-	u32 highest_perf;
+ 
+ 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+ 	if (ret)
+ 		return ret;
+ 
+-	if (cpudata->hw_prefcore)
+-		highest_perf = amd_pstate_highest_perf_set(cpudata);
+-	else
+-		highest_perf = cppc_perf.highest_perf;
+-
+-	WRITE_ONCE(cpudata->highest_perf, highest_perf);
+-	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
+	WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
+	WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf);
+ 	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
+ 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
+ 		   cppc_perf.lowest_nonlinear_perf);
+@@ -944,8 +910,8 @@ static u32 amd_pstate_get_transition_lat
+ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+ {
+ 	int ret;
+-	u32 min_freq;
+-	u32 highest_perf, max_freq;
+	u32 min_freq, max_freq;
+	u64 numerator;
+ 	u32 nominal_perf, nominal_freq;
+ 	u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
+ 	u32 boost_ratio, lowest_nonlinear_ratio;
+@@ -967,8 +933,10 @@ static int amd_pstate_init_freq(struct a
+ 
+ 	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ 
+-	highest_perf = READ_ONCE(cpudata->highest_perf);
+-	boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
+	ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
+	if (ret)
+		return ret;
+	boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf);
+ 	max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+ 
+ 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
--- a/debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-Optimize-amd_pstate_update_limits.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-Optimize-amd_pstate_update_limits.patch
@@ -0,0 +1,42 @@
+From deed718125e73b6bf280dcebb80c39108226388c Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:05 -0500
+Subject: cpufreq: amd-pstate: Optimize amd_pstate_update_limits()
+
+Don't take and release the mutex when prefcore isn't present and
+avoid initialization of variables that will be initially set
+in the function.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Perry Yuan <perry.yuan@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -836,17 +836,17 @@ static void amd_pstate_update_limits(uns
+ 
+ 	cpudata = policy->driver_data;
+ 
+-	mutex_lock(&amd_pstate_driver_lock);
+-	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+-		goto free_cpufreq_put;
+	if (!amd_pstate_prefcore)
+		return;
+ 
+	mutex_lock(&amd_pstate_driver_lock);
+ 	ret = amd_get_highest_perf(cpu, &cur_high);
+ 	if (ret)
+ 		goto free_cpufreq_put;
+ 
+ 	prev_high = READ_ONCE(cpudata->prefcore_ranking);
+-	if (prev_high != cur_high) {
+-		highest_perf_changed = true;
+	highest_perf_changed = (prev_high != cur_high);
+	if (highest_perf_changed) {
+ 		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
+ 
+ 		if (cur_high < CPPC_MAX_PERF)
--- a/debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-Add-documentation-for-amd_pstate_.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-Add-documentation-for-amd_pstate_.patch
@@ -0,0 +1,29 @@
+From 391075a34e392c7cacd338a6b034a21a10679855 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:06 -0500
+Subject: cpufreq: amd-pstate: Add documentation for `amd_pstate_hw_prefcore`
+
+Explain that the sysfs file represents both preferred core being
+enabled by the user and supported by the hardware.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ Documentation/admin-guide/pm/amd-pstate.rst | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
+@@ -263,6 +263,11 @@ lowest non-linear performance in `AMD CP
+ <perf_cap_>`_.)
+ This attribute is read-only.
+ 
+``amd_pstate_hw_prefcore``
+
+Whether the platform supports the preferred core feature and it has been
+enabled. This attribute is read-only.
+
+ ``energy_performance_available_preferences``
+ 
+ A list of all the supported EPP preferences that could be used for
--- a/debian/patches/patchset-pf/amd-pstate/0015-amd-pstate-Add-missing-documentation-for-amd_pstate_.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0015-amd-pstate-Add-missing-documentation-for-amd_pstate_.patch
@@ -0,0 +1,42 @@
+From 2ed9874f6dcafcc2bee7a922af9e1d1c62dbeb18 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:30:07 -0500
+Subject: amd-pstate: Add missing documentation for
+ `amd_pstate_prefcore_ranking`
+
+`amd_pstate_prefcore_ranking` reflects the dynamic rankings of a CPU
+core based on platform conditions.  Explicitly include it in the
+documentation.
+
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+---
+ Documentation/admin-guide/pm/amd-pstate.rst | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
+@@ -252,7 +252,8 @@ In some ASICs, the highest CPPC performa
+ table, so we need to expose it to sysfs. If boost is not active, but
+ still supported, this maximum frequency will be larger than the one in
+ ``cpuinfo``. On systems that support preferred core, the driver will have
+-different values for some cores than others.
+different values for some cores than others and this will reflect the values
+advertised by the platform at bootup.
+ This attribute is read-only.
+ 
+ ``amd_pstate_lowest_nonlinear_freq``
+@@ -268,6 +269,12 @@ This attribute is read-only.
+ Whether the platform supports the preferred core feature and it has been
+ enabled. This attribute is read-only.
+ 
+``amd_pstate_prefcore_ranking``
+
+The performance ranking of the core. This number doesn't have any unit, but
+larger numbers are preferred at the time of reading. This can change at
+runtime based on platform conditions. This attribute is read-only.
+
+ ``energy_performance_available_preferences``
+ 
+ A list of all the supported EPP preferences that could be used for
--- a/debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-Fix-non-kerneldoc-comment.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-Fix-non-kerneldoc-comment.patch
@@ -0,0 +1,24 @@
+From 2e2ba39aec71fb51e897c3275b255ef806800cf0 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Sep 2024 11:23:51 -0500
+Subject: cpufreq/amd-pstate: Fix non kerneldoc comment
+
+The comment for amd_cppc_supported() isn't meant to be kernel doc.
+
+Fixes: cb817ec6673b7 ("cpufreq: amd-pstate: show CPPC debug message if CPPC is not supported")
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1786,7 +1786,7 @@ static int __init amd_pstate_set_driver(
+ 	return -EINVAL;
+ }
+ 
+-/**
+/*
+  * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
+  * show the debug message that helps to check if the CPU has CPPC support for loading issue.
+  */
--- a/debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-ut-Fix-an-Uninitialized-variables.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-ut-Fix-an-Uninitialized-variables.patch
@@ -0,0 +1,24 @@
+From 185e64a7e1a749593f3d6dadc666da9dda82d48c Mon Sep 17 00:00:00 2001
+From: Qianqiang Liu <qianqiang.liu@163.com>
+Date: Wed, 11 Sep 2024 07:39:24 +0800
+Subject: cpufreq/amd-pstate-ut: Fix an "Uninitialized variables" issue
+
+Using uninitialized value "mode2" when calling "amd_pstate_get_mode_string".
+Set "mode2" to "AMD_PSTATE_DISABLE" by default.
+
+Signed-off-by: Qianqiang Liu <qianqiang.liu@163.com>
+---
+ drivers/cpufreq/amd-pstate-ut.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
+@@ -270,7 +270,7 @@ static int amd_pstate_set_mode(enum amd_
+ 
+ static void amd_pstate_ut_check_driver(u32 index)
+ {
+-	enum amd_pstate_mode mode1, mode2;
+	enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
+ 	int ret;
+ 
+ 	for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
--- a/debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Rename-MSR-and-shared-memory-spec.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Rename-MSR-and-shared-memory-spec.patch
@@ -0,0 +1,108 @@
+From d74ce254cc470da670d6b90c69bab553cdbde62b Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Tue, 17 Sep 2024 09:14:35 +0000
+Subject: cpufreq/amd-pstate: Rename MSR and shared memory specific functions
+
+Existing function names "cppc_*" and "pstate_*" for shared memory and
+MSR based systems are not intuitive enough, replace them with "shmem_*" and
+"msr_*" respectively.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -263,7 +263,7 @@ static int amd_pstate_get_energy_pref_in
+ 	return index;
+ }
+ 
+-static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+ 			       u32 des_perf, u32 max_perf, bool fast_switch)
+ {
+ 	if (fast_switch)
+@@ -273,7 +273,7 @@ static void pstate_update_perf(struct am
+ 			      READ_ONCE(cpudata->cppc_req_cached));
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
+DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
+ 
+ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
+ 					  u32 min_perf, u32 des_perf,
+@@ -336,7 +336,7 @@ static int amd_pstate_set_energy_pref_in
+ 	return ret;
+ }
+ 
+-static inline int pstate_enable(bool enable)
+static inline int msr_enable(bool enable)
+ {
+ 	int ret, cpu;
+ 	unsigned long logical_proc_id_mask = 0;
+@@ -362,7 +362,7 @@ static inline int pstate_enable(bool ena
+ 	return 0;
+ }
+ 
+-static int cppc_enable(bool enable)
+static int shmem_enable(bool enable)
+ {
+ 	int cpu, ret = 0;
+ 	struct cppc_perf_ctrls perf_ctrls;
+@@ -389,14 +389,14 @@ static int cppc_enable(bool enable)
+ 	return ret;
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
+DEFINE_STATIC_CALL(amd_pstate_enable, msr_enable);
+ 
+ static inline int amd_pstate_enable(bool enable)
+ {
+ 	return static_call(amd_pstate_enable)(enable);
+ }
+ 
+-static int pstate_init_perf(struct amd_cpudata *cpudata)
+static int msr_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	u64 cap1;
+ 
+@@ -415,7 +415,7 @@ static int pstate_init_perf(struct amd_c
+ 	return 0;
+ }
+ 
+-static int cppc_init_perf(struct amd_cpudata *cpudata)
+static int shmem_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	struct cppc_perf_caps cppc_perf;
+ 
+@@ -450,14 +450,14 @@ static int cppc_init_perf(struct amd_cpu
+ 	return ret;
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
+DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf);
+ 
+ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	return static_call(amd_pstate_init_perf)(cpudata);
+ }
+ 
+-static void cppc_update_perf(struct amd_cpudata *cpudata,
+static void shmem_update_perf(struct amd_cpudata *cpudata,
+ 			     u32 min_perf, u32 des_perf,
+ 			     u32 max_perf, bool fast_switch)
+ {
+@@ -1909,9 +1909,9 @@ static int __init amd_pstate_init(void)
+ 			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
+ 	} else {
+ 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
+-		static_call_update(amd_pstate_enable, cppc_enable);
+-		static_call_update(amd_pstate_init_perf, cppc_init_perf);
+-		static_call_update(amd_pstate_update_perf, cppc_update_perf);
+		static_call_update(amd_pstate_enable, shmem_enable);
+		static_call_update(amd_pstate_init_perf, shmem_init_perf);
+		static_call_update(amd_pstate_update_perf, shmem_update_perf);
+ 	}
+ 
+ 	if (amd_pstate_prefcore) {
--- a/debian/patches/patchset-pf/amd-pstate/0019-cpufreq-Add-a-callback-to-update-the-min_freq_req-fr.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0019-cpufreq-Add-a-callback-to-update-the-min_freq_req-fr.patch
@@ -0,0 +1,115 @@
+From 787175146e26a199c06be4e6bf8cf8da0f757271 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 3 Oct 2024 08:39:52 +0000
+Subject: cpufreq: Add a callback to update the min_freq_req from drivers
+
+Currently, there is no proper way to update the initial lower frequency
+limit from cpufreq drivers. Only way is to add a new min_freq qos
+request from the driver side, but it leads to the issue explained below.
+
+The QoS infrastructure collates the constraints from multiple
+subsystems and saves them in a plist. The "current value" is defined to
+be the highest value in the plist for min_freq constraint.
+
+The cpufreq core adds a qos_request for min_freq to be 0 and the amd-pstate
+driver today adds qos request for min_freq to be lowest_freq, where
+lowest_freq corresponds to CPPC.lowest_perf.
+
+Eg: Suppose WLOG considering amd-pstate driver, lowest_freq is 400000 KHz,
+lowest_non_linear_freq is 1200000 KHz.
+
+At this point of time, the min_freq QoS plist looks like:
+
+head--> 400000 KHz (registered by amd-pstate) --> 0 KHz (registered by
+cpufreq core)
+
+When a user updates /sys/devices/system/cpu/cpuX/cpufreq/scaling_min_freq,
+it only results in updating the cpufreq-core's node in the plist, where
+say 0 becomes the newly echoed value.
+
+Now, if the user echoes a value 1000000 KHz, to scaling_min_freq, then the
+new list would be
+
+head--> 1000000 KHz (registered by cpufreq core) --> 400000 KHz (registered
+by amd-pstate)
+
+and the new "current value" of the min_freq QoS constraint will be 1000000
+KHz, this is the scenario where it works as expected.
+
+Suppose we change the amd-pstate driver code's min_freq qos constraint
+to lowest_non_linear_freq instead of lowest_freq, then the user will
+never be able to request a value below that, due to the following:
+
+At boot time, the min_freq QoS plist would be
+
+head--> 1200000 KHz (registered by amd-pstate) --> 0 KHz (registered by
+cpufreq core)
+
+When the user echoes a value of 1000000 KHz, to
+/sys/devices/..../scaling_min_freq, then the new list would be
+
+head--> 1200000 KHz (registered by amd-pstate) --> 1000000 KHz (registered
+by cpufreq core)
+
+with the new "current value" of the min_freq QoS remaining 1200000 KHz.
+Since the current value has not changed, there won't be any notifications
+sent to the subsystems which have added their QoS constraints. In
+particular, the amd-pstate driver will not get the notification, and thus,
+the user's request to lower the scaling_min_freq will be ineffective.
+
+Hence, it is advisable to have a single source of truth for the min and
+max freq QoS constraints between the cpufreq and the cpufreq drivers.
+
+So add a new callback get_init_min_freq() add in struct cpufreq_driver,
+which allows amd-pstate (or any other cpufreq driver) to override the
+default min_freq value being set in the policy->min_freq_req. Now
+scaling_min_freq can be modified by the user to any value (lower or
+higher than the init value) later on if desired.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/cpufreq.c | 6 +++++-
+ include/linux/cpufreq.h   | 6 ++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
+@@ -1380,6 +1380,7 @@ static int cpufreq_online(unsigned int c
+ 	bool new_policy;
+ 	unsigned long flags;
+ 	unsigned int j;
+	u32 init_min_freq = FREQ_QOS_MIN_DEFAULT_VALUE;
+ 	int ret;
+ 
+ 	pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
+@@ -1464,9 +1465,12 @@ static int cpufreq_online(unsigned int c
+ 			goto out_destroy_policy;
+ 		}
+ 
+		if (cpufreq_driver->get_init_min_freq)
+			init_min_freq = cpufreq_driver->get_init_min_freq(policy);
+
+ 		ret = freq_qos_add_request(&policy->constraints,
+ 					   policy->min_freq_req, FREQ_QOS_MIN,
+-					   FREQ_QOS_MIN_DEFAULT_VALUE);
+					   init_min_freq);
+ 		if (ret < 0) {
+ 			/*
+ 			 * So we don't call freq_qos_remove_request() for an
+--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
+@@ -414,6 +414,12 @@ struct cpufreq_driver {
+ 	 * policy is properly initialized, but before the governor is started.
+ 	 */
+ 	void		(*register_em)(struct cpufreq_policy *policy);
+
+	/*
+	 * Set by drivers that want to initialize the policy->min_freq_req with
+	 * a value different from the default value (0) in cpufreq core.
+	 */
+	int		(*get_init_min_freq)(struct cpufreq_policy *policy);
+ };
+ 
+ /* flags */
--- a/debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Set-the-initial-min_freq-to-lowes.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Set-the-initial-min_freq-to-lowes.patch
@@ -0,0 +1,79 @@
+From f5b234be445a45b0bcacc37e0aad7a6bc7900eac Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 3 Oct 2024 08:39:54 +0000
+Subject: cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq
+
+According to the AMD architectural programmer's manual volume 2 [1], in
+section "17.6.4.1 CPPC_CAPABILITY_1" lowest_nonlinear_perf is described
+as "Reports the most energy efficient performance level (in terms of
+performance per watt). Above this threshold, lower performance levels
+generally result in increased energy efficiency. Reducing performance
+below this threshold does not result in total energy savings for a given
+computation, although it reduces instantaneous power consumption". So
+lowest_nonlinear_perf is the most power efficient performance level, and
+going below that would lead to a worse performance/watt.
+
+Also, setting the minimum frequency to lowest_nonlinear_freq (instead of
+lowest_freq) allows the CPU to idle at a higher frequency which leads
+to more time being spent in a deeper idle state (as trivial idle tasks
+are completed sooner). This has shown a power benefit in some systems,
+in other systems, power consumption has increased but so has the
+throughput/watt.
+
+Use the get_init_min_freq() callback to set the initial lower limit for
+amd-pstate driver to lowest_nonlinear_freq instead of lowest_freq.
+
+Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf [1]
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1025,13 +1025,6 @@ static int amd_pstate_cpu_init(struct cp
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC))
+ 		policy->fast_switch_possible = true;
+ 
+-	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
+-				   FREQ_QOS_MIN, policy->cpuinfo.min_freq);
+-	if (ret < 0) {
+-		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
+-		goto free_cpudata1;
+-	}
+-
+ 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
+ 				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
+ 	if (ret < 0) {
+@@ -1736,6 +1729,13 @@ static int amd_pstate_epp_resume(struct
+ 	return 0;
+ }
+ 
+static int amd_pstate_get_init_min_freq(struct cpufreq_policy *policy)
+{
+	struct amd_cpudata *cpudata = policy->driver_data;
+
+	return READ_ONCE(cpudata->lowest_nonlinear_freq);
+}
+
+ static struct cpufreq_driver amd_pstate_driver = {
+ 	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
+ 	.verify		= amd_pstate_verify,
+@@ -1749,6 +1749,7 @@ static struct cpufreq_driver amd_pstate_
+ 	.update_limits	= amd_pstate_update_limits,
+ 	.name		= "amd-pstate",
+ 	.attr		= amd_pstate_attr,
+	.get_init_min_freq = amd_pstate_get_init_min_freq,
+ };
+ 
+ static struct cpufreq_driver amd_pstate_epp_driver = {
+@@ -1765,6 +1766,7 @@ static struct cpufreq_driver amd_pstate_
+ 	.set_boost	= amd_pstate_set_boost,
+ 	.name		= "amd-pstate-epp",
+ 	.attr		= amd_pstate_epp_attr,
+	.get_init_min_freq = amd_pstate_get_init_min_freq,
+ };
+ 
+ static int __init amd_pstate_set_driver(int mode_idx)
--- a/debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Cleanup-the-old-min_freq-qos-requ.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Cleanup-the-old-min_freq-qos-requ.patch
@@ -0,0 +1,103 @@
+From f7b2b3a1c0d015c4272793bed89734c5cffb354c Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 3 Oct 2024 08:39:56 +0000
+Subject: cpufreq/amd-pstate: Cleanup the old min_freq qos request remnants
+
+Convert the freq_qos_request array in struct amd_cpudata to a single
+variable (only for max_freq request). Remove the references to cpudata->req
+array. Remove and rename the jump labels accordingly.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 19 ++++++++-----------
+ drivers/cpufreq/amd-pstate.h |  4 ++--
+ 2 files changed, 10 insertions(+), 13 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -726,7 +726,7 @@ static int amd_pstate_cpu_boost_update(s
+ 	policy->max = policy->cpuinfo.max_freq;
+ 
+ 	if (cppc_state == AMD_PSTATE_PASSIVE) {
+-		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
+		ret = freq_qos_update_request(&cpudata->max_freq_req, policy->cpuinfo.max_freq);
+ 		if (ret < 0)
+ 			pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
+ 	}
+@@ -993,17 +993,17 @@ static int amd_pstate_cpu_init(struct cp
+ 
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+-		goto free_cpudata1;
+		goto free_cpudata;
+ 
+ 	amd_pstate_init_prefcore(cpudata);
+ 
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+-		goto free_cpudata1;
+		goto free_cpudata;
+ 
+ 	ret = amd_pstate_init_boost_support(cpudata);
+ 	if (ret)
+-		goto free_cpudata1;
+		goto free_cpudata;
+ 
+ 	min_freq = READ_ONCE(cpudata->min_freq);
+ 	max_freq = READ_ONCE(cpudata->max_freq);
+@@ -1025,11 +1025,11 @@ static int amd_pstate_cpu_init(struct cp
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC))
+ 		policy->fast_switch_possible = true;
+ 
+-	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
+	ret = freq_qos_add_request(&policy->constraints, &cpudata->max_freq_req,
+ 				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
+ 	if (ret < 0) {
+ 		dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
+-		goto free_cpudata2;
+		goto free_cpudata;
+ 	}
+ 
+ 	cpudata->max_limit_freq = max_freq;
+@@ -1042,9 +1042,7 @@ static int amd_pstate_cpu_init(struct cp
+ 
+ 	return 0;
+ 
+-free_cpudata2:
+-	freq_qos_remove_request(&cpudata->req[0]);
+-free_cpudata1:
+free_cpudata:
+ 	kfree(cpudata);
+ 	return ret;
+ }
+@@ -1053,8 +1051,7 @@ static void amd_pstate_cpu_exit(struct c
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+-	freq_qos_remove_request(&cpudata->req[1]);
+-	freq_qos_remove_request(&cpudata->req[0]);
+	freq_qos_remove_request(&cpudata->max_freq_req);
+ 	policy->fast_switch_possible = false;
+ 	kfree(cpudata);
+ }
+--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
+@@ -28,7 +28,7 @@ struct amd_aperf_mperf {
+ /**
+  * struct amd_cpudata - private CPU data for AMD P-State
+  * @cpu: CPU number
+- * @req: constraint request to apply
+ * @max_freq_req: maximum frequency constraint request to apply
+  * @cppc_req_cached: cached performance request hints
+  * @highest_perf: the maximum performance an individual processor may reach,
+  *		  assuming ideal conditions
+@@ -68,7 +68,7 @@ struct amd_aperf_mperf {
+ struct amd_cpudata {
+ 	int	cpu;
+ 
+-	struct	freq_qos_request req[2];
+	struct	freq_qos_request max_freq_req;
+ 	u64	cppc_req_cached;
+ 
+ 	u32	highest_perf;
--- a/debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Fix-amd_pstate-mode-switch-on-sha.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Fix-amd_pstate-mode-switch-on-sha.patch
@@ -0,0 +1,42 @@
+From d1216c052bedbf6d79e4b0261e2f09e17c66ffd3 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 4 Oct 2024 12:23:04 +0000
+Subject: cpufreq/amd-pstate: Fix amd_pstate mode switch on shared memory
+ systems
+
+While switching the driver mode between active and passive, Collaborative
+Processor Performance Control (CPPC) is disabled in
+amd_pstate_unregister_driver(). But, it is not enabled back while registering
+the new driver (passive or active). This leads to the new driver mode not
+working correctly, so enable it back in amd_pstate_register_driver().
+
+Fixes: 3ca7bc818d8c ("cpufreq: amd-pstate: Add guided mode control support via sysfs")
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1221,11 +1221,21 @@ static int amd_pstate_register_driver(in
+ 		return -EINVAL;
+ 
+ 	cppc_state = mode;
+
+	ret = amd_pstate_enable(true);
+	if (ret) {
+		pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
+		       ret);
+		amd_pstate_driver_cleanup();
+		return ret;
+	}
+
+ 	ret = cpufreq_register_driver(current_pstate_driver);
+ 	if (ret) {
+ 		amd_pstate_driver_cleanup();
+ 		return ret;
+ 	}
+
+ 	return 0;
+ }
+ 
--- a/debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Use-nominal-perf-for-limits-when-.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Use-nominal-perf-for-limits-when-.patch
@@ -0,0 +1,57 @@
+From c4fde0d177bdb33912f450914d84d6432391a8b5 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 12 Oct 2024 12:45:16 -0500
+Subject: cpufreq/amd-pstate: Use nominal perf for limits when boost is
+ disabled
+
+When boost has been disabled the limit for perf should be nominal perf not
+the highest perf.  Using the latter to do calculations will lead to
+incorrect values that are still above nominal.
+
+Fixes: ad4caad58d91 ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()")
+Reported-by: Peter Jung <ptr1337@cachyos.org>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219348
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -566,11 +566,16 @@ static int amd_pstate_verify(struct cpuf
+ 
+ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+ {
+-	u32 max_limit_perf, min_limit_perf, lowest_perf;
+	u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf;
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+-	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+-	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+	if (cpudata->boost_supported && !policy->boost_enabled)
+		max_perf = READ_ONCE(cpudata->nominal_perf);
+	else
+		max_perf = READ_ONCE(cpudata->highest_perf);
+
+	max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
+	min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
+ 
+ 	lowest_perf = READ_ONCE(cpudata->lowest_perf);
+ 	if (min_limit_perf < lowest_perf)
+@@ -1526,10 +1531,13 @@ static int amd_pstate_epp_update_limit(s
+ 	u64 value;
+ 	s16 epp;
+ 
+-	max_perf = READ_ONCE(cpudata->highest_perf);
+	if (cpudata->boost_supported && !policy->boost_enabled)
+		max_perf = READ_ONCE(cpudata->nominal_perf);
+	else
+		max_perf = READ_ONCE(cpudata->highest_perf);
+ 	min_perf = READ_ONCE(cpudata->lowest_perf);
+-	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+-	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+	max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
+	min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
+ 
+ 	if (min_limit_perf < min_perf)
+ 		min_limit_perf = min_perf;
--- a/debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Don-t-update-CPPC-request-in-amd_.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Don-t-update-CPPC-request-in-amd_.patch
@@ -0,0 +1,55 @@
+From 01ad0fb3da95867947d923596a26b18d844afe3c Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 12 Oct 2024 12:45:17 -0500
+Subject: cpufreq/amd-pstate: Don't update CPPC request in
+ amd_pstate_cpu_boost_update()
+
+When boost is changed the CPPC value is changed in amd_pstate_cpu_boost_update()
+but then changed again when refresh_frequency_limits() and all it's callbacks
+occur.  The first is a pointless write, so instead just update the limits for
+the policy and let the policy refresh anchor everything properly.
+
+Fixes: c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state")
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 24 +-----------------------
+ 1 file changed, 1 insertion(+), 23 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -695,34 +695,12 @@ static void amd_pstate_adjust_perf(unsig
+ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+-	struct cppc_perf_ctrls perf_ctrls;
+-	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
+	u32 nominal_freq, max_freq;
+ 	int ret = 0;
+ 
+-	highest_perf = READ_ONCE(cpudata->highest_perf);
+-	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ 	nominal_freq = READ_ONCE(cpudata->nominal_freq);
+ 	max_freq = READ_ONCE(cpudata->max_freq);
+ 
+-	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+-		u64 value = READ_ONCE(cpudata->cppc_req_cached);
+-
+-		value &= ~GENMASK_ULL(7, 0);
+-		value |= on ? highest_perf : nominal_perf;
+-		WRITE_ONCE(cpudata->cppc_req_cached, value);
+-
+-		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+-	} else {
+-		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
+-		ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
+-		if (ret) {
+-			cpufreq_cpu_release(policy);
+-			pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
+-				cpudata->cpu, ret);
+-			return ret;
+-		}
+-	}
+-
+ 	if (on)
+ 		policy->cpuinfo.max_freq = max_freq;
+ 	else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
--- a/debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-Use-amd_pstate_update_min_max_lim.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-Use-amd_pstate_update_min_max_lim.patch
@@ -0,0 +1,49 @@
+From 684d162c08ab86fff02861c907ecc92bf9c09af4 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 12 Oct 2024 12:45:18 -0500
+Subject: cpufreq/amd-pstate: Use amd_pstate_update_min_max_limit() for EPP
+ limits
+
+When the EPP updates are set the maximum capable frequency for the
+CPU is used to set the upper limit instead of that of the policy.
+
+Adjust amd_pstate_epp_update_limit() to reuse policy calculation code
+from amd_pstate_update_min_max_limit().
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 19 +++----------------
+ 1 file changed, 3 insertions(+), 16 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1505,26 +1505,13 @@ static void amd_pstate_epp_cpu_exit(stru
+ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+-	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
+	u32 max_perf, min_perf;
+ 	u64 value;
+ 	s16 epp;
+ 
+-	if (cpudata->boost_supported && !policy->boost_enabled)
+-		max_perf = READ_ONCE(cpudata->nominal_perf);
+-	else
+-		max_perf = READ_ONCE(cpudata->highest_perf);
+	max_perf = READ_ONCE(cpudata->highest_perf);
+ 	min_perf = READ_ONCE(cpudata->lowest_perf);
+-	max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
+-	min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
+-
+-	if (min_limit_perf < min_perf)
+-		min_limit_perf = min_perf;
+-
+-	if (max_limit_perf < min_limit_perf)
+-		max_limit_perf = min_limit_perf;
+-
+-	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+-	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
+	amd_pstate_update_min_max_limit(policy);
+ 
+ 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
+ 			cpudata->max_limit_perf);
--- a/debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Drop-needless-EPP-initialization.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Drop-needless-EPP-initialization.patch
@@ -0,0 +1,29 @@
+From fa46d2873c9fa4060ce407e4bc5c7e29babce9d0 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Sat, 12 Oct 2024 12:45:19 -0500
+Subject: cpufreq/amd-pstate: Drop needless EPP initialization
+
+The EPP value doesn't need to be cached to the CPPC request in
+amd_pstate_epp_update_limit() because it's passed as an argument
+at the end to amd_pstate_set_epp() and stored at that time.
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1548,12 +1548,6 @@ static int amd_pstate_epp_update_limit(s
+ 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ 		epp = 0;
+ 
+-	/* Set initial EPP value */
+-	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+-		value &= ~GENMASK_ULL(31, 24);
+-		value |= (u64)epp << 24;
+-	}
+-
+ 	WRITE_ONCE(cpudata->cppc_req_cached, value);
+ 	return amd_pstate_set_epp(cpudata, epp);
+ }
--- a/debian/patches/patchset-pf/amd-pstate/0027-amd-pstate-6.11-update-setting-the-minimum-frequency.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0027-amd-pstate-6.11-update-setting-the-minimum-frequency.patch
@@ -0,0 +1,228 @@
+From 649d296be0c7f0df6e71b4fca25fdbe75cb3994e Mon Sep 17 00:00:00 2001
+From: Oleksandr Natalenko <oleksandr@natalenko.name>
+Date: Thu, 17 Oct 2024 17:03:11 +0200
+Subject: amd-pstate-6.11: update setting the minimum frequency to
+ lowest_nonlinear_freq patchset to v3
+
+Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+---
+ drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++---------------
+ drivers/cpufreq/amd-pstate.h |  4 +--
+ drivers/cpufreq/cpufreq.c    |  6 +---
+ include/linux/cpufreq.h      |  6 ----
+ 4 files changed, 43 insertions(+), 40 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -557,9 +557,28 @@ cpufreq_policy_put:
+ 	cpufreq_cpu_put(policy);
+ }
+ 
+-static int amd_pstate_verify(struct cpufreq_policy_data *policy)
+static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
+ {
+-	cpufreq_verify_within_cpu_limits(policy);
+	/*
+	 * Initialize lower frequency limit (i.e.policy->min) with
+	 * lowest_nonlinear_frequency which is the most energy efficient
+	 * frequency. Override the initial value set by cpufreq core and
+	 * amd-pstate qos_requests.
+	 */
+	if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
+		struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
+		struct amd_cpudata *cpudata;
+
+		if (!policy)
+			return -EINVAL;
+
+		cpudata = policy->driver_data;
+		policy_data->min = cpudata->lowest_nonlinear_freq;
+		cpufreq_cpu_put(policy);
+	}
+
+	cpufreq_verify_within_cpu_limits(policy_data);
+	pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
+ 
+ 	return 0;
+ }
+@@ -709,7 +728,7 @@ static int amd_pstate_cpu_boost_update(s
+ 	policy->max = policy->cpuinfo.max_freq;
+ 
+ 	if (cppc_state == AMD_PSTATE_PASSIVE) {
+-		ret = freq_qos_update_request(&cpudata->max_freq_req, policy->cpuinfo.max_freq);
+		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
+ 		if (ret < 0)
+ 			pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
+ 	}
+@@ -976,17 +995,17 @@ static int amd_pstate_cpu_init(struct cp
+ 
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+-		goto free_cpudata;
+		goto free_cpudata1;
+ 
+ 	amd_pstate_init_prefcore(cpudata);
+ 
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+-		goto free_cpudata;
+		goto free_cpudata1;
+ 
+ 	ret = amd_pstate_init_boost_support(cpudata);
+ 	if (ret)
+-		goto free_cpudata;
+		goto free_cpudata1;
+ 
+ 	min_freq = READ_ONCE(cpudata->min_freq);
+ 	max_freq = READ_ONCE(cpudata->max_freq);
+@@ -1008,11 +1027,18 @@ static int amd_pstate_cpu_init(struct cp
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC))
+ 		policy->fast_switch_possible = true;
+ 
+-	ret = freq_qos_add_request(&policy->constraints, &cpudata->max_freq_req,
+	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
+				   FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
+	if (ret < 0) {
+		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
+		goto free_cpudata1;
+	}
+
+	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
+ 				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
+ 	if (ret < 0) {
+ 		dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
+-		goto free_cpudata;
+		goto free_cpudata2;
+ 	}
+ 
+ 	cpudata->max_limit_freq = max_freq;
+@@ -1025,7 +1051,9 @@ static int amd_pstate_cpu_init(struct cp
+ 
+ 	return 0;
+ 
+-free_cpudata:
+free_cpudata2:
+	freq_qos_remove_request(&cpudata->req[0]);
+free_cpudata1:
+ 	kfree(cpudata);
+ 	return ret;
+ }
+@@ -1034,7 +1062,8 @@ static void amd_pstate_cpu_exit(struct c
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+-	freq_qos_remove_request(&cpudata->max_freq_req);
+	freq_qos_remove_request(&cpudata->req[1]);
+	freq_qos_remove_request(&cpudata->req[0]);
+ 	policy->fast_switch_possible = false;
+ 	kfree(cpudata);
+ }
+@@ -1658,13 +1687,6 @@ static int amd_pstate_epp_cpu_offline(st
+ 	return 0;
+ }
+ 
+-static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+-{
+-	cpufreq_verify_within_cpu_limits(policy);
+-	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+-	return 0;
+-}
+-
+ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+@@ -1703,13 +1725,6 @@ static int amd_pstate_epp_resume(struct
+ 	return 0;
+ }
+ 
+-static int amd_pstate_get_init_min_freq(struct cpufreq_policy *policy)
+-{
+-	struct amd_cpudata *cpudata = policy->driver_data;
+-
+-	return READ_ONCE(cpudata->lowest_nonlinear_freq);
+-}
+-
+ static struct cpufreq_driver amd_pstate_driver = {
+ 	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
+ 	.verify		= amd_pstate_verify,
+@@ -1723,12 +1738,11 @@ static struct cpufreq_driver amd_pstate_
+ 	.update_limits	= amd_pstate_update_limits,
+ 	.name		= "amd-pstate",
+ 	.attr		= amd_pstate_attr,
+-	.get_init_min_freq = amd_pstate_get_init_min_freq,
+ };
+ 
+ static struct cpufreq_driver amd_pstate_epp_driver = {
+ 	.flags		= CPUFREQ_CONST_LOOPS,
+-	.verify		= amd_pstate_epp_verify_policy,
+	.verify		= amd_pstate_verify,
+ 	.setpolicy	= amd_pstate_epp_set_policy,
+ 	.init		= amd_pstate_epp_cpu_init,
+ 	.exit		= amd_pstate_epp_cpu_exit,
+@@ -1740,7 +1754,6 @@ static struct cpufreq_driver amd_pstate_
+ 	.set_boost	= amd_pstate_set_boost,
+ 	.name		= "amd-pstate-epp",
+ 	.attr		= amd_pstate_epp_attr,
+-	.get_init_min_freq = amd_pstate_get_init_min_freq,
+ };
+ 
+ static int __init amd_pstate_set_driver(int mode_idx)
+--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
+@@ -28,7 +28,7 @@ struct amd_aperf_mperf {
+ /**
+  * struct amd_cpudata - private CPU data for AMD P-State
+  * @cpu: CPU number
+- * @max_freq_req: maximum frequency constraint request to apply
+ * @req: constraint request to apply
+  * @cppc_req_cached: cached performance request hints
+  * @highest_perf: the maximum performance an individual processor may reach,
+  *		  assuming ideal conditions
+@@ -68,7 +68,7 @@ struct amd_aperf_mperf {
+ struct amd_cpudata {
+ 	int	cpu;
+ 
+-	struct	freq_qos_request max_freq_req;
+	struct	freq_qos_request req[2];
+ 	u64	cppc_req_cached;
+ 
+ 	u32	highest_perf;
+--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
+@@ -1380,7 +1380,6 @@ static int cpufreq_online(unsigned int c
+ 	bool new_policy;
+ 	unsigned long flags;
+ 	unsigned int j;
+-	u32 init_min_freq = FREQ_QOS_MIN_DEFAULT_VALUE;
+ 	int ret;
+ 
+ 	pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
+@@ -1465,12 +1464,9 @@ static int cpufreq_online(unsigned int c
+ 			goto out_destroy_policy;
+ 		}
+ 
+-		if (cpufreq_driver->get_init_min_freq)
+-			init_min_freq = cpufreq_driver->get_init_min_freq(policy);
+-
+ 		ret = freq_qos_add_request(&policy->constraints,
+ 					   policy->min_freq_req, FREQ_QOS_MIN,
+-					   init_min_freq);
+					   FREQ_QOS_MIN_DEFAULT_VALUE);
+ 		if (ret < 0) {
+ 			/*
+ 			 * So we don't call freq_qos_remove_request() for an
+--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
+@@ -414,12 +414,6 @@ struct cpufreq_driver {
+ 	 * policy is properly initialized, but before the governor is started.
+ 	 */
+ 	void		(*register_em)(struct cpufreq_policy *policy);
+-
+-	/*
+-	 * Set by drivers that want to initialize the policy->min_freq_req with
+-	 * a value different from the default value (0) in cpufreq core.
+-	 */
+-	int		(*get_init_min_freq)(struct cpufreq_policy *policy);
+ };
+ 
+ /* flags */
--- a/debian/patches/patchset-pf/amd-pstate/0028-cpufreq-amd-pstate-Call-amd_pstate_register-in-amd_p.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0028-cpufreq-amd-pstate-Call-amd_pstate_register-in-amd_p.patch
@@ -0,0 +1,44 @@
+From db147a0a6341822a15fd9c4cd51f8dc4a9a1747b Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 17 Oct 2024 10:05:27 +0000
+Subject: cpufreq/amd-pstate: Call amd_pstate_register() in amd_pstate_init()
+
+Replace a similar chunk of code in amd_pstate_init() with
+amd_pstate_register() call.
+
+Suggested-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1909,17 +1909,10 @@ static int __init amd_pstate_init(void)
+ 			return ret;
+ 	}
+ 
+-	/* enable amd pstate feature */
+-	ret = amd_pstate_enable(true);
+-	if (ret) {
+-		pr_err("failed to enable driver mode(%d)\n", cppc_state);
+-		return ret;
+-	}
+-
+-	ret = cpufreq_register_driver(current_pstate_driver);
+	ret = amd_pstate_register_driver(cppc_state);
+ 	if (ret) {
+ 		pr_err("failed to register with return %d\n", ret);
+-		goto disable_driver;
+		return ret;
+ 	}
+ 
+ 	dev_root = bus_get_dev_root(&cpu_subsys);
+@@ -1936,7 +1929,6 @@ static int __init amd_pstate_init(void)
+ 
+ global_attr_free:
+ 	cpufreq_unregister_driver(current_pstate_driver);
+-disable_driver:
+ 	amd_pstate_enable(false);
+ 	return ret;
+ }
--- a/debian/patches/patchset-pf/amd-pstate/0029-cpufreq-amd-pstate-Call-amd_pstate_set_driver-in-amd.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0029-cpufreq-amd-pstate-Call-amd_pstate_set_driver-in-amd.patch
@@ -0,0 +1,81 @@
+From 7c658490b05f6ab4dd59e1c25e75ba1037f6cfeb Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 17 Oct 2024 10:05:29 +0000
+Subject: cpufreq/amd-pstate: Call amd_pstate_set_driver() in
+ amd_pstate_register_driver()
+
+Replace a similar chunk of code in amd_pstate_register_driver() with
+amd_pstate_set_driver() call.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 47 +++++++++++++++++-------------------
+ 1 file changed, 22 insertions(+), 25 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1221,16 +1221,32 @@ static void amd_pstate_driver_cleanup(vo
+ 	current_pstate_driver = NULL;
+ }
+ 
+static int amd_pstate_set_driver(int mode_idx)
+{
+	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+		cppc_state = mode_idx;
+		if (cppc_state == AMD_PSTATE_DISABLE)
+			pr_info("driver is explicitly disabled\n");
+
+		if (cppc_state == AMD_PSTATE_ACTIVE)
+			current_pstate_driver = &amd_pstate_epp_driver;
+
+		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+			current_pstate_driver = &amd_pstate_driver;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+ static int amd_pstate_register_driver(int mode)
+ {
+ 	int ret;
+ 
+-	if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
+-		current_pstate_driver = &amd_pstate_driver;
+-	else if (mode == AMD_PSTATE_ACTIVE)
+-		current_pstate_driver = &amd_pstate_epp_driver;
+-	else
+-		return -EINVAL;
+	ret = amd_pstate_set_driver(mode);
+	if (ret)
+		return ret;
+ 
+ 	cppc_state = mode;
+ 
+@@ -1756,25 +1772,6 @@ static struct cpufreq_driver amd_pstate_
+ 	.attr		= amd_pstate_epp_attr,
+ };
+ 
+-static int __init amd_pstate_set_driver(int mode_idx)
+-{
+-	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+-		cppc_state = mode_idx;
+-		if (cppc_state == AMD_PSTATE_DISABLE)
+-			pr_info("driver is explicitly disabled\n");
+-
+-		if (cppc_state == AMD_PSTATE_ACTIVE)
+-			current_pstate_driver = &amd_pstate_epp_driver;
+-
+-		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+-			current_pstate_driver = &amd_pstate_driver;
+-
+-		return 0;
+-	}
+-
+-	return -EINVAL;
+-}
+-
+ /*
+  * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
+  * show the debug message that helps to check if the CPU has CPPC support for loading issue.
--- a/debian/patches/patchset-pf/amd-pstate/0030-cpufreq-amd-pstate-Remove-the-switch-case-in-amd_pst.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0030-cpufreq-amd-pstate-Remove-the-switch-case-in-amd_pst.patch
@@ -0,0 +1,41 @@
+From 55be5db97f4f52badc958463ee8d9cbc2ae91615 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 17 Oct 2024 10:05:31 +0000
+Subject: cpufreq/amd-pstate: Remove the switch case in amd_pstate_init()
+
+Replace the switch case with a more readable if condition.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1873,21 +1873,15 @@ static int __init amd_pstate_init(void)
+ 		cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
+ 	}
+ 
+-	switch (cppc_state) {
+-	case AMD_PSTATE_DISABLE:
+	if (cppc_state == AMD_PSTATE_DISABLE) {
+ 		pr_info("driver load is disabled, boot with specific mode to enable this\n");
+ 		return -ENODEV;
+-	case AMD_PSTATE_PASSIVE:
+-	case AMD_PSTATE_ACTIVE:
+-	case AMD_PSTATE_GUIDED:
+-		ret = amd_pstate_set_driver(cppc_state);
+-		if (ret)
+-			return ret;
+-		break;
+-	default:
+-		return -EINVAL;
+ 	}
+ 
+	ret = amd_pstate_set_driver(cppc_state);
+	if (ret)
+		return ret;
+
+ 	/* capability check */
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+ 		pr_debug("AMD CPPC MSR based functionality is supported\n");
--- a/debian/patches/patchset-pf/amd-pstate/0031-cpufreq-amd-pstate-Remove-the-redundant-amd_pstate_s.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0031-cpufreq-amd-pstate-Remove-the-redundant-amd_pstate_s.patch
@@ -0,0 +1,43 @@
+From 7305364888151cb9e6b435c5f219ccfd18132b58 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Thu, 17 Oct 2024 10:05:33 +0000
+Subject: cpufreq/amd-pstate: Remove the redundant amd_pstate_set_driver() call
+
+amd_pstate_set_driver() is called twice, once in amd_pstate_init() and once
+as part of amd_pstate_register_driver(). Move around code and eliminate
+the redundancy.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1878,9 +1878,11 @@ static int __init amd_pstate_init(void)
+ 		return -ENODEV;
+ 	}
+ 
+-	ret = amd_pstate_set_driver(cppc_state);
+-	if (ret)
+	ret = amd_pstate_register_driver(cppc_state);
+	if (ret) {
+		pr_err("failed to register with return %d\n", ret);
+ 		return ret;
+	}
+ 
+ 	/* capability check */
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+@@ -1900,12 +1902,6 @@ static int __init amd_pstate_init(void)
+ 			return ret;
+ 	}
+ 
+-	ret = amd_pstate_register_driver(cppc_state);
+-	if (ret) {
+-		pr_err("failed to register with return %d\n", ret);
+-		return ret;
+-	}
+-
+ 	dev_root = bus_get_dev_root(&cpu_subsys);
+ 	if (dev_root) {
+ 		ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
--- a/debian/patches/patchset-pf/amd-pstate/0032-cpufreq-amd-pstate-ut-Add-fix-for-min-freq-unit-test.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0032-cpufreq-amd-pstate-ut-Add-fix-for-min-freq-unit-test.patch
@@ -0,0 +1,33 @@
+From 5886ef269d069c72ea952cb00699e16221289e8c Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 17 Oct 2024 12:34:39 -0500
+Subject: cpufreq/amd-pstate-ut: Add fix for min freq unit test
+
+commit 642aff3964b0f ("cpufreq/amd-pstate: Set the initial min_freq to
+lowest_nonlinear_freq") changed the iniital minimum frequency to lowest
+nonlinear frequency, but the unit tests weren't updated and now fail.
+
+Update them to match this same change.
+
+Fixes: 642aff3964b0f ("cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq")
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate-ut.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
+@@ -227,10 +227,10 @@ static void amd_pstate_ut_check_freq(u32
+ 			goto skip_test;
+ 		}
+ 
+-		if (cpudata->min_freq != policy->min) {
+		if (cpudata->lowest_nonlinear_freq != policy->min) {
+ 			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+-			pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
+-				__func__, cpu, cpudata->min_freq, policy->min);
+			pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
+				__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
+ 			goto skip_test;
+ 		}
+ 
--- a/debian/patches/patchset-pf/amd-pstate/0033-amd-pstate-Set-min_perf-to-nominal_perf-for-active-m.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0033-amd-pstate-Set-min_perf-to-nominal_perf-for-active-m.patch
@@ -0,0 +1,33 @@
+From e82b9b5a56bcac18cae68878fe67263279805735 Mon Sep 17 00:00:00 2001
+From: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
+Date: Mon, 21 Oct 2024 15:48:35 +0530
+Subject: amd-pstate: Set min_perf to nominal_perf for active mode performance
+ gov
+
+The amd-pstate driver sets CPPC_REQ.min_perf to CPPC_REQ.max_perf when
+in active mode with performance governor. Typically CPPC_REQ.max_perf
+is set to CPPC.highest_perf. This causes frequency throttling on
+power-limited platforms which causes performance regressions on
+certain classes of workloads.
+
+Hence, set the CPPC_REQ.min_perf to the CPPC.nominal_perf or
+CPPC_REQ.max_perf, whichever is lower of the two.
+
+Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
+Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1565,7 +1565,7 @@ static int amd_pstate_epp_update_limit(s
+ 	value = READ_ONCE(cpudata->cppc_req_cached);
+ 
+ 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+-		min_perf = max_perf;
+		min_perf = min(cpudata->nominal_perf, max_perf);
+ 
+ 	/* Initial min/max values for CPPC Performance Controls Register */
+ 	value &= ~AMD_CPPC_MIN_PERF(~0L);
--- a/debian/patches/patchset-pf/amd-pstate/0034-amd-pstate-Switch-to-amd-pstate-by-default-on-some-S.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0034-amd-pstate-Switch-to-amd-pstate-by-default-on-some-S.patch
@@ -0,0 +1,44 @@
+From 497447cf96a785a4edd0756da5d5718037f5687c Mon Sep 17 00:00:00 2001
+From: Swapnil Sapkal <swapnil.sapkal@amd.com>
+Date: Mon, 21 Oct 2024 15:48:36 +0530
+Subject: amd-pstate: Switch to amd-pstate by default on some Server platforms
+
+Currently the default cpufreq driver for all the AMD EPYC servers is
+acpi-cpufreq. Going forward, switch to amd-pstate as the default
+driver on the AMD EPYC server platforms with CPU family 0x1A or
+higher. The default mode will be active mode.
+
+Testing shows that amd-pstate with active mode and performance
+governor provides comparable or better performance per-watt against
+acpi-cpufreq + performance governor.
+
+Likewise, amd-pstate with active mode and powersave governor with the
+energy_performance_preference=power (EPP=255) provides comparable or
+better performance per-watt against acpi-cpufreq + schedutil governor
+for a wide range of workloads.
+
+Users can still revert to using acpi-cpufreq driver on these platforms
+with the "amd_pstate=disable" kernel commandline parameter.
+
+Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
+Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1862,10 +1862,10 @@ static int __init amd_pstate_init(void)
+ 	if (cppc_state == AMD_PSTATE_UNDEFINED) {
+ 		/* Disable on the following configs by default:
+ 		 * 1. Undefined platforms
+-		 * 2. Server platforms
+		 * 2. Server platforms with CPUs older than Family 0x1A.
+ 		 */
+ 		if (amd_pstate_acpi_pm_profile_undefined() ||
+-		    amd_pstate_acpi_pm_profile_server()) {
+		    (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) {
+ 			pr_info("driver load is disabled, boot with specific mode to enable this\n");
+ 			return -ENODEV;
+ 		}
--- a/debian/patches/patchset-pf/amd-pstate/0035-cpufreq-amd-pstate-Push-adjust_perf-vfunc-init-into-.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0035-cpufreq-amd-pstate-Push-adjust_perf-vfunc-init-into-.patch
@@ -0,0 +1,38 @@
+From a4d255935a1ea6e4b10167df942ec641079bcdf7 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Mon, 28 Oct 2024 09:55:41 -0500
+Subject: cpufreq/amd-pstate: Push adjust_perf vfunc init into cpu_init
+
+As the driver can be changed in and out of different modes it's possible
+that adjust_perf is assigned when it shouldn't be.
+
+This could happen if an MSR design is started up in passive mode and then
+switches to active mode.
+
+To solve this explicitly clear `adjust_perf` in amd_pstate_epp_cpu_init().
+
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1528,6 +1528,8 @@ static int amd_pstate_epp_cpu_init(struc
+ 		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
+ 	}
+ 
+	current_pstate_driver->adjust_perf = NULL;
+
+ 	return 0;
+ 
+ free_cpudata1:
+@@ -1887,8 +1889,6 @@ static int __init amd_pstate_init(void)
+ 	/* capability check */
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+ 		pr_debug("AMD CPPC MSR based functionality is supported\n");
+-		if (cppc_state != AMD_PSTATE_ACTIVE)
+-			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
+ 	} else {
+ 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
+ 		static_call_update(amd_pstate_enable, shmem_enable);
--- a/debian/patches/patchset-pf/amd-pstate/0036-cpufreq-amd-pstate-Move-registration-after-static-fu.patch
+++ b/debian/patches/patchset-pf/amd-pstate/0036-cpufreq-amd-pstate-Move-registration-after-static-fu.patch
@@ -0,0 +1,47 @@
+From c42a82a583646dcbba8500d47ed878616ab5c33a Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Mon, 28 Oct 2024 09:55:42 -0500
+Subject: cpufreq/amd-pstate: Move registration after static function call
+ update
+
+On shared memory designs the static functions need to work before
+registration is done or the system can hang at bootup.
+
+Move the registration later in amd_pstate_init() to solve this.
+
+Fixes: e238968a2087 ("cpufreq/amd-pstate: Remove the redundant amd_pstate_set_driver() call")
+Reported-by: Klara Modin <klarasmodin@gmail.com>
+Closes: https://lore.kernel.org/linux-pm/cf9c146d-bacf-444e-92e2-15ebf513af96@gmail.com/#t
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+---
+ drivers/cpufreq/amd-pstate.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
+@@ -1880,12 +1880,6 @@ static int __init amd_pstate_init(void)
+ 		return -ENODEV;
+ 	}
+ 
+-	ret = amd_pstate_register_driver(cppc_state);
+-	if (ret) {
+-		pr_err("failed to register with return %d\n", ret);
+-		return ret;
+-	}
+-
+ 	/* capability check */
+ 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+ 		pr_debug("AMD CPPC MSR based functionality is supported\n");
+@@ -1896,6 +1890,12 @@ static int __init amd_pstate_init(void)
+ 		static_call_update(amd_pstate_update_perf, shmem_update_perf);
+ 	}
+ 
+	ret = amd_pstate_register_driver(cppc_state);
+	if (ret) {
+		pr_err("failed to register with return %d\n", ret);
+		return ret;
+	}
+
+ 	if (amd_pstate_prefcore) {
+ 		ret = amd_detect_prefcore(&amd_pstate_prefcore);
+ 		if (ret)
--- a/debian/patches/patchset-pf/amd-rapl/0001-perf-Generic-hotplug-support-for-a-PMU-with-a-scope.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0001-perf-Generic-hotplug-support-for-a-PMU-with-a-scope.patch
@@ -0,0 +1,321 @@
+From 023d6b8aa8d8b346cfdcccf5ca4cb880c8d41d87 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:37 -0700
+Subject: perf: Generic hotplug support for a PMU with a scope
+
+The perf subsystem assumes that the counters of a PMU are per-CPU. So
+the user space tool reads a counter from each CPU in the system wide
+mode. However, many PMUs don't have a per-CPU counter. The counter is
+effective for a scope, e.g., a die or a socket. To address this, a
+cpumask is exposed by the kernel driver to restrict to one CPU to stand
+for a specific scope. In case the given CPU is removed,
+the hotplug support has to be implemented for each such driver.
+
+The codes to support the cpumask and hotplug are very similar.
+- Expose a cpumask into sysfs
+- Pickup another CPU in the same scope if the given CPU is removed.
+- Invoke the perf_pmu_migrate_context() to migrate to a new CPU.
+- In event init, always set the CPU in the cpumask to event->cpu
+
+Similar duplicated codes are implemented for each such PMU driver. It
+would be good to introduce a generic infrastructure to avoid such
+duplication.
+
+5 popular scopes are implemented here, core, die, cluster, pkg, and
+the system-wide. The scope can be set when a PMU is registered. If so, a
+"cpumask" is automatically exposed for the PMU.
+
+The "cpumask" is from the perf_online_<scope>_mask, which is to track
+the active CPU for each scope. They are set when the first CPU of the
+scope is online via the generic perf hotplug support. When a
+corresponding CPU is removed, the perf_online_<scope>_mask is updated
+accordingly and the PMU will be moved to a new CPU from the same scope
+if possible.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+---
+ include/linux/perf_event.h |  18 ++++
+ kernel/events/core.c       | 164 ++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 180 insertions(+), 2 deletions(-)
+
+--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
+@@ -292,6 +292,19 @@ struct perf_event_pmu_context;
+ #define PERF_PMU_CAP_AUX_OUTPUT			0x0080
+ #define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
+ 
+/**
+ * pmu::scope
+ */
+enum perf_pmu_scope {
+	PERF_PMU_SCOPE_NONE	= 0,
+	PERF_PMU_SCOPE_CORE,
+	PERF_PMU_SCOPE_DIE,
+	PERF_PMU_SCOPE_CLUSTER,
+	PERF_PMU_SCOPE_PKG,
+	PERF_PMU_SCOPE_SYS_WIDE,
+	PERF_PMU_MAX_SCOPE,
+};
+
+ struct perf_output_handle;
+ 
+ #define PMU_NULL_DEV	((void *)(~0UL))
+@@ -315,6 +328,11 @@ struct pmu {
+ 	 */
+ 	int				capabilities;
+ 
+	/*
+	 * PMU scope
+	 */
+	unsigned int			scope;
+
+ 	int __percpu			*pmu_disable_count;
+ 	struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
+ 	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
+--- a/kernel/events/core.c
+++ b/kernel/events/core.c
+@@ -411,6 +411,11 @@ static LIST_HEAD(pmus);
+ static DEFINE_MUTEX(pmus_lock);
+ static struct srcu_struct pmus_srcu;
+ static cpumask_var_t perf_online_mask;
+static cpumask_var_t perf_online_core_mask;
+static cpumask_var_t perf_online_die_mask;
+static cpumask_var_t perf_online_cluster_mask;
+static cpumask_var_t perf_online_pkg_mask;
+static cpumask_var_t perf_online_sys_mask;
+ static struct kmem_cache *perf_event_cache;
+ 
+ /*
+@@ -11497,10 +11502,60 @@ perf_event_mux_interval_ms_store(struct
+ }
+ static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
+ 
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu)
+{
+	switch (scope) {
+	case PERF_PMU_SCOPE_CORE:
+		return topology_sibling_cpumask(cpu);
+	case PERF_PMU_SCOPE_DIE:
+		return topology_die_cpumask(cpu);
+	case PERF_PMU_SCOPE_CLUSTER:
+		return topology_cluster_cpumask(cpu);
+	case PERF_PMU_SCOPE_PKG:
+		return topology_core_cpumask(cpu);
+	case PERF_PMU_SCOPE_SYS_WIDE:
+		return cpu_online_mask;
+	}
+
+	return NULL;
+}
+
+static inline struct cpumask *perf_scope_cpumask(unsigned int scope)
+{
+	switch (scope) {
+	case PERF_PMU_SCOPE_CORE:
+		return perf_online_core_mask;
+	case PERF_PMU_SCOPE_DIE:
+		return perf_online_die_mask;
+	case PERF_PMU_SCOPE_CLUSTER:
+		return perf_online_cluster_mask;
+	case PERF_PMU_SCOPE_PKG:
+		return perf_online_pkg_mask;
+	case PERF_PMU_SCOPE_SYS_WIDE:
+		return perf_online_sys_mask;
+	}
+
+	return NULL;
+}
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct cpumask *mask = perf_scope_cpumask(pmu->scope);
+
+	if (mask)
+		return cpumap_print_to_pagebuf(true, buf, mask);
+	return 0;
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+ static struct attribute *pmu_dev_attrs[] = {
+ 	&dev_attr_type.attr,
+ 	&dev_attr_perf_event_mux_interval_ms.attr,
+ 	&dev_attr_nr_addr_filters.attr,
+	&dev_attr_cpumask.attr,
+ 	NULL,
+ };
+ 
+@@ -11512,6 +11567,10 @@ static umode_t pmu_dev_is_visible(struct
+ 	if (n == 2 && !pmu->nr_addr_filters)
+ 		return 0;
+ 
+	/* cpumask */
+	if (n == 3 && pmu->scope == PERF_PMU_SCOPE_NONE)
+		return 0;
+
+ 	return a->mode;
+ }
+ 
+@@ -11596,6 +11655,11 @@ int perf_pmu_register(struct pmu *pmu, c
+ 		goto free_pdc;
+ 	}
+ 
+	if (WARN_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE, "Can not register a pmu with an invalid scope.\n")) {
+		ret = -EINVAL;
+		goto free_pdc;
+	}
+
+ 	pmu->name = name;
+ 
+ 	if (type >= 0)
+@@ -11750,6 +11814,22 @@ static int perf_try_init_event(struct pm
+ 		    event_has_any_exclude_flag(event))
+ 			ret = -EINVAL;
+ 
+		if (pmu->scope != PERF_PMU_SCOPE_NONE && event->cpu >= 0) {
+			const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(pmu->scope, event->cpu);
+			struct cpumask *pmu_cpumask = perf_scope_cpumask(pmu->scope);
+			int cpu;
+
+			if (pmu_cpumask && cpumask) {
+				cpu = cpumask_any_and(pmu_cpumask, cpumask);
+				if (cpu >= nr_cpu_ids)
+					ret = -ENODEV;
+				else
+					event->cpu = cpu;
+			} else {
+				ret = -ENODEV;
+			}
+		}
+
+ 		if (ret && event->destroy)
+ 			event->destroy(event);
+ 	}
+@@ -13713,6 +13793,12 @@ static void __init perf_event_init_all_c
+ 	int cpu;
+ 
+ 	zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&perf_online_core_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&perf_online_die_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&perf_online_cluster_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&perf_online_pkg_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&perf_online_sys_mask, GFP_KERNEL);
+
+ 
+ 	for_each_possible_cpu(cpu) {
+ 		swhash = &per_cpu(swevent_htable, cpu);
+@@ -13762,6 +13848,40 @@ static void __perf_event_exit_context(vo
+ 	raw_spin_unlock(&ctx->lock);
+ }
+ 
+static void perf_event_clear_cpumask(unsigned int cpu)
+{
+	int target[PERF_PMU_MAX_SCOPE];
+	unsigned int scope;
+	struct pmu *pmu;
+
+	cpumask_clear_cpu(cpu, perf_online_mask);
+
+	for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
+		struct cpumask *pmu_cpumask = perf_scope_cpumask(scope);
+
+		target[scope] = -1;
+		if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
+			continue;
+
+		if (!cpumask_test_and_clear_cpu(cpu, pmu_cpumask))
+			continue;
+		target[scope] = cpumask_any_but(cpumask, cpu);
+		if (target[scope] < nr_cpu_ids)
+			cpumask_set_cpu(target[scope], pmu_cpumask);
+	}
+
+	/* migrate */
+	list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
+		if (pmu->scope == PERF_PMU_SCOPE_NONE ||
+		    WARN_ON_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE))
+			continue;
+
+		if (target[pmu->scope] >= 0 && target[pmu->scope] < nr_cpu_ids)
+			perf_pmu_migrate_context(pmu, cpu, target[pmu->scope]);
+	}
+}
+
+ static void perf_event_exit_cpu_context(int cpu)
+ {
+ 	struct perf_cpu_context *cpuctx;
+@@ -13769,6 +13889,11 @@ static void perf_event_exit_cpu_context(
+ 
+ 	// XXX simplify cpuctx->online
+ 	mutex_lock(&pmus_lock);
+	/*
+	 * Clear the cpumasks, and migrate to other CPUs if possible.
+	 * Must be invoked before the __perf_event_exit_context.
+	 */
+	perf_event_clear_cpumask(cpu);
+ 	cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
+ 	ctx = &cpuctx->ctx;
+ 
+@@ -13776,7 +13901,6 @@ static void perf_event_exit_cpu_context(
+ 	smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+ 	cpuctx->online = 0;
+ 	mutex_unlock(&ctx->mutex);
+-	cpumask_clear_cpu(cpu, perf_online_mask);
+ 	mutex_unlock(&pmus_lock);
+ }
+ #else
+@@ -13785,6 +13909,42 @@ static void perf_event_exit_cpu_context(
+ 
+ #endif
+ 
+static void perf_event_setup_cpumask(unsigned int cpu)
+{
+	struct cpumask *pmu_cpumask;
+	unsigned int scope;
+
+	cpumask_set_cpu(cpu, perf_online_mask);
+
+	/*
+	 * Early boot stage, the cpumask hasn't been set yet.
+	 * The perf_online_<domain>_masks includes the first CPU of each domain.
+	 * Always uncondifionally set the boot CPU for the perf_online_<domain>_masks.
+	 */
+	if (!topology_sibling_cpumask(cpu)) {
+		for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+			pmu_cpumask = perf_scope_cpumask(scope);
+			if (WARN_ON_ONCE(!pmu_cpumask))
+				continue;
+			cpumask_set_cpu(cpu, pmu_cpumask);
+		}
+		return;
+	}
+
+	for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
+
+		pmu_cpumask = perf_scope_cpumask(scope);
+
+		if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
+			continue;
+
+		if (!cpumask_empty(cpumask) &&
+		    cpumask_any_and(pmu_cpumask, cpumask) >= nr_cpu_ids)
+			cpumask_set_cpu(cpu, pmu_cpumask);
+	}
+}
+
+ int perf_event_init_cpu(unsigned int cpu)
+ {
+ 	struct perf_cpu_context *cpuctx;
+@@ -13793,7 +13953,7 @@ int perf_event_init_cpu(unsigned int cpu
+ 	perf_swevent_init_cpu(cpu);
+ 
+ 	mutex_lock(&pmus_lock);
+-	cpumask_set_cpu(cpu, perf_online_mask);
+	perf_event_setup_cpumask(cpu);
+ 	cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
+ 	ctx = &cpuctx->ctx;
+ 
--- a/debian/patches/patchset-pf/amd-rapl/0002-perf-Add-PERF_EV_CAP_READ_SCOPE.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0002-perf-Add-PERF_EV_CAP_READ_SCOPE.patch
@@ -0,0 +1,71 @@
+From 8c7eb17e722a6a45c4436e5debb9336089b21d9b Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:38 -0700
+Subject: perf: Add PERF_EV_CAP_READ_SCOPE
+
+Usually, an event can be read from any CPU of the scope. It doesn't need
+to be read from the advertised CPU.
+
+Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
+scope can be read from any active CPU in the scope.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+---
+ include/linux/perf_event.h |  3 +++
+ kernel/events/core.c       | 14 +++++++++++---
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
+@@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(
+  * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
+  * cannot be a group leader. If an event with this flag is detached from the
+  * group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
+  */
+ #define PERF_EV_CAP_SOFTWARE		BIT(0)
+ #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
+ #define PERF_EV_CAP_SIBLING		BIT(2)
+#define PERF_EV_CAP_READ_SCOPE		BIT(3)
+ 
+ #define SWEVENT_HLIST_BITS		8
+ #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
+--- a/kernel/events/core.c
+++ b/kernel/events/core.c
+@@ -4477,16 +4477,24 @@ struct perf_read_data {
+ 	int ret;
+ };
+ 
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
+
+ static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
+ {
+	int local_cpu = smp_processor_id();
+ 	u16 local_pkg, event_pkg;
+ 
+ 	if ((unsigned)event_cpu >= nr_cpu_ids)
+ 		return event_cpu;
+ 
+-	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+-		int local_cpu = smp_processor_id();
+	if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
+		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
+
+		if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
+			return local_cpu;
+	}
+ 
+	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+ 		event_pkg = topology_physical_package_id(event_cpu);
+ 		local_pkg = topology_physical_package_id(local_cpu);
+ 
+@@ -11824,7 +11832,7 @@ static int perf_try_init_event(struct pm
+ 				if (cpu >= nr_cpu_ids)
+ 					ret = -ENODEV;
+ 				else
+-					event->cpu = cpu;
+					event->event_caps |= PERF_EV_CAP_READ_SCOPE;
+ 			} else {
+ 				ret = -ENODEV;
+ 			}
--- a/debian/patches/patchset-pf/amd-rapl/0003-perf-x86-intel-cstate-Clean-up-cpumask-and-hotplug.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0003-perf-x86-intel-cstate-Clean-up-cpumask-and-hotplug.patch
@@ -0,0 +1,286 @@
+From 09c1529eb102b486220c35546f2663ca858a2943 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:39 -0700
+Subject: perf/x86/intel/cstate: Clean up cpumask and hotplug
+
+There are three cstate PMUs with different scopes, core, die and module.
+The scopes are supported by the generic perf_event subsystem now.
+
+Set the scope for each PMU and remove all the cpumask and hotplug codes.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+---
+ arch/x86/events/intel/cstate.c | 142 ++-------------------------------
+ include/linux/cpuhotplug.h     |   2 -
+ 2 files changed, 5 insertions(+), 139 deletions(-)
+
+--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
+@@ -128,10 +128,6 @@ static ssize_t __cstate_##_var##_show(st
+ static struct device_attribute format_attr_##_var =		\
+ 	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+ 
+-static ssize_t cstate_get_attr_cpumask(struct device *dev,
+-				       struct device_attribute *attr,
+-				       char *buf);
+-
+ /* Model -> events mapping */
+ struct cstate_model {
+ 	unsigned long		core_events;
+@@ -206,22 +202,9 @@ static struct attribute_group cstate_for
+ 	.attrs = cstate_format_attrs,
+ };
+ 
+-static cpumask_t cstate_core_cpu_mask;
+-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+-
+-static struct attribute *cstate_cpumask_attrs[] = {
+-	&dev_attr_cpumask.attr,
+-	NULL,
+-};
+-
+-static struct attribute_group cpumask_attr_group = {
+-	.attrs = cstate_cpumask_attrs,
+-};
+-
+ static const struct attribute_group *cstate_attr_groups[] = {
+ 	&cstate_events_attr_group,
+ 	&cstate_format_attr_group,
+-	&cpumask_attr_group,
+ 	NULL,
+ };
+ 
+@@ -269,8 +252,6 @@ static struct perf_msr pkg_msr[] = {
+ 	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&group_cstate_pkg_c10,	test_msr },
+ };
+ 
+-static cpumask_t cstate_pkg_cpu_mask;
+-
+ /* cstate_module PMU */
+ static struct pmu cstate_module_pmu;
+ static bool has_cstate_module;
+@@ -291,28 +272,9 @@ static struct perf_msr module_msr[] = {
+ 	[PERF_CSTATE_MODULE_C6_RES]  = { MSR_MODULE_C6_RES_MS,	&group_cstate_module_c6,	test_msr },
+ };
+ 
+-static cpumask_t cstate_module_cpu_mask;
+-
+-static ssize_t cstate_get_attr_cpumask(struct device *dev,
+-				       struct device_attribute *attr,
+-				       char *buf)
+-{
+-	struct pmu *pmu = dev_get_drvdata(dev);
+-
+-	if (pmu == &cstate_core_pmu)
+-		return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+-	else if (pmu == &cstate_pkg_pmu)
+-		return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+-	else if (pmu == &cstate_module_pmu)
+-		return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
+-	else
+-		return 0;
+-}
+-
+ static int cstate_pmu_event_init(struct perf_event *event)
+ {
+ 	u64 cfg = event->attr.config;
+-	int cpu;
+ 
+ 	if (event->attr.type != event->pmu->type)
+ 		return -ENOENT;
+@@ -331,20 +293,13 @@ static int cstate_pmu_event_init(struct
+ 		if (!(core_msr_mask & (1 << cfg)))
+ 			return -EINVAL;
+ 		event->hw.event_base = core_msr[cfg].msr;
+-		cpu = cpumask_any_and(&cstate_core_cpu_mask,
+-				      topology_sibling_cpumask(event->cpu));
+ 	} else if (event->pmu == &cstate_pkg_pmu) {
+ 		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+ 			return -EINVAL;
+ 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
+ 		if (!(pkg_msr_mask & (1 << cfg)))
+ 			return -EINVAL;
+-
+-		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+-
+ 		event->hw.event_base = pkg_msr[cfg].msr;
+-		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+-				      topology_die_cpumask(event->cpu));
+ 	} else if (event->pmu == &cstate_module_pmu) {
+ 		if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
+ 			return -EINVAL;
+@@ -352,16 +307,10 @@ static int cstate_pmu_event_init(struct
+ 		if (!(module_msr_mask & (1 << cfg)))
+ 			return -EINVAL;
+ 		event->hw.event_base = module_msr[cfg].msr;
+-		cpu = cpumask_any_and(&cstate_module_cpu_mask,
+-				      topology_cluster_cpumask(event->cpu));
+ 	} else {
+ 		return -ENOENT;
+ 	}
+ 
+-	if (cpu >= nr_cpu_ids)
+-		return -ENODEV;
+-
+-	event->cpu = cpu;
+ 	event->hw.config = cfg;
+ 	event->hw.idx = -1;
+ 	return 0;
+@@ -412,84 +361,6 @@ static int cstate_pmu_event_add(struct p
+ 	return 0;
+ }
+ 
+-/*
+- * Check if exiting cpu is the designated reader. If so migrate the
+- * events when there is a valid target available
+- */
+-static int cstate_cpu_exit(unsigned int cpu)
+-{
+-	unsigned int target;
+-
+-	if (has_cstate_core &&
+-	    cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
+-
+-		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+-		/* Migrate events if there is a valid target */
+-		if (target < nr_cpu_ids) {
+-			cpumask_set_cpu(target, &cstate_core_cpu_mask);
+-			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+-		}
+-	}
+-
+-	if (has_cstate_pkg &&
+-	    cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
+-
+-		target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
+-		/* Migrate events if there is a valid target */
+-		if (target < nr_cpu_ids) {
+-			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+-			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+-		}
+-	}
+-
+-	if (has_cstate_module &&
+-	    cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
+-
+-		target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
+-		/* Migrate events if there is a valid target */
+-		if (target < nr_cpu_ids) {
+-			cpumask_set_cpu(target, &cstate_module_cpu_mask);
+-			perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
+-		}
+-	}
+-	return 0;
+-}
+-
+-static int cstate_cpu_init(unsigned int cpu)
+-{
+-	unsigned int target;
+-
+-	/*
+-	 * If this is the first online thread of that core, set it in
+-	 * the core cpu mask as the designated reader.
+-	 */
+-	target = cpumask_any_and(&cstate_core_cpu_mask,
+-				 topology_sibling_cpumask(cpu));
+-
+-	if (has_cstate_core && target >= nr_cpu_ids)
+-		cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+-
+-	/*
+-	 * If this is the first online thread of that package, set it
+-	 * in the package cpu mask as the designated reader.
+-	 */
+-	target = cpumask_any_and(&cstate_pkg_cpu_mask,
+-				 topology_die_cpumask(cpu));
+-	if (has_cstate_pkg && target >= nr_cpu_ids)
+-		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+-
+-	/*
+-	 * If this is the first online thread of that cluster, set it
+-	 * in the cluster cpu mask as the designated reader.
+-	 */
+-	target = cpumask_any_and(&cstate_module_cpu_mask,
+-				 topology_cluster_cpumask(cpu));
+-	if (has_cstate_module && target >= nr_cpu_ids)
+-		cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
+-
+-	return 0;
+-}
+-
+ static const struct attribute_group *core_attr_update[] = {
+ 	&group_cstate_core_c1,
+ 	&group_cstate_core_c3,
+@@ -526,6 +397,7 @@ static struct pmu cstate_core_pmu = {
+ 	.stop		= cstate_pmu_event_stop,
+ 	.read		= cstate_pmu_event_update,
+ 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+	.scope		= PERF_PMU_SCOPE_CORE,
+ 	.module		= THIS_MODULE,
+ };
+ 
+@@ -541,6 +413,7 @@ static struct pmu cstate_pkg_pmu = {
+ 	.stop		= cstate_pmu_event_stop,
+ 	.read		= cstate_pmu_event_update,
+ 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+	.scope		= PERF_PMU_SCOPE_PKG,
+ 	.module		= THIS_MODULE,
+ };
+ 
+@@ -556,6 +429,7 @@ static struct pmu cstate_module_pmu = {
+ 	.stop		= cstate_pmu_event_stop,
+ 	.read		= cstate_pmu_event_update,
+ 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+	.scope		= PERF_PMU_SCOPE_CLUSTER,
+ 	.module		= THIS_MODULE,
+ };
+ 
+@@ -810,9 +684,6 @@ static int __init cstate_probe(const str
+ 
+ static inline void cstate_cleanup(void)
+ {
+-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
+-
+ 	if (has_cstate_core)
+ 		perf_pmu_unregister(&cstate_core_pmu);
+ 
+@@ -827,11 +698,6 @@ static int __init cstate_init(void)
+ {
+ 	int err;
+ 
+-	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
+-			  "perf/x86/cstate:starting", cstate_cpu_init, NULL);
+-	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+-			  "perf/x86/cstate:online", NULL, cstate_cpu_exit);
+-
+ 	if (has_cstate_core) {
+ 		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+ 		if (err) {
+@@ -844,6 +710,8 @@ static int __init cstate_init(void)
+ 
+ 	if (has_cstate_pkg) {
+ 		if (topology_max_dies_per_package() > 1) {
+			/* CLX-AP is multi-die and the cstate is die-scope */
+			cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE;
+ 			err = perf_pmu_register(&cstate_pkg_pmu,
+ 						"cstate_die", -1);
+ 		} else {
+--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
+@@ -152,7 +152,6 @@ enum cpuhp_state {
+ 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
+ 	CPUHP_AP_PERF_X86_STARTING,
+ 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+-	CPUHP_AP_PERF_X86_CSTATE_STARTING,
+ 	CPUHP_AP_PERF_XTENSA_STARTING,
+ 	CPUHP_AP_ARM_VFP_STARTING,
+ 	CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+@@ -209,7 +208,6 @@ enum cpuhp_state {
+ 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
+ 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
+ 	CPUHP_AP_PERF_X86_RAPL_ONLINE,
+-	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+ 	CPUHP_AP_PERF_S390_CF_ONLINE,
+ 	CPUHP_AP_PERF_S390_SF_ONLINE,
+ 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
--- a/debian/patches/patchset-pf/amd-rapl/0004-iommu-vt-d-Clean-up-cpumask-and-hotplug-for-perfmon.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0004-iommu-vt-d-Clean-up-cpumask-and-hotplug-for-perfmon.patch
@@ -0,0 +1,188 @@
+From f91da33af8295b4b3d73a2083225f69e1d5ff301 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:40 -0700
+Subject: iommu/vt-d: Clean up cpumask and hotplug for perfmon
+
+The iommu PMU is system-wide scope, which is supported by the generic
+perf_event subsystem now.
+
+Set the scope for the iommu PMU and remove all the cpumask and hotplug
+codes.
+
+Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: iommu@lists.linux.dev
+---
+ drivers/iommu/intel/iommu.h   |   2 -
+ drivers/iommu/intel/perfmon.c | 111 +---------------------------------
+ 2 files changed, 2 insertions(+), 111 deletions(-)
+
+--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
+@@ -687,8 +687,6 @@ struct iommu_pmu {
+ 	DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
+ 	struct perf_event	*event_list[IOMMU_PMU_IDX_MAX];
+ 	unsigned char		irq_name[16];
+-	struct hlist_node	cpuhp_node;
+-	int			cpu;
+ };
+ 
+ #define IOMMU_IRQ_ID_OFFSET_PRQ		(DMAR_UNITS_SUPPORTED)
+--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
+@@ -34,28 +34,9 @@ static struct attribute_group iommu_pmu_
+ 	.attrs = attrs_empty,
+ };
+ 
+-static cpumask_t iommu_pmu_cpu_mask;
+-
+-static ssize_t
+-cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+-{
+-	return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
+-}
+-static DEVICE_ATTR_RO(cpumask);
+-
+-static struct attribute *iommu_pmu_cpumask_attrs[] = {
+-	&dev_attr_cpumask.attr,
+-	NULL
+-};
+-
+-static struct attribute_group iommu_pmu_cpumask_attr_group = {
+-	.attrs = iommu_pmu_cpumask_attrs,
+-};
+-
+ static const struct attribute_group *iommu_pmu_attr_groups[] = {
+ 	&iommu_pmu_format_attr_group,
+ 	&iommu_pmu_events_attr_group,
+-	&iommu_pmu_cpumask_attr_group,
+ 	NULL
+ };
+ 
+@@ -565,6 +546,7 @@ static int __iommu_pmu_register(struct i
+ 	iommu_pmu->pmu.attr_groups	= iommu_pmu_attr_groups;
+ 	iommu_pmu->pmu.attr_update	= iommu_pmu_attr_update;
+ 	iommu_pmu->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+	iommu_pmu->pmu.scope		= PERF_PMU_SCOPE_SYS_WIDE;
+ 	iommu_pmu->pmu.module		= THIS_MODULE;
+ 
+ 	return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
+@@ -773,89 +755,6 @@ static void iommu_pmu_unset_interrupt(st
+ 	iommu->perf_irq = 0;
+ }
+ 
+-static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+-{
+-	struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+-
+-	if (cpumask_empty(&iommu_pmu_cpu_mask))
+-		cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+-
+-	if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
+-		iommu_pmu->cpu = cpu;
+-
+-	return 0;
+-}
+-
+-static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+-{
+-	struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+-	int target = cpumask_first(&iommu_pmu_cpu_mask);
+-
+-	/*
+-	 * The iommu_pmu_cpu_mask has been updated when offline the CPU
+-	 * for the first iommu_pmu. Migrate the other iommu_pmu to the
+-	 * new target.
+-	 */
+-	if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
+-		perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+-		iommu_pmu->cpu = target;
+-		return 0;
+-	}
+-
+-	if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
+-		return 0;
+-
+-	target = cpumask_any_but(cpu_online_mask, cpu);
+-
+-	if (target < nr_cpu_ids)
+-		cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
+-	else
+-		return 0;
+-
+-	perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+-	iommu_pmu->cpu = target;
+-
+-	return 0;
+-}
+-
+-static int nr_iommu_pmu;
+-static enum cpuhp_state iommu_cpuhp_slot;
+-
+-static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
+-{
+-	int ret;
+-
+-	if (!nr_iommu_pmu) {
+-		ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+-					      "driver/iommu/intel/perfmon:online",
+-					      iommu_pmu_cpu_online,
+-					      iommu_pmu_cpu_offline);
+-		if (ret < 0)
+-			return ret;
+-		iommu_cpuhp_slot = ret;
+-	}
+-
+-	ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+-	if (ret) {
+-		if (!nr_iommu_pmu)
+-			cpuhp_remove_multi_state(iommu_cpuhp_slot);
+-		return ret;
+-	}
+-	nr_iommu_pmu++;
+-
+-	return 0;
+-}
+-
+-static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
+-{
+-	cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+-
+-	if (--nr_iommu_pmu)
+-		return;
+-
+-	cpuhp_remove_multi_state(iommu_cpuhp_slot);
+-}
+-
+ void iommu_pmu_register(struct intel_iommu *iommu)
+ {
+ 	struct iommu_pmu *iommu_pmu = iommu->pmu;
+@@ -866,17 +765,12 @@ void iommu_pmu_register(struct intel_iom
+ 	if (__iommu_pmu_register(iommu))
+ 		goto err;
+ 
+-	if (iommu_pmu_cpuhp_setup(iommu_pmu))
+-		goto unregister;
+-
+ 	/* Set interrupt for overflow */
+ 	if (iommu_pmu_set_interrupt(iommu))
+-		goto cpuhp_free;
+		goto unregister;
+ 
+ 	return;
+ 
+-cpuhp_free:
+-	iommu_pmu_cpuhp_free(iommu_pmu);
+ unregister:
+ 	perf_pmu_unregister(&iommu_pmu->pmu);
+ err:
+@@ -892,6 +786,5 @@ void iommu_pmu_unregister(struct intel_i
+ 		return;
+ 
+ 	iommu_pmu_unset_interrupt(iommu);
+-	iommu_pmu_cpuhp_free(iommu_pmu);
+ 	perf_pmu_unregister(&iommu_pmu->pmu);
+ }
--- a/debian/patches/patchset-pf/amd-rapl/0005-dmaengine-idxd-Clean-up-cpumask-and-hotplug-for-perf.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0005-dmaengine-idxd-Clean-up-cpumask-and-hotplug-for-perf.patch
@@ -0,0 +1,238 @@
+From 76278bd3946d618ead2d9cc22612a75a4ab99ace Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:41 -0700
+Subject: dmaengine: idxd: Clean up cpumask and hotplug for perfmon
+
+The idxd PMU is system-wide scope, which is supported by the generic
+perf_event subsystem now.
+
+Set the scope for the idxd PMU and remove all the cpumask and hotplug
+codes.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: Vinod Koul <vkoul@kernel.org>
+Cc: dmaengine@vger.kernel.org
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
+---
+ drivers/dma/idxd/idxd.h    |  7 ---
+ drivers/dma/idxd/init.c    |  3 --
+ drivers/dma/idxd/perfmon.c | 98 +-------------------------------------
+ 3 files changed, 1 insertion(+), 107 deletions(-)
+
+--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
+@@ -124,7 +124,6 @@ struct idxd_pmu {
+ 
+ 	struct pmu pmu;
+ 	char name[IDXD_NAME_SIZE];
+-	int cpu;
+ 
+ 	int n_counters;
+ 	int counter_width;
+@@ -135,8 +134,6 @@ struct idxd_pmu {
+ 
+ 	unsigned long supported_filters;
+ 	int n_filters;
+-
+-	struct hlist_node cpuhp_node;
+ };
+ 
+ #define IDXD_MAX_PRIORITY	0xf
+@@ -803,14 +800,10 @@ void idxd_user_counter_increment(struct
+ int perfmon_pmu_init(struct idxd_device *idxd);
+ void perfmon_pmu_remove(struct idxd_device *idxd);
+ void perfmon_counter_overflow(struct idxd_device *idxd);
+-void perfmon_init(void);
+-void perfmon_exit(void);
+ #else
+ static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
+ static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
+ static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
+-static inline void perfmon_init(void) {}
+-static inline void perfmon_exit(void) {}
+ #endif
+ 
+ /* debugfs */
+--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
+@@ -878,8 +878,6 @@ static int __init idxd_init_module(void)
+ 	else
+ 		support_enqcmd = true;
+ 
+-	perfmon_init();
+-
+ 	err = idxd_driver_register(&idxd_drv);
+ 	if (err < 0)
+ 		goto err_idxd_driver_register;
+@@ -928,7 +926,6 @@ static void __exit idxd_exit_module(void
+ 	idxd_driver_unregister(&idxd_drv);
+ 	pci_unregister_driver(&idxd_pci_driver);
+ 	idxd_cdev_remove();
+-	perfmon_exit();
+ 	idxd_remove_debugfs();
+ }
+ module_exit(idxd_exit_module);
+--- a/drivers/dma/idxd/perfmon.c
+++ b/drivers/dma/idxd/perfmon.c
+@@ -6,29 +6,6 @@
+ #include "idxd.h"
+ #include "perfmon.h"
+ 
+-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+-			    char *buf);
+-
+-static cpumask_t		perfmon_dsa_cpu_mask;
+-static bool			cpuhp_set_up;
+-static enum cpuhp_state		cpuhp_slot;
+-
+-/*
+- * perf userspace reads this attribute to determine which cpus to open
+- * counters on.  It's connected to perfmon_dsa_cpu_mask, which is
+- * maintained by the cpu hotplug handlers.
+- */
+-static DEVICE_ATTR_RO(cpumask);
+-
+-static struct attribute *perfmon_cpumask_attrs[] = {
+-	&dev_attr_cpumask.attr,
+-	NULL,
+-};
+-
+-static struct attribute_group cpumask_attr_group = {
+-	.attrs = perfmon_cpumask_attrs,
+-};
+-
+ /*
+  * These attributes specify the bits in the config word that the perf
+  * syscall uses to pass the event ids and categories to perfmon.
+@@ -67,16 +44,9 @@ static struct attribute_group perfmon_fo
+ 
+ static const struct attribute_group *perfmon_attr_groups[] = {
+ 	&perfmon_format_attr_group,
+-	&cpumask_attr_group,
+ 	NULL,
+ };
+ 
+-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+-			    char *buf)
+-{
+-	return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
+-}
+-
+ static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
+ {
+ 	return &idxd_pmu->pmu == event->pmu;
+@@ -217,7 +187,6 @@ static int perfmon_pmu_event_init(struct
+ 		return -EINVAL;
+ 
+ 	event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
+-	event->cpu = idxd->idxd_pmu->cpu;
+ 	event->hw.config = event->attr.config;
+ 
+ 	if (event->group_leader != event)
+@@ -488,6 +457,7 @@ static void idxd_pmu_init(struct idxd_pm
+ 	idxd_pmu->pmu.stop		= perfmon_pmu_event_stop;
+ 	idxd_pmu->pmu.read		= perfmon_pmu_event_update;
+ 	idxd_pmu->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+	idxd_pmu->pmu.scope		= PERF_PMU_SCOPE_SYS_WIDE;
+ 	idxd_pmu->pmu.module		= THIS_MODULE;
+ }
+ 
+@@ -496,47 +466,11 @@ void perfmon_pmu_remove(struct idxd_devi
+ 	if (!idxd->idxd_pmu)
+ 		return;
+ 
+-	cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
+ 	perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ 	kfree(idxd->idxd_pmu);
+ 	idxd->idxd_pmu = NULL;
+ }
+ 
+-static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
+-{
+-	struct idxd_pmu *idxd_pmu;
+-
+-	idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+-
+-	/* select the first online CPU as the designated reader */
+-	if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
+-		cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
+-		idxd_pmu->cpu = cpu;
+-	}
+-
+-	return 0;
+-}
+-
+-static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
+-{
+-	struct idxd_pmu *idxd_pmu;
+-	unsigned int target;
+-
+-	idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+-
+-	if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
+-		return 0;
+-
+-	target = cpumask_any_but(cpu_online_mask, cpu);
+-	/* migrate events if there is a valid target */
+-	if (target < nr_cpu_ids) {
+-		cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
+-		perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+-	}
+-
+-	return 0;
+-}
+-
+ int perfmon_pmu_init(struct idxd_device *idxd)
+ {
+ 	union idxd_perfcap perfcap;
+@@ -544,12 +478,6 @@ int perfmon_pmu_init(struct idxd_device
+ 	int rc = -ENODEV;
+ 
+ 	/*
+-	 * perfmon module initialization failed, nothing to do
+-	 */
+-	if (!cpuhp_set_up)
+-		return -ENODEV;
+-
+-	/*
+ 	 * If perfmon_offset or num_counters is 0, it means perfmon is
+ 	 * not supported on this hardware.
+ 	 */
+@@ -624,11 +552,6 @@ int perfmon_pmu_init(struct idxd_device
+ 	if (rc)
+ 		goto free;
+ 
+-	rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
+-	if (rc) {
+-		perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+-		goto free;
+-	}
+ out:
+ 	return rc;
+ free:
+@@ -637,22 +560,3 @@ free:
+ 
+ 	goto out;
+ }
+-
+-void __init perfmon_init(void)
+-{
+-	int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+-					 "driver/dma/idxd/perf:online",
+-					 perf_event_cpu_online,
+-					 perf_event_cpu_offline);
+-	if (WARN_ON(rc < 0))
+-		return;
+-
+-	cpuhp_slot = rc;
+-	cpuhp_set_up = true;
+-}
+-
+-void __exit perfmon_exit(void)
+-{
+-	if (cpuhp_set_up)
+-		cpuhp_remove_multi_state(cpuhp_slot);
+-}
--- a/debian/patches/patchset-pf/amd-rapl/0006-perf-x86-rapl-Move-the-pmu-allocation-out-of-CPU-hot.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0006-perf-x86-rapl-Move-the-pmu-allocation-out-of-CPU-hot.patch
@@ -0,0 +1,84 @@
+From fb0a3b5932882f02ed42fcaa6db73aba3eafd6d7 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:42 -0700
+Subject: perf/x86/rapl: Move the pmu allocation out of CPU hotplug
+
+The rapl pmu just needs to be allocated once. It doesn't matter to be
+allocated at each CPU hotplug, or the global init_rapl_pmus().
+
+Move the pmu allocation to the init_rapl_pmus(). So the generic hotplug
+supports can be applied.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 44 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 31 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -568,19 +568,8 @@ static int rapl_cpu_online(unsigned int
+ 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ 	int target;
+ 
+-	if (!pmu) {
+-		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+-		if (!pmu)
+-			return -ENOMEM;
+-
+-		raw_spin_lock_init(&pmu->lock);
+-		INIT_LIST_HEAD(&pmu->active_list);
+-		pmu->pmu = &rapl_pmus->pmu;
+-		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+-		rapl_hrtimer_init(pmu);
+-
+-		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
+-	}
+	if (!pmu)
+		return -ENOMEM;
+ 
+ 	/*
+ 	 * Check if there is an online cpu in the package which collects rapl
+@@ -673,6 +662,32 @@ static const struct attribute_group *rap
+ 	NULL,
+ };
+ 
+static void __init init_rapl_pmu(void)
+{
+	struct rapl_pmu *pmu;
+	int cpu;
+
+	cpus_read_lock();
+
+	for_each_cpu(cpu, cpu_online_mask) {
+		pmu = cpu_to_rapl_pmu(cpu);
+		if (pmu)
+			continue;
+		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+		if (!pmu)
+			continue;
+		raw_spin_lock_init(&pmu->lock);
+		INIT_LIST_HEAD(&pmu->active_list);
+		pmu->pmu = &rapl_pmus->pmu;
+		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+		rapl_hrtimer_init(pmu);
+
+		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
+	}
+
+	cpus_read_unlock();
+}
+
+ static int __init init_rapl_pmus(void)
+ {
+ 	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+@@ -693,6 +708,9 @@ static int __init init_rapl_pmus(void)
+ 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+ 	rapl_pmus->pmu.module		= THIS_MODULE;
+ 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+
+	init_rapl_pmu();
+
+ 	return 0;
+ }
+ 
--- a/debian/patches/patchset-pf/amd-rapl/0007-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0007-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch
@@ -0,0 +1,179 @@
+From 7b4f6ba1b1dc5f3120652bcb5921a697d5167bff Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Fri, 2 Aug 2024 08:16:43 -0700
+Subject: perf/x86/rapl: Clean up cpumask and hotplug
+
+The rapl pmu is die scope, which is supported by the generic perf_event
+subsystem now.
+
+Set the scope for the rapl PMU and remove all the cpumask and hotplug
+codes.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c     | 80 +-------------------------------------
+ include/linux/cpuhotplug.h |  1 -
+ 2 files changed, 2 insertions(+), 79 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -135,7 +135,6 @@ struct rapl_model {
+  /* 1/2^hw_unit Joule */
+ static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+ static struct rapl_pmus *rapl_pmus;
+-static cpumask_t rapl_cpu_mask;
+ static unsigned int rapl_cntr_mask;
+ static u64 rapl_timer_ms;
+ static struct perf_msr *rapl_msrs;
+@@ -340,8 +339,6 @@ static int rapl_pmu_event_init(struct pe
+ 	if (event->cpu < 0)
+ 		return -EINVAL;
+ 
+-	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+-
+ 	if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
+ 		return -EINVAL;
+ 
+@@ -360,7 +357,6 @@ static int rapl_pmu_event_init(struct pe
+ 	pmu = cpu_to_rapl_pmu(event->cpu);
+ 	if (!pmu)
+ 		return -EINVAL;
+-	event->cpu = pmu->cpu;
+ 	event->pmu_private = pmu;
+ 	event->hw.event_base = rapl_msrs[bit].msr;
+ 	event->hw.config = cfg;
+@@ -374,23 +370,6 @@ static void rapl_pmu_event_read(struct p
+ 	rapl_event_update(event);
+ }
+ 
+-static ssize_t rapl_get_attr_cpumask(struct device *dev,
+-				struct device_attribute *attr, char *buf)
+-{
+-	return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
+-}
+-
+-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+-
+-static struct attribute *rapl_pmu_attrs[] = {
+-	&dev_attr_cpumask.attr,
+-	NULL,
+-};
+-
+-static struct attribute_group rapl_pmu_attr_group = {
+-	.attrs = rapl_pmu_attrs,
+-};
+-
+ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+ RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+ RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+@@ -438,7 +417,6 @@ static struct attribute_group rapl_pmu_f
+ };
+ 
+ static const struct attribute_group *rapl_attr_groups[] = {
+-	&rapl_pmu_attr_group,
+ 	&rapl_pmu_format_group,
+ 	&rapl_pmu_events_group,
+ 	NULL,
+@@ -541,49 +519,6 @@ static struct perf_msr amd_rapl_msrs[] =
+ 	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
+ };
+ 
+-static int rapl_cpu_offline(unsigned int cpu)
+-{
+-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+-	int target;
+-
+-	/* Check if exiting cpu is used for collecting rapl events */
+-	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+-		return 0;
+-
+-	pmu->cpu = -1;
+-	/* Find a new cpu to collect rapl events */
+-	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
+-
+-	/* Migrate rapl events to the new target */
+-	if (target < nr_cpu_ids) {
+-		cpumask_set_cpu(target, &rapl_cpu_mask);
+-		pmu->cpu = target;
+-		perf_pmu_migrate_context(pmu->pmu, cpu, target);
+-	}
+-	return 0;
+-}
+-
+-static int rapl_cpu_online(unsigned int cpu)
+-{
+-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+-	int target;
+-
+-	if (!pmu)
+-		return -ENOMEM;
+-
+-	/*
+-	 * Check if there is an online cpu in the package which collects rapl
+-	 * events already.
+-	 */
+-	target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
+-	if (target < nr_cpu_ids)
+-		return 0;
+-
+-	cpumask_set_cpu(cpu, &rapl_cpu_mask);
+-	pmu->cpu = cpu;
+-	return 0;
+-}
+-
+ static int rapl_check_hw_unit(struct rapl_model *rm)
+ {
+ 	u64 msr_rapl_power_unit_bits;
+@@ -707,6 +642,7 @@ static int __init init_rapl_pmus(void)
+ 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
+ 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+ 	rapl_pmus->pmu.module		= THIS_MODULE;
+	rapl_pmus->pmu.scope		= PERF_PMU_SCOPE_DIE;
+ 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+ 
+ 	init_rapl_pmu();
+@@ -857,24 +793,13 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		return ret;
+ 
+-	/*
+-	 * Install callbacks. Core will call them for each online cpu.
+-	 */
+-	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+-				"perf/x86/rapl:online",
+-				rapl_cpu_online, rapl_cpu_offline);
+-	if (ret)
+-		goto out;
+-
+ 	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+ 	if (ret)
+-		goto out1;
+		goto out;
+ 
+ 	rapl_advertise();
+ 	return 0;
+ 
+-out1:
+-	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out:
+ 	pr_warn("Initialization failed (%d), disabled\n", ret);
+ 	cleanup_rapl_pmus();
+@@ -884,7 +809,6 @@ module_init(rapl_pmu_init);
+ 
+ static void __exit intel_rapl_exit(void)
+ {
+-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ 	perf_pmu_unregister(&rapl_pmus->pmu);
+ 	cleanup_rapl_pmus();
+ }
+--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
+@@ -207,7 +207,6 @@ enum cpuhp_state {
+ 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+ 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
+ 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
+-	CPUHP_AP_PERF_X86_RAPL_ONLINE,
+ 	CPUHP_AP_PERF_S390_CF_ONLINE,
+ 	CPUHP_AP_PERF_S390_SF_ONLINE,
+ 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
--- a/debian/patches/patchset-pf/amd-rapl/0008-perf-x86-rapl-Fix-the-energy-pkg-event-for-AMD-CPUs.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0008-perf-x86-rapl-Fix-the-energy-pkg-event-for-AMD-CPUs.patch
@@ -0,0 +1,101 @@
+From f1525664ff9da3241b3556594dc0b67506ae1ddd Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Tue, 10 Sep 2024 14:25:05 +0530
+Subject: perf/x86/rapl: Fix the energy-pkg event for AMD CPUs
+
+After commit ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf"),
+on AMD processors that support extended CPUID leaf 0x80000026, the
+topology_die_cpumask() and topology_logical_die_id() macros, no longer
+return the package cpumask and package id, instead they return the CCD
+(Core Complex Die) mask and id respectively. This leads to the energy-pkg
+event scope to be modified to CCD instead of package.
+
+So, change the PMU scope for AMD and Hygon back to package.
+
+On a 12 CCD 1 Package AMD Zen4 Genoa machine:
+
+Before:
+$ cat /sys/devices/power/cpumask
+0,8,16,24,32,40,48,56,64,72,80,88.
+
+The expected cpumask here is supposed to be just "0", as it is a package
+scope event, only one CPU will be collecting the event for all the CPUs in
+the package.
+
+After:
+$ cat /sys/devices/power/cpumask
+0
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 35 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 32 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -139,9 +139,32 @@ static unsigned int rapl_cntr_mask;
+ static u64 rapl_timer_ms;
+ static struct perf_msr *rapl_msrs;
+ 
+/*
+ * RAPL Package energy counter scope:
+ * 1. AMD/HYGON platforms have a per-PKG package energy counter
+ * 2. For Intel platforms
+ *	2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope
+ *	2.2. Other Intel platforms are single die systems so the scope can be
+ *	     considered as either pkg-scope or die-scope, and we are considering
+ *	     them as die-scope.
+ */
+#define rapl_pmu_is_pkg_scope()				\
+	(boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||	\
+	 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+
+/*
+ * Helper function to get the correct topology id according to the
+ * RAPL PMU scope.
+ */
+static inline unsigned int get_rapl_pmu_idx(int cpu)
+{
+	return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
+					 topology_logical_die_id(cpu);
+}
+
+ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+ {
+-	unsigned int rapl_pmu_idx = topology_logical_die_id(cpu);
+	unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+ 
+ 	/*
+ 	 * The unsigned check also catches the '-1' return value for non
+@@ -617,7 +640,7 @@ static void __init init_rapl_pmu(void)
+ 		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ 		rapl_hrtimer_init(pmu);
+ 
+-		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
+		rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
+ 	}
+ 
+ 	cpus_read_unlock();
+@@ -626,6 +649,12 @@ static void __init init_rapl_pmu(void)
+ static int __init init_rapl_pmus(void)
+ {
+ 	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+	int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+
+	if (rapl_pmu_is_pkg_scope()) {
+		nr_rapl_pmu		= topology_max_packages();
+		rapl_pmu_scope		= PERF_PMU_SCOPE_PKG;
+	}
+ 
+ 	rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
+ 	if (!rapl_pmus)
+@@ -641,8 +670,8 @@ static int __init init_rapl_pmus(void)
+ 	rapl_pmus->pmu.start		= rapl_pmu_event_start;
+ 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
+ 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	rapl_pmus->pmu.scope		= rapl_pmu_scope;
+ 	rapl_pmus->pmu.module		= THIS_MODULE;
+-	rapl_pmus->pmu.scope		= PERF_PMU_SCOPE_DIE;
+ 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+ 
+ 	init_rapl_pmu();
--- a/debian/patches/patchset-pf/amd-rapl/0009-x86-topology-Introduce-topology_logical_core_id.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0009-x86-topology-Introduce-topology_logical_core_id.patch
@@ -0,0 +1,77 @@
+From 9439067951f4d857272836b35812af26650d9c16 Mon Sep 17 00:00:00 2001
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+Date: Fri, 13 Sep 2024 15:21:41 +0000
+Subject: x86/topology: Introduce topology_logical_core_id()
+
+On x86, topology_core_id() returns a unique core ID within the PKG
+domain. Looking at match_smt() suggests that a core ID just needs to be
+unique within a LLC domain. For use cases such as the per-core RAPL PMU,
+there exists a need for a unique core ID across the entire system with
+multiple PKG domains. Introduce topology_logical_core_id() to derive a
+unique core ID across the system.
+
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Reviewed-by: Zhang Rui <rui.zhang@intel.com>
+Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
+---
+ Documentation/arch/x86/topology.rst   | 4 ++++
+ arch/x86/include/asm/processor.h      | 1 +
+ arch/x86/include/asm/topology.h       | 1 +
+ arch/x86/kernel/cpu/debugfs.c         | 1 +
+ arch/x86/kernel/cpu/topology_common.c | 1 +
+ 5 files changed, 8 insertions(+)
+
+--- a/Documentation/arch/x86/topology.rst
+++ b/Documentation/arch/x86/topology.rst
+@@ -135,6 +135,10 @@ Thread-related topology information in t
+     The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
+     "core_id."
+ 
+  - topology_logical_core_id();
+
+    The logical core ID to which a thread belongs.
+
+ 
+ 
+ System topology examples
+--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
+@@ -98,6 +98,7 @@ struct cpuinfo_topology {
+ 	// Logical ID mappings
+ 	u32			logical_pkg_id;
+ 	u32			logical_die_id;
+	u32			logical_core_id;
+ 
+ 	// AMD Node ID and Nodes per Package info
+ 	u32			amd_node_id;
+--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
+@@ -137,6 +137,7 @@ extern const struct cpumask *cpu_cluster
+ #define topology_logical_package_id(cpu)	(cpu_data(cpu).topo.logical_pkg_id)
+ #define topology_physical_package_id(cpu)	(cpu_data(cpu).topo.pkg_id)
+ #define topology_logical_die_id(cpu)		(cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu)		(cpu_data(cpu).topo.logical_core_id)
+ #define topology_die_id(cpu)			(cpu_data(cpu).topo.die_id)
+ #define topology_core_id(cpu)			(cpu_data(cpu).topo.core_id)
+ #define topology_ppin(cpu)			(cpu_data(cpu).ppin)
+--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
+@@ -24,6 +24,7 @@ static int cpu_debug_show(struct seq_fil
+ 	seq_printf(m, "core_id:             %u\n", c->topo.core_id);
+ 	seq_printf(m, "logical_pkg_id:      %u\n", c->topo.logical_pkg_id);
+ 	seq_printf(m, "logical_die_id:      %u\n", c->topo.logical_die_id);
+	seq_printf(m, "logical_core_id:     %u\n", c->topo.logical_core_id);
+ 	seq_printf(m, "llc_id:              %u\n", c->topo.llc_id);
+ 	seq_printf(m, "l2c_id:              %u\n", c->topo.l2c_id);
+ 	seq_printf(m, "amd_node_id:         %u\n", c->topo.amd_node_id);
+--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
+@@ -151,6 +151,7 @@ static void topo_set_ids(struct topo_sca
+ 	if (!early) {
+ 		c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
+ 		c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+		c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN);
+ 	}
+ 
+ 	/* Package relative core ID */
--- a/debian/patches/patchset-pf/amd-rapl/0010-perf-x86-rapl-Remove-the-cpu_to_rapl_pmu-function.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0010-perf-x86-rapl-Remove-the-cpu_to_rapl_pmu-function.patch
@@ -0,0 +1,87 @@
+From b8e1231d5f78314de8f9066baba7b1fdd5e59218 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:21:42 +0000
+Subject: perf/x86/rapl: Remove the cpu_to_rapl_pmu() function
+
+Preparation for the addition of per-core RAPL energy counter support for
+AMD CPUs. Post which, one cpu might be mapped to more than one rapl_pmu
+(package/die one or per-core one), also makes sense to use the
+get_rapl_pmu_idx macro which is anyway used to index into the
+rapl_pmus->pmus[] array.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 29 +++++++++++++----------------
+ 1 file changed, 13 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -162,17 +162,6 @@ static inline unsigned int get_rapl_pmu_
+ 					 topology_logical_die_id(cpu);
+ }
+ 
+-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+-{
+-	unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+-
+-	/*
+-	 * The unsigned check also catches the '-1' return value for non
+-	 * existent mappings in the topology map.
+-	 */
+-	return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
+-}
+-
+ static inline u64 rapl_read_counter(struct perf_event *event)
+ {
+ 	u64 raw;
+@@ -348,7 +337,7 @@ static void rapl_pmu_event_del(struct pe
+ static int rapl_pmu_event_init(struct perf_event *event)
+ {
+ 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+-	int bit, ret = 0;
+	int bit, rapl_pmu_idx, ret = 0;
+ 	struct rapl_pmu *pmu;
+ 
+ 	/* only look at RAPL events */
+@@ -376,8 +365,12 @@ static int rapl_pmu_event_init(struct pe
+ 	if (event->attr.sample_period) /* no sampling */
+ 		return -EINVAL;
+ 
+	rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
+	if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+		return -EINVAL;
+
+ 	/* must be done before validate_group */
+-	pmu = cpu_to_rapl_pmu(event->cpu);
+	pmu = rapl_pmus->pmus[rapl_pmu_idx];
+ 	if (!pmu)
+ 		return -EINVAL;
+ 	event->pmu_private = pmu;
+@@ -623,12 +616,16 @@ static const struct attribute_group *rap
+ static void __init init_rapl_pmu(void)
+ {
+ 	struct rapl_pmu *pmu;
+-	int cpu;
+	int cpu, rapl_pmu_idx;
+ 
+ 	cpus_read_lock();
+ 
+ 	for_each_cpu(cpu, cpu_online_mask) {
+-		pmu = cpu_to_rapl_pmu(cpu);
+		rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+		if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+			continue;
+
+		pmu = rapl_pmus->pmus[rapl_pmu_idx];
+ 		if (pmu)
+ 			continue;
+ 		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+@@ -640,7 +637,7 @@ static void __init init_rapl_pmu(void)
+ 		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ 		rapl_hrtimer_init(pmu);
+ 
+-		rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
+		rapl_pmus->pmus[rapl_pmu_idx] = pmu;
+ 	}
+ 
+ 	cpus_read_unlock();
--- a/debian/patches/patchset-pf/amd-rapl/0011-perf-x86-rapl-Rename-rapl_pmu-variables.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0011-perf-x86-rapl-Rename-rapl_pmu-variables.patch
@@ -0,0 +1,240 @@
+From 07ec9f38cac6eb6e5b0b062ef99e9458ba567de8 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:21:43 +0000
+Subject: perf/x86/rapl: Rename rapl_pmu variables
+
+Rename struct rapl_pmu variables from "pmu" to "rapl_pmu", to
+avoid any confusion between the variables of two different
+structs pmu and rapl_pmu. As rapl_pmu also contains a pointer to
+struct pmu, which leads to situations in code like pmu->pmu,
+which is needlessly confusing. Above scenario is replaced with
+much more readable rapl_pmu->pmu with this change.
+
+Also rename "pmus" member in rapl_pmus struct, for same reason.
+
+No functional change.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 93 +++++++++++++++++++++---------------------
+ 1 file changed, 47 insertions(+), 46 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -116,7 +116,7 @@ struct rapl_pmu {
+ struct rapl_pmus {
+ 	struct pmu		pmu;
+ 	unsigned int		nr_rapl_pmu;
+-	struct rapl_pmu		*pmus[] __counted_by(nr_rapl_pmu);
+	struct rapl_pmu		*rapl_pmu[] __counted_by(nr_rapl_pmu);
+ };
+ 
+ enum rapl_unit_quirk {
+@@ -223,34 +223,34 @@ static void rapl_start_hrtimer(struct ra
+ 
+ static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+ {
+-	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+	struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+ 	struct perf_event *event;
+ 	unsigned long flags;
+ 
+-	if (!pmu->n_active)
+	if (!rapl_pmu->n_active)
+ 		return HRTIMER_NORESTART;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+-	list_for_each_entry(event, &pmu->active_list, active_entry)
+	list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
+ 		rapl_event_update(event);
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ 
+-	hrtimer_forward_now(hrtimer, pmu->timer_interval);
+	hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
+ 
+ 	return HRTIMER_RESTART;
+ }
+ 
+-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
+ {
+-	struct hrtimer *hr = &pmu->hrtimer;
+	struct hrtimer *hr = &rapl_pmu->hrtimer;
+ 
+ 	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ 	hr->function = rapl_hrtimer_handle;
+ }
+ 
+-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
+ 				   struct perf_event *event)
+ {
+ 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+@@ -258,39 +258,39 @@ static void __rapl_pmu_event_start(struc
+ 
+ 	event->hw.state = 0;
+ 
+-	list_add_tail(&event->active_entry, &pmu->active_list);
+	list_add_tail(&event->active_entry, &rapl_pmu->active_list);
+ 
+ 	local64_set(&event->hw.prev_count, rapl_read_counter(event));
+ 
+-	pmu->n_active++;
+-	if (pmu->n_active == 1)
+-		rapl_start_hrtimer(pmu);
+	rapl_pmu->n_active++;
+	if (rapl_pmu->n_active == 1)
+		rapl_start_hrtimer(rapl_pmu);
+ }
+ 
+ static void rapl_pmu_event_start(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
+	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
+-	__rapl_pmu_event_start(pmu, event);
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+	__rapl_pmu_event_start(rapl_pmu, event);
+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ }
+ 
+ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
+	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	struct hw_perf_event *hwc = &event->hw;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+ 	/* mark event as deactivated and stopped */
+ 	if (!(hwc->state & PERF_HES_STOPPED)) {
+-		WARN_ON_ONCE(pmu->n_active <= 0);
+-		pmu->n_active--;
+-		if (pmu->n_active == 0)
+-			hrtimer_cancel(&pmu->hrtimer);
+		WARN_ON_ONCE(rapl_pmu->n_active <= 0);
+		rapl_pmu->n_active--;
+		if (rapl_pmu->n_active == 0)
+			hrtimer_cancel(&rapl_pmu->hrtimer);
+ 
+ 		list_del(&event->active_entry);
+ 
+@@ -308,23 +308,23 @@ static void rapl_pmu_event_stop(struct p
+ 		hwc->state |= PERF_HES_UPTODATE;
+ 	}
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ }
+ 
+ static int rapl_pmu_event_add(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
+	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	struct hw_perf_event *hwc = &event->hw;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+ 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ 
+ 	if (mode & PERF_EF_START)
+-		__rapl_pmu_event_start(pmu, event);
+		__rapl_pmu_event_start(rapl_pmu, event);
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ 
+ 	return 0;
+ }
+@@ -338,7 +338,7 @@ static int rapl_pmu_event_init(struct pe
+ {
+ 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+ 	int bit, rapl_pmu_idx, ret = 0;
+-	struct rapl_pmu *pmu;
+	struct rapl_pmu *rapl_pmu;
+ 
+ 	/* only look at RAPL events */
+ 	if (event->attr.type != rapl_pmus->pmu.type)
+@@ -370,10 +370,11 @@ static int rapl_pmu_event_init(struct pe
+ 		return -EINVAL;
+ 
+ 	/* must be done before validate_group */
+-	pmu = rapl_pmus->pmus[rapl_pmu_idx];
+-	if (!pmu)
+	rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+	if (!rapl_pmu)
+ 		return -EINVAL;
+-	event->pmu_private = pmu;
+
+	event->pmu_private = rapl_pmu;
+ 	event->hw.event_base = rapl_msrs[bit].msr;
+ 	event->hw.config = cfg;
+ 	event->hw.idx = bit;
+@@ -600,7 +601,7 @@ static void cleanup_rapl_pmus(void)
+ 	int i;
+ 
+ 	for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
+-		kfree(rapl_pmus->pmus[i]);
+		kfree(rapl_pmus->rapl_pmu[i]);
+ 	kfree(rapl_pmus);
+ }
+ 
+@@ -615,7 +616,7 @@ static const struct attribute_group *rap
+ 
+ static void __init init_rapl_pmu(void)
+ {
+-	struct rapl_pmu *pmu;
+	struct rapl_pmu *rapl_pmu;
+ 	int cpu, rapl_pmu_idx;
+ 
+ 	cpus_read_lock();
+@@ -625,19 +626,19 @@ static void __init init_rapl_pmu(void)
+ 		if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ 			continue;
+ 
+-		pmu = rapl_pmus->pmus[rapl_pmu_idx];
+-		if (pmu)
+		rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+		if (rapl_pmu)
+ 			continue;
+-		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+-		if (!pmu)
+		rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu));
+		if (!rapl_pmu)
+ 			continue;
+-		raw_spin_lock_init(&pmu->lock);
+-		INIT_LIST_HEAD(&pmu->active_list);
+-		pmu->pmu = &rapl_pmus->pmu;
+-		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+-		rapl_hrtimer_init(pmu);
+		raw_spin_lock_init(&rapl_pmu->lock);
+		INIT_LIST_HEAD(&rapl_pmu->active_list);
+		rapl_pmu->pmu = &rapl_pmus->pmu;
+		rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+		rapl_hrtimer_init(rapl_pmu);
+ 
+-		rapl_pmus->pmus[rapl_pmu_idx] = pmu;
+		rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu;
+ 	}
+ 
+ 	cpus_read_unlock();
+@@ -653,7 +654,7 @@ static int __init init_rapl_pmus(void)
+ 		rapl_pmu_scope		= PERF_PMU_SCOPE_PKG;
+ 	}
+ 
+-	rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
+	rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
+ 	if (!rapl_pmus)
+ 		return -ENOMEM;
+ 
--- a/debian/patches/patchset-pf/amd-rapl/0012-perf-x86-rapl-Make-rapl_model-struct-global.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0012-perf-x86-rapl-Make-rapl_model-struct-global.patch
@@ -0,0 +1,75 @@
+From 68614752b9fd6b6bae6f9ab7b02fc28350c5a541 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:47:56 +0000
+Subject: perf/x86/rapl: Make rapl_model struct global
+
+Preparation for per-core energy counter support addition for AMD CPUs.
+
+As there will always be just one rapl_model variable on a system, make it
+global, to make it easier to access it from any function.
+
+No functional change.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -138,6 +138,7 @@ static struct rapl_pmus *rapl_pmus;
+ static unsigned int rapl_cntr_mask;
+ static u64 rapl_timer_ms;
+ static struct perf_msr *rapl_msrs;
+static struct rapl_model *rapl_model;
+ 
+ /*
+  * RAPL Package energy counter scope:
+@@ -536,18 +537,18 @@ static struct perf_msr amd_rapl_msrs[] =
+ 	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
+ };
+ 
+-static int rapl_check_hw_unit(struct rapl_model *rm)
+static int rapl_check_hw_unit(void)
+ {
+ 	u64 msr_rapl_power_unit_bits;
+ 	int i;
+ 
+ 	/* protect rdmsrl() to handle virtualization */
+-	if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
+	if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
+ 		return -1;
+ 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+ 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ 
+-	switch (rm->unit_quirk) {
+	switch (rapl_model->unit_quirk) {
+ 	/*
+ 	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+ 	 * different than the unit from power unit MSR. See
+@@ -798,21 +799,20 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
+ static int __init rapl_pmu_init(void)
+ {
+ 	const struct x86_cpu_id *id;
+-	struct rapl_model *rm;
+ 	int ret;
+ 
+ 	id = x86_match_cpu(rapl_model_match);
+ 	if (!id)
+ 		return -ENODEV;
+ 
+-	rm = (struct rapl_model *) id->driver_data;
+	rapl_model = (struct rapl_model *) id->driver_data;
+ 
+-	rapl_msrs = rm->rapl_msrs;
+	rapl_msrs = rapl_model->rapl_msrs;
+ 
+ 	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+-					false, (void *) &rm->events);
+					false, (void *) &rapl_model->events);
+ 
+-	ret = rapl_check_hw_unit(rm);
+	ret = rapl_check_hw_unit();
+ 	if (ret)
+ 		return ret;
+ 
--- a/debian/patches/patchset-pf/amd-rapl/0013-perf-x86-rapl-Add-arguments-to-the-cleanup-and-init-.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0013-perf-x86-rapl-Add-arguments-to-the-cleanup-and-init-.patch
@@ -0,0 +1,112 @@
+From b10b887510ccb0b6bc7294888982b862703c9c32 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:47:57 +0000
+Subject: perf/x86/rapl: Add arguments to the cleanup and init functions
+
+Prep for per-core RAPL PMU addition.
+
+No functional change.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -597,7 +597,7 @@ static void __init rapl_advertise(void)
+ 	}
+ }
+ 
+-static void cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
+ {
+ 	int i;
+ 
+@@ -615,7 +615,7 @@ static const struct attribute_group *rap
+ 	NULL,
+ };
+ 
+-static void __init init_rapl_pmu(void)
+static void __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
+ {
+ 	struct rapl_pmu *rapl_pmu;
+ 	int cpu, rapl_pmu_idx;
+@@ -645,20 +645,22 @@ static void __init init_rapl_pmu(void)
+ 	cpus_read_unlock();
+ }
+ 
+-static int __init init_rapl_pmus(void)
+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope)
+ {
+-	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+-	int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+	int nr_rapl_pmu;
+	struct rapl_pmus *rapl_pmus;
+ 
+-	if (rapl_pmu_is_pkg_scope()) {
+-		nr_rapl_pmu		= topology_max_packages();
+-		rapl_pmu_scope		= PERF_PMU_SCOPE_PKG;
+-	}
+	if (rapl_pmu_scope == PERF_PMU_SCOPE_PKG)
+		nr_rapl_pmu	= topology_max_packages();
+	else
+		nr_rapl_pmu	= topology_max_packages() * topology_max_dies_per_package();
+ 
+ 	rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
+ 	if (!rapl_pmus)
+ 		return -ENOMEM;
+ 
+	*rapl_pmus_ptr = rapl_pmus;
+
+ 	rapl_pmus->nr_rapl_pmu		= nr_rapl_pmu;
+ 	rapl_pmus->pmu.attr_groups	= rapl_attr_groups;
+ 	rapl_pmus->pmu.attr_update	= rapl_attr_update;
+@@ -673,7 +675,7 @@ static int __init init_rapl_pmus(void)
+ 	rapl_pmus->pmu.module		= THIS_MODULE;
+ 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
+ 
+-	init_rapl_pmu();
+	init_rapl_pmu(rapl_pmus);
+ 
+ 	return 0;
+ }
+@@ -799,8 +801,12 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
+ static int __init rapl_pmu_init(void)
+ {
+ 	const struct x86_cpu_id *id;
+	int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+ 	int ret;
+ 
+	if (rapl_pmu_is_pkg_scope())
+		rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
+
+ 	id = x86_match_cpu(rapl_model_match);
+ 	if (!id)
+ 		return -ENODEV;
+@@ -816,7 +822,7 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = init_rapl_pmus();
+	ret = init_rapl_pmus(&rapl_pmus, rapl_pmu_scope);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -829,7 +835,7 @@ static int __init rapl_pmu_init(void)
+ 
+ out:
+ 	pr_warn("Initialization failed (%d), disabled\n", ret);
+-	cleanup_rapl_pmus();
+	cleanup_rapl_pmus(rapl_pmus);
+ 	return ret;
+ }
+ module_init(rapl_pmu_init);
+@@ -837,6 +843,6 @@ module_init(rapl_pmu_init);
+ static void __exit intel_rapl_exit(void)
+ {
+ 	perf_pmu_unregister(&rapl_pmus->pmu);
+-	cleanup_rapl_pmus();
+	cleanup_rapl_pmus(rapl_pmus);
+ }
+ module_exit(intel_rapl_exit);
--- a/debian/patches/patchset-pf/amd-rapl/0014-perf-x86-rapl-Modify-the-generic-variable-names-to-_.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0014-perf-x86-rapl-Modify-the-generic-variable-names-to-_.patch
@@ -0,0 +1,358 @@
+From b5c83c40540298a39f8314034b705f1236b17a9f Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:47:58 +0000
+Subject: perf/x86/rapl: Modify the generic variable names to *_pkg*
+
+Prep for addition of power_per_core PMU to handle core scope energy
+consumption for AMD CPUs.
+
+Replace the generic names with *_pkg*, to differentiate between the
+scopes of the two different PMUs and their variables.
+
+No functional change.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 118 ++++++++++++++++++++---------------------
+ 1 file changed, 59 insertions(+), 59 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -70,18 +70,18 @@ MODULE_LICENSE("GPL");
+ /*
+  * RAPL energy status counters
+  */
+-enum perf_rapl_events {
+enum perf_rapl_pkg_events {
+ 	PERF_RAPL_PP0 = 0,		/* all cores */
+ 	PERF_RAPL_PKG,			/* entire package */
+ 	PERF_RAPL_RAM,			/* DRAM */
+ 	PERF_RAPL_PP1,			/* gpu */
+ 	PERF_RAPL_PSYS,			/* psys */
+ 
+-	PERF_RAPL_MAX,
+-	NR_RAPL_DOMAINS = PERF_RAPL_MAX,
+	PERF_RAPL_PKG_EVENTS_MAX,
+	NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
+ };
+ 
+-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
+ 	"pp0-core",
+ 	"package",
+ 	"dram",
+@@ -126,16 +126,16 @@ enum rapl_unit_quirk {
+ };
+ 
+ struct rapl_model {
+-	struct perf_msr *rapl_msrs;
+-	unsigned long	events;
+	struct perf_msr *rapl_pkg_msrs;
+	unsigned long	pkg_events;
+ 	unsigned int	msr_power_unit;
+ 	enum rapl_unit_quirk	unit_quirk;
+ };
+ 
+  /* 1/2^hw_unit Joule */
+-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+-static struct rapl_pmus *rapl_pmus;
+-static unsigned int rapl_cntr_mask;
+static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus_pkg;
+static unsigned int rapl_pkg_cntr_mask;
+ static u64 rapl_timer_ms;
+ static struct perf_msr *rapl_msrs;
+ static struct rapl_model *rapl_model;
+@@ -149,7 +149,7 @@ static struct rapl_model *rapl_model;
+  *	     considered as either pkg-scope or die-scope, and we are considering
+  *	     them as die-scope.
+  */
+-#define rapl_pmu_is_pkg_scope()				\
+#define rapl_pkg_pmu_is_pkg_scope()				\
+ 	(boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||	\
+ 	 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+ 
+@@ -159,7 +159,7 @@ static struct rapl_model *rapl_model;
+  */
+ static inline unsigned int get_rapl_pmu_idx(int cpu)
+ {
+-	return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
+	return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
+ 					 topology_logical_die_id(cpu);
+ }
+ 
+@@ -172,7 +172,7 @@ static inline u64 rapl_read_counter(stru
+ 
+ static inline u64 rapl_scale(u64 v, int cfg)
+ {
+-	if (cfg > NR_RAPL_DOMAINS) {
+	if (cfg > NR_RAPL_PKG_DOMAINS) {
+ 		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+ 		return v;
+ 	}
+@@ -182,7 +182,7 @@ static inline u64 rapl_scale(u64 v, int
+ 	 * or use ldexp(count, -32).
+ 	 * Watts = Joules/Time delta
+ 	 */
+-	return v << (32 - rapl_hw_unit[cfg - 1]);
+	return v << (32 - rapl_pkg_hw_unit[cfg - 1]);
+ }
+ 
+ static u64 rapl_event_update(struct perf_event *event)
+@@ -342,7 +342,7 @@ static int rapl_pmu_event_init(struct pe
+ 	struct rapl_pmu *rapl_pmu;
+ 
+ 	/* only look at RAPL events */
+-	if (event->attr.type != rapl_pmus->pmu.type)
+	if (event->attr.type != rapl_pmus_pkg->pmu.type)
+ 		return -ENOENT;
+ 
+ 	/* check only supported bits are set */
+@@ -352,14 +352,14 @@ static int rapl_pmu_event_init(struct pe
+ 	if (event->cpu < 0)
+ 		return -EINVAL;
+ 
+-	if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
+	if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+ 		return -EINVAL;
+ 
+-	cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
+	cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
+ 	bit = cfg - 1;
+ 
+ 	/* check event supported */
+-	if (!(rapl_cntr_mask & (1 << bit)))
+	if (!(rapl_pkg_cntr_mask & (1 << bit)))
+ 		return -EINVAL;
+ 
+ 	/* unsupported modes and filters */
+@@ -367,11 +367,11 @@ static int rapl_pmu_event_init(struct pe
+ 		return -EINVAL;
+ 
+ 	rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
+-	if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+	if (rapl_pmu_idx >= rapl_pmus_pkg->nr_rapl_pmu)
+ 		return -EINVAL;
+ 
+ 	/* must be done before validate_group */
+-	rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+	rapl_pmu = rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx];
+ 	if (!rapl_pmu)
+ 		return -EINVAL;
+ 
+@@ -525,11 +525,11 @@ static struct perf_msr intel_rapl_spr_ms
+ };
+ 
+ /*
+- * Force to PERF_RAPL_MAX size due to:
+- * - perf_msr_probe(PERF_RAPL_MAX)
+ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
+ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
+  * - want to use same event codes across both architectures
+  */
+-static struct perf_msr amd_rapl_msrs[] = {
+static struct perf_msr amd_rapl_pkg_msrs[] = {
+ 	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, NULL, false, 0 },
+ 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr, false, RAPL_MSR_MASK },
+ 	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   NULL, false, 0 },
+@@ -545,8 +545,8 @@ static int rapl_check_hw_unit(void)
+ 	/* protect rdmsrl() to handle virtualization */
+ 	if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
+ 		return -1;
+-	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+-		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
+		rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ 
+ 	switch (rapl_model->unit_quirk) {
+ 	/*
+@@ -556,11 +556,11 @@ static int rapl_check_hw_unit(void)
+ 	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ 	 */
+ 	case RAPL_UNIT_QUIRK_INTEL_HSW:
+-		rapl_hw_unit[PERF_RAPL_RAM] = 16;
+		rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16;
+ 		break;
+ 	/* SPR uses a fixed energy unit for Psys domain. */
+ 	case RAPL_UNIT_QUIRK_INTEL_SPR:
+-		rapl_hw_unit[PERF_RAPL_PSYS] = 0;
+		rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0;
+ 		break;
+ 	default:
+ 		break;
+@@ -575,9 +575,9 @@ static int rapl_check_hw_unit(void)
+ 	 * if hw unit is 32, then we use 2 ms 1/200/2
+ 	 */
+ 	rapl_timer_ms = 2;
+-	if (rapl_hw_unit[0] < 32) {
+	if (rapl_pkg_hw_unit[0] < 32) {
+ 		rapl_timer_ms = (1000 / (2 * 100));
+-		rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+		rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1));
+ 	}
+ 	return 0;
+ }
+@@ -587,12 +587,12 @@ static void __init rapl_advertise(void)
+ 	int i;
+ 
+ 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+-		hweight32(rapl_cntr_mask), rapl_timer_ms);
+		hweight32(rapl_pkg_cntr_mask), rapl_timer_ms);
+ 
+-	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+-		if (rapl_cntr_mask & (1 << i)) {
+	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
+		if (rapl_pkg_cntr_mask & (1 << i)) {
+ 			pr_info("hw unit of domain %s 2^-%d Joules\n",
+-				rapl_domain_names[i], rapl_hw_unit[i]);
+				rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
+ 		}
+ 	}
+ }
+@@ -681,71 +681,71 @@ static int __init init_rapl_pmus(struct
+ }
+ 
+ static struct rapl_model model_snb = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_PP1),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_snbep = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_hsw = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PP1),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_hsx = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_knl = {
+-	.events		= BIT(PERF_RAPL_PKG) |
+	.pkg_events	= BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_skl = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PP1) |
+ 			  BIT(PERF_RAPL_PSYS),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
+	.rapl_pkg_msrs      = intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_spr = {
+-	.events		= BIT(PERF_RAPL_PP0) |
+	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PSYS),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_SPR,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_spr_msrs,
+	.rapl_pkg_msrs	= intel_rapl_spr_msrs,
+ };
+ 
+ static struct rapl_model model_amd_hygon = {
+-	.events		= BIT(PERF_RAPL_PKG),
+	.pkg_events	= BIT(PERF_RAPL_PKG),
+ 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
+-	.rapl_msrs      = amd_rapl_msrs,
+	.rapl_pkg_msrs	= amd_rapl_pkg_msrs,
+ };
+ 
+ static const struct x86_cpu_id rapl_model_match[] __initconst = {
+@@ -801,11 +801,11 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
+ static int __init rapl_pmu_init(void)
+ {
+ 	const struct x86_cpu_id *id;
+-	int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+	int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE;
+ 	int ret;
+ 
+-	if (rapl_pmu_is_pkg_scope())
+-		rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
+	if (rapl_pkg_pmu_is_pkg_scope())
+		rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG;
+ 
+ 	id = x86_match_cpu(rapl_model_match);
+ 	if (!id)
+@@ -813,20 +813,20 @@ static int __init rapl_pmu_init(void)
+ 
+ 	rapl_model = (struct rapl_model *) id->driver_data;
+ 
+-	rapl_msrs = rapl_model->rapl_msrs;
+	rapl_msrs = rapl_model->rapl_pkg_msrs;
+ 
+-	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+-					false, (void *) &rapl_model->events);
+	rapl_pkg_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_PKG_EVENTS_MAX,
+					false, (void *) &rapl_model->pkg_events);
+ 
+ 	ret = rapl_check_hw_unit();
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = init_rapl_pmus(&rapl_pmus, rapl_pmu_scope);
+	ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+	ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
+ 	if (ret)
+ 		goto out;
+ 
+@@ -835,14 +835,14 @@ static int __init rapl_pmu_init(void)
+ 
+ out:
+ 	pr_warn("Initialization failed (%d), disabled\n", ret);
+-	cleanup_rapl_pmus(rapl_pmus);
+	cleanup_rapl_pmus(rapl_pmus_pkg);
+ 	return ret;
+ }
+ module_init(rapl_pmu_init);
+ 
+ static void __exit intel_rapl_exit(void)
+ {
+-	perf_pmu_unregister(&rapl_pmus->pmu);
+-	cleanup_rapl_pmus(rapl_pmus);
+	perf_pmu_unregister(&rapl_pmus_pkg->pmu);
+	cleanup_rapl_pmus(rapl_pmus_pkg);
+ }
+ module_exit(intel_rapl_exit);
--- a/debian/patches/patchset-pf/amd-rapl/0015-perf-x86-rapl-Remove-the-global-variable-rapl_msrs.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0015-perf-x86-rapl-Remove-the-global-variable-rapl_msrs.patch
@@ -0,0 +1,47 @@
+From dbc0343069c8f86fad0d8d9075f70f79114ef10a Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:47:59 +0000
+Subject: perf/x86/rapl: Remove the global variable rapl_msrs
+
+After making the rapl_model struct global, the rapl_msrs global
+variable isn't needed, so remove it.
+
+Also it will be cleaner when new per-core scope PMU is added. As we will
+need to maintain two rapl_msrs array(one for per-core scope and one for
+package scope PMU), inside the rapl_model struct.
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -137,7 +137,6 @@ static int rapl_pkg_hw_unit[NR_RAPL_PKG_
+ static struct rapl_pmus *rapl_pmus_pkg;
+ static unsigned int rapl_pkg_cntr_mask;
+ static u64 rapl_timer_ms;
+-static struct perf_msr *rapl_msrs;
+ static struct rapl_model *rapl_model;
+ 
+ /*
+@@ -376,7 +375,7 @@ static int rapl_pmu_event_init(struct pe
+ 		return -EINVAL;
+ 
+ 	event->pmu_private = rapl_pmu;
+-	event->hw.event_base = rapl_msrs[bit].msr;
+	event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
+ 	event->hw.config = cfg;
+ 	event->hw.idx = bit;
+ 
+@@ -813,9 +812,7 @@ static int __init rapl_pmu_init(void)
+ 
+ 	rapl_model = (struct rapl_model *) id->driver_data;
+ 
+-	rapl_msrs = rapl_model->rapl_pkg_msrs;
+-
+-	rapl_pkg_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_PKG_EVENTS_MAX,
+	rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX,
+ 					false, (void *) &rapl_model->pkg_events);
+ 
+ 	ret = rapl_check_hw_unit();
--- a/debian/patches/patchset-pf/amd-rapl/0016-perf-x86-rapl-Move-the-cntr_mask-to-rapl_pmus-struct.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0016-perf-x86-rapl-Move-the-cntr_mask-to-rapl_pmus-struct.patch
@@ -0,0 +1,79 @@
+From d6a5a28382558b896767a78db795d421015831a7 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:48:00 +0000
+Subject: perf/x86/rapl: Move the cntr_mask to rapl_pmus struct
+
+Preparation for the addition of per-core RAPL energy counter for AMD
+CPUs.
+
+Moving cntr_mask to rapl_pmus struct instead of adding a new global
+cntr_mask for the per-core RAPL energy counter, will ensure that the
+"per_core_cntr_mask" is only created if needed (i.e. in case of AMD
+CPUs).
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -116,6 +116,7 @@ struct rapl_pmu {
+ struct rapl_pmus {
+ 	struct pmu		pmu;
+ 	unsigned int		nr_rapl_pmu;
+	unsigned int		cntr_mask;
+ 	struct rapl_pmu		*rapl_pmu[] __counted_by(nr_rapl_pmu);
+ };
+ 
+@@ -135,7 +136,6 @@ struct rapl_model {
+  /* 1/2^hw_unit Joule */
+ static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
+ static struct rapl_pmus *rapl_pmus_pkg;
+-static unsigned int rapl_pkg_cntr_mask;
+ static u64 rapl_timer_ms;
+ static struct rapl_model *rapl_model;
+ 
+@@ -358,7 +358,7 @@ static int rapl_pmu_event_init(struct pe
+ 	bit = cfg - 1;
+ 
+ 	/* check event supported */
+-	if (!(rapl_pkg_cntr_mask & (1 << bit)))
+	if (!(rapl_pmus_pkg->cntr_mask & (1 << bit)))
+ 		return -EINVAL;
+ 
+ 	/* unsupported modes and filters */
+@@ -586,10 +586,10 @@ static void __init rapl_advertise(void)
+ 	int i;
+ 
+ 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+-		hweight32(rapl_pkg_cntr_mask), rapl_timer_ms);
+		hweight32(rapl_pmus_pkg->cntr_mask), rapl_timer_ms);
+ 
+ 	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
+-		if (rapl_pkg_cntr_mask & (1 << i)) {
+		if (rapl_pmus_pkg->cntr_mask & (1 << i)) {
+ 			pr_info("hw unit of domain %s 2^-%d Joules\n",
+ 				rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
+ 		}
+@@ -812,9 +812,6 @@ static int __init rapl_pmu_init(void)
+ 
+ 	rapl_model = (struct rapl_model *) id->driver_data;
+ 
+-	rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX,
+-					false, (void *) &rapl_model->pkg_events);
+-
+ 	ret = rapl_check_hw_unit();
+ 	if (ret)
+ 		return ret;
+@@ -823,6 +820,10 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		return ret;
+ 
+	rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs,
+						  PERF_RAPL_PKG_EVENTS_MAX, false,
+						  (void *) &rapl_model->pkg_events);
+
+ 	ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
+ 	if (ret)
+ 		goto out;
--- a/debian/patches/patchset-pf/amd-rapl/0017-perf-x86-rapl-Add-per-core-energy-counter-support-fo.patch
+++ b/debian/patches/patchset-pf/amd-rapl/0017-perf-x86-rapl-Add-per-core-energy-counter-support-fo.patch
@@ -0,0 +1,439 @@
+From 3cb480ec2950f4c6351c602552fc4f9a8e524b89 Mon Sep 17 00:00:00 2001
+From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Date: Fri, 13 Sep 2024 15:48:01 +0000
+Subject: perf/x86/rapl: Add per-core energy counter support for AMD CPUs
+
+Add a new "power_per_core" PMU and "energy-per-core" event for
+monitoring energy consumption by each core. The existing energy-cores
+event aggregates the energy consumption at the package level.
+This new event aligns with the AMD's per_core energy counters.
+
+Tested the package level and core level PMU counters with workloads
+pinned to different CPUs.
+
+Results with workload pinned to CPU 1 in core 1 on a AMD Zen4 Genoa
+machine:
+
+$ perf stat -a --per-core -e power_per_core/energy-per-core/ sleep 1
+
+ Performance counter stats for 'system wide':
+
+S0-D0-C0         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C1         1          5.72 Joules power_per_core/energy-per-core/
+S0-D0-C2         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C3         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C4         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C5         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C6         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C7         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C8         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C9         1          0.02 Joules power_per_core/energy-per-core/
+S0-D0-C10        1          0.02 Joules power_per_core/energy-per-core/
+
+Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+---
+ arch/x86/events/rapl.c | 178 +++++++++++++++++++++++++++++++++--------
+ 1 file changed, 143 insertions(+), 35 deletions(-)
+
+--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
+@@ -39,6 +39,10 @@
+  *	  event: rapl_energy_psys
+  *    perf code: 0x5
+  *
+ *  per_core counter: consumption of a single physical core
+ *	  event: rapl_energy_per_core (power_per_core PMU)
+ *    perf code: 0x1
+ *
+  * We manage those counters as free running (read-only). They may be
+  * use simultaneously by other tools, such as turbostat.
+  *
+@@ -81,6 +85,10 @@ enum perf_rapl_pkg_events {
+ 	NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
+ };
+ 
+#define PERF_RAPL_PER_CORE		0		/* per-core */
+#define PERF_RAPL_CORE_EVENTS_MAX	1
+#define NR_RAPL_CORE_DOMAINS		PERF_RAPL_CORE_EVENTS_MAX
+
+ static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
+ 	"pp0-core",
+ 	"package",
+@@ -89,6 +97,8 @@ static const char *const rapl_pkg_domain
+ 	"psys",
+ };
+ 
+static const char *const rapl_core_domain_name __initconst = "per-core";
+
+ /*
+  * event code: LSB 8 bits, passed in attr->config
+  * any other bit is reserved
+@@ -128,14 +138,18 @@ enum rapl_unit_quirk {
+ 
+ struct rapl_model {
+ 	struct perf_msr *rapl_pkg_msrs;
+	struct perf_msr *rapl_core_msrs;
+ 	unsigned long	pkg_events;
+	unsigned long	core_events;
+ 	unsigned int	msr_power_unit;
+ 	enum rapl_unit_quirk	unit_quirk;
+ };
+ 
+  /* 1/2^hw_unit Joule */
+ static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
+static int rapl_core_hw_unit __read_mostly;
+ static struct rapl_pmus *rapl_pmus_pkg;
+static struct rapl_pmus *rapl_pmus_core;
+ static u64 rapl_timer_ms;
+ static struct rapl_model *rapl_model;
+ 
+@@ -156,10 +170,14 @@ static struct rapl_model *rapl_model;
+  * Helper function to get the correct topology id according to the
+  * RAPL PMU scope.
+  */
+-static inline unsigned int get_rapl_pmu_idx(int cpu)
+static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
+ {
+-	return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
+-					 topology_logical_die_id(cpu);
+	if (scope == PERF_PMU_SCOPE_PKG)
+		return topology_logical_package_id(cpu);
+	else if (scope == PERF_PMU_SCOPE_DIE)
+		return topology_logical_die_id(cpu);
+	else
+		return topology_logical_core_id(cpu);
+ }
+ 
+ static inline u64 rapl_read_counter(struct perf_event *event)
+@@ -169,19 +187,20 @@ static inline u64 rapl_read_counter(stru
+ 	return raw;
+ }
+ 
+-static inline u64 rapl_scale(u64 v, int cfg)
+static inline u64 rapl_scale(u64 v, struct perf_event *event)
+ {
+-	if (cfg > NR_RAPL_PKG_DOMAINS) {
+-		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+-		return v;
+-	}
+	int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1];
+
+	if (event->pmu->scope == PERF_PMU_SCOPE_CORE)
+		hw_unit = rapl_core_hw_unit;
+
+ 	/*
+ 	 * scale delta to smallest unit (1/2^32)
+ 	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
+ 	 * or use ldexp(count, -32).
+ 	 * Watts = Joules/Time delta
+ 	 */
+-	return v << (32 - rapl_pkg_hw_unit[cfg - 1]);
+	return v << (32 - hw_unit);
+ }
+ 
+ static u64 rapl_event_update(struct perf_event *event)
+@@ -208,7 +227,7 @@ static u64 rapl_event_update(struct perf
+ 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ 	delta >>= shift;
+ 
+-	sdelta = rapl_scale(delta, event->hw.config);
+	sdelta = rapl_scale(delta, event);
+ 
+ 	local64_add(sdelta, &event->count);
+ 
+@@ -337,12 +356,13 @@ static void rapl_pmu_event_del(struct pe
+ static int rapl_pmu_event_init(struct perf_event *event)
+ {
+ 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+-	int bit, rapl_pmu_idx, ret = 0;
+	int bit, rapl_pmus_scope, rapl_pmu_idx, ret = 0;
+ 	struct rapl_pmu *rapl_pmu;
+	struct rapl_pmus *rapl_pmus;
+ 
+-	/* only look at RAPL events */
+-	if (event->attr.type != rapl_pmus_pkg->pmu.type)
+-		return -ENOENT;
+	/* unsupported modes and filters */
+	if (event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+ 
+ 	/* check only supported bits are set */
+ 	if (event->attr.config & ~RAPL_EVENT_MASK)
+@@ -351,31 +371,49 @@ static int rapl_pmu_event_init(struct pe
+ 	if (event->cpu < 0)
+ 		return -EINVAL;
+ 
+-	if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+	rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
+	if (!rapl_pmus)
+ 		return -EINVAL;
+	rapl_pmus_scope = rapl_pmus->pmu.scope;
+ 
+-	cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
+-	bit = cfg - 1;
+-
+-	/* check event supported */
+-	if (!(rapl_pmus_pkg->cntr_mask & (1 << bit)))
+	if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) {
+		/* only look at RAPL package events */
+		if (event->attr.type != rapl_pmus_pkg->pmu.type)
+			return -ENOENT;
+
+		cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
+		if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+			return -EINVAL;
+
+		bit = cfg - 1;
+		event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
+	} else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) {
+		/* only look at RAPL per-core events */
+		if (event->attr.type != rapl_pmus_core->pmu.type)
+			return -ENOENT;
+
+		cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1);
+		if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+			return -EINVAL;
+
+		bit = cfg - 1;
+		event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr;
+	} else
+ 		return -EINVAL;
+ 
+-	/* unsupported modes and filters */
+-	if (event->attr.sample_period) /* no sampling */
+	/* check event supported */
+	if (!(rapl_pmus->cntr_mask & (1 << bit)))
+ 		return -EINVAL;
+ 
+-	rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
+-	if (rapl_pmu_idx >= rapl_pmus_pkg->nr_rapl_pmu)
+	rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope);
+	if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ 		return -EINVAL;
+-
+ 	/* must be done before validate_group */
+-	rapl_pmu = rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx];
+	rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ 	if (!rapl_pmu)
+ 		return -EINVAL;
+ 
+ 	event->pmu_private = rapl_pmu;
+-	event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
+ 	event->hw.config = cfg;
+ 	event->hw.idx = bit;
+ 
+@@ -392,12 +430,14 @@ RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl
+ RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+ RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+ RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
+RAPL_EVENT_ATTR_STR(energy-per-core,   rapl_per_core, "event=0x01");
+ 
+ RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-per-core.unit,   rapl_per_core_unit, "Joules");
+ 
+ /*
+  * we compute in 0.23 nJ increments regardless of MSR
+@@ -407,6 +447,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale,
+ RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+ RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+ RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-per-core.scale,   rapl_per_core_scale, "2.3283064365386962890625e-10");
+ 
+ /*
+  * There are no default events, but we need to create
+@@ -439,6 +480,12 @@ static const struct attribute_group *rap
+ 	NULL,
+ };
+ 
+static const struct attribute_group *rapl_per_core_attr_groups[] = {
+	&rapl_pmu_format_group,
+	&rapl_pmu_events_group,
+	NULL,
+};
+
+ static struct attribute *rapl_events_cores[] = {
+ 	EVENT_PTR(rapl_cores),
+ 	EVENT_PTR(rapl_cores_unit),
+@@ -499,6 +546,18 @@ static struct attribute_group rapl_event
+ 	.attrs = rapl_events_psys,
+ };
+ 
+static struct attribute *rapl_events_per_core[] = {
+	EVENT_PTR(rapl_per_core),
+	EVENT_PTR(rapl_per_core_unit),
+	EVENT_PTR(rapl_per_core_scale),
+	NULL,
+};
+
+static struct attribute_group rapl_events_per_core_group = {
+	.name  = "events",
+	.attrs = rapl_events_per_core,
+};
+
+ static bool test_msr(int idx, void *data)
+ {
+ 	return test_bit(idx, (unsigned long *) data);
+@@ -536,6 +595,11 @@ static struct perf_msr amd_rapl_pkg_msrs
+ 	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
+ };
+ 
+static struct perf_msr amd_rapl_core_msrs[] = {
+	[PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group,
+				 test_msr, false, RAPL_MSR_MASK },
+};
+
+ static int rapl_check_hw_unit(void)
+ {
+ 	u64 msr_rapl_power_unit_bits;
+@@ -547,6 +611,8 @@ static int rapl_check_hw_unit(void)
+ 	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
+ 		rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ 
+	rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+ 	switch (rapl_model->unit_quirk) {
+ 	/*
+ 	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+@@ -565,7 +631,6 @@ static int rapl_check_hw_unit(void)
+ 		break;
+ 	}
+ 
+-
+ 	/*
+ 	 * Calculate the timer rate:
+ 	 * Use reference of 200W for scaling the timeout to avoid counter
+@@ -584,9 +649,13 @@ static int rapl_check_hw_unit(void)
+ static void __init rapl_advertise(void)
+ {
+ 	int i;
+	int num_counters = hweight32(rapl_pmus_pkg->cntr_mask);
+
+	if (rapl_pmus_core)
+		num_counters += hweight32(rapl_pmus_core->cntr_mask);
+ 
+ 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+-		hweight32(rapl_pmus_pkg->cntr_mask), rapl_timer_ms);
+		num_counters, rapl_timer_ms);
+ 
+ 	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
+ 		if (rapl_pmus_pkg->cntr_mask & (1 << i)) {
+@@ -594,6 +663,10 @@ static void __init rapl_advertise(void)
+ 				rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
+ 		}
+ 	}
+
+	if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_PER_CORE)))
+		pr_info("hw unit of domain %s 2^-%d Joules\n",
+			rapl_core_domain_name, rapl_core_hw_unit);
+ }
+ 
+ static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
+@@ -614,6 +687,10 @@ static const struct attribute_group *rap
+ 	NULL,
+ };
+ 
+static const struct attribute_group *rapl_per_core_attr_update[] = {
+	&rapl_events_per_core_group,
+};
+
+ static void __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
+ {
+ 	struct rapl_pmu *rapl_pmu;
+@@ -622,10 +699,9 @@ static void __init init_rapl_pmu(struct
+ 	cpus_read_lock();
+ 
+ 	for_each_cpu(cpu, cpu_online_mask) {
+-		rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+		rapl_pmu_idx = get_rapl_pmu_idx(cpu, rapl_pmus->pmu.scope);
+ 		if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ 			continue;
+-
+ 		rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ 		if (rapl_pmu)
+ 			continue;
+@@ -644,15 +720,19 @@ static void __init init_rapl_pmu(struct
+ 	cpus_read_unlock();
+ }
+ 
+-static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope)
+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope,
+				 const struct attribute_group **rapl_attr_groups,
+				 const struct attribute_group **rapl_attr_update)
+ {
+ 	int nr_rapl_pmu;
+ 	struct rapl_pmus *rapl_pmus;
+ 
+ 	if (rapl_pmu_scope == PERF_PMU_SCOPE_PKG)
+ 		nr_rapl_pmu	= topology_max_packages();
+-	else
+	else if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
+ 		nr_rapl_pmu	= topology_max_packages() * topology_max_dies_per_package();
+	else
+		nr_rapl_pmu	= topology_max_packages() * topology_num_cores_per_package();
+ 
+ 	rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
+ 	if (!rapl_pmus)
+@@ -743,8 +823,10 @@ static struct rapl_model model_spr = {
+ 
+ static struct rapl_model model_amd_hygon = {
+ 	.pkg_events	= BIT(PERF_RAPL_PKG),
+	.core_events	= BIT(PERF_RAPL_PER_CORE),
+ 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
+ 	.rapl_pkg_msrs	= amd_rapl_pkg_msrs,
+	.rapl_core_msrs	= amd_rapl_core_msrs,
+ };
+ 
+ static const struct x86_cpu_id rapl_model_match[] __initconst = {
+@@ -816,7 +898,8 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope);
+	ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups,
+			     rapl_attr_update);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -828,6 +911,27 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		goto out;
+ 
+	if (rapl_model->core_events) {
+		ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE,
+				     rapl_per_core_attr_groups,
+				     rapl_per_core_attr_update);
+		if (ret) {
+			pr_warn("Per-core PMU initialization failed (%d)\n", ret);
+			goto per_core_init_failed;
+		}
+
+		rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
+						     PERF_RAPL_CORE_EVENTS_MAX, false,
+						     (void *) &rapl_model->core_events);
+
+		ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1);
+		if (ret) {
+			pr_warn("Per-core PMU registration failed (%d)\n", ret);
+			cleanup_rapl_pmus(rapl_pmus_core);
+		}
+	}
+
+per_core_init_failed:
+ 	rapl_advertise();
+ 	return 0;
+ 
+@@ -840,6 +944,10 @@ module_init(rapl_pmu_init);
+ 
+ static void __exit intel_rapl_exit(void)
+ {
+	if (rapl_pmus_core) {
+		perf_pmu_unregister(&rapl_pmus_core->pmu);
+		cleanup_rapl_pmus(rapl_pmus_core);
+	}
+ 	perf_pmu_unregister(&rapl_pmus_pkg->pmu);
+ 	cleanup_rapl_pmus(rapl_pmus_pkg);
+ }
--- a/debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
+++ b/debian/patches/patchset-pf/cpuidle/0001-cpuidle-menu-Remove-iowait-influence.patch
@@ -0,0 +1,180 @@
+From d31e903a364802c068ff23bdd448cc70eda71a7c Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:38 +0100
+Subject: cpuidle: menu: Remove iowait influence
+
+Remove CPU iowaiters influence on idle state selection.
+Remove the menu notion of performance multiplier which increased with
+the number of tasks that went to iowait sleep on this CPU and haven't
+woken up yet.
+
+Relying on iowait for cpuidle is problematic for a few reasons:
+1. There is no guarantee that an iowaiting task will wake up on the
+same CPU.
+2. The task being in iowait says nothing about the idle duration, we
+could be selecting shallower states for a long time.
+3. The task being in iowait doesn't always imply a performance hit
+with increased latency.
+4. If there is such a performance hit, the number of iowaiting tasks
+doesn't directly correlate.
+5. The definition of iowait altogether is vague at best, it is
+sprinkled across kernel code.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpuidle/governors/menu.c | 76 ++++----------------------------
+ 1 file changed, 9 insertions(+), 67 deletions(-)
+
+--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
+@@ -19,7 +19,7 @@
+ 
+ #include "gov.h"
+ 
+-#define BUCKETS 12
+#define BUCKETS 6
+ #define INTERVAL_SHIFT 3
+ #define INTERVALS (1UL << INTERVAL_SHIFT)
+ #define RESOLUTION 1024
+@@ -29,12 +29,11 @@
+ /*
+  * Concepts and ideas behind the menu governor
+  *
+- * For the menu governor, there are 3 decision factors for picking a C
+ * For the menu governor, there are 2 decision factors for picking a C
+  * state:
+  * 1) Energy break even point
+- * 2) Performance impact
+- * 3) Latency tolerance (from pmqos infrastructure)
+- * These three factors are treated independently.
+ * 2) Latency tolerance (from pmqos infrastructure)
+ * These two factors are treated independently.
+  *
+  * Energy break even point
+  * -----------------------
+@@ -75,30 +74,6 @@
+  * intervals and if the stand deviation of these 8 intervals is below a
+  * threshold value, we use the average of these intervals as prediction.
+  *
+- * Limiting Performance Impact
+- * ---------------------------
+- * C states, especially those with large exit latencies, can have a real
+- * noticeable impact on workloads, which is not acceptable for most sysadmins,
+- * and in addition, less performance has a power price of its own.
+- *
+- * As a general rule of thumb, menu assumes that the following heuristic
+- * holds:
+- *     The busier the system, the less impact of C states is acceptable
+- *
+- * This rule-of-thumb is implemented using a performance-multiplier:
+- * If the exit latency times the performance multiplier is longer than
+- * the predicted duration, the C state is not considered a candidate
+- * for selection due to a too high performance impact. So the higher
+- * this multiplier is, the longer we need to be idle to pick a deep C
+- * state, and thus the less likely a busy CPU will hit such a deep
+- * C state.
+- *
+- * Currently there is only one value determining the factor:
+- * 10 points are added for each process that is waiting for IO on this CPU.
+- * (This value was experimentally determined.)
+- * Utilization is no longer a factor as it was shown that it never contributed
+- * significantly to the performance multiplier in the first place.
+- *
+  */
+ 
+ struct menu_device {
+@@ -112,19 +87,10 @@ struct menu_device {
+ 	int		interval_ptr;
+ };
+ 
+-static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
+static inline int which_bucket(u64 duration_ns)
+ {
+ 	int bucket = 0;
+ 
+-	/*
+-	 * We keep two groups of stats; one with no
+-	 * IO pending, one without.
+-	 * This allows us to calculate
+-	 * E(duration)|iowait
+-	 */
+-	if (nr_iowaiters)
+-		bucket = BUCKETS/2;
+-
+ 	if (duration_ns < 10ULL * NSEC_PER_USEC)
+ 		return bucket;
+ 	if (duration_ns < 100ULL * NSEC_PER_USEC)
+@@ -138,19 +104,6 @@ static inline int which_bucket(u64 durat
+ 	return bucket + 5;
+ }
+ 
+-/*
+- * Return a multiplier for the exit latency that is intended
+- * to take performance requirements into account.
+- * The more performance critical we estimate the system
+- * to be, the higher this multiplier, and thus the higher
+- * the barrier to go to an expensive C state.
+- */
+-static inline int performance_multiplier(unsigned int nr_iowaiters)
+-{
+-	/* for IO wait tasks (per cpu!) we add 10x each */
+-	return 1 + 10 * nr_iowaiters;
+-}
+-
+ static DEFINE_PER_CPU(struct menu_device, menu_devices);
+ 
+ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
+@@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_dr
+ 	struct menu_device *data = this_cpu_ptr(&menu_devices);
+ 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+ 	u64 predicted_ns;
+-	u64 interactivity_req;
+-	unsigned int nr_iowaiters;
+ 	ktime_t delta, delta_tick;
+ 	int i, idx;
+ 
+@@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_dr
+ 		data->needs_update = 0;
+ 	}
+ 
+-	nr_iowaiters = nr_iowait_cpu(dev->cpu);
+-
+ 	/* Find the shortest expected idle interval. */
+ 	predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+ 	if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+@@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_dr
+ 		}
+ 
+ 		data->next_timer_ns = delta;
+-		data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+		data->bucket = which_bucket(data->next_timer_ns);
+ 
+ 		/* Round up the result for half microseconds. */
+ 		timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
+@@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_dr
+ 		 */
+ 		data->next_timer_ns = KTIME_MAX;
+ 		delta_tick = TICK_NSEC / 2;
+-		data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+		data->bucket = which_bucket(KTIME_MAX);
+ 	}
+ 
+ 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
+@@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_dr
+ 		 */
+ 		if (predicted_ns < TICK_NSEC)
+ 			predicted_ns = data->next_timer_ns;
+-	} else {
+-		/*
+-		 * Use the performance multiplier and the user-configurable
+-		 * latency_req to determine the maximum exit latency.
+-		 */
+-		interactivity_req = div64_u64(predicted_ns,
+-					      performance_multiplier(nr_iowaiters));
+-		if (latency_req > interactivity_req)
+-			latency_req = interactivity_req;
+	} else if (latency_req > predicted_ns) {
+		latency_req = predicted_ns;
+ 	}
+ 
+ 	/*
--- a/debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
+++ b/debian/patches/patchset-pf/cpuidle/0002-cpuidle-Prefer-teo-over-menu-governor.patch
@@ -0,0 +1,58 @@
+From 3f840a42780323a4437dd1a417488d141c33af15 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:39 +0100
+Subject: cpuidle: Prefer teo over menu governor
+
+Since menu no longer has the interactivity boost teo works better
+overall, so make it the default.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpuidle/Kconfig          | 5 +----
+ drivers/cpuidle/governors/menu.c | 2 +-
+ drivers/cpuidle/governors/teo.c  | 2 +-
+ 3 files changed, 3 insertions(+), 6 deletions(-)
+
+--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
+@@ -5,7 +5,7 @@ config CPU_IDLE
+ 	bool "CPU idle PM support"
+ 	default y if ACPI || PPC_PSERIES
+ 	select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+-	select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_TEO
+	select CPU_IDLE_GOV_TEO if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_MENU
+ 	help
+ 	  CPU idle is a generic framework for supporting software-controlled
+ 	  idle processor power management.  It includes modular cross-platform
+@@ -30,9 +30,6 @@ config CPU_IDLE_GOV_TEO
+ 	  This governor implements a simplified idle state selection method
+ 	  focused on timer events and does not do any interactivity boosting.
+ 
+-	  Some workloads benefit from using it and it generally should be safe
+-	  to use.  Say Y here if you are not happy with the alternatives.
+-
+ config CPU_IDLE_GOV_HALTPOLL
+ 	bool "Haltpoll governor (for virtualized systems)"
+ 	depends on KVM_GUEST
+--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
+@@ -508,7 +508,7 @@ static int menu_enable_device(struct cpu
+ 
+ static struct cpuidle_governor menu_governor = {
+ 	.name =		"menu",
+-	.rating =	20,
+	.rating =	19,
+ 	.enable =	menu_enable_device,
+ 	.select =	menu_select,
+ 	.reflect =	menu_reflect,
+--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
+@@ -537,7 +537,7 @@ static int teo_enable_device(struct cpui
+ 
+ static struct cpuidle_governor teo_governor = {
+ 	.name =		"teo",
+-	.rating =	19,
+	.rating =	20,
+ 	.enable =	teo_enable_device,
+ 	.select =	teo_select,
+ 	.reflect =	teo_reflect,
--- a/debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
+++ b/debian/patches/patchset-pf/cpuidle/0003-TEST-cpufreq-schedutil-Linear-iowait-boost-step.patch
@@ -0,0 +1,39 @@
+From ca8c9368b6f28ef625716b03aa930acfb8afe158 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:40 +0100
+Subject: TEST: cpufreq/schedutil: Linear iowait boost step
+
+In preparation for capping iowait boost make the steps linear as
+opposed to doubling.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -267,7 +267,8 @@ static void sugov_iowait_boost(struct su
+ 	/* Double the boost at each request */
+ 	if (sg_cpu->iowait_boost) {
+ 		sg_cpu->iowait_boost =
+-			min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
+			min_t(unsigned int,
+			      sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
+ 		return;
+ 	}
+ 
+@@ -308,11 +309,9 @@ static unsigned long sugov_iowait_apply(
+ 		/*
+ 		 * No boost pending; reduce the boost value.
+ 		 */
+-		sg_cpu->iowait_boost >>= 1;
+-		if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
+-			sg_cpu->iowait_boost = 0;
+		sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
+		if (!sg_cpu->iowait_boost)
+ 			return 0;
+-		}
+ 	}
+ 
+ 	sg_cpu->iowait_boost_pending = false;
--- a/debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
+++ b/debian/patches/patchset-pf/cpuidle/0004-TEST-cpufreq-schedutil-iowait-boost-cap-sysfs.patch
@@ -0,0 +1,106 @@
+From 33f05bd16a4ac2f6f36c9eb88016e2375dcb597c Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:41 +0100
+Subject: TEST: cpufreq/schedutil: iowait boost cap sysfs
+
+Add a knob to cap applied iowait_boost per sysfs.
+This is to test for potential regressions.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 38 ++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -11,6 +11,7 @@
+ struct sugov_tunables {
+ 	struct gov_attr_set	attr_set;
+ 	unsigned int		rate_limit_us;
+	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_policy {
+@@ -35,6 +36,8 @@ struct sugov_policy {
+ 
+ 	bool			limits_changed;
+ 	bool			need_freq_update;
+
+	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_cpu {
+@@ -316,6 +319,9 @@ static unsigned long sugov_iowait_apply(
+ 
+ 	sg_cpu->iowait_boost_pending = false;
+ 
+	if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
+		sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
+
+ 	/*
+ 	 * sg_cpu->util is already in capacity scale; convert iowait_boost
+ 	 * into the same scale so we can compare.
+@@ -554,6 +560,14 @@ static ssize_t rate_limit_us_show(struct
+ 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+ }
+ 
+
+static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
+}
+
+ static ssize_t
+ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+ {
+@@ -572,10 +586,30 @@ rate_limit_us_store(struct gov_attr_set
+ 	return count;
+ }
+ 
+static ssize_t
+iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int iowait_boost_cap;
+
+	if (kstrtouint(buf, 10, &iowait_boost_cap))
+		return -EINVAL;
+
+	tunables->iowait_boost_cap = iowait_boost_cap;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+		sg_policy->iowait_boost_cap = iowait_boost_cap;
+
+	return count;
+}
+
+ static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
+ 
+ static struct attribute *sugov_attrs[] = {
+ 	&rate_limit_us.attr,
+	&iowait_boost_cap.attr,
+ 	NULL
+ };
+ ATTRIBUTE_GROUPS(sugov);
+@@ -765,6 +799,8 @@ static int sugov_init(struct cpufreq_pol
+ 
+ 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+ 
+	tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
+
+ 	policy->governor_data = sg_policy;
+ 	sg_policy->tunables = tunables;
+ 
+@@ -834,6 +870,8 @@ static int sugov_start(struct cpufreq_po
+ 	sg_policy->limits_changed		= false;
+ 	sg_policy->cached_raw_freq		= 0;
+ 
+	sg_policy->iowait_boost_cap		= SCHED_CAPACITY_SCALE;
+
+ 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ 
+ 	if (policy_is_shared(policy))
--- a/debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
+++ b/debian/patches/patchset-pf/cpuidle/0005-cpufreq-schedutil-Remove-iowait-boost.patch
@@ -0,0 +1,325 @@
+From 33eb6c08d7c615fad308001921c7b1148cbccfde Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:42 +0100
+Subject: cpufreq/schedutil: Remove iowait boost
+
+iowait boost in schedutil was introduced by
+commit ("21ca6d2c52f8 cpufreq: schedutil: Add iowait boosting").
+with it more or less following intel_pstate's approach to increase
+frequency after an iowait wakeup.
+Behaviour that is piggy-backed onto iowait boost is problematic
+due to a lot of reasons, so remove it.
+
+For schedutil specifically these are some of the reasons:
+1. Boosting is applied even in scenarios where it doesn't improve
+throughput.
+2. The boost is not accounted for in EAS: a) feec() will only consider
+ the actual task utilization for task placement, but another CPU might
+ be more energy-efficient at that capacity than the boosted one.)
+ b) When placing a non-IO task while a CPU is boosted compute_energy()
+ assumes a lower OPP than what is actually applied. This leads to
+ wrong EAS decisions.
+3. Actual IO heavy workloads are hardly distinguished from infrequent
+in_iowait wakeups.
+4. The boost isn't accounted for in task placement.
+5. The boost isn't associated with a task, it therefore lingers on the
+rq even after the responsible task has migrated / stopped.
+6. The boost isn't associated with a task, it therefore needs to ramp
+up again when migrated.
+7. Since schedutil doesn't know which task is getting woken up,
+multiple unrelated in_iowait tasks lead to boosting.
+8. Boosting is hard to control with UCLAMP_MAX (which is only active
+when the task is on the rq, which for boosted tasks is usually not
+the case for most of the time).
+
+One benefit of schedutil specifically is the reliance on the
+scheduler's utilization signals, which have evolved a lot since it's
+original introduction. Some cases that benefitted from iowait boosting
+in the past can now be covered by e.g. util_est.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ kernel/sched/cpufreq_schedutil.c | 181 +------------------------------
+ 1 file changed, 3 insertions(+), 178 deletions(-)
+
+--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
+@@ -6,12 +6,9 @@
+  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+  */
+ 
+-#define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8)
+-
+ struct sugov_tunables {
+ 	struct gov_attr_set	attr_set;
+ 	unsigned int		rate_limit_us;
+-	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_policy {
+@@ -36,8 +33,6 @@ struct sugov_policy {
+ 
+ 	bool			limits_changed;
+ 	bool			need_freq_update;
+-
+-	unsigned int		iowait_boost_cap;
+ };
+ 
+ struct sugov_cpu {
+@@ -45,10 +40,6 @@ struct sugov_cpu {
+ 	struct sugov_policy	*sg_policy;
+ 	unsigned int		cpu;
+ 
+-	bool			iowait_boost_pending;
+-	unsigned int		iowait_boost;
+-	u64			last_update;
+-
+ 	unsigned long		util;
+ 	unsigned long		bw_min;
+ 
+@@ -198,137 +189,15 @@ unsigned long sugov_effective_cpu_perf(i
+ 	return max(min, max);
+ }
+ 
+-static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
+static void sugov_get_util(struct sugov_cpu *sg_cpu)
+ {
+ 	unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
+ 
+ 	util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
+-	util = max(util, boost);
+ 	sg_cpu->bw_min = min;
+ 	sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
+ }
+ 
+-/**
+- * sugov_iowait_reset() - Reset the IO boost status of a CPU.
+- * @sg_cpu: the sugov data for the CPU to boost
+- * @time: the update time from the caller
+- * @set_iowait_boost: true if an IO boost has been requested
+- *
+- * The IO wait boost of a task is disabled after a tick since the last update
+- * of a CPU. If a new IO wait boost is requested after more then a tick, then
+- * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
+- * efficiency by ignoring sporadic wakeups from IO.
+- */
+-static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
+-			       bool set_iowait_boost)
+-{
+-	s64 delta_ns = time - sg_cpu->last_update;
+-
+-	/* Reset boost only if a tick has elapsed since last request */
+-	if (delta_ns <= TICK_NSEC)
+-		return false;
+-
+-	sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
+-	sg_cpu->iowait_boost_pending = set_iowait_boost;
+-
+-	return true;
+-}
+-
+-/**
+- * sugov_iowait_boost() - Updates the IO boost status of a CPU.
+- * @sg_cpu: the sugov data for the CPU to boost
+- * @time: the update time from the caller
+- * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
+- *
+- * Each time a task wakes up after an IO operation, the CPU utilization can be
+- * boosted to a certain utilization which doubles at each "frequent and
+- * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
+- * of the maximum OPP.
+- *
+- * To keep doubling, an IO boost has to be requested at least once per tick,
+- * otherwise we restart from the utilization of the minimum OPP.
+- */
+-static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+-			       unsigned int flags)
+-{
+-	bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
+-
+-	/* Reset boost if the CPU appears to have been idle enough */
+-	if (sg_cpu->iowait_boost &&
+-	    sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
+-		return;
+-
+-	/* Boost only tasks waking up after IO */
+-	if (!set_iowait_boost)
+-		return;
+-
+-	/* Ensure boost doubles only one time at each request */
+-	if (sg_cpu->iowait_boost_pending)
+-		return;
+-	sg_cpu->iowait_boost_pending = true;
+-
+-	/* Double the boost at each request */
+-	if (sg_cpu->iowait_boost) {
+-		sg_cpu->iowait_boost =
+-			min_t(unsigned int,
+-			      sg_cpu->iowait_boost + IOWAIT_BOOST_MIN, SCHED_CAPACITY_SCALE);
+-		return;
+-	}
+-
+-	/* First wakeup after IO: start with minimum boost */
+-	sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
+-}
+-
+-/**
+- * sugov_iowait_apply() - Apply the IO boost to a CPU.
+- * @sg_cpu: the sugov data for the cpu to boost
+- * @time: the update time from the caller
+- * @max_cap: the max CPU capacity
+- *
+- * A CPU running a task which woken up after an IO operation can have its
+- * utilization boosted to speed up the completion of those IO operations.
+- * The IO boost value is increased each time a task wakes up from IO, in
+- * sugov_iowait_apply(), and it's instead decreased by this function,
+- * each time an increase has not been requested (!iowait_boost_pending).
+- *
+- * A CPU which also appears to have been idle for at least one tick has also
+- * its IO boost utilization reset.
+- *
+- * This mechanism is designed to boost high frequently IO waiting tasks, while
+- * being more conservative on tasks which does sporadic IO operations.
+- */
+-static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+-			       unsigned long max_cap)
+-{
+-	/* No boost currently required */
+-	if (!sg_cpu->iowait_boost)
+-		return 0;
+-
+-	/* Reset boost if the CPU appears to have been idle enough */
+-	if (sugov_iowait_reset(sg_cpu, time, false))
+-		return 0;
+-
+-	if (!sg_cpu->iowait_boost_pending) {
+-		/*
+-		 * No boost pending; reduce the boost value.
+-		 */
+-		sg_cpu->iowait_boost -= IOWAIT_BOOST_MIN;
+-		if (!sg_cpu->iowait_boost)
+-			return 0;
+-	}
+-
+-	sg_cpu->iowait_boost_pending = false;
+-
+-	if (sg_cpu->iowait_boost > sg_cpu->sg_policy->iowait_boost_cap)
+-		sg_cpu->iowait_boost = sg_cpu->sg_policy->iowait_boost_cap;
+-
+-	/*
+-	 * sg_cpu->util is already in capacity scale; convert iowait_boost
+-	 * into the same scale so we can compare.
+-	 */
+-	return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT;
+-}
+-
+ #ifdef CONFIG_NO_HZ_COMMON
+ static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+ {
+@@ -356,18 +225,12 @@ static inline bool sugov_update_single_c
+ 					      u64 time, unsigned long max_cap,
+ 					      unsigned int flags)
+ {
+-	unsigned long boost;
+-
+-	sugov_iowait_boost(sg_cpu, time, flags);
+-	sg_cpu->last_update = time;
+-
+ 	ignore_dl_rate_limit(sg_cpu);
+ 
+ 	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
+ 		return false;
+ 
+-	boost = sugov_iowait_apply(sg_cpu, time, max_cap);
+-	sugov_get_util(sg_cpu, boost);
+	sugov_get_util(sg_cpu);
+ 
+ 	return true;
+ }
+@@ -468,11 +331,8 @@ static unsigned int sugov_next_freq_shar
+ 
+ 	for_each_cpu(j, policy->cpus) {
+ 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
+-		unsigned long boost;
+-
+-		boost = sugov_iowait_apply(j_sg_cpu, time, max_cap);
+-		sugov_get_util(j_sg_cpu, boost);
+ 
+		sugov_get_util(j_sg_cpu);
+ 		util = max(j_sg_cpu->util, util);
+ 	}
+ 
+@@ -488,9 +348,6 @@ sugov_update_shared(struct update_util_d
+ 
+ 	raw_spin_lock(&sg_policy->update_lock);
+ 
+-	sugov_iowait_boost(sg_cpu, time, flags);
+-	sg_cpu->last_update = time;
+-
+ 	ignore_dl_rate_limit(sg_cpu);
+ 
+ 	if (sugov_should_update_freq(sg_policy, time)) {
+@@ -560,14 +417,6 @@ static ssize_t rate_limit_us_show(struct
+ 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+ }
+ 
+-
+-static ssize_t iowait_boost_cap_show(struct gov_attr_set *attr_set, char *buf)
+-{
+-	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+-
+-	return sprintf(buf, "%u\n", tunables->iowait_boost_cap);
+-}
+-
+ static ssize_t
+ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+ {
+@@ -586,30 +435,10 @@ rate_limit_us_store(struct gov_attr_set
+ 	return count;
+ }
+ 
+-static ssize_t
+-iowait_boost_cap_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+-{
+-	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+-	struct sugov_policy *sg_policy;
+-	unsigned int iowait_boost_cap;
+-
+-	if (kstrtouint(buf, 10, &iowait_boost_cap))
+-		return -EINVAL;
+-
+-	tunables->iowait_boost_cap = iowait_boost_cap;
+-
+-	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+-		sg_policy->iowait_boost_cap = iowait_boost_cap;
+-
+-	return count;
+-}
+-
+ static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+-static struct governor_attr iowait_boost_cap = __ATTR_RW(iowait_boost_cap);
+ 
+ static struct attribute *sugov_attrs[] = {
+ 	&rate_limit_us.attr,
+-	&iowait_boost_cap.attr,
+ 	NULL
+ };
+ ATTRIBUTE_GROUPS(sugov);
+@@ -799,8 +628,6 @@ static int sugov_init(struct cpufreq_pol
+ 
+ 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+ 
+-	tunables->iowait_boost_cap = SCHED_CAPACITY_SCALE;
+-
+ 	policy->governor_data = sg_policy;
+ 	sg_policy->tunables = tunables;
+ 
+@@ -870,8 +697,6 @@ static int sugov_start(struct cpufreq_po
+ 	sg_policy->limits_changed		= false;
+ 	sg_policy->cached_raw_freq		= 0;
+ 
+-	sg_policy->iowait_boost_cap		= SCHED_CAPACITY_SCALE;
+-
+ 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ 
+ 	if (policy_is_shared(policy))
--- a/debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
+++ b/debian/patches/patchset-pf/cpuidle/0006-cpufreq-intel_pstate-Remove-iowait-boost.patch
@@ -0,0 +1,113 @@
+From af7bbb59c2411e985a5d79173af5686337b4af9b Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:43 +0100
+Subject: cpufreq: intel_pstate: Remove iowait boost
+
+Analogous to schedutil, remove iowait boost for the same reasons.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ drivers/cpufreq/intel_pstate.c | 50 ++--------------------------------
+ 1 file changed, 3 insertions(+), 47 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
+@@ -191,7 +191,6 @@ struct global_params {
+  * @policy:		CPUFreq policy value
+  * @update_util:	CPUFreq utility callback information
+  * @update_util_set:	CPUFreq utility callback is set
+- * @iowait_boost:	iowait-related boost fraction
+  * @last_update:	Time of the last update.
+  * @pstate:		Stores P state limits for this CPU
+  * @vid:		Stores VID limits for this CPU
+@@ -245,7 +244,6 @@ struct cpudata {
+ 	struct acpi_processor_performance acpi_perf_data;
+ 	bool valid_pss_table;
+ #endif
+-	unsigned int iowait_boost;
+ 	s16 epp_powersave;
+ 	s16 epp_policy;
+ 	s16 epp_default;
+@@ -2136,28 +2134,7 @@ static inline void intel_pstate_update_u
+ {
+ 	cpu->sample.time = time;
+ 
+-	if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
+-		bool do_io = false;
+-
+-		cpu->sched_flags = 0;
+-		/*
+-		 * Set iowait_boost flag and update time. Since IO WAIT flag
+-		 * is set all the time, we can't just conclude that there is
+-		 * some IO bound activity is scheduled on this CPU with just
+-		 * one occurrence. If we receive at least two in two
+-		 * consecutive ticks, then we treat as boost candidate.
+-		 */
+-		if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
+-			do_io = true;
+-
+-		cpu->last_io_update = time;
+-
+-		if (do_io)
+-			intel_pstate_hwp_boost_up(cpu);
+-
+-	} else {
+-		intel_pstate_hwp_boost_down(cpu);
+-	}
+	intel_pstate_hwp_boost_down(cpu);
+ }
+ 
+ static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
+@@ -2240,9 +2217,6 @@ static inline int32_t get_target_pstate(
+ 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
+ 			   sample->tsc);
+ 
+-	if (busy_frac < cpu->iowait_boost)
+-		busy_frac = cpu->iowait_boost;
+-
+ 	sample->busy_scaled = busy_frac * 100;
+ 
+ 	target = READ_ONCE(global.no_turbo) ?
+@@ -2303,7 +2277,7 @@ static void intel_pstate_adjust_pstate(s
+ 		sample->aperf,
+ 		sample->tsc,
+ 		get_avg_frequency(cpu),
+-		fp_toint(cpu->iowait_boost * 100));
+		0);
+ }
+ 
+ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+@@ -2317,24 +2291,6 @@ static void intel_pstate_update_util(str
+ 		return;
+ 
+ 	delta_ns = time - cpu->last_update;
+-	if (flags & SCHED_CPUFREQ_IOWAIT) {
+-		/* Start over if the CPU may have been idle. */
+-		if (delta_ns > TICK_NSEC) {
+-			cpu->iowait_boost = ONE_EIGHTH_FP;
+-		} else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
+-			cpu->iowait_boost <<= 1;
+-			if (cpu->iowait_boost > int_tofp(1))
+-				cpu->iowait_boost = int_tofp(1);
+-		} else {
+-			cpu->iowait_boost = ONE_EIGHTH_FP;
+-		}
+-	} else if (cpu->iowait_boost) {
+-		/* Clear iowait_boost if the CPU may have been idle. */
+-		if (delta_ns > TICK_NSEC)
+-			cpu->iowait_boost = 0;
+-		else
+-			cpu->iowait_boost >>= 1;
+-	}
+ 	cpu->last_update = time;
+ 	delta_ns = time - cpu->sample.time;
+ 	if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
+@@ -2832,7 +2788,7 @@ static void intel_cpufreq_trace(struct c
+ 		sample->aperf,
+ 		sample->tsc,
+ 		get_avg_frequency(cpu),
+-		fp_toint(cpu->iowait_boost * 100));
+		0);
+ }
+ 
+ static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
--- a/debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
+++ b/debian/patches/patchset-pf/cpuidle/0007-cpufreq-Remove-SCHED_CPUFREQ_IOWAIT-update.patch
@@ -0,0 +1,42 @@
+From fd1e0723b0a7ad140d2bf7cd9154997d5ece2b37 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:44 +0100
+Subject: cpufreq: Remove SCHED_CPUFREQ_IOWAIT update
+
+Neither intel_pstate nor schedutil care for the flag anymore, so
+remove the update and flag definition.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ include/linux/sched/cpufreq.h | 2 --
+ kernel/sched/fair.c           | 8 --------
+ 2 files changed, 10 deletions(-)
+
+--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
+@@ -8,8 +8,6 @@
+  * Interface between cpufreq drivers and the scheduler:
+  */
+ 
+-#define SCHED_CPUFREQ_IOWAIT	(1U << 0)
+-
+ #ifdef CONFIG_CPU_FREQ
+ struct cpufreq_policy;
+ 
+--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
+@@ -6768,14 +6768,6 @@ enqueue_task_fair(struct rq *rq, struct
+ 	 */
+ 	util_est_enqueue(&rq->cfs, p);
+ 
+-	/*
+-	 * If in_iowait is set, the code below may not trigger any cpufreq
+-	 * utilization updates, so do it here explicitly with the IOWAIT flag
+-	 * passed.
+-	 */
+-	if (p->in_iowait)
+-		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
+-
+ 	for_each_sched_entity(se) {
+ 		if (se->on_rq)
+ 			break;
--- a/debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
+++ b/debian/patches/patchset-pf/cpuidle/0008-io_uring-Do-not-set-iowait-before-sleeping.patch
@@ -0,0 +1,55 @@
+From 30cdb8d7d06f51bb86142c537ea05bd01c31bb40 Mon Sep 17 00:00:00 2001
+From: Christian Loehle <christian.loehle@arm.com>
+Date: Thu, 5 Sep 2024 10:26:45 +0100
+Subject: io_uring: Do not set iowait before sleeping
+
+Setting in_iowait was introduced in commit
+8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
+to tackle a perf regression that was caused by menu taking iowait into
+account for synchronous IO and thus not selecting deeper states like in
+the io_uring counterpart.
+That behaviour is gone, so the workaround can be removed.
+
+Signed-off-by: Christian Loehle <christian.loehle@arm.com>
+---
+ io_uring/io_uring.c | 17 -----------------
+ 1 file changed, 17 deletions(-)
+
+--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
+@@ -2359,15 +2359,6 @@ int io_run_task_work_sig(struct io_ring_
+ 	return 0;
+ }
+ 
+-static bool current_pending_io(void)
+-{
+-	struct io_uring_task *tctx = current->io_uring;
+-
+-	if (!tctx)
+-		return false;
+-	return percpu_counter_read_positive(&tctx->inflight);
+-}
+-
+ /* when returns >0, the caller should retry */
+ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+ 					  struct io_wait_queue *iowq)
+@@ -2385,19 +2376,11 @@ static inline int io_cqring_wait_schedul
+ 	if (unlikely(io_should_wake(iowq)))
+ 		return 0;
+ 
+-	/*
+-	 * Mark us as being in io_wait if we have pending requests, so cpufreq
+-	 * can take into account that the task is waiting for IO - turns out
+-	 * to be important for low QD IO.
+-	 */
+-	if (current_pending_io())
+-		current->in_iowait = 1;
+ 	ret = 0;
+ 	if (iowq->timeout == KTIME_MAX)
+ 		schedule();
+ 	else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+ 		ret = -ETIME;
+-	current->in_iowait = 0;
+ 	return ret;
+ }
+ 
--- a/debian/patches/patchset-pf/crypto/0001-crypto-x86-crc32c-simplify-code-for-handling-fewer-t.patch
+++ b/debian/patches/patchset-pf/crypto/0001-crypto-x86-crc32c-simplify-code-for-handling-fewer-t.patch
@@ -0,0 +1,181 @@
+From e6b67a8d14e86d63062e6f1f234c5afc235561d4 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 Oct 2024 21:06:49 -0700
+Subject: crypto: x86/crc32c - simplify code for handling fewer than 200 bytes
+
+The assembly code in crc32c-pcl-intel-asm_64.S is invoked only for
+lengths >= 512, due to the overhead of saving and restoring FPU state.
+Therefore, it is unnecessary for this code to be excessively "optimized"
+for lengths < 200.  Eliminate the excessive unrolling of this part of
+the code and use a more straightforward qword-at-a-time loop.
+
+Note: the part of the code in question is not entirely redundant, as it
+is still used to process any remainder mod 24, as well as any remaining
+data when fewer than 200 bytes remain after least one 3072-byte chunk.
+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+---
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 116 ++++++----------------
+ 1 file changed, 33 insertions(+), 83 deletions(-)
+
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -56,20 +56,10 @@
+ .quad .Lcrc_\i
+ .endm
+ 
+-.macro JNC_LESS_THAN j
+-	jnc .Lless_than_\j
+-.endm
+-
+-# Define threshold where buffers are considered "small" and routed to more
+-# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
+-# SMALL_SIZE can be no larger than 255.
+-
+# Define threshold below which buffers are considered "small" and routed to
+# regular CRC code that does not interleave the CRC instructions.
+ #define SMALL_SIZE 200
+ 
+-.if (SMALL_SIZE > 255)
+-.error "SMALL_ SIZE must be < 256"
+-.endif
+-
+ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+ 
+ .text
+@@ -100,25 +90,18 @@ SYM_FUNC_START(crc_pcl)
+ 	## Move crc_init for Linux to a different
+ 	mov     crc_init_arg, crc_init
+ 
+	mov	%bufp, bufptmp		# rdi = *buf
+	cmp	$SMALL_SIZE, len
+	jb	.Lsmall
+
+ 	################################################################
+ 	## 1) ALIGN:
+ 	################################################################
+-
+-	mov     %bufp, bufptmp		# rdi = *buf
+ 	neg     %bufp
+ 	and     $7, %bufp		# calculate the unalignment amount of
+ 					# the address
+ 	je      .Lproc_block		# Skip if aligned
+ 
+-	## If len is less than 8 and we're unaligned, we need to jump
+-	## to special code to avoid reading beyond the end of the buffer
+-	cmp     $8, len
+-	jae     .Ldo_align
+-	# less_than_8 expects length in upper 3 bits of len_dw
+-	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
+-	shl     $32-3+1, len_dw
+-	jmp     .Lless_than_8_post_shl1
+-
+ .Ldo_align:
+ 	#### Calculate CRC of unaligned bytes of the buffer (if any)
+ 	movq    (bufptmp), tmp		# load a quadward from the buffer
+@@ -144,9 +127,6 @@ SYM_FUNC_START(crc_pcl)
+ 	jae     .Lfull_block
+ 
+ .Lcontinue_block:
+-	cmpq    $SMALL_SIZE, len
+-	jb      .Lsmall
+-
+ 	## len < 128*24
+ 	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
+ 	mul     len_dw
+@@ -243,68 +223,38 @@ LABEL crc_ 0
+ 	mov     tmp, len
+ 	cmp     $128*24, tmp
+ 	jae     .Lfull_block
+-	cmp     $24, tmp
+	cmp	$SMALL_SIZE, tmp
+ 	jae     .Lcontinue_block
+ 
+-.Lless_than_24:
+-	shl     $32-4, len_dw			# less_than_16 expects length
+-						# in upper 4 bits of len_dw
+-	jnc     .Lless_than_16
+-	crc32q  (bufptmp), crc_init
+-	crc32q  8(bufptmp), crc_init
+-	jz      .Ldo_return
+-	add     $16, bufptmp
+-	# len is less than 8 if we got here
+-	# less_than_8 expects length in upper 3 bits of len_dw
+-	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
+-	shl     $2, len_dw
+-	jmp     .Lless_than_8_post_shl1
+-
+ 	#######################################################################
+-	## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
+	## 6) Process any remainder without interleaving:
+ 	#######################################################################
+ .Lsmall:
+-	shl $32-8, len_dw		# Prepare len_dw for less_than_256
+-	j=256
+-.rept 5					# j = {256, 128, 64, 32, 16}
+-.altmacro
+-LABEL less_than_ %j			# less_than_j: Length should be in
+-					# upper lg(j) bits of len_dw
+-	j=(j/2)
+-	shl     $1, len_dw		# Get next MSB
+-	JNC_LESS_THAN %j
+-.noaltmacro
+-	i=0
+-.rept (j/8)
+-	crc32q  i(bufptmp), crc_init	# Compute crc32 of 8-byte data
+-	i=i+8
+-.endr
+-	jz      .Ldo_return		# Return if remaining length is zero
+-	add     $j, bufptmp		# Advance buf
+-.endr
+-
+-.Lless_than_8:				# Length should be stored in
+-					# upper 3 bits of len_dw
+-	shl     $1, len_dw
+-.Lless_than_8_post_shl1:
+-	jnc     .Lless_than_4
+-	crc32l  (bufptmp), crc_init_dw	# CRC of 4 bytes
+-	jz      .Ldo_return		# return if remaining data is zero
+-	add     $4, bufptmp
+-.Lless_than_4:				# Length should be stored in
+-					# upper 2 bits of len_dw
+-	shl     $1, len_dw
+-	jnc     .Lless_than_2
+-	crc32w  (bufptmp), crc_init_dw	# CRC of 2 bytes
+-	jz      .Ldo_return		# return if remaining data is zero
+-	add     $2, bufptmp
+-.Lless_than_2:				# Length should be stored in the MSB
+-					# of len_dw
+-	shl     $1, len_dw
+-	jnc     .Lless_than_1
+-	crc32b  (bufptmp), crc_init_dw	# CRC of 1 byte
+-.Lless_than_1:				# Length should be zero
+-.Ldo_return:
+	test	len, len
+	jz	.Ldone
+	mov	len_dw, %eax
+	shr	$3, %eax
+	jz	.Ldo_dword
+.Ldo_qwords:
+	crc32q	(bufptmp), crc_init
+	add	$8, bufptmp
+	dec	%eax
+	jnz	.Ldo_qwords
+.Ldo_dword:
+	test	$4, len_dw
+	jz	.Ldo_word
+	crc32l	(bufptmp), crc_init_dw
+	add	$4, bufptmp
+.Ldo_word:
+	test	$2, len_dw
+	jz	.Ldo_byte
+	crc32w	(bufptmp), crc_init_dw
+	add	$2, bufptmp
+.Ldo_byte:
+	test	$1, len_dw
+	jz	.Ldone
+	crc32b	(bufptmp), crc_init_dw
+.Ldone:
+ 	movq    crc_init, %rax
+ 	popq    %rsi
+ 	popq    %rdi
--- a/debian/patches/patchset-pf/crypto/0002-crypto-x86-crc32c-access-32-bit-arguments-as-32-bit.patch
+++ b/debian/patches/patchset-pf/crypto/0002-crypto-x86-crc32c-access-32-bit-arguments-as-32-bit.patch
@@ -0,0 +1,187 @@
+From 430478d63b1403878f2fd4b12de2cd21ee502184 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 Oct 2024 21:06:49 -0700
+Subject: crypto: x86/crc32c - access 32-bit arguments as 32-bit
+
+Fix crc32c-pcl-intel-asm_64.S to access 32-bit arguments as 32-bit
+values instead of 64-bit, since the upper bits of the corresponding
+64-bit registers are not guaranteed to be zero.  Also update the type of
+the length argument to be unsigned int rather than int, as the assembly
+code treats it as unsigned.
+
+Note: there haven't been any reports of this bug actually causing
+incorrect behavior.  Neither gcc nor clang guarantee zero-extension to
+64 bits, but zero-extension is likely to happen in practice because most
+instructions that operate on 32-bit registers zero-extend to 64 bits.
+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+---
+ arch/x86/crypto/crc32c-intel_glue.c       |  2 +-
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 57 +++++++++++------------
+ 2 files changed, 27 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
+@@ -41,7 +41,7 @@
+  */
+ #define CRC32C_PCL_BREAKEVEN	512
+ 
+-asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
+asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
+ 				unsigned int crc_init);
+ #endif /* CONFIG_X86_64 */
+ 
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -60,7 +60,7 @@
+ # regular CRC code that does not interleave the CRC instructions.
+ #define SMALL_SIZE 200
+ 
+-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
+ 
+ .text
+ SYM_FUNC_START(crc_pcl)
+@@ -72,14 +72,11 @@ SYM_FUNC_START(crc_pcl)
+ #define    block_0	%rcx
+ #define    block_1	%rdx
+ #define    block_2	%r11
+-#define    len		%rsi
+-#define    len_dw	%esi
+-#define    len_w	%si
+-#define    len_b	%sil
+-#define    crc_init_arg %rdx
+#define    len		%esi
+#define    crc_init_arg %edx
+ #define    tmp		%rbx
+-#define    crc_init	%r8
+-#define    crc_init_dw	%r8d
+#define    crc_init	%r8d
+#define    crc_init_q	%r8
+ #define    crc1		%r9
+ #define    crc2		%r10
+ 
+@@ -107,9 +104,9 @@ SYM_FUNC_START(crc_pcl)
+ 	movq    (bufptmp), tmp		# load a quadward from the buffer
+ 	add     %bufp, bufptmp		# align buffer pointer for quadword
+ 					# processing
+-	sub     %bufp, len		# update buffer length
+	sub	bufp_dw, len		# update buffer length
+ .Lalign_loop:
+-	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
+	crc32b	%bl, crc_init		# compute crc32 of 1-byte
+ 	shr     $8, tmp			# get next byte
+ 	dec     %bufp
+ 	jne     .Lalign_loop
+@@ -121,15 +118,14 @@ SYM_FUNC_START(crc_pcl)
+ 	################################################################
+ 
+ 	## compute num of bytes to be processed
+-	movq    len, tmp		# save num bytes in tmp
+ 
+-	cmpq    $128*24, len
+	cmp	$128*24, len
+ 	jae     .Lfull_block
+ 
+ .Lcontinue_block:
+ 	## len < 128*24
+ 	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
+-	mul     len_dw
+	mul	len
+ 	shrq    $16, %rax
+ 
+ 	## eax contains floor(bytes / 24) = num 24-byte chunks to do
+@@ -176,7 +172,7 @@ SYM_FUNC_START(crc_pcl)
+ LABEL crc_ %i
+ .noaltmacro
+ 	ENDBR
+-	crc32q   -i*8(block_0), crc_init
+	crc32q   -i*8(block_0), crc_init_q
+ 	crc32q   -i*8(block_1), crc1
+ 	crc32q   -i*8(block_2), crc2
+ 	i=(i-1)
+@@ -186,7 +182,7 @@ LABEL crc_ %i
+ LABEL crc_ %i
+ .noaltmacro
+ 	ENDBR
+-	crc32q   -i*8(block_0), crc_init
+	crc32q   -i*8(block_0), crc_init_q
+ 	crc32q   -i*8(block_1), crc1
+ # SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
+ 
+@@ -200,9 +196,9 @@ LABEL crc_ %i
+ 	shlq    $3, %rax			# rax *= 8
+ 	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
+ 	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
+-	subq    %rax, tmp			# tmp -= rax*24
+	sub	%eax, len			# len -= rax*24
+ 
+-	movq    crc_init, %xmm1			# CRC for block 1
+	movq	crc_init_q, %xmm1		# CRC for block 1
+ 	pclmulqdq $0x00, %xmm0, %xmm1		# Multiply by K2
+ 
+ 	movq    crc1, %xmm2			# CRC for block 2
+@@ -211,8 +207,8 @@ LABEL crc_ %i
+ 	pxor    %xmm2,%xmm1
+ 	movq    %xmm1, %rax
+ 	xor     -i*8(block_2), %rax
+-	mov     crc2, crc_init
+-	crc32   %rax, crc_init
+	mov	crc2, crc_init_q
+	crc32	%rax, crc_init_q
+ 
+ 	################################################################
+ 	## 5) Check for end:
+@@ -220,10 +216,9 @@ LABEL crc_ %i
+ 
+ LABEL crc_ 0
+ 	ENDBR
+-	mov     tmp, len
+-	cmp     $128*24, tmp
+	cmp	$128*24, len
+ 	jae     .Lfull_block
+-	cmp	$SMALL_SIZE, tmp
+	cmp	$SMALL_SIZE, len
+ 	jae     .Lcontinue_block
+ 
+ 	#######################################################################
+@@ -232,30 +227,30 @@ LABEL crc_ 0
+ .Lsmall:
+ 	test	len, len
+ 	jz	.Ldone
+-	mov	len_dw, %eax
+	mov	len, %eax
+ 	shr	$3, %eax
+ 	jz	.Ldo_dword
+ .Ldo_qwords:
+-	crc32q	(bufptmp), crc_init
+	crc32q	(bufptmp), crc_init_q
+ 	add	$8, bufptmp
+ 	dec	%eax
+ 	jnz	.Ldo_qwords
+ .Ldo_dword:
+-	test	$4, len_dw
+	test	$4, len
+ 	jz	.Ldo_word
+-	crc32l	(bufptmp), crc_init_dw
+	crc32l	(bufptmp), crc_init
+ 	add	$4, bufptmp
+ .Ldo_word:
+-	test	$2, len_dw
+	test	$2, len
+ 	jz	.Ldo_byte
+-	crc32w	(bufptmp), crc_init_dw
+	crc32w	(bufptmp), crc_init
+ 	add	$2, bufptmp
+ .Ldo_byte:
+-	test	$1, len_dw
+	test	$1, len
+ 	jz	.Ldone
+-	crc32b	(bufptmp), crc_init_dw
+	crc32b	(bufptmp), crc_init
+ .Ldone:
+-	movq    crc_init, %rax
+	mov	crc_init, %eax
+ 	popq    %rsi
+ 	popq    %rdi
+ 	popq    %rbx
--- a/debian/patches/patchset-pf/crypto/0003-crypto-x86-crc32c-eliminate-jump-table-and-excessive.patch
+++ b/debian/patches/patchset-pf/crypto/0003-crypto-x86-crc32c-eliminate-jump-table-and-excessive.patch
@@ -0,0 +1,374 @@
+From 8706bf3e3cba8c708f9933f0d1c6a23f9c2c8c33 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 Oct 2024 21:06:49 -0700
+Subject: crypto: x86/crc32c - eliminate jump table and excessive unrolling
+
+crc32c-pcl-intel-asm_64.S has a loop with 1 to 127 iterations fully
+unrolled and uses a jump table to jump into the correct location.  This
+optimization is misguided, as it bloats the binary code size and
+introduces an indirect call.  x86_64 CPUs can predict loops well, so it
+is fine to just use a loop instead.  Loop bookkeeping instructions can
+compete with the crc instructions for the ALUs, but this is easily
+mitigated by unrolling the loop by a smaller amount, such as 4 times.
+
+Therefore, re-roll the loop and make related tweaks to the code.
+
+This reduces the binary code size of crc_pclmul() from 4546 bytes to 418
+bytes, a 91% reduction.  In general it also makes the code faster, with
+some large improvements seen when retpoline is enabled.
+
+More detailed performance results are shown below.  They are given as
+percent improvement in throughput (negative means regressed) for CPU
+microarchitecture vs. input length in bytes.  E.g. an improvement from
+40 GB/s to 50 GB/s would be listed as 25%.
+
+Table 1: Results with retpoline enabled (the default):
+
+                       |   512 |   833 |  1024 |  2000 |  3173 |  4096 |
+  ---------------------+-------+-------+-------+------ +-------+-------+
+  Intel Haswell        | 35.0% | 20.7% | 17.8% |  9.7% | -0.2% |  4.4% |
+  Intel Emerald Rapids | 66.8% | 45.2% | 36.3% | 19.3% |  0.0% |  5.4% |
+  AMD Zen 2            | 29.5% | 17.2% | 13.5% |  8.6% | -0.5% |  2.8% |
+
+Table 2: Results with retpoline disabled:
+
+                       |   512 |   833 |  1024 |  2000 |  3173 |  4096 |
+  ---------------------+-------+-------+-------+------ +-------+-------+
+  Intel Haswell        |  3.3% |  4.8% |  4.5% |  0.9% | -2.9% |  0.3% |
+  Intel Emerald Rapids |  7.5% |  6.4% |  5.2% |  2.3% | -0.0% |  0.6% |
+  AMD Zen 2            | 11.8% |  1.4% |  0.2% |  1.3% | -0.9% | -0.2% |
+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+---
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 233 +++++++++-------------
+ 1 file changed, 92 insertions(+), 141 deletions(-)
+
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -7,6 +7,7 @@
+  * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
+  *
+  * Copyright (C) 2012 Intel Corporation.
+ * Copyright 2024 Google LLC
+  *
+  * Authors:
+  *	Wajdi Feghali <wajdi.k.feghali@intel.com>
+@@ -44,18 +45,9 @@
+  */
+ 
+ #include <linux/linkage.h>
+-#include <asm/nospec-branch.h>
+ 
+ ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+ 
+-.macro LABEL prefix n
+-.L\prefix\n\():
+-.endm
+-
+-.macro JMPTBL_ENTRY i
+-.quad .Lcrc_\i
+-.endm
+-
+ # Define threshold below which buffers are considered "small" and routed to
+ # regular CRC code that does not interleave the CRC instructions.
+ #define SMALL_SIZE 200
+@@ -64,139 +56,116 @@
+ 
+ .text
+ SYM_FUNC_START(crc_pcl)
+-#define    bufp		rdi
+-#define    bufp_dw	%edi
+-#define    bufp_w	%di
+-#define    bufp_b	%dil
+-#define    bufptmp	%rcx
+-#define    block_0	%rcx
+-#define    block_1	%rdx
+-#define    block_2	%r11
+-#define    len		%esi
+-#define    crc_init_arg %edx
+-#define    tmp		%rbx
+-#define    crc_init	%r8d
+-#define    crc_init_q	%r8
+-#define    crc1		%r9
+-#define    crc2		%r10
+-
+-	pushq   %rbx
+-	pushq   %rdi
+-	pushq   %rsi
+-
+-	## Move crc_init for Linux to a different
+-	mov     crc_init_arg, crc_init
+#define    bufp		  %rdi
+#define    bufp_d	  %edi
+#define    len		  %esi
+#define    crc_init	  %edx
+#define    crc_init_q	  %rdx
+#define    n_misaligned	  %ecx /* overlaps chunk_bytes! */
+#define    n_misaligned_q %rcx
+#define    chunk_bytes	  %ecx /* overlaps n_misaligned! */
+#define    chunk_bytes_q  %rcx
+#define    crc1		  %r8
+#define    crc2		  %r9
+ 
+-	mov	%bufp, bufptmp		# rdi = *buf
+ 	cmp	$SMALL_SIZE, len
+ 	jb	.Lsmall
+ 
+ 	################################################################
+ 	## 1) ALIGN:
+ 	################################################################
+-	neg     %bufp
+-	and     $7, %bufp		# calculate the unalignment amount of
+	mov	bufp_d, n_misaligned
+	neg	n_misaligned
+	and	$7, n_misaligned	# calculate the misalignment amount of
+ 					# the address
+-	je      .Lproc_block		# Skip if aligned
+	je	.Laligned		# Skip if aligned
+ 
+	# Process 1 <= n_misaligned <= 7 bytes individually in order to align
+	# the remaining data to an 8-byte boundary.
+ .Ldo_align:
+-	#### Calculate CRC of unaligned bytes of the buffer (if any)
+-	movq    (bufptmp), tmp		# load a quadward from the buffer
+-	add     %bufp, bufptmp		# align buffer pointer for quadword
+-					# processing
+-	sub	bufp_dw, len		# update buffer length
+	movq	(bufp), %rax
+	add	n_misaligned_q, bufp
+	sub	n_misaligned, len
+ .Lalign_loop:
+-	crc32b	%bl, crc_init		# compute crc32 of 1-byte
+-	shr     $8, tmp			# get next byte
+-	dec     %bufp
+	crc32b	%al, crc_init		# compute crc32 of 1-byte
+	shr	$8, %rax		# get next byte
+	dec	n_misaligned
+ 	jne     .Lalign_loop
+-
+-.Lproc_block:
+.Laligned:
+ 
+ 	################################################################
+-	## 2) PROCESS  BLOCKS:
+	## 2) PROCESS BLOCK:
+ 	################################################################
+ 
+-	## compute num of bytes to be processed
+-
+ 	cmp	$128*24, len
+ 	jae     .Lfull_block
+ 
+-.Lcontinue_block:
+-	## len < 128*24
+-	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
+-	mul	len
+-	shrq    $16, %rax
+-
+-	## eax contains floor(bytes / 24) = num 24-byte chunks to do
+-
+-	## process rax 24-byte chunks (128 >= rax >= 0)
+-
+-	## compute end address of each block
+-	## block 0 (base addr + RAX * 8)
+-	## block 1 (base addr + RAX * 16)
+-	## block 2 (base addr + RAX * 24)
+-	lea     (bufptmp, %rax, 8), block_0
+-	lea     (block_0, %rax, 8), block_1
+-	lea     (block_1, %rax, 8), block_2
+-
+-	xor     crc1, crc1
+-	xor     crc2, crc2
+-
+-	## branch into array
+-	leaq	jump_table(%rip), %bufp
+-	mov	(%bufp,%rax,8), %bufp
+-	JMP_NOSPEC bufp
+.Lpartial_block:
+	# Compute floor(len / 24) to get num qwords to process from each lane.
+	imul	$2731, len, %eax	# 2731 = ceil(2^16 / 24)
+	shr	$16, %eax
+	jmp	.Lcrc_3lanes
+ 
+-	################################################################
+-	## 2a) PROCESS FULL BLOCKS:
+-	################################################################
+ .Lfull_block:
+-	movl    $128,%eax
+-	lea     128*8*2(block_0), block_1
+-	lea     128*8*3(block_0), block_2
+-	add     $128*8*1, block_0
+-
+-	xor     crc1,crc1
+-	xor     crc2,crc2
+-
+-	# Fall through into top of crc array (crc_128)
+	# Processing 128 qwords from each lane.
+	mov	$128, %eax
+ 
+ 	################################################################
+-	## 3) CRC Array:
+	## 3) CRC each of three lanes:
+ 	################################################################
+ 
+-	i=128
+-.rept 128-1
+-.altmacro
+-LABEL crc_ %i
+-.noaltmacro
+-	ENDBR
+-	crc32q   -i*8(block_0), crc_init_q
+-	crc32q   -i*8(block_1), crc1
+-	crc32q   -i*8(block_2), crc2
+-	i=(i-1)
+-.endr
+-
+-.altmacro
+-LABEL crc_ %i
+-.noaltmacro
+-	ENDBR
+-	crc32q   -i*8(block_0), crc_init_q
+-	crc32q   -i*8(block_1), crc1
+-# SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
+.Lcrc_3lanes:
+	xor	crc1,crc1
+	xor     crc2,crc2
+	mov	%eax, chunk_bytes
+	shl	$3, chunk_bytes		# num bytes to process from each lane
+	sub	$5, %eax		# 4 for 4x_loop, 1 for special last iter
+	jl	.Lcrc_3lanes_4x_done
+
+	# Unroll the loop by a factor of 4 to reduce the overhead of the loop
+	# bookkeeping instructions, which can compete with crc32q for the ALUs.
+.Lcrc_3lanes_4x_loop:
+	crc32q	(bufp), crc_init_q
+	crc32q	(bufp,chunk_bytes_q), crc1
+	crc32q	(bufp,chunk_bytes_q,2), crc2
+	crc32q	8(bufp), crc_init_q
+	crc32q	8(bufp,chunk_bytes_q), crc1
+	crc32q	8(bufp,chunk_bytes_q,2), crc2
+	crc32q	16(bufp), crc_init_q
+	crc32q	16(bufp,chunk_bytes_q), crc1
+	crc32q	16(bufp,chunk_bytes_q,2), crc2
+	crc32q	24(bufp), crc_init_q
+	crc32q	24(bufp,chunk_bytes_q), crc1
+	crc32q	24(bufp,chunk_bytes_q,2), crc2
+	add	$32, bufp
+	sub	$4, %eax
+	jge	.Lcrc_3lanes_4x_loop
+
+.Lcrc_3lanes_4x_done:
+	add	$4, %eax
+	jz	.Lcrc_3lanes_last_qword
+
+.Lcrc_3lanes_1x_loop:
+	crc32q	(bufp), crc_init_q
+	crc32q	(bufp,chunk_bytes_q), crc1
+	crc32q	(bufp,chunk_bytes_q,2), crc2
+	add	$8, bufp
+	dec	%eax
+	jnz	.Lcrc_3lanes_1x_loop
+ 
+-	mov     block_2, block_0
+.Lcrc_3lanes_last_qword:
+	crc32q	(bufp), crc_init_q
+	crc32q	(bufp,chunk_bytes_q), crc1
+# SKIP  crc32q	(bufp,chunk_bytes_q,2), crc2	; Don't do this one yet
+ 
+ 	################################################################
+ 	## 4) Combine three results:
+ 	################################################################
+ 
+-	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
+-	shlq    $3, %rax			# rax *= 8
+-	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
+-	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
+-	sub	%eax, len			# len -= rax*24
+	lea	(K_table-8)(%rip), %rax		# first entry is for idx 1
+	pmovzxdq (%rax,chunk_bytes_q), %xmm0	# 2 consts: K1:K2
+	lea	(chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
+	sub	%eax, len			# len -= chunk_bytes * 3
+ 
+ 	movq	crc_init_q, %xmm1		# CRC for block 1
+ 	pclmulqdq $0x00, %xmm0, %xmm1		# Multiply by K2
+@@ -206,20 +175,19 @@ LABEL crc_ %i
+ 
+ 	pxor    %xmm2,%xmm1
+ 	movq    %xmm1, %rax
+-	xor     -i*8(block_2), %rax
+	xor	(bufp,chunk_bytes_q,2), %rax
+ 	mov	crc2, crc_init_q
+ 	crc32	%rax, crc_init_q
+	lea	8(bufp,chunk_bytes_q,2), bufp
+ 
+ 	################################################################
+-	## 5) Check for end:
+	## 5) If more blocks remain, goto (2):
+ 	################################################################
+ 
+-LABEL crc_ 0
+-	ENDBR
+ 	cmp	$128*24, len
+-	jae     .Lfull_block
+	jae	.Lfull_block
+ 	cmp	$SMALL_SIZE, len
+-	jae     .Lcontinue_block
+	jae	.Lpartial_block
+ 
+ 	#######################################################################
+ 	## 6) Process any remainder without interleaving:
+@@ -231,47 +199,30 @@ LABEL crc_ 0
+ 	shr	$3, %eax
+ 	jz	.Ldo_dword
+ .Ldo_qwords:
+-	crc32q	(bufptmp), crc_init_q
+-	add	$8, bufptmp
+	crc32q	(bufp), crc_init_q
+	add	$8, bufp
+ 	dec	%eax
+ 	jnz	.Ldo_qwords
+ .Ldo_dword:
+ 	test	$4, len
+ 	jz	.Ldo_word
+-	crc32l	(bufptmp), crc_init
+-	add	$4, bufptmp
+	crc32l	(bufp), crc_init
+	add	$4, bufp
+ .Ldo_word:
+ 	test	$2, len
+ 	jz	.Ldo_byte
+-	crc32w	(bufptmp), crc_init
+-	add	$2, bufptmp
+	crc32w	(bufp), crc_init
+	add	$2, bufp
+ .Ldo_byte:
+ 	test	$1, len
+ 	jz	.Ldone
+-	crc32b	(bufptmp), crc_init
+	crc32b	(bufp), crc_init
+ .Ldone:
+ 	mov	crc_init, %eax
+-	popq    %rsi
+-	popq    %rdi
+-	popq    %rbx
+         RET
+ SYM_FUNC_END(crc_pcl)
+ 
+ .section	.rodata, "a", @progbits
+-        ################################################################
+-        ## jump table        Table is 129 entries x 2 bytes each
+-        ################################################################
+-.align 4
+-jump_table:
+-	i=0
+-.rept 129
+-.altmacro
+-JMPTBL_ENTRY %i
+-.noaltmacro
+-	i=i+1
+-.endr
+-
+-
+ 	################################################################
+ 	## PCLMULQDQ tables
+ 	## Table is 128 entries x 2 words (8 bytes) each
--- a/debian/patches/patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
+++ b/debian/patches/patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
@@ -0,0 +1,31 @@
+From cda0e050fec85635986e9cfe991e26339bf305dc Mon Sep 17 00:00:00 2001
+From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
+Date: Sat, 13 Jan 2024 15:29:25 +0100
+Subject: arch/Kconfig: Default to maximum amount of ASLR bits
+
+To mitigate https://zolutal.github.io/aslrnt/; do this with a patch to
+avoid having to enable `CONFIG_EXPERT`.
+---
+ arch/Kconfig | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/Kconfig
+++ b/arch/Kconfig
+@@ -1050,7 +1050,7 @@ config ARCH_MMAP_RND_BITS
+ 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
+ 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+ 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+-	default ARCH_MMAP_RND_BITS_MIN
+	default ARCH_MMAP_RND_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
+@@ -1084,7 +1084,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
+ 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+ 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+-	default ARCH_MMAP_RND_COMPAT_BITS_MIN
+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
--- a/debian/patches/patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch
+++ b/debian/patches/patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch
@@ -0,0 +1,112 @@
+From b7d96c1f19ef15ea431a8d5d7ab2cad22c35edba Mon Sep 17 00:00:00 2001
+From: Qais Yousef <qyousef@layalina.io>
+Date: Sun, 28 Jul 2024 20:26:59 +0100
+Subject: cpufreq: Remove LATENCY_MULTIPLIER
+
+The current LATENCY_MULTIPLIER which has been around for nearly 20 years
+causes rate_limit_us to be always in ms range.
+
+On M1 mac mini I get 50 and 56us transition latency, but due to the 1000
+multiplier we end up setting rate_limit_us to 50 and 56ms, which gets
+capped into 2ms and was 10ms before e13aa799c2a6 ("cpufreq: Change
+default transition delay to 2ms")
+
+On Intel I5 system transition latency is 20us but due to the multiplier
+we end up with 20ms that again is capped to 2ms.
+
+Given how good modern hardware and how modern workloads require systems
+to be more responsive to cater for sudden changes in workload (tasks
+sleeping/wakeup/migrating, uclamp causing a sudden boost or cap) and
+that 2ms is quarter of the time of 120Hz refresh rate system, drop the
+old logic in favour of providing 50% headroom.
+
+	rate_limit_us = 1.5 * latency.
+
+I considered not adding any headroom which could mean that we can end up
+with infinite back-to-back requests.
+
+I also considered providing a constant headroom (e.g: 100us) assuming
+that any h/w or f/w dealing with the request shouldn't require a large
+headroom when transition_latency is actually high.
+
+But for both cases I wasn't sure if h/w or f/w can end up being
+overwhelmed dealing with the freq requests in a potentially busy system.
+So I opted for providing 50% breathing room.
+
+This is expected to impact schedutil only as the other user,
+dbs_governor, takes the max(2*tick, transition_delay_us) and the former
+was at least 2ms on 1ms TICK, which is equivalent to the max_delay_us
+before applying this patch. For systems with TICK of 4ms, this value
+would have almost always ended up with 8ms sampling rate.
+
+For systems that report 0 transition latency, we still default to
+returning 1ms as transition delay.
+
+This helps in eliminating a source of latency for applying requests as
+mentioned in [1]. For example if we have a 1ms tick, most systems will
+miss sending an update at tick when updating the util_avg for a task/CPU
+(rate_limit_us will be 2ms for most systems).
+
+Link: https://lore.kernel.org/lkml/20240724212255.mfr2ybiv2j2uqek7@airbuntu/ # [1]
+Link: https://lore.kernel.org/lkml/20240205022500.2232124-1-qyousef@layalina.io/
+Signed-off-by: Qais Yousef <qyousef@layalina.io>
+Link: https://patch.msgid.link/20240728192659.58115-1-qyousef@layalina.io
+[ rjw: Subject edits ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+---
+ drivers/cpufreq/cpufreq.c | 27 ++++-----------------------
+ include/linux/cpufreq.h   |  6 ------
+ 2 files changed, 4 insertions(+), 29 deletions(-)
+
+--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
+@@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_d
+ 		return policy->transition_delay_us;
+ 
+ 	latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+-	if (latency) {
+-		unsigned int max_delay_us = 2 * MSEC_PER_SEC;
+	if (latency)
+		/* Give a 50% breathing room between updates */
+		return latency + (latency >> 1);
+ 
+-		/*
+-		 * If the platform already has high transition_latency, use it
+-		 * as-is.
+-		 */
+-		if (latency > max_delay_us)
+-			return latency;
+-
+-		/*
+-		 * For platforms that can change the frequency very fast (< 2
+-		 * us), the above formula gives a decent transition delay. But
+-		 * for platforms where transition_latency is in milliseconds, it
+-		 * ends up giving unrealistic values.
+-		 *
+-		 * Cap the default transition delay to 2 ms, which seems to be
+-		 * a reasonable amount of time after which we should reevaluate
+-		 * the frequency.
+-		 */
+-		return min(latency * LATENCY_MULTIPLIER, max_delay_us);
+-	}
+-
+-	return LATENCY_MULTIPLIER;
+	return USEC_PER_MSEC;
+ }
+ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us);
+ 
+--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
+@@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scal
+ #define CPUFREQ_POLICY_POWERSAVE	(1)
+ #define CPUFREQ_POLICY_PERFORMANCE	(2)
+ 
+-/*
+- * The polling frequency depends on the capability of the processor. Default
+- * polling frequency is 1000 times the transition latency of the processor.
+- */
+-#define LATENCY_MULTIPLIER		(1000)
+-
+ struct cpufreq_governor {
+ 	char	name[CPUFREQ_NAME_LEN];
+ 	int	(*init)(struct cpufreq_policy *policy);
--- a/debian/patches/patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
+++ b/debian/patches/patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
@@ -0,0 +1,83 @@
+From 218e958524c673d6e68737e7f82d80ba2b6ef59a Mon Sep 17 00:00:00 2001
+From: Javier Martinez Canillas <javierm@redhat.com>
+Date: Thu, 19 May 2022 14:40:07 +0200
+Subject: drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set
+
+The Nvidia proprietary driver has some bugs that leads to issues if used
+with the simpledrm driver. The most noticeable is that does not register
+an emulated fbdev device.
+
+It just relies on a fbdev to be registered by another driver, that could
+be that could be attached to the framebuffer console. On UEFI machines,
+this is the efifb driver.
+
+This means that disabling the efifb driver will cause virtual consoles to
+not be present in the system when using the Nvidia driver. Legacy BIOS is
+not affected just because fbcon is not used there, but instead vgacon.
+
+Unless a VGA mode is specified using the vga= kernel command line option,
+in that case the vesafb driver is used instead and its fbdev attached to
+the fbcon.
+
+This is a problem because with CONFIG_SYSFB_SIMPLEFB=y, the sysfb platform
+code attempts to register a "simple-framebuffer" platform device (that is
+matched against simpledrm) and only registers either an "efi-framebuffer"
+or "vesa-framebuffer" if this fails to be registered due the video modes
+not being compatible.
+
+The Nvidia driver relying on another driver to register the fbdev is quite
+fragile, since it can't really assume those will stick around. For example
+there are patches posted to remove the EFI and VESA platform devices once
+a real DRM or fbdev driver probes.
+
+But in any case, moving to a simpledrm + emulated fbdev only breaks this
+assumption and causes users to not have VT if the Nvidia driver is used.
+
+So to prevent this, let's add a workaround and make the sysfb to skip the
+"simple-framebuffer" registration when nvidia-drm.modeset=1 option is set.
+
+This is quite horrible, but honestly I can't think of any other approach.
+
+For this to work, the CONFIG_FB_EFI and CONFIG_FB_VESA config options must
+be enabled besides CONFIG_DRM_SIMPLEDRM.
+
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Cherry-picked-for: https://bugs.archlinux.org/task/73720
+---
+ drivers/firmware/sysfb.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
+@@ -35,6 +35,22 @@
+ #include <linux/screen_info.h>
+ #include <linux/sysfb.h>
+ 
+static int skip_simpledrm;
+
+static int __init simpledrm_disable(char *opt)
+{
+	if (!opt)
+                return -EINVAL;
+
+	get_option(&opt, &skip_simpledrm);
+
+	if (skip_simpledrm)
+		pr_info("The simpledrm driver will not be probed\n");
+
+	return 0;
+}
+early_param("nvidia-drm.modeset", simpledrm_disable);
+
+ static struct platform_device *pd;
+ static DEFINE_MUTEX(disable_lock);
+ static bool disabled;
+@@ -145,7 +161,7 @@ static __init int sysfb_init(void)
+ 
+ 	/* try to create a simple-framebuffer device */
+ 	compatible = sysfb_parse_mode(si, &mode);
+-	if (compatible) {
+	if (compatible && !skip_simpledrm) {
+ 		pd = sysfb_create_simplefb(si, &mode, parent);
+ 		if (!IS_ERR(pd))
+ 			goto put_device;
--- a/debian/patches/patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch
+++ b/debian/patches/patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch
@@ -0,0 +1,26 @@
+From b97d21a0aa65a6f7a7bb17bbc696b136688c96ed Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Mon, 26 Aug 2024 08:50:11 -0400
+Subject: nfsd: add more info to WARN_ON_ONCE on failed callbacks
+
+Currently, you get the warning and stack trace, but nothing is printed
+about the relevant error codes. Add that in.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4callback.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
+@@ -1333,7 +1333,8 @@ static void nfsd4_cb_done(struct rpc_tas
+ 		return;
+ 
+ 	if (cb->cb_status) {
+-		WARN_ON_ONCE(task->tk_status);
+		WARN_ONCE(task->tk_status, "cb_status=%d tk_status=%d",
+			  cb->cb_status, task->tk_status);
+ 		task->tk_status = cb->cb_status;
+ 	}
+ 
--- a/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
+++ b/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
@@ -0,0 +1,57 @@
+From 1d120544580708eae6bd5981b308ca17735edaac Mon Sep 17 00:00:00 2001
+From: Vitaly Lifshits <vitaly.lifshits@intel.com>
+Date: Tue, 1 Oct 2024 20:08:48 +0300
+Subject: e1000e: Remove Meteor Lake SMBUS workarounds
+
+This is a partial revert to commit 76a0a3f9cc2f ("e1000e: fix force smbus
+during suspend flow"). That commit fixed a sporadic PHY access issue but
+introduced a regression in runtime suspend flows.
+The original issue on Meteor Lake systems was rare in terms of the
+reproduction rate and the number of the systems affected.
+
+After the integration of commit 0a6ad4d9e169 ("e1000e: avoid failing the
+system during pm_suspend"), PHY access loss can no longer cause a
+system-level suspend failure. As it only occurs when the LAN cable is
+disconnected, and is recovered during system resume flow. Therefore, its
+functional impact is low, and the priority is given to stabilizing
+runtime suspend.
+
+Fixes: 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow")
+Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
+---
+ drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++-------------
+ 1 file changed, 4 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+@@ -1205,12 +1205,10 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
+ 	if (ret_val)
+ 		goto out;
+ 
+-	if (hw->mac.type != e1000_pch_mtp) {
+-		ret_val = e1000e_force_smbus(hw);
+-		if (ret_val) {
+-			e_dbg("Failed to force SMBUS: %d\n", ret_val);
+-			goto release;
+-		}
+	ret_val = e1000e_force_smbus(hw);
+	if (ret_val) {
+		e_dbg("Failed to force SMBUS: %d\n", ret_val);
+		goto release;
+ 	}
+ 
+ 	/* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
+@@ -1273,13 +1271,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
+ 	}
+ 
+ release:
+-	if (hw->mac.type == e1000_pch_mtp) {
+-		ret_val = e1000e_force_smbus(hw);
+-		if (ret_val)
+-			e_dbg("Failed to force SMBUS over MTL system: %d\n",
+-			      ret_val);
+-	}
+-
+ 	hw->phy.ops.release(hw);
+ out:
+ 	if (ret_val)
--- a/debian/patches/patchset-pf/fixes/0006-btrfs-zoned-fix-zone-unusable-accounting-for-freed-r.patch
+++ b/debian/patches/patchset-pf/fixes/0006-btrfs-zoned-fix-zone-unusable-accounting-for-freed-r.patch
@@ -0,0 +1,46 @@
+From 4086c1a804741c9c8f418d6088e8c531f2a481f3 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Tue, 1 Oct 2024 17:03:32 +0900
+Subject: btrfs: zoned: fix zone unusable accounting for freed reserved extent
+
+When btrfs reserves an extent and does not use it (e.g, by an error), it
+calls btrfs_free_reserved_extent() to free the reserved extent. In the
+process, it calls btrfs_add_free_space() and then it accounts the region
+bytes as block_group->zone_unusable.
+
+However, it leaves the space_info->bytes_zone_unusable side not updated. As
+a result, ENOSPC can happen while a space_info reservation succeeded. The
+reservation is fine because the freed region is not added in
+space_info->bytes_zone_unusable, leaving that space as "free". OTOH,
+corresponding block group counts it as zone_unusable and its allocation
+pointer is not rewound, we cannot allocate an extent from that block group.
+That will also negate space_info's async/sync reclaim process, and cause an
+ENOSPC error from the extent allocation process.
+
+Fix that by returning the space to space_info->bytes_zone_unusable.
+Ideally, since a bio is not submitted for this reserved region, we should
+return the space to free space and rewind the allocation pointer. But, it
+needs rework on extent allocation handling, so let it work in this way for
+now.
+
+Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/block-group.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
+@@ -3819,6 +3819,8 @@ void btrfs_free_reserved_bytes(struct bt
+ 	spin_lock(&cache->lock);
+ 	if (cache->ro)
+ 		space_info->bytes_readonly += num_bytes;
+	else if (btrfs_is_zoned(cache->fs_info))
+		space_info->bytes_zone_unusable += num_bytes;
+ 	cache->reserved -= num_bytes;
+ 	space_info->bytes_reserved -= num_bytes;
+ 	space_info->max_extent_size = 0;
--- a/debian/patches/patchset-pf/fixes/0007-btrfs-clear-force-compress-on-remount-when-compress-.patch
+++ b/debian/patches/patchset-pf/fixes/0007-btrfs-clear-force-compress-on-remount-when-compress-.patch
@@ -0,0 +1,64 @@
+From aa8155f0ba032729ec4f28c5cb9669fb14f6947b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 14 Oct 2024 16:14:18 +0100
+Subject: btrfs: clear force-compress on remount when compress mount option is
+ given
+
+After the migration to use fs context for processing mount options we had
+a slight change in the semantics for remounting a filesystem that was
+mounted with compress-force. Before we could clear compress-force by
+passing only "-o compress[=algo]" during a remount, but after that change
+that does not work anymore, force-compress is still present and one needs
+to pass "-o compress-force=no,compress[=algo]" to the mount command.
+
+Example, when running on a kernel 6.8+:
+
+  $ mount -o compress-force=zlib:9 /dev/sdi /mnt/sdi
+  $ mount | grep sdi
+  /dev/sdi on /mnt/sdi type btrfs (rw,relatime,compress-force=zlib:9,discard=async,space_cache=v2,subvolid=5,subvol=/)
+
+  $ mount -o remount,compress=zlib:5 /mnt/sdi
+  $ mount | grep sdi
+  /dev/sdi on /mnt/sdi type btrfs (rw,relatime,compress-force=zlib:5,discard=async,space_cache=v2,subvolid=5,subvol=/)
+
+On a 6.7 kernel (or older):
+
+  $ mount -o compress-force=zlib:9 /dev/sdi /mnt/sdi
+  $ mount | grep sdi
+  /dev/sdi on /mnt/sdi type btrfs (rw,relatime,compress-force=zlib:9,discard=async,space_cache=v2,subvolid=5,subvol=/)
+
+  $ mount -o remount,compress=zlib:5 /mnt/sdi
+  $ mount | grep sdi
+  /dev/sdi on /mnt/sdi type btrfs (rw,relatime,compress=zlib:5,discard=async,space_cache=v2,subvolid=5,subvol=/)
+
+So update btrfs_parse_param() to clear "compress-force" when "compress" is
+given, providing the same semantics as kernel 6.7 and older.
+
+Reported-by: Roman Mamedov <rm@romanrm.net>
+Link: https://lore.kernel.org/linux-btrfs/20241014182416.13d0f8b0@nvm/
+CC: stable@vger.kernel.org # 6.8+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/super.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
+@@ -340,6 +340,15 @@ static int btrfs_parse_param(struct fs_c
+ 		fallthrough;
+ 	case Opt_compress:
+ 	case Opt_compress_type:
+		/*
+		 * Provide the same semantics as older kernels that don't use fs
+		 * context, specifying the "compress" option clears
+		 * "force-compress" without the need to pass
+		 * "compress-force=[no|none]" before specifying "compress".
+		 */
+		if (opt != Opt_compress_force && opt != Opt_compress_force_type)
+			btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
+
+ 		if (opt == Opt_compress || opt == Opt_compress_force) {
+ 			ctx->compress_type = BTRFS_COMPRESS_ZLIB;
+ 			ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
--- a/debian/patches/patchset-pf/fixes/0008-btrfs-qgroup-set-a-more-sane-default-value-for-subtr.patch
+++ b/debian/patches/patchset-pf/fixes/0008-btrfs-qgroup-set-a-more-sane-default-value-for-subtr.patch
@@ -0,0 +1,68 @@
+From 81baeb2a67d8245ac5b61299e54dd65defd4ac72 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 10 Sep 2024 15:21:04 +0930
+Subject: btrfs: qgroup: set a more sane default value for subtree drop
+ threshold
+
+Since commit 011b46c30476 ("btrfs: skip subtree scan if it's too high to
+avoid low stall in btrfs_commit_transaction()"), btrfs qgroup can
+automatically skip large subtree scan at the cost of marking qgroup
+inconsistent.
+
+It's designed to address the final performance problem of snapshot drop
+with qgroup enabled, but to be safe the default value is
+BTRFS_MAX_LEVEL, requiring a user space daemon to set a different value
+to make it work.
+
+I'd say it's not a good idea to rely on user space tool to set this
+default value, especially when some operations (snapshot dropping) can
+be triggered immediately after mount, leaving a very small window to
+that that sysfs interface.
+
+So instead of disabling this new feature by default, enable it with a
+low threshold (3), so that large subvolume tree drop at mount time won't
+cause huge qgroup workload.
+
+CC: stable@vger.kernel.org # 6.1
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/qgroup.c  | 2 +-
+ fs/btrfs/qgroup.h  | 2 ++
+ 3 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
+@@ -1960,7 +1960,7 @@ static void btrfs_init_qgroup(struct btr
+ 	fs_info->qgroup_seq = 1;
+ 	fs_info->qgroup_ulist = NULL;
+ 	fs_info->qgroup_rescan_running = false;
+-	fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+	fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
+ 	mutex_init(&fs_info->qgroup_rescan_lock);
+ }
+ 
+--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
+@@ -1407,7 +1407,7 @@ int btrfs_quota_disable(struct btrfs_fs_
+ 	fs_info->quota_root = NULL;
+ 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
+ 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
+-	fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+	fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
+ 	spin_unlock(&fs_info->qgroup_lock);
+ 
+ 	btrfs_free_qgroup_config(fs_info);
+--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
+@@ -121,6 +121,8 @@ struct btrfs_inode;
+ #define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN		(1ULL << 63)
+ #define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING		(1ULL << 62)
+ 
+#define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT		(3)
+
+ /*
+  * Record a dirty extent, and info qgroup to update quota on it
+  */
--- a/debian/patches/patchset-pf/fixes/0009-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
+++ b/debian/patches/patchset-pf/fixes/0009-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
@@ -0,0 +1,32 @@
+From 8ea93b01558ea7a752e478ad25862e7441d6053a Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Thu, 19 Sep 2024 12:16:38 +0200
+Subject: btrfs: also add stripe entries for NOCOW writes
+
+NOCOW writes do not generate stripe_extent entries in the RAID stripe
+tree, as the RAID stripe-tree feature initially was designed with a
+zoned filesystem in mind and on a zoned filesystem, we do not allow NOCOW
+writes. But the RAID stripe-tree feature is independent from the zoned
+feature, so we must also do NOCOW writes for RAID stripe-tree filesystems.
+
+Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/inode.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
+@@ -3087,6 +3087,11 @@ int btrfs_finish_one_ordered(struct btrf
+ 		ret = btrfs_update_inode_fallback(trans, inode);
+ 		if (ret) /* -ENOMEM or corruption */
+ 			btrfs_abort_transaction(trans, ret);
+
+		ret = btrfs_insert_raid_extent(trans, ordered_extent);
+		if (ret)
+			btrfs_abort_transaction(trans, ret);
+
+ 		goto out;
+ 	}
+ 
--- a/debian/patches/patchset-pf/fixes/0010-btrfs-fix-read-corruption-due-to-race-with-extent-ma.patch
+++ b/debian/patches/patchset-pf/fixes/0010-btrfs-fix-read-corruption-due-to-race-with-extent-ma.patch
@@ -0,0 +1,107 @@
+From f6f5cd12972307324de5decd7fa41b0b3c98639c Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 18 Oct 2024 15:44:34 -0700
+Subject: btrfs: fix read corruption due to race with extent map merging
+
+In debugging some corrupt squashfs files, we observed symptoms of
+corrupt page cache pages but correct on-disk contents. Further
+investigation revealed that the exact symptom was a correct page
+followed by an incorrect, duplicate, page. This got us thinking about
+extent maps.
+
+commit ac05ca913e9f ("Btrfs: fix race between using extent maps and merging them")
+enforces a reference count on the primary `em` extent_map being merged,
+as that one gets modified.
+
+However, since,
+commit 3d2ac9922465 ("btrfs: introduce new members for extent_map")
+both 'em' and 'merge' get modified, which started modifying 'merge'
+and thus introduced the same race.
+
+We were able to reproduce this by looping the affected squashfs workload
+in parallel on a bunch of separate btrfs-es while also dropping caches.
+We are still working on a simple enough reproducer to make into an fstest.
+
+The simplest fix is to stop modifying 'merge', which is not essential,
+as it is dropped immediately after the merge. This behavior is simply
+a consequence of the order of the two extent maps being important in
+computing the new values. Modify merge_ondisk_extents to take prev and
+next by const* and also take a third merged parameter that it puts the
+results in. Note that this introduces the rather odd behavior of passing
+'em' to merge_ondisk_extents as a const * and as a regular ptr.
+
+Fixes: 3d2ac9922465 ("btrfs: introduce new members for extent_map")
+CC: stable@vger.kernel.org # 6.11+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/extent_map.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
+@@ -240,13 +240,19 @@ static bool mergeable_maps(const struct
+ /*
+  * Handle the on-disk data extents merge for @prev and @next.
+  *
+ * @prev:    left extent to merge
+ * @next:    right extent to merge
+ * @merged:  the extent we will not discard after the merge; updated with new values
+ *
+ * After this, one of the two extents is the new merged extent and the other is
+ * removed from the tree and likely freed. Note that @merged is one of @prev/@next
+ * so there is const/non-const aliasing occurring here.
+ *
+  * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
+  * For now only uncompressed regular extent can be merged.
+- *
+- * @prev and @next will be both updated to point to the new merged range.
+- * Thus one of them should be removed by the caller.
+  */
+-static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
+static void merge_ondisk_extents(const struct extent_map *prev, const struct extent_map *next,
+				 struct extent_map *merged)
+ {
+ 	u64 new_disk_bytenr;
+ 	u64 new_disk_num_bytes;
+@@ -281,15 +287,10 @@ static void merge_ondisk_extents(struct
+ 			     new_disk_bytenr;
+ 	new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
+ 
+-	prev->disk_bytenr = new_disk_bytenr;
+-	prev->disk_num_bytes = new_disk_num_bytes;
+-	prev->ram_bytes = new_disk_num_bytes;
+-	prev->offset = new_offset;
+-
+-	next->disk_bytenr = new_disk_bytenr;
+-	next->disk_num_bytes = new_disk_num_bytes;
+-	next->ram_bytes = new_disk_num_bytes;
+-	next->offset = new_offset;
+	merged->disk_bytenr = new_disk_bytenr;
+	merged->disk_num_bytes = new_disk_num_bytes;
+	merged->ram_bytes = new_disk_num_bytes;
+	merged->offset = new_offset;
+ }
+ 
+ static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix,
+@@ -358,7 +359,7 @@ static void try_merge_map(struct btrfs_i
+ 			em->generation = max(em->generation, merge->generation);
+ 
+ 			if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+-				merge_ondisk_extents(merge, em);
+				merge_ondisk_extents(merge, em, em);
+ 			em->flags |= EXTENT_FLAG_MERGED;
+ 
+ 			validate_extent_map(fs_info, em);
+@@ -375,7 +376,7 @@ static void try_merge_map(struct btrfs_i
+ 	if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
+ 		em->len += merge->len;
+ 		if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+-			merge_ondisk_extents(em, merge);
+			merge_ondisk_extents(em, merge, em);
+ 		validate_extent_map(fs_info, em);
+ 		rb_erase(&merge->rb_node, &tree->root);
+ 		RB_CLEAR_NODE(&merge->rb_node);
--- a/debian/patches/patchset-pf/fixes/0011-btrfs-reject-ro-rw-reconfiguration-if-there-are-hard.patch
+++ b/debian/patches/patchset-pf/fixes/0011-btrfs-reject-ro-rw-reconfiguration-if-there-are-hard.patch
@@ -0,0 +1,101 @@
+From 7f83049bda761f340991af8dce79a4e98c62b378 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 19 Sep 2024 20:18:11 +0930
+Subject: btrfs: reject ro->rw reconfiguration if there are hard ro
+ requirements
+
+[BUG]
+Syzbot reports the following crash:
+
+  BTRFS info (device loop0 state MCS): disabling free space tree
+  BTRFS info (device loop0 state MCS): clearing compat-ro feature flag for FREE_SPACE_TREE (0x1)
+  BTRFS info (device loop0 state MCS): clearing compat-ro feature flag for FREE_SPACE_TREE_VALID (0x2)
+  Oops: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN NOPTI
+  KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
+  RIP: 0010:backup_super_roots fs/btrfs/disk-io.c:1691 [inline]
+  RIP: 0010:write_all_supers+0x97a/0x40f0 fs/btrfs/disk-io.c:4041
+  Call Trace:
+   <TASK>
+   btrfs_commit_transaction+0x1eae/0x3740 fs/btrfs/transaction.c:2530
+   btrfs_delete_free_space_tree+0x383/0x730 fs/btrfs/free-space-tree.c:1312
+   btrfs_start_pre_rw_mount+0xf28/0x1300 fs/btrfs/disk-io.c:3012
+   btrfs_remount_rw fs/btrfs/super.c:1309 [inline]
+   btrfs_reconfigure+0xae6/0x2d40 fs/btrfs/super.c:1534
+   btrfs_reconfigure_for_mount fs/btrfs/super.c:2020 [inline]
+   btrfs_get_tree_subvol fs/btrfs/super.c:2079 [inline]
+   btrfs_get_tree+0x918/0x1920 fs/btrfs/super.c:2115
+   vfs_get_tree+0x90/0x2b0 fs/super.c:1800
+   do_new_mount+0x2be/0xb40 fs/namespace.c:3472
+   do_mount fs/namespace.c:3812 [inline]
+   __do_sys_mount fs/namespace.c:4020 [inline]
+   __se_sys_mount+0x2d6/0x3c0 fs/namespace.c:3997
+   do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+   do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
+   entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+[CAUSE]
+To support mounting different subvolume with different RO/RW flags for
+the new mount APIs, btrfs introduced two workaround to support this feature:
+
+- Skip mount option/feature checks if we are mounting a different
+  subvolume
+
+- Reconfigure the fs to RW if the initial mount is RO
+
+Combining these two, we can have the following sequence:
+
+- Mount the fs ro,rescue=all,clear_cache,space_cache=v1
+  rescue=all will mark the fs as hard read-only, so no v2 cache clearing
+  will happen.
+
+- Mount a subvolume rw of the same fs.
+  We go into btrfs_get_tree_subvol(), but fc_mount() returns EBUSY
+  because our new fc is RW, different from the original fs.
+
+  Now we enter btrfs_reconfigure_for_mount(), which switches the RO flag
+  first so that we can grab the existing fs_info.
+  Then we reconfigure the fs to RW.
+
+- During reconfiguration, option/features check is skipped
+  This means we will restart the v2 cache clearing, and convert back to
+  v1 cache.
+  This will trigger fs writes, and since the original fs has "rescue=all"
+  option, it skips the csum tree read.
+
+  And eventually causing NULL pointer dereference in super block
+  writeback.
+
+[FIX]
+For reconfiguration caused by different subvolume RO/RW flags, ensure we
+always run btrfs_check_options() to ensure we have proper hard RO
+requirements met.
+
+In fact the function btrfs_check_options() doesn't really do many
+complex checks, but hard RO requirement and some feature dependency
+checks, thus there is no special reason not to do the check for mount
+reconfiguration.
+
+Reported-by: syzbot+56360f93efa90ff15870@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/0000000000008c5d090621cb2770@google.com/
+Fixes: f044b318675f ("btrfs: handle the ro->rw transition for mounting different subvolumes")
+CC: stable@vger.kernel.org # 6.8+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/super.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
+@@ -1519,8 +1519,7 @@ static int btrfs_reconfigure(struct fs_c
+ 	sync_filesystem(sb);
+ 	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+ 
+-	if (!mount_reconfigure &&
+-	    !btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags))
+	if (!btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags))
+ 		return -EINVAL;
+ 
+ 	ret = btrfs_check_features(fs_info, !(fc->sb_flags & SB_RDONLY));
--- a/debian/patches/patchset-pf/fixes/0012-btrfs-fix-passing-0-to-ERR_PTR-in-btrfs_search_dir_i.patch
+++ b/debian/patches/patchset-pf/fixes/0012-btrfs-fix-passing-0-to-ERR_PTR-in-btrfs_search_dir_i.patch
@@ -0,0 +1,54 @@
+From ed73b9279db9536a9672cba6506950c26cedb140 Mon Sep 17 00:00:00 2001
+From: Yue Haibing <yuehaibing@huawei.com>
+Date: Tue, 22 Oct 2024 17:52:08 +0800
+Subject: btrfs: fix passing 0 to ERR_PTR in btrfs_search_dir_index_item()
+
+The ret may be zero in btrfs_search_dir_index_item() and should not
+passed to ERR_PTR(). Now btrfs_unlink_subvol() is the only caller to
+this, reconstructed it to check ERR_PTR(-ENOENT) while ret >= 0.
+
+This fixes smatch warnings:
+
+fs/btrfs/dir-item.c:353
+  btrfs_search_dir_index_item() warn: passing zero to 'ERR_PTR'
+
+Fixes: 9dcbe16fccbb ("btrfs: use btrfs_for_each_slot in btrfs_search_dir_index_item")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/dir-item.c | 4 ++--
+ fs/btrfs/inode.c    | 7 ++-----
+ 2 files changed, 4 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
+@@ -347,8 +347,8 @@ btrfs_search_dir_index_item(struct btrfs
+ 			return di;
+ 	}
+ 	/* Adjust return code if the key was not found in the next leaf. */
+-	if (ret > 0)
+-		ret = 0;
+	if (ret >= 0)
+		ret = -ENOENT;
+ 
+ 	return ERR_PTR(ret);
+ }
+--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
+@@ -4344,11 +4344,8 @@ static int btrfs_unlink_subvol(struct bt
+ 	 */
+ 	if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+ 		di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name);
+-		if (IS_ERR_OR_NULL(di)) {
+-			if (!di)
+-				ret = -ENOENT;
+-			else
+-				ret = PTR_ERR(di);
+		if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
+ 			btrfs_abort_transaction(trans, ret);
+ 			goto out;
+ 		}
--- a/debian/patches/patchset-pf/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch
+++ b/debian/patches/patchset-pf/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch
@@ -0,0 +1,407 @@
+From 88362669534c70bbc7036f45bb23e63a30d4adfb Mon Sep 17 00:00:00 2001
+From: Oleksandr Natalenko <oleksandr@natalenko.name>
+Date: Mon, 29 Jul 2024 00:38:24 +0200
+Subject: mm: expose per-process KSM control via syscalls
+
+d7597f59d1d3 added a new API to enable per-process KSM control. It
+however uses prctl, which doesn't allow controlling KSM from outside of
+the current process.
+
+Hence, expose this API via 3 syscalls: process_ksm_enable,
+process_ksm_disable and process_ksm_status. Given sufficient privileges,
+auto-KSM can be enable by another process.
+
+Since these syscalls are not in the upstream kernel, also expose their
+numbers under /sys/kernel/process_ksm so that userspace tooling like
+uksmd knows how to use them.
+
+Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+---
+ arch/alpha/kernel/syscalls/syscall.tbl        |   3 +
+ arch/arm/tools/syscall.tbl                    |   3 +
+ arch/m68k/kernel/syscalls/syscall.tbl         |   3 +
+ arch/microblaze/kernel/syscalls/syscall.tbl   |   3 +
+ arch/mips/kernel/syscalls/syscall_n32.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_n64.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_o32.tbl     |   3 +
+ arch/parisc/kernel/syscalls/syscall.tbl       |   3 +
+ arch/powerpc/kernel/syscalls/syscall.tbl      |   3 +
+ arch/s390/kernel/syscalls/syscall.tbl         |   3 +
+ arch/sh/kernel/syscalls/syscall.tbl           |   3 +
+ arch/sparc/kernel/syscalls/syscall.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_32.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_64.tbl        |   3 +
+ arch/xtensa/kernel/syscalls/syscall.tbl       |   3 +
+ include/linux/syscalls.h                      |   3 +
+ include/uapi/asm-generic/unistd.h             |   9 +-
+ kernel/sys.c                                  | 147 ++++++++++++++++++
+ kernel/sys_ni.c                               |   3 +
+ scripts/syscall.tbl                           |   3 +
+ .../arch/powerpc/entry/syscalls/syscall.tbl   |   3 +
+ .../perf/arch/s390/entry/syscalls/syscall.tbl |   3 +
+ 22 files changed, 215 insertions(+), 1 deletion(-)
+
+--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
+@@ -502,3 +502,6 @@
+ 570	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 571	common	lsm_list_modules		sys_lsm_list_modules
+ 572	common  mseal				sys_mseal
+573	common	process_ksm_enable		sys_process_ksm_enable
+574	common	process_ksm_disable		sys_process_ksm_disable
+575	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
+@@ -477,3 +477,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
+@@ -462,3 +462,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
+@@ -468,3 +468,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
+@@ -401,3 +401,6 @@
+ 460	n32	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	n32	lsm_list_modules		sys_lsm_list_modules
+ 462	n32	mseal				sys_mseal
+463	n32	process_ksm_enable		sys_process_ksm_enable
+464	n32	process_ksm_disable		sys_process_ksm_disable
+465	n32	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
+@@ -377,3 +377,6 @@
+ 460	n64	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	n64	lsm_list_modules		sys_lsm_list_modules
+ 462	n64	mseal				sys_mseal
+463	n64	process_ksm_enable		sys_process_ksm_enable
+464	n64	process_ksm_disable		sys_process_ksm_disable
+465	n64	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
+@@ -450,3 +450,6 @@
+ 460	o32	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	o32	lsm_list_modules		sys_lsm_list_modules
+ 462	o32	mseal				sys_mseal
+463	o32	process_ksm_enable		sys_process_ksm_enable
+464	o32	process_ksm_disable		sys_process_ksm_disable
+465	o32	process_ksm_status		sys_process_ksm_status
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -461,3 +461,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
+@@ -553,3 +553,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
+@@ -465,3 +465,6 @@
+ 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
+ 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+ 462  common	mseal			sys_mseal			sys_mseal
+463  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
+464  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
+465  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
+--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
+@@ -466,3 +466,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
+@@ -508,3 +508,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal 				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -468,3 +468,6 @@
+ 460	i386	lsm_set_self_attr	sys_lsm_set_self_attr
+ 461	i386	lsm_list_modules	sys_lsm_list_modules
+ 462	i386	mseal 			sys_mseal
+463	i386	process_ksm_enable		sys_process_ksm_enable
+464	i386	process_ksm_disable		sys_process_ksm_disable
+465	i386	process_ksm_status		sys_process_ksm_status
+--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
+@@ -386,6 +386,9 @@
+ 460	common	lsm_set_self_attr	sys_lsm_set_self_attr
+ 461	common	lsm_list_modules	sys_lsm_list_modules
+ 462 	common  mseal			sys_mseal
+463	common	process_ksm_enable	sys_process_ksm_enable
+464	common	process_ksm_disable	sys_process_ksm_disable
+465	common	process_ksm_status	sys_process_ksm_status
+ 
+ #
+ # Due to a historical design error, certain syscalls are numbered differently
+--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
+@@ -433,3 +433,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal 				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
+@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned lon
+ asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ 			size_t vlen, int behavior, unsigned int flags);
+ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
+ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
+ 			unsigned long prot, unsigned long pgoff,
+ 			unsigned long flags);
+--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
+@@ -841,8 +841,15 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm
+ #define __NR_mseal 462
+ __SYSCALL(__NR_mseal, sys_mseal)
+ 
+#define __NR_process_ksm_enable 463
+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
+#define __NR_process_ksm_disable 464
+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
+#define __NR_process_ksm_status 465
+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
+
+ #undef __NR_syscalls
+-#define __NR_syscalls 463
+#define __NR_syscalls 466
+ 
+ /*
+  * 32 bit systems traditionally used different
+--- a/kernel/sys.c
+++ b/kernel/sys.c
+@@ -2789,6 +2789,153 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
+ 	return error;
+ }
+ 
+#ifdef CONFIG_KSM
+enum pkc_action {
+	PKSM_ENABLE = 0,
+	PKSM_DISABLE,
+	PKSM_STATUS,
+};
+
+static long do_process_ksm_control(int pidfd, enum pkc_action action)
+{
+	long ret;
+	struct pid *pid;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	unsigned int f_flags;
+
+	pid = pidfd_get_pid(pidfd, &f_flags);
+	if (IS_ERR(pid)) {
+		ret = PTR_ERR(pid);
+		goto out;
+	}
+
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		ret = -ESRCH;
+		goto put_pid;
+	}
+
+	/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+	if (IS_ERR_OR_NULL(mm)) {
+		ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		goto release_task;
+	}
+
+	/* Require CAP_SYS_NICE for influencing process performance. */
+	if (!capable(CAP_SYS_NICE)) {
+		ret = -EPERM;
+		goto release_mm;
+	}
+
+	if (mmap_write_lock_killable(mm)) {
+		ret = -EINTR;
+		goto release_mm;
+	}
+
+	switch (action) {
+		case PKSM_ENABLE:
+			ret = ksm_enable_merge_any(mm);
+			break;
+		case PKSM_DISABLE:
+			ret = ksm_disable_merge_any(mm);
+			break;
+		case PKSM_STATUS:
+			ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
+			break;
+	}
+
+	mmap_write_unlock(mm);
+
+release_mm:
+	mmput(mm);
+release_task:
+	put_task_struct(task);
+put_pid:
+	put_pid(pid);
+out:
+	return ret;
+}
+#endif /* CONFIG_KSM */
+
+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_ENABLE);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_DISABLE);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_STATUS);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+#ifdef CONFIG_KSM
+static ssize_t process_ksm_enable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_enable);
+}
+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
+
+static ssize_t process_ksm_disable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_disable);
+}
+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
+
+static ssize_t process_ksm_status_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_status);
+}
+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
+
+static struct attribute *process_ksm_sysfs_attrs[] = {
+	&process_ksm_enable_attr.attr,
+	&process_ksm_disable_attr.attr,
+	&process_ksm_status_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group process_ksm_sysfs_attr_group = {
+	.attrs = process_ksm_sysfs_attrs,
+	.name = "process_ksm",
+};
+
+static int __init process_ksm_sysfs_init(void)
+{
+	return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
+}
+subsys_initcall(process_ksm_sysfs_init);
+#endif /* CONFIG_KSM */
+
+ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
+ 		struct getcpu_cache __user *, unused)
+ {
+--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
+@@ -186,6 +186,9 @@ COND_SYSCALL(mincore);
+ COND_SYSCALL(madvise);
+ COND_SYSCALL(process_madvise);
+ COND_SYSCALL(process_mrelease);
+COND_SYSCALL(process_ksm_enable);
+COND_SYSCALL(process_ksm_disable);
+COND_SYSCALL(process_ksm_status);
+ COND_SYSCALL(remap_file_pages);
+ COND_SYSCALL(mbind);
+ COND_SYSCALL(get_mempolicy);
+--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
+@@ -403,3 +403,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable			sys_process_ksm_enable
+464	common	process_ksm_disable			sys_process_ksm_disable
+465	common	process_ksm_status			sys_process_ksm_status
+--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+@@ -553,3 +553,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
+463	common	process_ksm_enable		sys_process_ksm_enable
+464	common	process_ksm_disable		sys_process_ksm_disable
+465	common	process_ksm_status		sys_process_ksm_status
+--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+@@ -465,3 +465,6 @@
+ 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
+ 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+ 462  common	mseal			sys_mseal			sys_mseal
+463  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
+464  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
+465  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
--- a/debian/patches/patchset-pf/ksm/0002-mm-process_ksm-use-pidfd_get_task-instead-of-pidfd_g.patch
+++ b/debian/patches/patchset-pf/ksm/0002-mm-process_ksm-use-pidfd_get_task-instead-of-pidfd_g.patch
@@ -0,0 +1,50 @@
+From 9308d03bfeb941469da17e2903ca06254b110b25 Mon Sep 17 00:00:00 2001
+From: Oleksandr Natalenko <oleksandr@natalenko.name>
+Date: Tue, 24 Sep 2024 11:58:41 +0200
+Subject: mm/process_ksm: use pidfd_get_task() instead of
+ pidfd_get_pid()+get_pid_task()
+
+Link: https://git.kernel.org/linus/ee9955d61a0a
+Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+---
+ kernel/sys.c | 15 +++------------
+ 1 file changed, 3 insertions(+), 12 deletions(-)
+
+--- a/kernel/sys.c
+++ b/kernel/sys.c
+@@ -2799,23 +2799,16 @@ enum pkc_action {
+ static long do_process_ksm_control(int pidfd, enum pkc_action action)
+ {
+ 	long ret;
+-	struct pid *pid;
+ 	struct task_struct *task;
+ 	struct mm_struct *mm;
+ 	unsigned int f_flags;
+ 
+-	pid = pidfd_get_pid(pidfd, &f_flags);
+-	if (IS_ERR(pid)) {
+-		ret = PTR_ERR(pid);
+	task = pidfd_get_task(pidfd, &f_flags);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+ 		goto out;
+ 	}
+ 
+-	task = get_pid_task(pid, PIDTYPE_PID);
+-	if (!task) {
+-		ret = -ESRCH;
+-		goto put_pid;
+-	}
+-
+ 	/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+ 	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+ 	if (IS_ERR_OR_NULL(mm)) {
+@@ -2852,8 +2845,6 @@ release_mm:
+ 	mmput(mm);
+ release_task:
+ 	put_task_struct(task);
+-put_pid:
+-	put_pid(pid);
+ out:
+ 	return ret;
+ }
--- a/debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.6.patch
+++ b/debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.6.patch
--- a/debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch
+++ b/debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch
@@ -0,0 +1,58 @@
+From c09f361b41027ca073de5631c66dfe0e7275c3a4 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 22 Jan 2024 16:27:56 -0800
+Subject: lib: zstd: Refactor intentional wrap-around test
+
+In an effort to separate intentional arithmetic wrap-around from
+unexpected wrap-around, we need to refactor places that depend on this
+kind of math. One of the most common code patterns of this is:
+
+	VAR + value < VAR
+
+Notably, this is considered "undefined behavior" for signed and pointer
+types, which the kernel works around by using the -fno-strict-overflow
+option in the build[1] (which used to just be -fwrapv). Regardless, we
+want to get the kernel source to the position where we can meaningfully
+instrument arithmetic wrap-around conditions and catch them when they
+are unexpected, regardless of whether they are signed[2], unsigned[3],
+or pointer[4] types.
+
+Switch to a more regular type for a 64-bit value and refactor the
+open-coded wrap-around addition test to use subtraction from the type max
+(since add_would_overflow() may not be defined in early boot code). This
+paves the way to enabling the wrap-around sanitizers in the future.
+
+Link: https://git.kernel.org/linus/68df3755e383e6fecf2354a67b08f92f18536594 [1]
+Link: https://github.com/KSPP/linux/issues/26 [2]
+Link: https://github.com/KSPP/linux/issues/27 [3]
+Link: https://github.com/KSPP/linux/issues/344 [4]
+Cc: Nick Terrell <terrelln@fb.com>
+Cc: Paul Jones <paul@pauljones.id.au>
+Cc: Sedat Dilek <sedat.dilek@gmail.com>
+Cc: Oleksandr Natalenko <oleksandr@natalenko.name>
+Cc: Xin Gao <gaoxin@cdjrlc.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+---
+ lib/zstd/decompress/zstd_decompress.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
+@@ -618,7 +618,7 @@ size_t ZSTD_readSkippableFrame(void* dst
+  * @return : decompressed size of the frames contained */
+ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+ {
+-    unsigned long long totalDstSize = 0;
+    U64 totalDstSize = 0;
+ 
+     while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+         U32 const magicNumber = MEM_readLE32(src);
+@@ -636,7 +636,7 @@ unsigned long long ZSTD_findDecompressed
+         {   unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
+             if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
+ 
+-            if (totalDstSize + fcs < totalDstSize)
+            if (U64_MAX - totalDstSize < fcs)
+                 return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
+             totalDstSize += fcs;
+         }