release 6.15.2 (preliminary)
This commit is contained in:
@@ -42,7 +42,7 @@ correctness.
|
||||
|
||||
static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs)
|
||||
{
|
||||
@@ -3848,6 +3852,7 @@ static struct phy_driver marvell_drivers
|
||||
@@ -3828,6 +3832,7 @@ static struct phy_driver marvell_drivers
|
||||
.led_hw_control_set = m88e1318_led_hw_control_set,
|
||||
.led_hw_control_get = m88e1318_led_hw_control_get,
|
||||
},
|
||||
@@ -50,7 +50,7 @@ correctness.
|
||||
{
|
||||
.phy_id = MARVELL_PHY_ID_88E1145,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
@@ -3871,6 +3876,8 @@ static struct phy_driver marvell_drivers
|
||||
@@ -3851,6 +3856,8 @@ static struct phy_driver marvell_drivers
|
||||
.cable_test_start = m88e1111_vct_cable_test_start,
|
||||
.cable_test_get_status = m88e1111_vct_cable_test_get_status,
|
||||
},
|
||||
@@ -59,7 +59,7 @@ correctness.
|
||||
{
|
||||
.phy_id = MARVELL_PHY_ID_88E1149R,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
@@ -3889,6 +3896,8 @@ static struct phy_driver marvell_drivers
|
||||
@@ -3869,6 +3876,8 @@ static struct phy_driver marvell_drivers
|
||||
.get_strings = marvell_get_strings,
|
||||
.get_stats = marvell_get_stats,
|
||||
},
|
||||
@@ -68,7 +68,7 @@ correctness.
|
||||
{
|
||||
.phy_id = MARVELL_PHY_ID_88E1240,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
@@ -3909,6 +3918,7 @@ static struct phy_driver marvell_drivers
|
||||
@@ -3889,6 +3898,7 @@ static struct phy_driver marvell_drivers
|
||||
.get_tunable = m88e1011_get_tunable,
|
||||
.set_tunable = m88e1011_set_tunable,
|
||||
},
|
||||
@@ -76,7 +76,7 @@ correctness.
|
||||
{
|
||||
.phy_id = MARVELL_PHY_ID_88E1116R,
|
||||
.phy_id_mask = MARVELL_PHY_ID_MASK,
|
||||
@@ -4197,9 +4207,9 @@ static const struct mdio_device_id __may
|
||||
@@ -4177,9 +4187,9 @@ static const struct mdio_device_id __may
|
||||
{ MARVELL_PHY_ID_88E1111_FINISAR, MARVELL_PHY_ID_MASK },
|
||||
{ MARVELL_PHY_ID_88E1118, MARVELL_PHY_ID_MASK },
|
||||
{ MARVELL_PHY_ID_88E1121R, MARVELL_PHY_ID_MASK },
|
||||
|
@@ -1,32 +0,0 @@
|
||||
From: Ben Hutchings <benh@debian.org>
|
||||
Date: Mon, 16 Sep 2024 00:07:04 +0200
|
||||
Subject: Documentation: Use relative source filenames in ABI documentation
|
||||
|
||||
Currently the ABI documentation files contain absolute source
|
||||
filenames, which makes them unreproducible if the build directory can
|
||||
vary.
|
||||
|
||||
Remove the source base directory ($srctree) from the source filenames
|
||||
shown in the documentation.
|
||||
|
||||
Signed-off-by: Ben Hutchings <benh@debian.org>
|
||||
---
|
||||
--- a/Documentation/sphinx/kernel_abi.py
|
||||
+++ b/Documentation/sphinx/kernel_abi.py
|
||||
@@ -103,6 +103,7 @@ class KernelCmd(Directive):
|
||||
lines = code_block + "\n\n"
|
||||
|
||||
line_regex = re.compile(r"^\.\. LINENO (\S+)\#([0-9]+)$")
|
||||
+ srctree = os.path.abspath(os.environ["srctree"])
|
||||
ln = 0
|
||||
n = 0
|
||||
f = fname
|
||||
@@ -127,7 +128,7 @@ class KernelCmd(Directive):
|
||||
# sphinx counts lines from 0
|
||||
ln = int(match.group(2)) - 1
|
||||
else:
|
||||
- content.append(line, f, ln)
|
||||
+ content.append(line, os.path.relpath(f, srctree), ln)
|
||||
|
||||
kernellog.info(self.state.document.settings.env.app, "%s: parsed %i lines" % (fname, n))
|
||||
|
@@ -9,7 +9,7 @@ sources.
|
||||
|
||||
--- a/scripts/Makefile.build
|
||||
+++ b/scripts/Makefile.build
|
||||
@@ -188,6 +188,11 @@ cmd_record_mcount = $(if $(findstring $(
|
||||
@@ -184,6 +184,11 @@ cmd_record_mcount = $(if $(findstring $(
|
||||
$(sub_cmd_record_mcount))
|
||||
endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
|
||||
|
||||
|
@@ -16,7 +16,7 @@ Signed-off-by: Ben Hutchings <benh@debian.org>
|
||||
---
|
||||
--- a/tools/perf/Makefile.perf
|
||||
+++ b/tools/perf/Makefile.perf
|
||||
@@ -945,7 +945,7 @@ $(LIBAPI)-clean:
|
||||
@@ -963,7 +963,7 @@ $(LIBAPI)-clean:
|
||||
$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
|
||||
$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
|
||||
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \
|
||||
|
@@ -21,7 +21,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
+ * much older kernel. Do "use" the attr structure here to avoid
|
||||
+ * a "set but not used" warning.
|
||||
*/
|
||||
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)) == 0;
|
||||
+ (void)&attr;
|
||||
+ return 0;
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ Signed-off-by: Ben Hutchings <benh@debian.org>
|
||||
---
|
||||
--- a/tools/perf/Makefile.perf
|
||||
+++ b/tools/perf/Makefile.perf
|
||||
@@ -919,7 +919,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in
|
||||
@@ -937,7 +937,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in
|
||||
.SECONDARY: $(DLFILTERS:.so=.o)
|
||||
|
||||
$(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Date: Fri, 31 May 2013 19:12:12 +0000 (+0100)
|
||||
Subject: add sysctl to disallow unprivileged CLONE_NEWUSER by default
|
||||
Origin: https://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8
|
||||
Origin: http://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8
|
||||
|
||||
add sysctl to disallow unprivileged CLONE_NEWUSER by default
|
||||
|
||||
@@ -34,7 +34,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
|
||||
/*
|
||||
* Minimum number of threads to boot the kernel
|
||||
*/
|
||||
@@ -2172,6 +2178,10 @@ __latent_entropy struct task_struct *cop
|
||||
@@ -2194,6 +2200,10 @@ __latent_entropy struct task_struct *cop
|
||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
@@ -45,7 +45,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
|
||||
/*
|
||||
* Thread groups must share signals as well, and detached threads
|
||||
* can only be started up within the thread group.
|
||||
@@ -3325,6 +3335,12 @@ int ksys_unshare(unsigned long unshare_f
|
||||
@@ -3354,6 +3364,12 @@ int ksys_unshare(unsigned long unshare_f
|
||||
if (unshare_flags & CLONE_NEWNS)
|
||||
unshare_flags |= CLONE_FS;
|
||||
|
||||
@@ -60,18 +60,18 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
|
||||
goto bad_unshare_out;
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -135,6 +135,10 @@ static enum sysctl_writes_mode sysctl_wr
|
||||
int sysctl_legacy_va_layout;
|
||||
#endif
|
||||
@@ -84,6 +84,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
|
||||
static const int ngroups_max = NGROUPS_MAX;
|
||||
static const int cap_last_cap = CAP_LAST_CAP;
|
||||
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+extern int unprivileged_userns_clone;
|
||||
+#endif
|
||||
+
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
|
||||
/*
|
||||
@@ -1617,6 +1621,15 @@ static const struct ctl_table kern_table
|
||||
/**
|
||||
@@ -1595,6 +1599,15 @@ static const struct ctl_table kern_table
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
|
@@ -80,7 +80,7 @@ Consequently, the ashmem part of this patch has been removed.
|
||||
{
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -6392,6 +6392,7 @@ inval:
|
||||
@@ -6589,6 +6589,7 @@ inval:
|
||||
count_vm_vma_lock_event(VMA_LOCK_ABORT);
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
--- a/net/dccp/ipv4.c
|
||||
+++ b/net/dccp/ipv4.c
|
||||
@@ -1099,8 +1099,8 @@ module_exit(dccp_v4_exit);
|
||||
@@ -1094,8 +1094,8 @@ module_exit(dccp_v4_exit);
|
||||
* values directly, Also cover the case where the protocol is not specified,
|
||||
* i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
|
||||
*/
|
||||
@@ -28,7 +28,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
|
||||
--- a/net/dccp/ipv6.c
|
||||
+++ b/net/dccp/ipv6.c
|
||||
@@ -1174,8 +1174,8 @@ module_exit(dccp_v6_exit);
|
||||
@@ -1167,8 +1167,8 @@ module_exit(dccp_v6_exit);
|
||||
* values directly, Also cover the case where the protocol is not specified,
|
||||
* i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
|
||||
*/
|
||||
|
@@ -22,7 +22,7 @@ Export the currently un-exported symbols it depends on.
|
||||
|
||||
--- a/fs/file.c
|
||||
+++ b/fs/file.c
|
||||
@@ -845,6 +845,7 @@ struct file *file_close_fd(unsigned int
|
||||
@@ -843,6 +843,7 @@ struct file *file_close_fd(unsigned int
|
||||
|
||||
return file;
|
||||
}
|
||||
@@ -82,7 +82,7 @@ Export the currently un-exported symbols it depends on.
|
||||
* task_work_cancel_match - cancel a pending work added by task_work_add()
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -2027,6 +2027,7 @@ void zap_page_range_single(struct vm_are
|
||||
@@ -2020,6 +2020,7 @@ void zap_page_range_single(struct vm_are
|
||||
tlb_finish_mmu(&tlb);
|
||||
hugetlb_zap_end(vma, details);
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@ actually used.
|
||||
---
|
||||
--- a/fs/notify/fanotify/fanotify_user.c
|
||||
+++ b/fs/notify/fanotify/fanotify_user.c
|
||||
@@ -1838,6 +1838,14 @@ static int do_fanotify_mark(int fanotify
|
||||
@@ -1881,6 +1881,14 @@ static int do_fanotify_mark(int fanotify
|
||||
umask = FANOTIFY_EVENT_FLAGS;
|
||||
}
|
||||
|
||||
|
37
debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch
vendored
Normal file
37
debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
From: Ben Hutchings <benh@debian.org>
|
||||
Subject: firmware_loader: Log direct loading failures as info for d-i
|
||||
Date: Thu, 30 May 2024 13:14:32 +0100
|
||||
Forwarded: not-needed
|
||||
|
||||
On an installed Debian system, firmware packages will normally be
|
||||
installed automatically based on a mapping of device IDs to firmware.
|
||||
Within the Debian installer this has not yet happened and we need a
|
||||
way to detect missing firmware.
|
||||
|
||||
Although many/most drivers log firmware loading failures, they do so
|
||||
using many different formats. This adds a single log message to the
|
||||
firmware loader, which the installer's hw-detect package will look
|
||||
for. The log level is set to "info" because some failures are
|
||||
expected and we do not want to confuse users with bogus error messages
|
||||
(like in bug #966218).
|
||||
|
||||
NOTE: The log message format must not be changed without coordinating
|
||||
this with the check-missing-firmware.sh in hw-detect.
|
||||
---
|
||||
drivers/base/firmware_loader/fallback.c | 2 +-
|
||||
drivers/base/firmware_loader/main.c | 17 ++++++++---------
|
||||
2 files changed, 9 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/base/firmware_loader/main.c
|
||||
+++ b/drivers/base/firmware_loader/main.c
|
||||
@@ -590,6 +590,10 @@ fw_get_filesystem_firmware(struct device
|
||||
}
|
||||
__putname(path);
|
||||
|
||||
+ if (rc)
|
||||
+ dev_info(device, "firmware: failed to load %s (%d)\n",
|
||||
+ fw_priv->fw_name, rc);
|
||||
+
|
||||
return rc;
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ This reverts commit 561ec64ae67ef25cac8d72bb9c4bfc955edfd415
|
||||
|
||||
--- a/fs/namei.c
|
||||
+++ b/fs/namei.c
|
||||
@@ -1094,8 +1094,8 @@ static inline void put_link(struct namei
|
||||
@@ -1095,8 +1095,8 @@ static inline void put_link(struct namei
|
||||
path_put(&last->link);
|
||||
}
|
||||
|
||||
|
4
debian/patches/debian/kernelvariables.patch
vendored
4
debian/patches/debian/kernelvariables.patch
vendored
@@ -19,7 +19,7 @@ use of $(ARCH) needs to be moved after this.
|
||||
---
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -405,36 +405,6 @@ include $(srctree)/scripts/subarch.inclu
|
||||
@@ -402,36 +402,6 @@ include $(srctree)/scripts/subarch.inclu
|
||||
# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
|
||||
ARCH ?= $(SUBARCH)
|
||||
|
||||
@@ -56,7 +56,7 @@ use of $(ARCH) needs to be moved after this.
|
||||
KCONFIG_CONFIG ?= .config
|
||||
export KCONFIG_CONFIG
|
||||
|
||||
@@ -554,6 +524,35 @@ RUSTFLAGS_KERNEL =
|
||||
@@ -551,6 +521,35 @@ RUSTFLAGS_KERNEL =
|
||||
AFLAGS_KERNEL =
|
||||
LDFLAGS_vmlinux =
|
||||
|
||||
|
@@ -15,7 +15,7 @@ to the installed location.
|
||||
---
|
||||
--- a/tools/perf/builtin-report.c
|
||||
+++ b/tools/perf/builtin-report.c
|
||||
@@ -660,10 +660,12 @@ static int report__browse_hists(struct r
|
||||
@@ -666,10 +666,12 @@ static int report__browse_hists(struct r
|
||||
|
||||
path = system_path(TIPDIR);
|
||||
if (perf_tip(&help, path) || help == NULL) {
|
||||
|
@@ -20,7 +20,7 @@ is non-empty.
|
||||
---
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -1871,7 +1871,7 @@ PHONY += prepare
|
||||
@@ -1882,7 +1882,7 @@ PHONY += prepare
|
||||
# now expand this into a simple variable to reduce the cost of shell evaluations
|
||||
prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT)
|
||||
prepare:
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From: Adriaan Schmidt <adriaan.schmidt@siemens.com>
|
||||
Date: Mon, 4 Apr 2022 13:38:33 +0200
|
||||
Subject: tools: install perf python bindings
|
||||
Bug-Debian: https://bugs.debian.org/860957
|
||||
Bug-Debian: http://bugs.debian.org/860957
|
||||
Forwarded: not-needed
|
||||
|
||||
---
|
||||
@@ -10,7 +10,7 @@ Forwarded: not-needed
|
||||
|
||||
--- a/tools/perf/Makefile.perf
|
||||
+++ b/tools/perf/Makefile.perf
|
||||
@@ -1139,7 +1139,7 @@ install-bin: install-tools install-tests
|
||||
@@ -1157,7 +1157,7 @@ install-bin: install-tools install-tests
|
||||
install: install-bin try-install-man
|
||||
|
||||
install-python_ext:
|
||||
|
@@ -4,7 +4,7 @@ Subject: linux-tools: Install perf-read-vdso{,x}32 in directory under /usr/lib
|
||||
|
||||
--- a/tools/perf/Makefile.perf
|
||||
+++ b/tools/perf/Makefile.perf
|
||||
@@ -1067,21 +1067,21 @@ install-tools: all install-gtk
|
||||
@@ -1085,21 +1085,21 @@ install-tools: all install-gtk
|
||||
$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'; \
|
||||
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \
|
||||
$(INSTALL) -m 644 include/perf/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf'
|
||||
|
@@ -13,7 +13,7 @@ $KBUILD_BUILD_TIMESTAMP.
|
||||
|
||||
--- a/init/Makefile
|
||||
+++ b/init/Makefile
|
||||
@@ -29,7 +29,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) :
|
||||
@@ -30,7 +30,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) :
|
||||
preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT
|
||||
|
||||
build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))
|
||||
|
@@ -19,7 +19,7 @@ Forwarded: not-needed
|
||||
|
||||
/* describe a ptrace relationship for potential exception */
|
||||
struct ptrace_relation {
|
||||
@@ -474,7 +474,7 @@ static inline void yama_init_sysctl(void
|
||||
@@ -469,7 +469,7 @@ static inline void yama_init_sysctl(void
|
||||
|
||||
static int __init yama_init(void)
|
||||
{
|
||||
|
@@ -31,7 +31,7 @@ cc: linux-efi@vger.kernel.org
|
||||
|
||||
--- a/arch/x86/kernel/setup.c
|
||||
+++ b/arch/x86/kernel/setup.c
|
||||
@@ -1073,19 +1073,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
@@ -1127,19 +1127,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
/* Allocate bigger log buffer */
|
||||
setup_log_buf(1);
|
||||
|
||||
|
@@ -26,7 +26,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
|
||||
|
||||
--- a/arch/x86/kernel/setup.c
|
||||
+++ b/arch/x86/kernel/setup.c
|
||||
@@ -907,6 +907,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
@@ -964,6 +964,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
if (efi_enabled(EFI_BOOT))
|
||||
efi_init();
|
||||
|
||||
@@ -35,7 +35,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
|
||||
reserve_ibft_region();
|
||||
x86_init.resources.dmi_setup();
|
||||
|
||||
@@ -1073,8 +1075,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
@@ -1127,8 +1129,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
/* Allocate bigger log buffer */
|
||||
setup_log_buf(1);
|
||||
|
||||
|
@@ -22,9 +22,9 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
--- a/include/linux/perf_event.h
|
||||
+++ b/include/linux/perf_event.h
|
||||
@@ -1701,6 +1701,11 @@ int perf_cpu_time_max_percent_handler(co
|
||||
int perf_event_max_stack_handler(const struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos);
|
||||
@@ -1684,6 +1684,11 @@ extern int sysctl_perf_event_sample_rate
|
||||
|
||||
extern void perf_sample_event_took(u64 sample_len_ns);
|
||||
|
||||
+static inline bool perf_paranoid_any(void)
|
||||
+{
|
||||
@@ -36,7 +36,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
--- a/kernel/events/core.c
|
||||
+++ b/kernel/events/core.c
|
||||
@@ -449,8 +449,13 @@ static struct kmem_cache *perf_event_cac
|
||||
@@ -450,8 +450,13 @@ static struct kmem_cache *perf_event_cac
|
||||
* 0 - disallow raw tracepoint access for unpriv
|
||||
* 1 - disallow cpu events for unpriv
|
||||
* 2 - disallow kernel profiling for unpriv
|
||||
@@ -48,9 +48,9 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
int sysctl_perf_event_paranoid __read_mostly = 2;
|
||||
+#endif
|
||||
|
||||
/* Minimum for 512 kiB + 1 user control page */
|
||||
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
|
||||
@@ -12813,6 +12818,9 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
/* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */
|
||||
static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
|
||||
@@ -13084,6 +13089,9 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -58,13 +58,13 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
+ return -EACCES;
|
||||
+
|
||||
/* Do we allow access to perf_event_open(2) ? */
|
||||
err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
|
||||
err = security_perf_event_open(PERF_SECURITY_OPEN);
|
||||
if (err)
|
||||
--- a/security/Kconfig
|
||||
+++ b/security/Kconfig
|
||||
@@ -51,6 +51,15 @@ config PROC_MEM_NO_FORCE
|
||||
|
||||
endchoice
|
||||
@@ -72,6 +72,15 @@ config MSEAL_SYSTEM_MAPPINGS
|
||||
For complete descriptions of memory sealing, please see
|
||||
Documentation/userspace-api/mseal.rst
|
||||
|
||||
+config SECURITY_PERF_EVENTS_RESTRICT
|
||||
+ bool "Restrict unprivileged use of performance events"
|
||||
|
@@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2264,6 +2264,8 @@
|
||||
@@ -2288,6 +2288,8 @@
|
||||
bypassed by not enabling DMAR with this option. In
|
||||
this case, gfx device will use physical address for
|
||||
DMA.
|
||||
@@ -68,7 +68,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
} else if (!strncmp(str, "forcedac", 8)) {
|
||||
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
|
||||
iommu_dma_forcedac = true;
|
||||
@@ -1902,6 +1910,9 @@ static int device_def_domain_type(struct
|
||||
@@ -1935,6 +1943,9 @@ static int device_def_domain_type(struct
|
||||
|
||||
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
|
||||
return IOMMU_DOMAIN_IDENTITY;
|
||||
@@ -78,7 +78,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -2196,6 +2207,9 @@ static int __init init_dmars(void)
|
||||
@@ -2229,6 +2240,9 @@ static int __init init_dmars(void)
|
||||
iommu_set_root_entry(iommu);
|
||||
}
|
||||
|
||||
|
@@ -29,7 +29,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -7004,6 +7004,10 @@
|
||||
@@ -7044,6 +7044,10 @@
|
||||
later by a loaded module cannot be set this way.
|
||||
Example: sysctl.vm.swappiness=40
|
||||
|
||||
@@ -42,7 +42,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
Ignore sysrq setting - this boot parameter will
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -3202,6 +3202,14 @@ config COMPAT_32
|
||||
@@ -3169,6 +3169,14 @@ config COMPAT_32
|
||||
select HAVE_UID16
|
||||
select OLD_SIGSUSPEND3
|
||||
|
||||
@@ -57,9 +57,70 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
config COMPAT
|
||||
def_bool y
|
||||
depends on IA32_EMULATION || X86_X32_ABI
|
||||
--- a/arch/x86/entry/common.c
|
||||
+++ b/arch/x86/entry/common.c
|
||||
@@ -64,7 +64,7 @@ static __always_inline bool do_syscall_x
|
||||
--- a/arch/x86/include/asm/elf.h
|
||||
+++ b/arch/x86/include/asm/elf.h
|
||||
@@ -12,6 +12,9 @@
|
||||
#include <asm/user.h>
|
||||
#include <asm/auxvec.h>
|
||||
#include <asm/fsgsbase.h>
|
||||
+#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */
|
||||
+#include <asm/syscall.h>
|
||||
+#endif
|
||||
|
||||
typedef unsigned long elf_greg_t;
|
||||
|
||||
@@ -152,7 +155,8 @@ do { \
|
||||
|
||||
#define compat_elf_check_arch(x) \
|
||||
((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
|
||||
- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
|
||||
+ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \
|
||||
+ (x)->e_machine == EM_X86_64))
|
||||
|
||||
static inline void elf_common_init(struct thread_struct *t,
|
||||
struct pt_regs *regs, const u16 ds)
|
||||
--- a/arch/x86/include/asm/syscall.h
|
||||
+++ b/arch/x86/include/asm/syscall.h
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <uapi/linux/audit.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/err.h>
|
||||
+#include <linux/jump_label.h>
|
||||
#include <asm/thread_info.h> /* for TS_COMPAT */
|
||||
#include <asm/unistd.h>
|
||||
|
||||
@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p
|
||||
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
|
||||
+#if defined(CONFIG_X86_X32_ABI)
|
||||
+#if defined(CONFIG_X86_X32_DISABLED)
|
||||
+DECLARE_STATIC_KEY_FALSE(x32_enabled_skey);
|
||||
+#define x32_enabled static_branch_unlikely(&x32_enabled_skey)
|
||||
+#else
|
||||
+DECLARE_STATIC_KEY_TRUE(x32_enabled_skey);
|
||||
+#define x32_enabled static_branch_likely(&x32_enabled_skey)
|
||||
+#endif
|
||||
+#else
|
||||
+#define x32_enabled 0
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Only the low 32 bits of orig_ax are meaningful, so we return int.
|
||||
* This importantly ignores the high bits on 64-bit, so comparisons
|
||||
--- a/arch/x86/entry/syscall_64.c
|
||||
+++ b/arch/x86/entry/syscall_64.c
|
||||
@@ -7,6 +7,9 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/nospec.h>
|
||||
+#include <linux/moduleparam.h>
|
||||
+#undef MODULE_PARAM_PREFIX
|
||||
+#define MODULE_PARAM_PREFIX "syscall."
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
|
||||
@@ -75,7 +78,7 @@ static __always_inline bool do_syscall_x
|
||||
*/
|
||||
unsigned int xnr = nr - __X32_SYSCALL_BIT;
|
||||
|
||||
@@ -68,23 +129,12 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
xnr = array_index_nospec(xnr, X32_NR_syscalls);
|
||||
regs->ax = x32_sys_call(regs, xnr);
|
||||
return true;
|
||||
--- a/arch/x86/entry/syscall_x32.c
|
||||
+++ b/arch/x86/entry/syscall_x32.c
|
||||
@@ -4,6 +4,9 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/sys.h>
|
||||
#include <linux/cache.h>
|
||||
+#include <linux/moduleparam.h>
|
||||
+#undef MODULE_PARAM_PREFIX
|
||||
+#define MODULE_PARAM_PREFIX "syscall."
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
@@ -23,3 +26,46 @@ long x32_sys_call(const struct pt_regs *
|
||||
default: return __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
};
|
||||
@@ -139,3 +142,48 @@ __visible noinstr bool do_syscall_64(str
|
||||
/* Use SYSRET to exit to userspace */
|
||||
return true;
|
||||
}
|
||||
+
|
||||
+#ifdef CONFIG_X86_X32_ABI
|
||||
+/* Maybe enable x32 syscalls */
|
||||
+
|
||||
+#if defined(CONFIG_X86_X32_DISABLED)
|
||||
@@ -127,54 +177,4 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
+};
|
||||
+
|
||||
+arch_param_cb(x32, &x32_param_ops, NULL, 0444);
|
||||
--- a/arch/x86/include/asm/elf.h
|
||||
+++ b/arch/x86/include/asm/elf.h
|
||||
@@ -12,6 +12,9 @@
|
||||
#include <asm/user.h>
|
||||
#include <asm/auxvec.h>
|
||||
#include <asm/fsgsbase.h>
|
||||
+#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */
|
||||
+#include <asm/syscall.h>
|
||||
+#endif
|
||||
|
||||
typedef unsigned long elf_greg_t;
|
||||
|
||||
@@ -151,7 +154,8 @@ do { \
|
||||
|
||||
#define compat_elf_check_arch(x) \
|
||||
((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
|
||||
- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
|
||||
+ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \
|
||||
+ (x)->e_machine == EM_X86_64))
|
||||
|
||||
static inline void elf_common_init(struct thread_struct *t,
|
||||
struct pt_regs *regs, const u16 ds)
|
||||
--- a/arch/x86/include/asm/syscall.h
|
||||
+++ b/arch/x86/include/asm/syscall.h
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <uapi/linux/audit.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/err.h>
|
||||
+#include <linux/jump_label.h>
|
||||
#include <asm/thread_info.h> /* for TS_COMPAT */
|
||||
#include <asm/unistd.h>
|
||||
|
||||
@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p
|
||||
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
|
||||
+#if defined(CONFIG_X86_X32_ABI)
|
||||
+#if defined(CONFIG_X86_X32_DISABLED)
|
||||
+DECLARE_STATIC_KEY_FALSE(x32_enabled_skey);
|
||||
+#define x32_enabled static_branch_unlikely(&x32_enabled_skey)
|
||||
+#else
|
||||
+DECLARE_STATIC_KEY_TRUE(x32_enabled_skey);
|
||||
+#define x32_enabled static_branch_likely(&x32_enabled_skey)
|
||||
+#endif
|
||||
+#else
|
||||
+#define x32_enabled 0
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Only the low 32 bits of orig_ax are meaningful, so we return int.
|
||||
* This importantly ignores the high bits on 64-bit, so comparisons
|
||||
|
@@ -1,52 +0,0 @@
|
||||
this reverts following commit:
|
||||
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Thu, 14 Jan 2021 16:32:42 -0600
|
||||
Subject: objtool: Don't fail the kernel build on fatal errors
|
||||
|
||||
[ Upstream commit 655cf86548a3938538642a6df27dd359e13c86bd ]
|
||||
|
||||
This is basically a revert of commit 644592d32837 ("objtool: Fail the
|
||||
kernel build on fatal errors").
|
||||
|
||||
That change turned out to be more trouble than it's worth. Failing the
|
||||
build is an extreme measure which sometimes gets too much attention and
|
||||
blocks CI build testing.
|
||||
|
||||
These fatal-type warnings aren't yet as rare as we'd hope, due to the
|
||||
ever-increasing matrix of supported toolchains/plugins and their
|
||||
fast-changing nature as of late.
|
||||
|
||||
Also, there are more people (and bots) looking for objtool warnings than
|
||||
ever before, so even non-fatal warnings aren't likely to be ignored for
|
||||
long.
|
||||
|
||||
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
|
||||
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
|
||||
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
|
||||
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Signed-off-by: Sasha Levin <sashal@kernel.org>
|
||||
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -4783,10 +4783,14 @@ int check(struct objtool_file *file)
|
||||
}
|
||||
|
||||
out:
|
||||
- /*
|
||||
- * For now, don't fail the kernel build on fatal warnings. These
|
||||
- * errors are still fairly common due to the growing matrix of
|
||||
- * supported toolchains and their recent pace of change.
|
||||
- */
|
||||
+ if (ret < 0) {
|
||||
+ /*
|
||||
+ * Fatal error. The binary is corrupt or otherwise broken in
|
||||
+ * some way, or objtool itself is broken. Fail the kernel
|
||||
+ * build.
|
||||
+ */
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
34
debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch
vendored
Normal file
34
debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
From: David Bauer <mail@david-bauer.net>
|
||||
Date: Thu, 30 Nov 2023 07:32:52 +0100
|
||||
Subject: [PATCH] mac80211: avoid crashing on invalid band info
|
||||
|
||||
Frequent crashes have been observed on MT7916 based platforms. While the
|
||||
root of these crashes are currently unknown, they happen when decoding
|
||||
rate information of connected STAs in AP mode. The rate-information is
|
||||
associated with a band which is not available on the PHY.
|
||||
|
||||
Check for this condition in order to avoid crashing the whole system.
|
||||
This patch should be removed once the roout cause has been found and
|
||||
fixed.
|
||||
|
||||
Link: https://github.com/freifunk-gluon/gluon/issues/2980
|
||||
|
||||
Signed-off-by: David Bauer <mail@david-bauer.net>
|
||||
---
|
||||
|
||||
--- a/net/mac80211/sta_info.c
|
||||
+++ b/net/mac80211/sta_info.c
|
||||
@@ -2474,6 +2474,13 @@ static void sta_stats_decode_rate(struct
|
||||
|
||||
sband = local->hw.wiphy->bands[band];
|
||||
|
||||
+ if (!sband) {
|
||||
+ wiphy_warn(local->hw.wiphy,
|
||||
+ "Invalid band %d\n",
|
||||
+ band);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
if (WARN_ON_ONCE(!sband->bitrates))
|
||||
break;
|
||||
|
38
debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch
vendored
Normal file
38
debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
From b478e06a16a8baa00c5ecc87c1d636981f2206d5 Mon Sep 17 00:00:00 2001
|
||||
From: Johannes Berg <johannes.berg@intel.com>
|
||||
Date: Tue, 29 Oct 2019 10:25:25 +0100
|
||||
Subject: [PATCH] mac80211: sta: randomize BA session dialog token allocator
|
||||
|
||||
We currently always start the dialog token generator at zero,
|
||||
so the first dialog token we use is always 1. This would be
|
||||
OK if we had a perfect guarantee that we always do a proper
|
||||
deauth/re-auth handshake, but in IBSS mode this doesn't always
|
||||
happen properly.
|
||||
|
||||
To make problems with block ack (aggregation) sessions getting
|
||||
stuck less likely, randomize the dialog token so if we start a
|
||||
new session but the peer still has old state for us, it can
|
||||
better detect this.
|
||||
|
||||
This is really just a workaround to make things a bit more
|
||||
robust than they are now - a better fix would be to do a full
|
||||
authentication handshake in IBSS mode upon having discovered a
|
||||
new station, and on the receiver resetting the state (removing
|
||||
and re-adding the station) on receiving the authentication
|
||||
packet.
|
||||
|
||||
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
|
||||
---
|
||||
net/mac80211/sta_info.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/net/mac80211/sta_info.c
|
||||
+++ b/net/mac80211/sta_info.c
|
||||
@@ -583,6 +583,7 @@ __sta_info_alloc(struct ieee80211_sub_if
|
||||
spin_lock_init(&sta->ps_lock);
|
||||
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
|
||||
wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
|
||||
+ sta->ampdu_mlme.dialog_token_allocator = get_random_u32_below(U8_MAX);
|
||||
#ifdef CONFIG_MAC80211_MESH
|
||||
if (ieee80211_vif_is_mesh(&sdata->vif)) {
|
||||
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
|
21
debian/patches/misc-openwrt/0004-mac80211-minstrel_ht-fix-MINSTREL_FRAC-macro.patch
vendored
Normal file
21
debian/patches/misc-openwrt/0004-mac80211-minstrel_ht-fix-MINSTREL_FRAC-macro.patch
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 28 Apr 2021 21:03:13 +0200
|
||||
Subject: [PATCH] mac80211: minstrel_ht: fix MINSTREL_FRAC macro
|
||||
|
||||
Add missing braces to avoid issues with e.g. using additions in the
|
||||
div expression
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/net/mac80211/rc80211_minstrel_ht.h
|
||||
+++ b/net/mac80211/rc80211_minstrel_ht.h
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
/* scaled fraction values */
|
||||
#define MINSTREL_SCALE 12
|
||||
-#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
|
||||
+#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / (div))
|
||||
#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
|
||||
|
||||
#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */
|
30
debian/patches/misc-openwrt/0005-mac80211-minstrel_ht-reduce-fluctuations-in-rate-pro.patch
vendored
Normal file
30
debian/patches/misc-openwrt/0005-mac80211-minstrel_ht-reduce-fluctuations-in-rate-pro.patch
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 6 Feb 2021 16:08:01 +0100
|
||||
Subject: [PATCH] mac80211: minstrel_ht: reduce fluctuations in rate
|
||||
probability stats
|
||||
|
||||
In some scenarios when there is a lot of fluctuation in packet error rates,
|
||||
rate switching can be amplified when the statistics get skewed by time slots
|
||||
with very few tries.
|
||||
Make the input data to the moving average more smooth by adding the
|
||||
success/attempts count from the last stats window as well. This has the
|
||||
advantage of smoothing the data without introducing any extra lag to sampling
|
||||
rates.
|
||||
This significantly improves rate stability on a strong test link subjected to
|
||||
periodic noise bursts generated with a SDR
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/net/mac80211/rc80211_minstrel_ht.c
|
||||
+++ b/net/mac80211/rc80211_minstrel_ht.c
|
||||
@@ -769,7 +769,8 @@ minstrel_ht_calc_rate_stats(struct minst
|
||||
unsigned int cur_prob;
|
||||
|
||||
if (unlikely(mrs->attempts > 0)) {
|
||||
- cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
|
||||
+ cur_prob = MINSTREL_FRAC(mrs->success + mrs->last_success,
|
||||
+ mrs->attempts + mrs->last_attempts);
|
||||
minstrel_filter_avg_add(&mrs->prob_avg,
|
||||
&mrs->prob_avg_1, cur_prob);
|
||||
mrs->att_hist += mrs->attempts;
|
151
debian/patches/misc-openwrt/0006-mac80211-minstrel_ht-rework-rate-downgrade-code-and-.patch
vendored
Normal file
151
debian/patches/misc-openwrt/0006-mac80211-minstrel_ht-rework-rate-downgrade-code-and-.patch
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 6 Feb 2021 16:33:14 +0100
|
||||
Subject: [PATCH] mac80211: minstrel_ht: rework rate downgrade code and
|
||||
max_prob rate selection
|
||||
|
||||
The current fallback code for fast rate switching on potentially failing rates
|
||||
is triggering too often if there is some strong noise on the channel. This can
|
||||
lead to wild fluctuations in the rate selection.
|
||||
Additionally, switching down to max_prob_rate can create a significant gap down
|
||||
in throughput, especially when using only 2 spatial streams, because max_prob_rate
|
||||
is limited to using fewer streams than the max_tp rates.
|
||||
In order to improve throughput without reducing reliability too much, use the
|
||||
rate downgrade code for the max_prob_rate only, and allow the non-downgraded
|
||||
max_prob_rate to use as many spatial streams as the max_tp rates
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/net/mac80211/rc80211_minstrel_ht.c
|
||||
+++ b/net/mac80211/rc80211_minstrel_ht.c
|
||||
@@ -580,6 +580,14 @@ minstrel_ht_set_best_prob_rate(struct mi
|
||||
int cur_tp_avg, cur_group, cur_idx;
|
||||
int max_gpr_group, max_gpr_idx;
|
||||
int max_gpr_tp_avg, max_gpr_prob;
|
||||
+ int min_dur;
|
||||
+
|
||||
+ min_dur = max(minstrel_get_duration(mi->max_tp_rate[0]),
|
||||
+ minstrel_get_duration(mi->max_tp_rate[1]));
|
||||
+
|
||||
+ /* make the rate at least 18% slower than max tp rates */
|
||||
+ if (minstrel_get_duration(index) <= min_dur * 19 / 16)
|
||||
+ return;
|
||||
|
||||
cur_group = MI_RATE_GROUP(index);
|
||||
cur_idx = MI_RATE_IDX(index);
|
||||
@@ -601,11 +609,6 @@ minstrel_ht_set_best_prob_rate(struct mi
|
||||
!minstrel_ht_is_legacy_group(max_tp_group))
|
||||
return;
|
||||
|
||||
- /* skip rates faster than max tp rate with lower prob */
|
||||
- if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) &&
|
||||
- mrs->prob_avg < max_tp_prob)
|
||||
- return;
|
||||
-
|
||||
max_gpr_group = MI_RATE_GROUP(mg->max_group_prob_rate);
|
||||
max_gpr_idx = MI_RATE_IDX(mg->max_group_prob_rate);
|
||||
max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg;
|
||||
@@ -663,40 +666,6 @@ minstrel_ht_assign_best_tp_rates(struct
|
||||
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Try to increase robustness of max_prob rate by decrease number of
|
||||
- * streams if possible.
|
||||
- */
|
||||
-static inline void
|
||||
-minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
|
||||
-{
|
||||
- struct minstrel_mcs_group_data *mg;
|
||||
- int tmp_max_streams, group, tmp_idx, tmp_prob;
|
||||
- int tmp_tp = 0;
|
||||
-
|
||||
- if (!mi->sta->deflink.ht_cap.ht_supported)
|
||||
- return;
|
||||
-
|
||||
- group = MI_RATE_GROUP(mi->max_tp_rate[0]);
|
||||
- tmp_max_streams = minstrel_mcs_groups[group].streams;
|
||||
- for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
|
||||
- mg = &mi->groups[group];
|
||||
- if (!mi->supported[group] || group == MINSTREL_CCK_GROUP)
|
||||
- continue;
|
||||
-
|
||||
- tmp_idx = MI_RATE_IDX(mg->max_group_prob_rate);
|
||||
- tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg;
|
||||
-
|
||||
- if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
|
||||
- (minstrel_mcs_groups[group].streams < tmp_max_streams)) {
|
||||
- mi->max_prob_rate = mg->max_group_prob_rate;
|
||||
- tmp_tp = minstrel_ht_get_tp_avg(mi, group,
|
||||
- tmp_idx,
|
||||
- tmp_prob);
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static u16
|
||||
__minstrel_ht_get_sample_rate(struct minstrel_ht_sta *mi,
|
||||
enum minstrel_sample_type type)
|
||||
@@ -1176,8 +1145,6 @@ minstrel_ht_update_stats(struct minstrel
|
||||
|
||||
mi->max_prob_rate = tmp_max_prob_rate;
|
||||
|
||||
- /* Try to increase robustness of max_prob_rate*/
|
||||
- minstrel_ht_prob_rate_reduce_streams(mi);
|
||||
minstrel_ht_refill_sample_rates(mi);
|
||||
|
||||
#ifdef CONFIG_MAC80211_DEBUGFS
|
||||
@@ -1256,7 +1223,7 @@ minstrel_ht_ri_txstat_valid(struct minst
|
||||
}
|
||||
|
||||
static void
|
||||
-minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
|
||||
+minstrel_downgrade_prob_rate(struct minstrel_ht_sta *mi, u16 *idx)
|
||||
{
|
||||
int group, orig_group;
|
||||
|
||||
@@ -1271,11 +1238,7 @@ minstrel_downgrade_rate(struct minstrel_
|
||||
minstrel_mcs_groups[orig_group].streams)
|
||||
continue;
|
||||
|
||||
- if (primary)
|
||||
- *idx = mi->groups[group].max_group_tp_rate[0];
|
||||
- else
|
||||
- *idx = mi->groups[group].max_group_tp_rate[1];
|
||||
- break;
|
||||
+ *idx = mi->groups[group].max_group_prob_rate;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1286,7 +1249,7 @@ minstrel_ht_tx_status(void *priv, struct
|
||||
struct ieee80211_tx_info *info = st->info;
|
||||
struct minstrel_ht_sta *mi = priv_sta;
|
||||
struct ieee80211_tx_rate *ar = info->status.rates;
|
||||
- struct minstrel_rate_stats *rate, *rate2;
|
||||
+ struct minstrel_rate_stats *rate;
|
||||
struct minstrel_priv *mp = priv;
|
||||
u32 update_interval = mp->update_interval;
|
||||
bool last, update = false;
|
||||
@@ -1354,18 +1317,13 @@ minstrel_ht_tx_status(void *priv, struct
|
||||
/*
|
||||
* check for sudden death of spatial multiplexing,
|
||||
* downgrade to a lower number of streams if necessary.
|
||||
+ * only do this for the max_prob_rate to prevent spurious
|
||||
+ * rate fluctuations when the link changes suddenly
|
||||
*/
|
||||
- rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
|
||||
+ rate = minstrel_get_ratestats(mi, mi->max_prob_rate);
|
||||
if (rate->attempts > 30 &&
|
||||
rate->success < rate->attempts / 4) {
|
||||
- minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
|
||||
- update = true;
|
||||
- }
|
||||
-
|
||||
- rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
|
||||
- if (rate2->attempts > 30 &&
|
||||
- rate2->success < rate2->attempts / 4) {
|
||||
- minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
|
||||
+ minstrel_downgrade_prob_rate(mi, &mi->max_prob_rate);
|
||||
update = true;
|
||||
}
|
||||
}
|
53
debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch
vendored
Normal file
53
debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 26 Jun 2022 11:43:25 +0200
|
||||
Subject: [PATCH] mac80211: increase quantum for airtime scheduler
|
||||
|
||||
Given the typical AQL budget and queue length, a quantum of 256 with the
|
||||
default station weight often requires iterating over all queues frequently,
|
||||
until one of them becomes eligible.
|
||||
Improve performance by using 8 times station weight as scheduler quantum
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/net/mac80211/ieee80211_i.h
|
||||
+++ b/net/mac80211/ieee80211_i.h
|
||||
@@ -103,6 +103,8 @@ ieee80211_sta_keep_active(struct sta_inf
|
||||
return time_before_eq(jiffies, sta->airtime[ac].last_active + HZ / 10);
|
||||
}
|
||||
|
||||
+#define AIRTIME_QUANTUM_SHIFT 3
|
||||
+
|
||||
struct ieee80211_bss {
|
||||
u32 device_ts_beacon, device_ts_presp;
|
||||
|
||||
--- a/net/mac80211/tx.c
|
||||
+++ b/net/mac80211/tx.c
|
||||
@@ -4084,7 +4084,7 @@ struct ieee80211_txq *ieee80211_next_txq
|
||||
|
||||
if (deficit < 0)
|
||||
sta->airtime[txqi->txq.ac].deficit +=
|
||||
- sta->airtime_weight;
|
||||
+ sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
|
||||
|
||||
if (deficit < 0 || !aql_check) {
|
||||
list_move_tail(&txqi->schedule_order,
|
||||
@@ -4227,7 +4227,8 @@ bool ieee80211_txq_may_transmit(struct i
|
||||
}
|
||||
sta = container_of(iter->txq.sta, struct sta_info, sta);
|
||||
if (ieee80211_sta_deficit(sta, ac) < 0)
|
||||
- sta->airtime[ac].deficit += sta->airtime_weight;
|
||||
+ sta->airtime[ac].deficit += sta->airtime_weight <<
|
||||
+ AIRTIME_QUANTUM_SHIFT;
|
||||
list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
|
||||
}
|
||||
|
||||
@@ -4235,7 +4236,7 @@ bool ieee80211_txq_may_transmit(struct i
|
||||
if (sta->airtime[ac].deficit >= 0)
|
||||
goto out;
|
||||
|
||||
- sta->airtime[ac].deficit += sta->airtime_weight;
|
||||
+ sta->airtime[ac].deficit += sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
|
||||
list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
|
||||
spin_unlock_bh(&local->active_txq_lock[ac]);
|
||||
|
293
debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch
vendored
Normal file
293
debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch
vendored
Normal file
@@ -0,0 +1,293 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 9 Feb 2024 19:43:40 +0100
|
||||
Subject: [PATCH] mac80211: add AQL support for broadcast packets
|
||||
|
||||
Excessive broadcast traffic with little competing unicast traffic can easily
|
||||
flood hardware queues, leading to throughput issues. Additionally, filling
|
||||
the hardware queues with too many packets breaks FQ for broadcast data.
|
||||
Fix this by enabling AQL for broadcast packets.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/include/net/cfg80211.h
|
||||
+++ b/include/net/cfg80211.h
|
||||
@@ -3464,6 +3464,7 @@ enum wiphy_params_flags {
|
||||
/* The per TXQ device queue limit in airtime */
|
||||
#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
|
||||
#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
|
||||
+#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000
|
||||
|
||||
/* The per interface airtime threshold to switch to lower queue limit */
|
||||
#define IEEE80211_AQL_THRESHOLD 24000
|
||||
--- a/net/mac80211/debugfs.c
|
||||
+++ b/net/mac80211/debugfs.c
|
||||
@@ -212,11 +212,13 @@ static ssize_t aql_pending_read(struct f
|
||||
"VI %u us\n"
|
||||
"BE %u us\n"
|
||||
"BK %u us\n"
|
||||
+ "BC/MC %u us\n"
|
||||
"total %u us\n",
|
||||
atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
|
||||
atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
|
||||
atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
|
||||
atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
|
||||
+ atomic_read(&local->aql_bc_pending_airtime),
|
||||
atomic_read(&local->aql_total_pending_airtime));
|
||||
return simple_read_from_buffer(user_buf, count, ppos,
|
||||
buf, len);
|
||||
@@ -241,7 +243,8 @@ static ssize_t aql_txq_limit_read(struct
|
||||
"VO %u %u\n"
|
||||
"VI %u %u\n"
|
||||
"BE %u %u\n"
|
||||
- "BK %u %u\n",
|
||||
+ "BK %u %u\n"
|
||||
+ "BC/MC %u\n",
|
||||
local->aql_txq_limit_low[IEEE80211_AC_VO],
|
||||
local->aql_txq_limit_high[IEEE80211_AC_VO],
|
||||
local->aql_txq_limit_low[IEEE80211_AC_VI],
|
||||
@@ -249,7 +252,8 @@ static ssize_t aql_txq_limit_read(struct
|
||||
local->aql_txq_limit_low[IEEE80211_AC_BE],
|
||||
local->aql_txq_limit_high[IEEE80211_AC_BE],
|
||||
local->aql_txq_limit_low[IEEE80211_AC_BK],
|
||||
- local->aql_txq_limit_high[IEEE80211_AC_BK]);
|
||||
+ local->aql_txq_limit_high[IEEE80211_AC_BK],
|
||||
+ local->aql_txq_limit_bc);
|
||||
return simple_read_from_buffer(user_buf, count, ppos,
|
||||
buf, len);
|
||||
}
|
||||
@@ -275,6 +279,11 @@ static ssize_t aql_txq_limit_write(struc
|
||||
else
|
||||
buf[count] = '\0';
|
||||
|
||||
+ if (sscanf(buf, "mcast %u", &q_limit_low) == 1) {
|
||||
+ local->aql_txq_limit_bc = q_limit_low;
|
||||
+ return count;
|
||||
+ }
|
||||
+
|
||||
if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
|
||||
return -EINVAL;
|
||||
|
||||
--- a/net/mac80211/ieee80211_i.h
|
||||
+++ b/net/mac80211/ieee80211_i.h
|
||||
@@ -1368,10 +1368,12 @@ struct ieee80211_local {
|
||||
spinlock_t handle_wake_tx_queue_lock;
|
||||
|
||||
u16 airtime_flags;
|
||||
+ u32 aql_txq_limit_bc;
|
||||
u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
|
||||
u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
|
||||
u32 aql_threshold;
|
||||
atomic_t aql_total_pending_airtime;
|
||||
+ atomic_t aql_bc_pending_airtime;
|
||||
atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
|
||||
|
||||
const struct ieee80211_ops *ops;
|
||||
--- a/net/mac80211/main.c
|
||||
+++ b/net/mac80211/main.c
|
||||
@@ -959,6 +959,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
|
||||
spin_lock_init(&local->rx_path_lock);
|
||||
spin_lock_init(&local->queue_stop_reason_lock);
|
||||
|
||||
+ local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC;
|
||||
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
|
||||
INIT_LIST_HEAD(&local->active_txqs[i]);
|
||||
spin_lock_init(&local->active_txq_lock[i]);
|
||||
--- a/net/mac80211/sta_info.c
|
||||
+++ b/net/mac80211/sta_info.c
|
||||
@@ -2388,13 +2388,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre
|
||||
|
||||
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
|
||||
struct sta_info *sta, u8 ac,
|
||||
- u16 tx_airtime, bool tx_completed)
|
||||
+ u16 tx_airtime, bool tx_completed,
|
||||
+ bool mcast)
|
||||
{
|
||||
int tx_pending;
|
||||
|
||||
if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
|
||||
return;
|
||||
|
||||
+ if (mcast) {
|
||||
+ if (!tx_completed) {
|
||||
+ atomic_add(tx_airtime, &local->aql_bc_pending_airtime);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ tx_pending = atomic_sub_return(tx_airtime,
|
||||
+ &local->aql_bc_pending_airtime);
|
||||
+ if (tx_pending < 0)
|
||||
+ atomic_cmpxchg(&local->aql_bc_pending_airtime,
|
||||
+ tx_pending, 0);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!tx_completed) {
|
||||
if (sta)
|
||||
atomic_add(tx_airtime,
|
||||
--- a/net/mac80211/tx.c
|
||||
+++ b/net/mac80211/tx.c
|
||||
@@ -2556,7 +2556,7 @@ static u16 ieee80211_store_ack_skb(struc
|
||||
|
||||
spin_lock_irqsave(&local->ack_status_lock, flags);
|
||||
id = idr_alloc(&local->ack_status_frames, ack_skb,
|
||||
- 1, 0x2000, GFP_ATOMIC);
|
||||
+ 1, 0x1000, GFP_ATOMIC);
|
||||
spin_unlock_irqrestore(&local->ack_status_lock, flags);
|
||||
|
||||
if (id >= 0) {
|
||||
@@ -3985,20 +3985,20 @@ begin:
|
||||
encap_out:
|
||||
info->control.vif = vif;
|
||||
|
||||
- if (tx.sta &&
|
||||
- wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
|
||||
- bool ampdu = txq->ac != IEEE80211_AC_VO;
|
||||
+ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
|
||||
+ bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO;
|
||||
u32 airtime;
|
||||
|
||||
airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
|
||||
skb->len, ampdu);
|
||||
- if (airtime) {
|
||||
- airtime = ieee80211_info_set_tx_time_est(info, airtime);
|
||||
- ieee80211_sta_update_pending_airtime(local, tx.sta,
|
||||
- txq->ac,
|
||||
- airtime,
|
||||
- false);
|
||||
- }
|
||||
+ if (!airtime)
|
||||
+ return skb;
|
||||
+
|
||||
+ airtime = ieee80211_info_set_tx_time_est(info, airtime);
|
||||
+ info->tx_time_mc = !tx.sta;
|
||||
+ ieee80211_sta_update_pending_airtime(local, tx.sta, txq->ac,
|
||||
+ airtime, false,
|
||||
+ info->tx_time_mc);
|
||||
}
|
||||
|
||||
return skb;
|
||||
@@ -4050,6 +4050,7 @@ struct ieee80211_txq *ieee80211_next_txq
|
||||
struct ieee80211_txq *ret = NULL;
|
||||
struct txq_info *txqi = NULL, *head = NULL;
|
||||
bool found_eligible_txq = false;
|
||||
+ bool aql_check;
|
||||
|
||||
spin_lock_bh(&local->active_txq_lock[ac]);
|
||||
|
||||
@@ -4073,26 +4074,26 @@ struct ieee80211_txq *ieee80211_next_txq
|
||||
if (!head)
|
||||
head = txqi;
|
||||
|
||||
+ aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
|
||||
+ if (aql_check)
|
||||
+ found_eligible_txq = true;
|
||||
+
|
||||
if (txqi->txq.sta) {
|
||||
struct sta_info *sta = container_of(txqi->txq.sta,
|
||||
struct sta_info, sta);
|
||||
- bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
|
||||
- s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
|
||||
-
|
||||
- if (aql_check)
|
||||
- found_eligible_txq = true;
|
||||
-
|
||||
- if (deficit < 0)
|
||||
+ if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) {
|
||||
sta->airtime[txqi->txq.ac].deficit +=
|
||||
sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
|
||||
-
|
||||
- if (deficit < 0 || !aql_check) {
|
||||
- list_move_tail(&txqi->schedule_order,
|
||||
- &local->active_txqs[txqi->txq.ac]);
|
||||
- goto begin;
|
||||
+ aql_check = false;
|
||||
}
|
||||
}
|
||||
|
||||
+ if (!aql_check) {
|
||||
+ list_move_tail(&txqi->schedule_order,
|
||||
+ &local->active_txqs[txqi->txq.ac]);
|
||||
+ goto begin;
|
||||
+ }
|
||||
+
|
||||
if (txqi->schedule_round == local->schedule_round[ac])
|
||||
goto out;
|
||||
|
||||
@@ -4157,7 +4158,8 @@ bool ieee80211_txq_airtime_check(struct
|
||||
return true;
|
||||
|
||||
if (!txq->sta)
|
||||
- return true;
|
||||
+ return atomic_read(&local->aql_bc_pending_airtime) <
|
||||
+ local->aql_txq_limit_bc;
|
||||
|
||||
if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
|
||||
return true;
|
||||
@@ -4206,15 +4208,15 @@ bool ieee80211_txq_may_transmit(struct i
|
||||
|
||||
spin_lock_bh(&local->active_txq_lock[ac]);
|
||||
|
||||
- if (!txqi->txq.sta)
|
||||
- goto out;
|
||||
-
|
||||
if (list_empty(&txqi->schedule_order))
|
||||
goto out;
|
||||
|
||||
if (!ieee80211_txq_schedule_airtime_check(local, ac))
|
||||
goto out;
|
||||
|
||||
+ if (!txqi->txq.sta)
|
||||
+ goto out;
|
||||
+
|
||||
list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
|
||||
schedule_order) {
|
||||
if (iter == txqi)
|
||||
--- a/include/net/mac80211.h
|
||||
+++ b/include/net/mac80211.h
|
||||
@@ -1238,8 +1238,8 @@ struct ieee80211_tx_info {
|
||||
status_data_idr:1,
|
||||
status_data:13,
|
||||
hw_queue:4,
|
||||
+ tx_time_mc:1,
|
||||
tx_time_est:10;
|
||||
- /* 1 free bit */
|
||||
|
||||
union {
|
||||
struct {
|
||||
--- a/net/mac80211/sta_info.h
|
||||
+++ b/net/mac80211/sta_info.h
|
||||
@@ -147,7 +147,8 @@ struct airtime_info {
|
||||
|
||||
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
|
||||
struct sta_info *sta, u8 ac,
|
||||
- u16 tx_airtime, bool tx_completed);
|
||||
+ u16 tx_airtime, bool tx_completed,
|
||||
+ bool mcast);
|
||||
|
||||
struct sta_info;
|
||||
|
||||
--- a/net/mac80211/status.c
|
||||
+++ b/net/mac80211/status.c
|
||||
@@ -734,7 +734,7 @@ static void ieee80211_report_used_skb(st
|
||||
ieee80211_sta_update_pending_airtime(local, sta,
|
||||
skb_get_queue_mapping(skb),
|
||||
tx_time_est,
|
||||
- true);
|
||||
+ true, info->tx_time_mc);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -1143,10 +1143,11 @@ void ieee80211_tx_status_ext(struct ieee
|
||||
/* Do this here to avoid the expensive lookup of the sta
|
||||
* in ieee80211_report_used_skb().
|
||||
*/
|
||||
+ bool mcast = IEEE80211_SKB_CB(skb)->tx_time_mc;
|
||||
ieee80211_sta_update_pending_airtime(local, sta,
|
||||
skb_get_queue_mapping(skb),
|
||||
tx_time_est,
|
||||
- true);
|
||||
+ true, mcast);
|
||||
ieee80211_info_set_tx_time_est(IEEE80211_SKB_CB(skb), 0);
|
||||
}
|
||||
|
221
debian/patches/misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch
vendored
Normal file
221
debian/patches/misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
This reverts commit 484a54c2e597dbc4ace79c1687022282905afba0. The CoDel
|
||||
parameter change essentially disables CoDel on slow stations, with some
|
||||
questionable assumptions, as Dave pointed out in [0]. Quoting from
|
||||
there:
|
||||
|
||||
But here are my pithy comments as to why this part of mac80211 is so
|
||||
wrong...
|
||||
|
||||
static void sta_update_codel_params(struct sta_info *sta, u32 thr)
|
||||
{
|
||||
- if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) {
|
||||
|
||||
1) sta->local->num_sta is the number of associated, rather than
|
||||
active, stations. "Active" stations in the last 50ms or so, might have
|
||||
been a better thing to use, but as most people have far more than that
|
||||
associated, we end up with really lousy codel parameters, all the
|
||||
time. Mistake numero uno!
|
||||
|
||||
2) The STA_SLOW_THRESHOLD was completely arbitrary in 2016.
|
||||
|
||||
- sta->cparams.target = MS2TIME(50);
|
||||
|
||||
This, by itself, was probably not too bad. 30ms might have been
|
||||
better, at the time, when we were battling powersave etc, but 20ms was
|
||||
enough, really, to cover most scenarios, even where we had low rate
|
||||
2Ghz multicast to cope with. Even then, codel has a hard time finding
|
||||
any sane drop rate at all, with a target this high.
|
||||
|
||||
- sta->cparams.interval = MS2TIME(300);
|
||||
|
||||
But this was horrible, a total mistake, that is leading to codel being
|
||||
completely ineffective in almost any scenario on clients or APS.
|
||||
100ms, even 80ms, here, would be vastly better than this insanity. I'm
|
||||
seeing 5+seconds of delay accumulated in a bunch of otherwise happily
|
||||
fq-ing APs....
|
||||
|
||||
100ms of observed jitter during a flow is enough. Certainly (in 2016)
|
||||
there were interactions with powersave that I did not understand, and
|
||||
still don't, but if you are transmitting in the first place, powersave
|
||||
shouldn't be a problemmmm.....
|
||||
|
||||
- sta->cparams.ecn = false;
|
||||
|
||||
At the time we were pretty nervous about ecn, I'm kind of sanguine
|
||||
about it now, and reliably indicating ecn seems better than turning it
|
||||
off for any reason.
|
||||
|
||||
[...]
|
||||
|
||||
In production, on p2p wireless, I've had 8ms and 80ms for target and
|
||||
interval for years now, and it works great.
|
||||
|
||||
I think Dave's arguments above are basically sound on the face of it,
|
||||
and various experimentation with tighter CoDel parameters in the OpenWrt
|
||||
community have show promising results[1]. So I don't think there's any
|
||||
reason to keep this parameter fiddling; hence this revert.
|
||||
|
||||
[0] https://lore.kernel.org/linux-wireless/CAA93jw6NJ2cmLmMauz0xAgC2MGbBq6n0ZiZzAdkK0u4b+O2yXg@mail.gmail.com/
|
||||
[1] https://forum.openwrt.org/t/reducing-multiplexing-latencies-still-further-in-wifi/133605/130
|
||||
|
||||
Suggested-By: Dave Taht <dave.taht@gmail.com>
|
||||
In-memory-of: Dave Taht <dave.taht@gmail.com>
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
|
||||
|
||||
--- a/include/net/mac80211.h
|
||||
+++ b/include/net/mac80211.h
|
||||
@@ -5347,22 +5347,6 @@ void ieee80211_get_tx_rates(struct ieee8
|
||||
int max_rates);
|
||||
|
||||
/**
|
||||
- * ieee80211_sta_set_expected_throughput - set the expected tpt for a station
|
||||
- *
|
||||
- * Call this function to notify mac80211 about a change in expected throughput
|
||||
- * to a station. A driver for a device that does rate control in firmware can
|
||||
- * call this function when the expected throughput estimate towards a station
|
||||
- * changes. The information is used to tune the CoDel AQM applied to traffic
|
||||
- * going towards that station (which can otherwise be too aggressive and cause
|
||||
- * slow stations to starve).
|
||||
- *
|
||||
- * @pubsta: the station to set throughput for.
|
||||
- * @thr: the current expected throughput in kbps.
|
||||
- */
|
||||
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
|
||||
- u32 thr);
|
||||
-
|
||||
-/**
|
||||
* ieee80211_tx_rate_update - transmit rate update callback
|
||||
*
|
||||
* Drivers should call this functions with a non-NULL pub sta
|
||||
--- a/net/mac80211/debugfs_sta.c
|
||||
+++ b/net/mac80211/debugfs_sta.c
|
||||
@@ -152,12 +152,6 @@ static ssize_t sta_aqm_read(struct file
|
||||
|
||||
p += scnprintf(p,
|
||||
bufsz + buf - p,
|
||||
- "target %uus interval %uus ecn %s\n",
|
||||
- codel_time_to_us(sta->cparams.target),
|
||||
- codel_time_to_us(sta->cparams.interval),
|
||||
- sta->cparams.ecn ? "yes" : "no");
|
||||
- p += scnprintf(p,
|
||||
- bufsz + buf - p,
|
||||
"tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
|
||||
--- a/net/mac80211/rate.c
|
||||
+++ b/net/mac80211/rate.c
|
||||
@@ -990,8 +990,6 @@ int rate_control_set_rates(struct ieee80
|
||||
if (sta->uploaded)
|
||||
drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
|
||||
|
||||
- ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta));
|
||||
-
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(rate_control_set_rates);
|
||||
--- a/net/mac80211/sta_info.c
|
||||
+++ b/net/mac80211/sta_info.c
|
||||
@@ -18,7 +18,6 @@
|
||||
#include <linux/timer.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
|
||||
-#include <net/codel.h>
|
||||
#include <net/mac80211.h>
|
||||
#include "ieee80211_i.h"
|
||||
#include "driver-ops.h"
|
||||
@@ -702,13 +701,6 @@ __sta_info_alloc(struct ieee80211_sub_if
|
||||
}
|
||||
}
|
||||
|
||||
- sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD;
|
||||
- sta->cparams.target = MS2TIME(20);
|
||||
- sta->cparams.interval = MS2TIME(100);
|
||||
- sta->cparams.ecn = true;
|
||||
- sta->cparams.ce_threshold_selector = 0;
|
||||
- sta->cparams.ce_threshold_mask = 0;
|
||||
-
|
||||
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
|
||||
|
||||
return sta;
|
||||
@@ -2928,27 +2920,6 @@ unsigned long ieee80211_sta_last_active(
|
||||
return sta->deflink.status_stats.last_ack;
|
||||
}
|
||||
|
||||
-static void sta_update_codel_params(struct sta_info *sta, u32 thr)
|
||||
-{
|
||||
- if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) {
|
||||
- sta->cparams.target = MS2TIME(50);
|
||||
- sta->cparams.interval = MS2TIME(300);
|
||||
- sta->cparams.ecn = false;
|
||||
- } else {
|
||||
- sta->cparams.target = MS2TIME(20);
|
||||
- sta->cparams.interval = MS2TIME(100);
|
||||
- sta->cparams.ecn = true;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
|
||||
- u32 thr)
|
||||
-{
|
||||
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
|
||||
-
|
||||
- sta_update_codel_params(sta, thr);
|
||||
-}
|
||||
-
|
||||
int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
|
||||
{
|
||||
struct ieee80211_sub_if_data *sdata = sta->sdata;
|
||||
--- a/net/mac80211/sta_info.h
|
||||
+++ b/net/mac80211/sta_info.h
|
||||
@@ -467,14 +467,6 @@ struct ieee80211_fragment_cache {
|
||||
unsigned int next;
|
||||
};
|
||||
|
||||
-/*
|
||||
- * The bandwidth threshold below which the per-station CoDel parameters will be
|
||||
- * scaled to be more lenient (to prevent starvation of slow stations). This
|
||||
- * value will be scaled by the number of active stations when it is being
|
||||
- * applied.
|
||||
- */
|
||||
-#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */
|
||||
-
|
||||
/**
|
||||
* struct link_sta_info - Link STA information
|
||||
* All link specific sta info are stored here for reference. This can be
|
||||
@@ -627,7 +619,6 @@ struct link_sta_info {
|
||||
* @sta: station information we share with the driver
|
||||
* @sta_state: duplicates information about station state (for debug)
|
||||
* @rcu_head: RCU head used for freeing this station struct
|
||||
- * @cparams: CoDel parameters for this station.
|
||||
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
|
||||
* @amsdu_mesh_control: track the mesh A-MSDU format used by the peer:
|
||||
*
|
||||
@@ -718,8 +709,6 @@ struct sta_info {
|
||||
struct dentry *debugfs_dir;
|
||||
#endif
|
||||
|
||||
- struct codel_params cparams;
|
||||
-
|
||||
u8 reserved_tid;
|
||||
s8 amsdu_mesh_control;
|
||||
|
||||
--- a/net/mac80211/tx.c
|
||||
+++ b/net/mac80211/tx.c
|
||||
@@ -1402,16 +1402,9 @@ static struct sk_buff *fq_tin_dequeue_fu
|
||||
|
||||
local = container_of(fq, struct ieee80211_local, fq);
|
||||
txqi = container_of(tin, struct txq_info, tin);
|
||||
+ cparams = &local->cparams;
|
||||
cstats = &txqi->cstats;
|
||||
|
||||
- if (txqi->txq.sta) {
|
||||
- struct sta_info *sta = container_of(txqi->txq.sta,
|
||||
- struct sta_info, sta);
|
||||
- cparams = &sta->cparams;
|
||||
- } else {
|
||||
- cparams = &local->cparams;
|
||||
- }
|
||||
-
|
||||
if (flow == &tin->default_flow)
|
||||
cvars = &txqi->def_cvars;
|
||||
else
|
13
debian/patches/misc-openwrt/0010-mac80211-txq-tune.patch
vendored
Normal file
13
debian/patches/misc-openwrt/0010-mac80211-txq-tune.patch
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
--- a/net/mac80211/tx.c
|
||||
+++ b/net/mac80211/tx.c
|
||||
@@ -1599,8 +1599,8 @@ int ieee80211_txq_setup_flows(struct iee
|
||||
fq->memory_limit = 4 << 20; /* 4 Mbytes */
|
||||
|
||||
codel_params_init(&local->cparams);
|
||||
- local->cparams.interval = MS2TIME(100);
|
||||
- local->cparams.target = MS2TIME(20);
|
||||
+ local->cparams.interval = MS2TIME(50);
|
||||
+ local->cparams.target = MS2TIME(10);
|
||||
local->cparams.ecn = true;
|
||||
|
||||
local->cvars = kvcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
|
18
debian/patches/misc-openwrt/0011-cfg80211-aql-txq-limit.patch
vendored
Normal file
18
debian/patches/misc-openwrt/0011-cfg80211-aql-txq-limit.patch
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
--- a/include/net/cfg80211.h
|
||||
+++ b/include/net/cfg80211.h
|
||||
@@ -3462,12 +3462,12 @@ enum wiphy_params_flags {
|
||||
#define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256
|
||||
|
||||
/* The per TXQ device queue limit in airtime */
|
||||
-#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
|
||||
-#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
|
||||
+#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 1500
|
||||
+#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 5000
|
||||
#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000
|
||||
|
||||
/* The per interface airtime threshold to switch to lower queue limit */
|
||||
-#define IEEE80211_AQL_THRESHOLD 24000
|
||||
+#define IEEE80211_AQL_THRESHOLD 12000
|
||||
|
||||
/**
|
||||
* struct cfg80211_pmksa - PMK Security Association
|
72
debian/patches/misc-openwrt/0101-sched-sch_cake-fix-bulk-flow-accounting-logic-for-host.patch
vendored
Normal file
72
debian/patches/misc-openwrt/0101-sched-sch_cake-fix-bulk-flow-accounting-logic-for-host.patch
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
From 546ea84d07e3e324644025e2aae2d12ea4c5896e Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Tue, 3 Sep 2024 18:08:45 +0200
|
||||
Subject: [PATCH] sched: sch_cake: fix bulk flow accounting logic for host
|
||||
fairness
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
In sch_cake, we keep track of the count of active bulk flows per host,
|
||||
when running in dst/src host fairness mode, which is used as the
|
||||
round-robin weight when iterating through flows. The count of active
|
||||
bulk flows is updated whenever a flow changes state.
|
||||
|
||||
This has a peculiar interaction with the hash collision handling: when a
|
||||
hash collision occurs (after the set-associative hashing), the state of
|
||||
the hash bucket is simply updated to match the new packet that collided,
|
||||
and if host fairness is enabled, that also means assigning new per-host
|
||||
state to the flow. For this reason, the bulk flow counters of the
|
||||
host(s) assigned to the flow are decremented, before new state is
|
||||
assigned (and the counters, which may not belong to the same host
|
||||
anymore, are incremented again).
|
||||
|
||||
Back when this code was introduced, the host fairness mode was always
|
||||
enabled, so the decrement was unconditional. When the configuration
|
||||
flags were introduced the *increment* was made conditional, but
|
||||
the *decrement* was not. Which of course can lead to a spurious
|
||||
decrement (and associated wrap-around to U16_MAX).
|
||||
|
||||
AFAICT, when host fairness is disabled, the decrement and wrap-around
|
||||
happens as soon as a hash collision occurs (which is not that common in
|
||||
itself, due to the set-associative hashing). However, in most cases this
|
||||
is harmless, as the value is only used when host fairness mode is
|
||||
enabled. So in order to trigger an array overflow, sch_cake has to first
|
||||
be configured with host fairness disabled, and while running in this
|
||||
mode, a hash collision has to occur to cause the overflow. Then, the
|
||||
qdisc has to be reconfigured to enable host fairness, which leads to the
|
||||
array out-of-bounds because the wrapped-around value is retained and
|
||||
used as an array index. It seems that syzbot managed to trigger this,
|
||||
which is quite impressive in its own right.
|
||||
|
||||
This patch fixes the issue by introducing the same conditional check on
|
||||
decrement as is used on increment.
|
||||
|
||||
The original bug predates the upstreaming of cake, but the commit listed
|
||||
in the Fixes tag touched that code, meaning that this patch won't apply
|
||||
before that.
|
||||
|
||||
Fixes: 712639929912 ("sch_cake: Make the dual modes fairer")
|
||||
Reported-by: syzbot+7fe7b81d602cc1e6b94d@syzkaller.appspotmail.com
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Link: https://patch.msgid.link/20240903160846.20909-1-toke@redhat.com
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
---
|
||||
net/sched/sch_cake.c | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -833,8 +833,10 @@ skip_hash:
|
||||
allocate_dst = cake_ddst(flow_mode);
|
||||
|
||||
if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
|
||||
- cake_dec_srchost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode);
|
||||
- cake_dec_dsthost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode);
|
||||
+ if (allocate_src)
|
||||
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
|
||||
+ if (allocate_dst)
|
||||
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
|
||||
}
|
||||
found:
|
||||
/* reserve queue for future packets in same flow */
|
11
debian/patches/misc-openwrt/0201-fq-adjust-memory-size.patch
vendored
Normal file
11
debian/patches/misc-openwrt/0201-fq-adjust-memory-size.patch
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
--- a/include/net/fq_impl.h
|
||||
+++ b/include/net/fq_impl.h
|
||||
@@ -356,7 +356,7 @@ static int fq_init(struct fq *fq, int fl
|
||||
fq->flows_cnt = max_t(u32, flows_cnt, 1);
|
||||
fq->quantum = 300;
|
||||
fq->limit = 8192;
|
||||
- fq->memory_limit = 16 << 20; /* 16 MBytes */
|
||||
+ fq->memory_limit = 32 << 20; /* 32 MBytes */
|
||||
|
||||
fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
|
||||
if (!fq->flows)
|
@@ -1,14 +1,6 @@
|
||||
From 90b69178f6a866c7f3330c2006f6b5396146192c Mon Sep 17 00:00:00 2001
|
||||
From 906ed24dfc7e1bbceacc087ba38aecfd22a9890b Mon Sep 17 00:00:00 2001
|
||||
From: graysky <therealgraysky AT proton DOT me>
|
||||
Date: Mon, 16 Sep 2024 05:55:58 -0400
|
||||
Subject: ZEN: Add graysky's more-uarches
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
From https://github.com/graysky2/kernel_compiler_patch
|
||||
|
||||
more-ISA-levels-and-uarches-for-kernel-6.1.79+.patch
|
||||
Date: Mon, 16 Sep 2024 14:47:03 -0400
|
||||
|
||||
FEATURES
|
||||
This patch adds additional tunings via new x86-64 ISA levels and
|
||||
@@ -121,46 +113,122 @@ REFERENCES
|
||||
1. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-x86-Options
|
||||
2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
|
||||
3. https://github.com/graysky2/kernel_gcc_patch/issues/15
|
||||
4. https://www.linuxforge.net/docs/linux/linux-gcc.php
|
||||
4. http://www.linuxforge.net/docs/linux/linux-gcc.php
|
||||
|
||||
---
|
||||
arch/x86/Kconfig.cpu | 367 ++++++++++++++++++++++++++++++--
|
||||
arch/x86/Makefile | 89 +++++++-
|
||||
arch/x86/include/asm/vermagic.h | 72 +++++++
|
||||
3 files changed, 511 insertions(+), 17 deletions(-)
|
||||
arch/x86/Kconfig.cpu | 462 ++++++++++++++++++++++++++++++++++++++++++-
|
||||
arch/x86/Makefile | 222 +++++++++++++++++++++
|
||||
2 files changed, 675 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/arch/x86/Kconfig.cpu
|
||||
+++ b/arch/x86/Kconfig.cpu
|
||||
@@ -155,9 +155,8 @@ config MPENTIUM4
|
||||
-Paxville
|
||||
-Dempsey
|
||||
@@ -31,6 +31,7 @@ choice
|
||||
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
|
||||
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
|
||||
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
|
||||
+ - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs.
|
||||
- "Crusoe" for the Transmeta Crusoe series.
|
||||
- "Efficeon" for the Transmeta Efficeon series.
|
||||
- "Winchip-C6" for original IDT Winchip.
|
||||
@@ -41,7 +42,10 @@ choice
|
||||
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
|
||||
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
|
||||
- "VIA C7" for VIA C7.
|
||||
+ - "Intel P4" for the Pentium 4/Netburst microarchitecture.
|
||||
+ - "Core 2/newer Xeon" for all core2 and newer Intel CPUs.
|
||||
- "Intel Atom" for the Atom-microarchitecture CPUs.
|
||||
+ - "Generic-x86-64" for a kernel which runs on any x86-64 CPU.
|
||||
|
||||
See each option's help text for additional details. If you don't know
|
||||
what to do, choose "Pentium-Pro".
|
||||
@@ -135,10 +139,21 @@ config MPENTIUM4
|
||||
-Mobile Pentium 4
|
||||
-Mobile Pentium 4 M
|
||||
-Extreme Edition (Gallatin)
|
||||
+ -Prescott
|
||||
+ -Prescott 2M
|
||||
+ -Cedar Mill
|
||||
+ -Presler
|
||||
+ -Smithfiled
|
||||
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
|
||||
-Foster
|
||||
-Prestonia
|
||||
-Gallatin
|
||||
+ -Nocona
|
||||
+ -Irwindale
|
||||
+ -Cranford
|
||||
+ -Potomac
|
||||
+ -Paxville
|
||||
+ -Dempsey
|
||||
|
||||
-
|
||||
config MK6
|
||||
- bool "K6/K6-II/K6-III"
|
||||
+ bool "AMD K6/K6-II/K6-III"
|
||||
depends on X86_32
|
||||
help
|
||||
Select this for an AMD K6-family processor. Enables use of
|
||||
@@ -165,7 +164,7 @@ config MK6
|
||||
flags to GCC.
|
||||
bool "K6/K6-II/K6-III"
|
||||
@@ -245,6 +260,435 @@ config MATOM
|
||||
|
||||
config MK7
|
||||
- bool "Athlon/Duron/K7"
|
||||
+ bool "AMD Athlon/Duron/K7"
|
||||
depends on X86_32
|
||||
help
|
||||
Select this for an AMD Athlon K7-family processor. Enables use of
|
||||
@@ -173,12 +172,114 @@ config MK7
|
||||
flags to GCC.
|
||||
endchoice
|
||||
|
||||
config MK8
|
||||
- bool "Opteron/Athlon64/Hammer/K8"
|
||||
+config CC_HAS_MARCH_NATIVE
|
||||
+ # This flag might not be available in cross-compilers:
|
||||
+ def_bool $(cc-option, -march=native)
|
||||
+ # LLVM 18 has an easily triggered internal compiler error in core
|
||||
+ # networking code with '-march=native' on certain systems:
|
||||
+ # https://github.com/llvm/llvm-project/issues/72026
|
||||
+ # LLVM 19 introduces an optimization that resolves some high stack
|
||||
+ # usage warnings that only appear wth '-march=native'.
|
||||
+ depends on CC_IS_GCC || CLANG_VERSION >= 190100
|
||||
+
|
||||
+choice
|
||||
+ prompt "x86_64 Compiler Build Optimization"
|
||||
+ default GENERIC_CPU
|
||||
+
|
||||
+config X86_NATIVE_CPU
|
||||
+ bool "Build and optimize for local/native CPU"
|
||||
+ depends on X86_64
|
||||
+ depends on CC_HAS_MARCH_NATIVE
|
||||
+ help
|
||||
+ Optimize for the current CPU used to compile the kernel.
|
||||
+ Use this option if you intend to build the kernel for your
|
||||
+ local machine.
|
||||
+
|
||||
+ Note that such a kernel might not work optimally on a
|
||||
+ different x86 machine.
|
||||
+
|
||||
+ If unsure, say N.
|
||||
+
|
||||
+config GENERIC_CPU
|
||||
+ bool "Generic-x86-64"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+ Generic x86-64 CPU.
|
||||
+ Runs equally well on all x86-64 CPUs.
|
||||
+
|
||||
+config MNATIVE_INTEL
|
||||
+ bool "Intel-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for AMD CPUs. Intel Only!
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
+config MNATIVE_AMD
|
||||
+ bool "AMD-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for Intel CPUs. AMD Only!
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
+config MK8
|
||||
+ bool "AMD Opteron/Athlon64/Hammer/K8"
|
||||
help
|
||||
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
|
||||
Enables use of some extended instructions, and passes appropriate
|
||||
optimization flags to GCC.
|
||||
|
||||
+ help
|
||||
+ Select this for an AMD Opteron or Athlon64 Hammer-family processor.
|
||||
+ Enables use of some extended instructions, and passes appropriate
|
||||
+ optimization flags to GCC.
|
||||
+
|
||||
+config MK8SSE3
|
||||
+ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
|
||||
+ help
|
||||
@@ -226,21 +294,21 @@ REFERENCES
|
||||
+ Enables -march=bdver4
|
||||
+
|
||||
+config MZEN
|
||||
+ bool "AMD Zen"
|
||||
+ bool "AMD Ryzen"
|
||||
+ help
|
||||
+ Select this for AMD Family 17h Zen processors.
|
||||
+
|
||||
+ Enables -march=znver1
|
||||
+
|
||||
+config MZEN2
|
||||
+ bool "AMD Zen 2"
|
||||
+ bool "AMD Ryzen 2"
|
||||
+ help
|
||||
+ Select this for AMD Family 17h Zen 2 processors.
|
||||
+
|
||||
+ Enables -march=znver2
|
||||
+
|
||||
+config MZEN3
|
||||
+ bool "AMD Zen 3"
|
||||
+ bool "AMD Ryzen 3"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 3 processors.
|
||||
@@ -248,7 +316,7 @@ REFERENCES
|
||||
+ Enables -march=znver3
|
||||
+
|
||||
+config MZEN4
|
||||
+ bool "AMD Zen 4"
|
||||
+ bool "AMD Ryzen 4"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 4 processors.
|
||||
@@ -256,57 +324,48 @@ REFERENCES
|
||||
+ Enables -march=znver4
|
||||
+
|
||||
+config MZEN5
|
||||
+ bool "AMD Zen 5"
|
||||
+ bool "AMD Ryzen 5"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 190100)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 5 processors.
|
||||
+
|
||||
+ Enables -march=znver5
|
||||
+
|
||||
config MCRUSOE
|
||||
bool "Crusoe"
|
||||
depends on X86_32
|
||||
@@ -269,8 +370,17 @@ config MPSC
|
||||
using the cpu family field
|
||||
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
|
||||
|
||||
+config MATOM
|
||||
+ bool "Intel Atom"
|
||||
+config MPSC
|
||||
+ bool "Intel P4 / older Netburst based Xeon"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+ Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
|
||||
+ Xeon CPUs with Intel 64bit which is compatible with x86-64.
|
||||
+ Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
|
||||
+ Netburst core and shouldn't use this option. You can distinguish them
|
||||
+ using the cpu family field
|
||||
+ in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
|
||||
+
|
||||
+config MCORE2
|
||||
+ bool "Intel Core 2"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Atom platform. Intel Atom CPUs have an
|
||||
+ in-order pipelining architecture and thus can benefit from
|
||||
+ accordingly optimized code. Use a recent GCC with specific Atom
|
||||
+ support in order to fully benefit from selecting this option.
|
||||
+ Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
|
||||
+ 53xx) CPUs. You can distinguish newer from older Xeons by the CPU
|
||||
+ family in /proc/cpuinfo. Newer ones have 6 and older ones 15
|
||||
+ (not a typo)
|
||||
+
|
||||
config MCORE2
|
||||
- bool "Core 2/newer Xeon"
|
||||
+ bool "Intel Core 2"
|
||||
help
|
||||
|
||||
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
|
||||
@@ -278,14 +388,199 @@ config MCORE2
|
||||
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
|
||||
(not a typo)
|
||||
|
||||
-config MATOM
|
||||
- bool "Intel Atom"
|
||||
+ Enables -march=core2
|
||||
+
|
||||
+config MNEHALEM
|
||||
+ bool "Intel Nehalem"
|
||||
help
|
||||
|
||||
- Select this for the Intel Atom platform. Intel Atom CPUs have an
|
||||
- in-order pipelining architecture and thus can benefit from
|
||||
- accordingly optimized code. Use a recent GCC with specific Atom
|
||||
- support in order to fully benefit from selecting this option.
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 1st Gen Core processors in the Nehalem family.
|
||||
+
|
||||
+ Enables -march=nehalem
|
||||
+
|
||||
+config MWESTMERE
|
||||
+ bool "Intel Westmere"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Westmere formerly Nehalem-C family.
|
||||
@@ -315,6 +374,7 @@ REFERENCES
|
||||
+
|
||||
+config MSILVERMONT
|
||||
+ bool "Intel Silvermont"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Silvermont platform.
|
||||
@@ -323,6 +383,7 @@ REFERENCES
|
||||
+
|
||||
+config MGOLDMONT
|
||||
+ bool "Intel Goldmont"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
|
||||
@@ -331,6 +392,7 @@ REFERENCES
|
||||
+
|
||||
+config MGOLDMONTPLUS
|
||||
+ bool "Intel Goldmont Plus"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Goldmont Plus platform including Gemini Lake.
|
||||
@@ -339,6 +401,7 @@ REFERENCES
|
||||
+
|
||||
+config MSANDYBRIDGE
|
||||
+ bool "Intel Sandy Bridge"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 2nd Gen Core processors in the Sandy Bridge family.
|
||||
@@ -347,6 +410,7 @@ REFERENCES
|
||||
+
|
||||
+config MIVYBRIDGE
|
||||
+ bool "Intel Ivy Bridge"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 3rd Gen Core processors in the Ivy Bridge family.
|
||||
@@ -355,6 +419,7 @@ REFERENCES
|
||||
+
|
||||
+config MHASWELL
|
||||
+ bool "Intel Haswell"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 4th Gen Core processors in the Haswell family.
|
||||
@@ -363,6 +428,7 @@ REFERENCES
|
||||
+
|
||||
+config MBROADWELL
|
||||
+ bool "Intel Broadwell"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 5th Gen Core processors in the Broadwell family.
|
||||
@@ -371,6 +437,7 @@ REFERENCES
|
||||
+
|
||||
+config MSKYLAKE
|
||||
+ bool "Intel Skylake"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 6th Gen Core processors in the Skylake family.
|
||||
@@ -379,6 +446,7 @@ REFERENCES
|
||||
+
|
||||
+config MSKYLAKEX
|
||||
+ bool "Intel Skylake X"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 6th Gen Core processors in the Skylake X family.
|
||||
@@ -387,6 +455,7 @@ REFERENCES
|
||||
+
|
||||
+config MCANNONLAKE
|
||||
+ bool "Intel Cannon Lake"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 8th Gen Core processors
|
||||
@@ -395,6 +464,7 @@ REFERENCES
|
||||
+
|
||||
+config MICELAKE_CLIENT
|
||||
+ bool "Intel Ice Lake"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 10th Gen Core client processors in the Ice Lake family.
|
||||
@@ -403,22 +473,16 @@ REFERENCES
|
||||
+
|
||||
+config MICELAKE_SERVER
|
||||
+ bool "Intel Ice Lake Server"
|
||||
+ depends on X86_64
|
||||
+ help
|
||||
+
|
||||
+ Select this for 10th Gen Core server processors in the Ice Lake family.
|
||||
+
|
||||
+ Enables -march=icelake-server
|
||||
+
|
||||
+config MCASCADELAKE
|
||||
+ bool "Intel Cascade Lake"
|
||||
+ help
|
||||
+
|
||||
+ Select this for Xeon processors in the Cascade Lake family.
|
||||
+
|
||||
+ Enables -march=cascadelake
|
||||
+
|
||||
+config MCOOPERLAKE
|
||||
+ bool "Intel Cooper Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
|
||||
+ help
|
||||
+
|
||||
@@ -426,8 +490,19 @@ REFERENCES
|
||||
+
|
||||
+ Enables -march=cooperlake
|
||||
+
|
||||
+config MCASCADELAKE
|
||||
+ bool "Intel Cascade Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for Xeon processors in the Cascade Lake family.
|
||||
+
|
||||
+ Enables -march=cascadelake
|
||||
+
|
||||
+config MTIGERLAKE
|
||||
+ bool "Intel Tiger Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
|
||||
+ help
|
||||
+
|
||||
@@ -437,6 +512,7 @@ REFERENCES
|
||||
+
|
||||
+config MSAPPHIRERAPIDS
|
||||
+ bool "Intel Sapphire Rapids"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
@@ -446,6 +522,7 @@ REFERENCES
|
||||
+
|
||||
+config MROCKETLAKE
|
||||
+ bool "Intel Rocket Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
@@ -455,6 +532,7 @@ REFERENCES
|
||||
+
|
||||
+config MALDERLAKE
|
||||
+ bool "Intel Alder Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
@@ -464,6 +542,7 @@ REFERENCES
|
||||
+
|
||||
+config MRAPTORLAKE
|
||||
+ bool "Intel Raptor Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
@@ -473,6 +552,7 @@ REFERENCES
|
||||
+
|
||||
+config MMETEORLAKE
|
||||
+ bool "Intel Meteor Lake"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
@@ -482,46 +562,16 @@ REFERENCES
|
||||
+
|
||||
+config MEMERALDRAPIDS
|
||||
+ bool "Intel Emerald Rapids"
|
||||
+ depends on X86_64
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
+ Select this for fifth-generation 10 nm process processors in the Emerald Rapids family.
|
||||
+
|
||||
+ Enables -march=emeraldrapids
|
||||
|
||||
config GENERIC_CPU
|
||||
bool "Generic-x86-64"
|
||||
@@ -294,6 +589,26 @@ config GENERIC_CPU
|
||||
Generic x86-64 CPU.
|
||||
Run equally well on all x86-64 CPUs.
|
||||
|
||||
+config MNATIVE_INTEL
|
||||
+ bool "Intel-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for AMD CPUs. Intel Only!
|
||||
+endchoice
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
+config MNATIVE_AMD
|
||||
+ bool "AMD-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for Intel CPUs. AMD Only!
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
endchoice
|
||||
|
||||
config X86_GENERIC
|
||||
@@ -308,6 +623,30 @@ config X86_GENERIC
|
||||
This is really intended for distributors who need more
|
||||
generic optimizations.
|
||||
|
||||
+config X86_64_VERSION
|
||||
+ int "x86-64 compiler ISA level"
|
||||
+ range 1 3
|
||||
@@ -531,7 +581,7 @@ REFERENCES
|
||||
+ Specify a specific x86-64 compiler ISA level.
|
||||
+
|
||||
+ There are three x86-64 ISA levels that work on top of
|
||||
+ the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4.
|
||||
+ the x86-64 baseline, namely: x86-64-v2 and x86-64-v3.
|
||||
+
|
||||
+ x86-64-v2 brings support for vector instructions up to Streaming SIMD
|
||||
+ Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
|
||||
@@ -546,221 +596,291 @@ REFERENCES
|
||||
+ /lib/ld-linux-x86-64.so.2 --help | grep supported
|
||||
+ /lib64/ld-linux-x86-64.so.2 --help | grep supported
|
||||
+
|
||||
#
|
||||
# Define implied options from the CPU selection here
|
||||
config X86_INTERNODE_CACHE_SHIFT
|
||||
@@ -318,7 +657,7 @@ config X86_INTERNODE_CACHE_SHIFT
|
||||
config X86_GENERIC
|
||||
bool "Generic x86 support"
|
||||
depends on X86_32
|
||||
@@ -266,8 +710,8 @@ config X86_INTERNODE_CACHE_SHIFT
|
||||
|
||||
config X86_L1_CACHE_SHIFT
|
||||
int
|
||||
default "7" if MPENTIUM4 || MPSC
|
||||
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
|
||||
- default "7" if MPENTIUM4
|
||||
- default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64
|
||||
+ default "7" if MPENTIUM4 || MPSC
|
||||
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
|
||||
default "4" if MELAN || M486SX || M486 || MGEODEGX1
|
||||
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
|
||||
|
||||
@@ -336,11 +675,11 @@ config X86_ALIGNMENT_16
|
||||
@@ -285,19 +729,19 @@ config X86_ALIGNMENT_16
|
||||
|
||||
config X86_INTEL_USERCOPY
|
||||
def_bool y
|
||||
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
|
||||
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK7 || MEFFICEON
|
||||
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL
|
||||
|
||||
config X86_USE_PPRO_CHECKSUM
|
||||
def_bool y
|
||||
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
|
||||
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MATOM
|
||||
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
|
||||
|
||||
#
|
||||
# P6_NOPs are a relatively minor optimization that require a family >=
|
||||
config X86_TSC
|
||||
def_bool y
|
||||
- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MATOM) || X86_64
|
||||
+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
|
||||
|
||||
config X86_HAVE_PAE
|
||||
def_bool y
|
||||
- depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC7 || MATOM || X86_64
|
||||
+ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
|
||||
|
||||
config X86_CX8
|
||||
def_bool y
|
||||
@@ -307,13 +751,13 @@ config X86_CX8
|
||||
# generates cmov.
|
||||
config X86_CMOV
|
||||
def_bool y
|
||||
- depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || MATOM || MGEODE_LX || X86_64)
|
||||
+ depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
|
||||
|
||||
config X86_MINIMUM_CPU_FAMILY
|
||||
int
|
||||
default "64" if X86_64
|
||||
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7)
|
||||
- default "5" if X86_32 && X86_CX8
|
||||
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
|
||||
+ default "5" if X86_32 && X86_CMPXCHG64
|
||||
default "4"
|
||||
|
||||
config X86_DEBUGCTLMSR
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -182,15 +182,98 @@ else
|
||||
cflags-$(CONFIG_MK8) += -march=k8
|
||||
cflags-$(CONFIG_MPSC) += -march=nocona
|
||||
cflags-$(CONFIG_MCORE2) += -march=core2
|
||||
- cflags-$(CONFIG_MATOM) += -march=atom
|
||||
- cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
|
||||
+ cflags-$(CONFIG_MATOM) += -march=bonnell
|
||||
+ ifeq ($(CONFIG_X86_64_VERSION),1)
|
||||
+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
|
||||
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
|
||||
+ else
|
||||
+ cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+ endif
|
||||
+ cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3
|
||||
+ cflags-$(CONFIG_MK10) += -march=amdfam10
|
||||
+ cflags-$(CONFIG_MBARCELONA) += -march=barcelona
|
||||
+ cflags-$(CONFIG_MBOBCAT) += -march=btver1
|
||||
+ cflags-$(CONFIG_MJAGUAR) += -march=btver2
|
||||
+ cflags-$(CONFIG_MBULLDOZER) += -march=bdver1
|
||||
+ cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm
|
||||
+ cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm
|
||||
+ cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm
|
||||
+ cflags-$(CONFIG_MZEN) += -march=znver1
|
||||
+ cflags-$(CONFIG_MZEN2) += -march=znver2
|
||||
+ cflags-$(CONFIG_MZEN3) += -march=znver3
|
||||
+ cflags-$(CONFIG_MZEN4) += -march=znver4
|
||||
+ cflags-$(CONFIG_MZEN5) += -march=znver5
|
||||
+ cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
|
||||
+ cflags-$(CONFIG_MNATIVE_AMD) += -march=native -mno-tbm
|
||||
+ cflags-$(CONFIG_MNEHALEM) += -march=nehalem
|
||||
+ cflags-$(CONFIG_MWESTMERE) += -march=westmere
|
||||
+ cflags-$(CONFIG_MSILVERMONT) += -march=silvermont
|
||||
+ cflags-$(CONFIG_MGOLDMONT) += -march=goldmont
|
||||
+ cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
|
||||
+ cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge
|
||||
+ cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge
|
||||
+ cflags-$(CONFIG_MHASWELL) += -march=haswell
|
||||
+ cflags-$(CONFIG_MBROADWELL) += -march=broadwell
|
||||
+ cflags-$(CONFIG_MSKYLAKE) += -march=skylake
|
||||
+ cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512
|
||||
+ cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake
|
||||
+ cflags-$(CONFIG_MICELAKE_CLIENT) += -march=icelake-client
|
||||
+ cflags-$(CONFIG_MICELAKE_SERVER) += -march=icelake-server
|
||||
+ cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake
|
||||
+ cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake
|
||||
+ cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake
|
||||
+ cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
|
||||
+ cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake
|
||||
+ cflags-$(CONFIG_MALDERLAKE) += -march=alderlake
|
||||
+ cflags-$(CONFIG_MRAPTORLAKE) += -march=raptorlake
|
||||
+ cflags-$(CONFIG_MMETEORLAKE) += -march=meteorlake
|
||||
+ cflags-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
@@ -173,8 +173,230 @@ else
|
||||
# Use -mskip-rax-setup if supported.
|
||||
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
|
||||
|
||||
rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
|
||||
rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona
|
||||
rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
|
||||
rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom
|
||||
- rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
|
||||
+ rustflags-$(CONFIG_MK8SSE3) += -Ctarget-cpu=k8-sse3
|
||||
+ rustflags-$(CONFIG_MK10) += -Ctarget-cpu=amdfam10
|
||||
+ rustflags-$(CONFIG_MBARCELONA) += -Ctarget-cpu=barcelona
|
||||
+ rustflags-$(CONFIG_MBOBCAT) += -Ctarget-cpu=btver1
|
||||
+ rustflags-$(CONFIG_MJAGUAR) += -Ctarget-cpu=btver2
|
||||
+ rustflags-$(CONFIG_MBULLDOZER) += -Ctarget-cpu=bdver1
|
||||
+ rustflags-$(CONFIG_MPILEDRIVER) += -Ctarget-cpu=bdver2
|
||||
+ rustflags-$(CONFIG_MSTEAMROLLER) += -Ctarget-cpu=bdver3
|
||||
+ rustflags-$(CONFIG_MEXCAVATOR) += -Ctarget-cpu=bdver4
|
||||
+ rustflags-$(CONFIG_MZEN) += -Ctarget-cpu=znver1
|
||||
+ rustflags-$(CONFIG_MZEN2) += -Ctarget-cpu=znver2
|
||||
+ rustflags-$(CONFIG_MZEN3) += -Ctarget-cpu=znver3
|
||||
+ rustflags-$(CONFIG_MZEN4) += -Ctarget-cpu=znver4
|
||||
+ rustflags-$(CONFIG_MZEN5) += -Ctarget-cpu=znver5
|
||||
+ rustflags-$(CONFIG_MNATIVE_INTEL) += -Ctarget-cpu=native
|
||||
+ rustflags-$(CONFIG_MNATIVE_AMD) += -Ctarget-cpu=native
|
||||
+ rustflags-$(CONFIG_MNEHALEM) += -Ctarget-cpu=nehalem
|
||||
+ rustflags-$(CONFIG_MWESTMERE) += -Ctarget-cpu=westmere
|
||||
+ rustflags-$(CONFIG_MSILVERMONT) += -Ctarget-cpu=silvermont
|
||||
+ rustflags-$(CONFIG_MGOLDMONT) += -Ctarget-cpu=goldmont
|
||||
+ rustflags-$(CONFIG_MGOLDMONTPLUS) += -Ctarget-cpu=goldmont-plus
|
||||
+ rustflags-$(CONFIG_MSANDYBRIDGE) += -Ctarget-cpu=sandybridge
|
||||
+ rustflags-$(CONFIG_MIVYBRIDGE) += -Ctarget-cpu=ivybridge
|
||||
+ rustflags-$(CONFIG_MHASWELL) += -Ctarget-cpu=haswell
|
||||
+ rustflags-$(CONFIG_MBROADWELL) += -Ctarget-cpu=broadwell
|
||||
+ rustflags-$(CONFIG_MSKYLAKE) += -Ctarget-cpu=skylake
|
||||
+ rustflags-$(CONFIG_MSKYLAKEX) += -Ctarget-cpu=skylake-avx512
|
||||
+ rustflags-$(CONFIG_MCANNONLAKE) += -Ctarget-cpu=cannonlake
|
||||
+ rustflags-$(CONFIG_MICELAKE_CLIENT) += -Ctarget-cpu=icelake-client
|
||||
+ rustflags-$(CONFIG_MICELAKE_SERVER) += -Ctarget-cpu=icelake-server
|
||||
+ rustflags-$(CONFIG_MCASCADELAKE) += -Ctarget-cpu=cascadelake
|
||||
+ rustflags-$(CONFIG_MCOOPERLAKE) += -Ctarget-cpu=cooperlake
|
||||
+ rustflags-$(CONFIG_MTIGERLAKE) += -Ctarget-cpu=tigerlake
|
||||
+ rustflags-$(CONFIG_MSAPPHIRERAPIDS) += -Ctarget-cpu=sapphirerapids
|
||||
+ rustflags-$(CONFIG_MROCKETLAKE) += -Ctarget-cpu=rocketlake
|
||||
+ rustflags-$(CONFIG_MALDERLAKE) += -Ctarget-cpu=alderlake
|
||||
+ rustflags-$(CONFIG_MRAPTORLAKE) += -Ctarget-cpu=raptorlake
|
||||
+ rustflags-$(CONFIG_MMETEORLAKE) += -Ctarget-cpu=meteorlake
|
||||
+ rustflags-$(CONFIG_MEMERALDRAPIDS) += -Ctarget-cpu=emeraldrapids
|
||||
KBUILD_RUSTFLAGS += $(rustflags-y)
|
||||
+ifdef CONFIG_X86_NATIVE_CPU
|
||||
+ KBUILD_CFLAGS += -march=native
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MNATIVE_INTEL
|
||||
+ KBUILD_CFLAGS += -march=native
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MNATIVE_AMD
|
||||
+ KBUILD_CFLAGS += -march=native
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MK8
|
||||
+ KBUILD_CFLAGS += -march=k8
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MK8SSE3
|
||||
+ KBUILD_CFLAGS += -march=k8-sse3
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8-sse3
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MK10
|
||||
+ KBUILD_CFLAGS += -march=amdfam10
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=amdfam10
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MBARCELONA
|
||||
+ KBUILD_CFLAGS += -march=barcelona
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=barcelona
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MBOBCAT
|
||||
+ KBUILD_CFLAGS += -march=btver1
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver1
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MJAGUAR
|
||||
+ KBUILD_CFLAGS += -march=btver2
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver2
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MBULLDOZER
|
||||
+ KBUILD_CFLAGS += -march=bdver1
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver1
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MPILEDRIVER
|
||||
+ KBUILD_CFLAGS += -march=bdver2 -mno-tbm
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver2 -mno-tbm
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSTEAMROLLER
|
||||
+ KBUILD_CFLAGS += -march=bdver3 -mno-tbm
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver3 -mno-tbm
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MEXCAVATOR
|
||||
+ KBUILD_CFLAGS += -march=bdver4 -mno-tbm
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver4 -mno-tbm
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MZEN
|
||||
+ KBUILD_CFLAGS += -march=znver1
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver1
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MZEN2
|
||||
+ KBUILD_CFLAGS += -march=znver2
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver2
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MZEN3
|
||||
+ KBUILD_CFLAGS += -march=znver3
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver3
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MZEN4
|
||||
+ KBUILD_CFLAGS += -march=znver4
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MZEN5
|
||||
+ KBUILD_CFLAGS += -march=znver5
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver5
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MPSC
|
||||
+ KBUILD_CFLAGS += -march=nocona
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=nocona
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MCORE2
|
||||
+ KBUILD_CFLAGS += -march=core2
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=core2
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MNEHALEM
|
||||
+ KBUILD_CFLAGS += -march=nehalem
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=nehalem
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MWESTMERE
|
||||
+ KBUILD_CFLAGS += -march=westmere
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=westmere
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSILVERMONT
|
||||
+ KBUILD_CFLAGS += -march=silvermont
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=silvermont
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MGOLDMONT
|
||||
+ KBUILD_CFLAGS += -march=goldmont
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MGOLDMONTPLUS
|
||||
+ KBUILD_CFLAGS += -march=goldmont-plus
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont-plus
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSANDYBRIDGE
|
||||
+ KBUILD_CFLAGS += -march=sandybridge
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=sandybridge
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MIVYBRIDGE
|
||||
+ KBUILD_CFLAGS += -march=ivybridge
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=ivybridge
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MHASWELL
|
||||
+ KBUILD_CFLAGS += -march=haswell
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=haswell
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MBROADWELL
|
||||
+ KBUILD_CFLAGS += -march=broadwell
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=broadwell
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSKYLAKE
|
||||
+ KBUILD_CFLAGS += -march=skylake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSKYLAKEX
|
||||
+ KBUILD_CFLAGS += -march=skylake-avx512
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake-avx512
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MCANNONLAKE
|
||||
+ KBUILD_CFLAGS += -march=cannonlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cannonlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MICELAKE_CLIENT
|
||||
+ KBUILD_CFLAGS += -march=icelake-client
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-client
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MICELAKE_SERVER
|
||||
+ KBUILD_CFLAGS += -march=icelake-server
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-server
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MCOOPERLAKE
|
||||
+ KBUILD_CFLAGS += -march=cooperlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cooperlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MCASCADELAKE
|
||||
+ KBUILD_CFLAGS += -march=cascadelake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cascadelake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MTIGERLAKE
|
||||
+ KBUILD_CFLAGS += -march=tigerlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=tigerlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MSAPPHIRERAPIDS
|
||||
+ KBUILD_CFLAGS += -march=sapphirerapids
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=sapphirerapids
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MROCKETLAKE
|
||||
+ KBUILD_CFLAGS += -march=rocketlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=rocketlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MALDERLAKE
|
||||
+ KBUILD_CFLAGS += -march=alderlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=alderlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MRAPTORLAKE
|
||||
+ KBUILD_CFLAGS += -march=raptorlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=raptorlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MMETEORLAKE
|
||||
+ KBUILD_CFLAGS += -march=meteorlake
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=meteorlake
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_MEMERALDRAPIDS
|
||||
+ KBUILD_CFLAGS += -march=emeraldrapids
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=emeraldrapids
|
||||
+endif
|
||||
+
|
||||
+ifdef CONFIG_GENERIC_CPU
|
||||
+ifeq ($(CONFIG_X86_64_VERSION),1)
|
||||
KBUILD_CFLAGS += -march=x86-64 -mtune=generic
|
||||
KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic
|
||||
+else
|
||||
+ KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+endif # CONFIG_X86_64_VERSION
|
||||
+endif # CONFIG_GENERIC_CPU
|
||||
|
||||
KBUILD_CFLAGS += -mno-red-zone
|
||||
--- a/arch/x86/include/asm/vermagic.h
|
||||
+++ b/arch/x86/include/asm/vermagic.h
|
||||
@@ -17,6 +17,56 @@
|
||||
#define MODULE_PROC_FAMILY "586MMX "
|
||||
#elif defined CONFIG_MCORE2
|
||||
#define MODULE_PROC_FAMILY "CORE2 "
|
||||
+#elif defined CONFIG_MNATIVE_INTEL
|
||||
+#define MODULE_PROC_FAMILY "NATIVE_INTEL "
|
||||
+#elif defined CONFIG_MNATIVE_AMD
|
||||
+#define MODULE_PROC_FAMILY "NATIVE_AMD "
|
||||
+#elif defined CONFIG_MNEHALEM
|
||||
+#define MODULE_PROC_FAMILY "NEHALEM "
|
||||
+#elif defined CONFIG_MWESTMERE
|
||||
+#define MODULE_PROC_FAMILY "WESTMERE "
|
||||
+#elif defined CONFIG_MSILVERMONT
|
||||
+#define MODULE_PROC_FAMILY "SILVERMONT "
|
||||
+#elif defined CONFIG_MGOLDMONT
|
||||
+#define MODULE_PROC_FAMILY "GOLDMONT "
|
||||
+#elif defined CONFIG_MGOLDMONTPLUS
|
||||
+#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
|
||||
+#elif defined CONFIG_MSANDYBRIDGE
|
||||
+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
|
||||
+#elif defined CONFIG_MIVYBRIDGE
|
||||
+#define MODULE_PROC_FAMILY "IVYBRIDGE "
|
||||
+#elif defined CONFIG_MHASWELL
|
||||
+#define MODULE_PROC_FAMILY "HASWELL "
|
||||
+#elif defined CONFIG_MBROADWELL
|
||||
+#define MODULE_PROC_FAMILY "BROADWELL "
|
||||
+#elif defined CONFIG_MSKYLAKE
|
||||
+#define MODULE_PROC_FAMILY "SKYLAKE "
|
||||
+#elif defined CONFIG_MSKYLAKEX
|
||||
+#define MODULE_PROC_FAMILY "SKYLAKEX "
|
||||
+#elif defined CONFIG_MCANNONLAKE
|
||||
+#define MODULE_PROC_FAMILY "CANNONLAKE "
|
||||
+#elif defined CONFIG_MICELAKE_CLIENT
|
||||
+#define MODULE_PROC_FAMILY "ICELAKE_CLIENT "
|
||||
+#elif defined CONFIG_MICELAKE_SERVER
|
||||
+#define MODULE_PROC_FAMILY "ICELAKE_SERVER "
|
||||
+#elif defined CONFIG_MCASCADELAKE
|
||||
+#define MODULE_PROC_FAMILY "CASCADELAKE "
|
||||
+#elif defined CONFIG_MCOOPERLAKE
|
||||
+#define MODULE_PROC_FAMILY "COOPERLAKE "
|
||||
+#elif defined CONFIG_MTIGERLAKE
|
||||
+#define MODULE_PROC_FAMILY "TIGERLAKE "
|
||||
+#elif defined CONFIG_MSAPPHIRERAPIDS
|
||||
+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
|
||||
+#elif defined CONFIG_ROCKETLAKE
|
||||
+#define MODULE_PROC_FAMILY "ROCKETLAKE "
|
||||
+#elif defined CONFIG_MALDERLAKE
|
||||
+#define MODULE_PROC_FAMILY "ALDERLAKE "
|
||||
+#elif defined CONFIG_MRAPTORLAKE
|
||||
+#define MODULE_PROC_FAMILY "RAPTORLAKE "
|
||||
+#elif defined CONFIG_MMETEORLAKE
|
||||
+#define MODULE_PROC_FAMILY "METEORLAKE "
|
||||
+#elif defined CONFIG_MEMERALDRAPIDS
|
||||
+#define MODULE_PROC_FAMILY "EMERALDRAPIDS "
|
||||
#elif defined CONFIG_MATOM
|
||||
#define MODULE_PROC_FAMILY "ATOM "
|
||||
#elif defined CONFIG_M686
|
||||
@@ -35,6 +85,28 @@
|
||||
#define MODULE_PROC_FAMILY "K7 "
|
||||
#elif defined CONFIG_MK8
|
||||
#define MODULE_PROC_FAMILY "K8 "
|
||||
+#elif defined CONFIG_MK8SSE3
|
||||
+#define MODULE_PROC_FAMILY "K8SSE3 "
|
||||
+#elif defined CONFIG_MK10
|
||||
+#define MODULE_PROC_FAMILY "K10 "
|
||||
+#elif defined CONFIG_MBARCELONA
|
||||
+#define MODULE_PROC_FAMILY "BARCELONA "
|
||||
+#elif defined CONFIG_MBOBCAT
|
||||
+#define MODULE_PROC_FAMILY "BOBCAT "
|
||||
+#elif defined CONFIG_MBULLDOZER
|
||||
+#define MODULE_PROC_FAMILY "BULLDOZER "
|
||||
+#elif defined CONFIG_MPILEDRIVER
|
||||
+#define MODULE_PROC_FAMILY "PILEDRIVER "
|
||||
+#elif defined CONFIG_MSTEAMROLLER
|
||||
+#define MODULE_PROC_FAMILY "STEAMROLLER "
|
||||
+#elif defined CONFIG_MJAGUAR
|
||||
+#define MODULE_PROC_FAMILY "JAGUAR "
|
||||
+#elif defined CONFIG_MEXCAVATOR
|
||||
+#define MODULE_PROC_FAMILY "EXCAVATOR "
|
||||
+#elif defined CONFIG_MZEN
|
||||
+#define MODULE_PROC_FAMILY "ZEN "
|
||||
+#elif defined CONFIG_MZEN2
|
||||
+#define MODULE_PROC_FAMILY "ZEN2 "
|
||||
#elif defined CONFIG_MELAN
|
||||
#define MODULE_PROC_FAMILY "ELAN "
|
||||
#elif defined CONFIG_MCRUSOE
|
||||
KBUILD_CFLAGS += -mcmodel=kernel
|
@@ -1,4 +1,4 @@
|
||||
From f4f448a305e9d705b9a0da102ddfd58bfaac5cc0 Mon Sep 17 00:00:00 2001
|
||||
From 15db9c3419fd147812151d95fb34bbd70f2f9715 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Sun, 11 Dec 2022 23:51:16 +0100
|
||||
Subject: ZEN: Restore CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3
|
||||
@@ -13,7 +13,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS.
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -871,6 +871,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe
|
||||
@@ -868,6 +868,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
KBUILD_CFLAGS += -O2
|
||||
KBUILD_RUSTFLAGS += -Copt-level=2
|
||||
@@ -25,7 +25,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS.
|
||||
KBUILD_RUSTFLAGS += -Copt-level=s
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1473,6 +1473,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
@@ -1479,6 +1479,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
with the "-O2" compiler flag for best performance and most
|
||||
helpful compile-time warnings.
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -879,6 +879,10 @@ KBUILD_CFLAGS += -Os
|
||||
@@ -876,6 +876,10 @@ KBUILD_CFLAGS += -Os
|
||||
KBUILD_RUSTFLAGS += -Copt-level=s
|
||||
endif
|
||||
|
||||
|
@@ -1,24 +1,22 @@
|
||||
From 3ebc1fdf3e0ee9bff1efe20eb5791eba5c84a810 Mon Sep 17 00:00:00 2001
|
||||
From 40f9fa82bb21a5e3f17f539897128a69824ad8ef Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Thu, 3 Aug 2023 13:53:49 +0000
|
||||
Subject: XANMOD: x86/build: Prevent generating avx2 and avx512 floating-point code
|
||||
Date: Mon, 18 Nov 2024 20:17:44 +0000
|
||||
Subject: [PATCH 1/4] XANMOD: x86/build: Prevent generating avx2 floating-point
|
||||
code
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
arch/x86/Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
arch/x86/Makefile | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -74,9 +74,9 @@ export BITS
|
||||
@@ -74,7 +74,7 @@ export BITS
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
||||
#
|
||||
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2
|
||||
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
|
||||
-KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
|
||||
+KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
|
||||
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
|
||||
|
||||
#
|
||||
# CFLAGS for compiling floating point code inside the kernel.
|
||||
|
@@ -4,8 +4,8 @@
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
||||
#
|
||||
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -fno-tree-vectorize
|
||||
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -fno-tree-vectorize
|
||||
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
|
||||
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
|
||||
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
|
||||
|
||||
|
@@ -1,7 +1,8 @@
|
||||
From b1a99a2a9675f80b7c04a239a6b047373ccf3a17 Mon Sep 17 00:00:00 2001
|
||||
From 7e45fca50a3151248266bca7058e1efa9b5233ca Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 16 Sep 2024 00:55:35 +0000
|
||||
Subject: XANMOD: kbuild: Add GCC SMS-based modulo scheduling flags
|
||||
Subject: [PATCH 02/19] XANMOD: kbuild: Add GCC SMS-based modulo scheduling
|
||||
flags
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
@@ -10,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -883,6 +883,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
@@ -880,6 +880,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
KBUILD_CFLAGS += $(call cc-option,-fivopts)
|
||||
endif
|
||||
|
||||
|
@@ -1,27 +0,0 @@
|
||||
From cb40e98d75a75567cbd10f9fc69c2ec12c87a445 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Wed, 5 Feb 2025 11:25:15 +0000
|
||||
Subject: cpufreq/amd-pstate: Remove the redundant des_perf clamping in
|
||||
adjust_perf
|
||||
|
||||
des_perf is later on clamped between min_perf and max_perf in
|
||||
amd_pstate_update. So, remove the redundant clamping from
|
||||
amd_pstate_adjust_perf.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -705,8 +705,6 @@ static void amd_pstate_adjust_perf(unsig
|
||||
if (max_perf < min_perf)
|
||||
max_perf = min_perf;
|
||||
|
||||
- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
|
||||
-
|
||||
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
|
||||
policy->governor->flags);
|
||||
cpufreq_cpu_put(policy);
|
@@ -1,133 +0,0 @@
|
||||
From f58e440e56a6c8a2c04894e5d169d1a98a8ce74f Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Wed, 5 Feb 2025 11:25:18 +0000
|
||||
Subject: cpufreq/amd-pstate: Modularize perf<->freq conversion
|
||||
|
||||
Delegate the perf<->frequency conversion to helper functions to reduce
|
||||
code duplication, and improve readability.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 57 +++++++++++++++++++-----------------
|
||||
1 file changed, 30 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -142,6 +142,20 @@ static struct quirk_entry quirk_amd_7k62
|
||||
.lowest_freq = 550,
|
||||
};
|
||||
|
||||
+static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
|
||||
+{
|
||||
+ u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
|
||||
+ cpudata->nominal_freq);
|
||||
+
|
||||
+ return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
|
||||
+}
|
||||
+
|
||||
+static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
|
||||
+{
|
||||
+ return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
|
||||
+ cpudata->nominal_perf);
|
||||
+}
|
||||
+
|
||||
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
|
||||
{
|
||||
/**
|
||||
@@ -534,7 +548,6 @@ static inline bool amd_pstate_sample(str
|
||||
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
|
||||
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
|
||||
{
|
||||
- unsigned long max_freq;
|
||||
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
|
||||
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
|
||||
@@ -543,8 +556,7 @@ static void amd_pstate_update(struct amd
|
||||
|
||||
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
|
||||
|
||||
- max_freq = READ_ONCE(cpudata->max_limit_freq);
|
||||
- policy->cur = div_u64(des_perf * max_freq, max_perf);
|
||||
+ policy->cur = perf_to_freq(cpudata, des_perf);
|
||||
|
||||
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
|
||||
min_perf = des_perf;
|
||||
@@ -594,14 +606,11 @@ static int amd_pstate_verify(struct cpuf
|
||||
|
||||
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
- u8 max_limit_perf, min_limit_perf, max_perf;
|
||||
- u32 max_freq;
|
||||
+ u8 max_limit_perf, min_limit_perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
- max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
- max_freq = READ_ONCE(cpudata->max_freq);
|
||||
- max_limit_perf = div_u64(policy->max * max_perf, max_freq);
|
||||
- min_limit_perf = div_u64(policy->min * max_perf, max_freq);
|
||||
+ max_limit_perf = freq_to_perf(cpudata, policy->max);
|
||||
+ min_limit_perf = freq_to_perf(cpudata, policy->min);
|
||||
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
|
||||
@@ -619,21 +628,15 @@ static int amd_pstate_update_freq(struct
|
||||
{
|
||||
struct cpufreq_freqs freqs;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u8 des_perf, cap_perf;
|
||||
-
|
||||
- if (!cpudata->max_freq)
|
||||
- return -ENODEV;
|
||||
+ u8 des_perf;
|
||||
|
||||
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
||||
amd_pstate_update_min_max_limit(policy);
|
||||
|
||||
- cap_perf = READ_ONCE(cpudata->highest_perf);
|
||||
-
|
||||
freqs.old = policy->cur;
|
||||
freqs.new = target_freq;
|
||||
|
||||
- des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
|
||||
- cpudata->max_freq);
|
||||
+ des_perf = freq_to_perf(cpudata, target_freq);
|
||||
|
||||
WARN_ON(fast_switch && !policy->fast_switch_enabled);
|
||||
/*
|
||||
@@ -907,7 +910,6 @@ static int amd_pstate_init_freq(struct a
|
||||
{
|
||||
int ret;
|
||||
u32 min_freq, max_freq;
|
||||
- u8 highest_perf, nominal_perf, lowest_nonlinear_perf;
|
||||
u32 nominal_freq, lowest_nonlinear_freq;
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
|
||||
@@ -925,16 +927,17 @@ static int amd_pstate_init_freq(struct a
|
||||
else
|
||||
nominal_freq = cppc_perf.nominal_freq;
|
||||
|
||||
- highest_perf = READ_ONCE(cpudata->highest_perf);
|
||||
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
- max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf);
|
||||
-
|
||||
- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
||||
- lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf);
|
||||
- WRITE_ONCE(cpudata->min_freq, min_freq * 1000);
|
||||
- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000);
|
||||
- WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);
|
||||
- WRITE_ONCE(cpudata->max_freq, max_freq * 1000);
|
||||
+ min_freq *= 1000;
|
||||
+ nominal_freq *= 1000;
|
||||
+
|
||||
+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
|
||||
+ WRITE_ONCE(cpudata->min_freq, min_freq);
|
||||
+
|
||||
+ max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
+ lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
|
||||
+
|
||||
+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
|
||||
+ WRITE_ONCE(cpudata->max_freq, max_freq);
|
||||
|
||||
/**
|
||||
* Below values need to be initialized correctly, otherwise driver will fail to load
|
@@ -1,37 +0,0 @@
|
||||
From 0a12d4a3ca1a996c1073d60c6775424972e8b7b9 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Wed, 5 Feb 2025 11:25:19 +0000
|
||||
Subject: cpufreq/amd-pstate: Remove the unnecessary cpufreq_update_policy call
|
||||
|
||||
The update_limits callback is only called in two conditions.
|
||||
|
||||
* When the preferred core rankings change. In which case, we just need to
|
||||
change the prefcore ranking in the cpudata struct. As there are no changes
|
||||
to any of the perf values, there is no need to call cpufreq_update_policy()
|
||||
|
||||
* When the _PPC ACPI object changes, i.e. the highest allowed Pstate
|
||||
changes. The _PPC object is only used for a table based cpufreq driver
|
||||
like acpi-cpufreq, hence is irrelevant for CPPC based amd-pstate.
|
||||
|
||||
Hence, the cpufreq_update_policy() call becomes unnecessary and can be
|
||||
removed.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -855,10 +855,6 @@ static void amd_pstate_update_limits(uns
|
||||
sched_set_itmt_core_prio((int)cur_high, cpu);
|
||||
}
|
||||
cpufreq_cpu_put(policy);
|
||||
-
|
||||
- if (!highest_perf_changed)
|
||||
- cpufreq_update_policy(cpu);
|
||||
-
|
||||
}
|
||||
|
||||
/*
|
@@ -1,124 +0,0 @@
|
||||
From ab0520499c83ff44d468f1b2b604c85e2f78d694 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Wed, 5 Feb 2025 11:25:22 +0000
|
||||
Subject: cpufreq/amd-pstate: Use scope based cleanup for cpufreq_policy refs
|
||||
|
||||
There have been instances in past where refcount decrementing is missed
|
||||
while exiting a function. Use automatic scope based cleanup to avoid
|
||||
such errors.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 25 ++++++++-----------------
|
||||
include/linux/cpufreq.h | 3 +++
|
||||
2 files changed, 11 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -548,7 +548,7 @@ static inline bool amd_pstate_sample(str
|
||||
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
|
||||
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
|
||||
{
|
||||
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
|
||||
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
|
||||
if (!policy)
|
||||
@@ -574,8 +574,6 @@ static void amd_pstate_update(struct amd
|
||||
}
|
||||
|
||||
amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
|
||||
-
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
|
||||
@@ -587,7 +585,8 @@ static int amd_pstate_verify(struct cpuf
|
||||
* amd-pstate qos_requests.
|
||||
*/
|
||||
if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
|
||||
- struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) =
|
||||
+ cpufreq_cpu_get(policy_data->cpu);
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
if (!policy)
|
||||
@@ -595,7 +594,6 @@ static int amd_pstate_verify(struct cpuf
|
||||
|
||||
cpudata = policy->driver_data;
|
||||
policy_data->min = cpudata->lowest_nonlinear_freq;
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
cpufreq_verify_within_cpu_limits(policy_data);
|
||||
@@ -678,7 +676,7 @@ static void amd_pstate_adjust_perf(unsig
|
||||
unsigned long capacity)
|
||||
{
|
||||
u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
|
||||
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
if (!policy)
|
||||
@@ -710,7 +708,6 @@ static void amd_pstate_adjust_perf(unsig
|
||||
|
||||
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
|
||||
policy->governor->flags);
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
|
||||
@@ -823,28 +820,23 @@ static void amd_pstate_init_prefcore(str
|
||||
|
||||
static void amd_pstate_update_limits(unsigned int cpu)
|
||||
{
|
||||
- struct cpufreq_policy *policy = NULL;
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
|
||||
struct amd_cpudata *cpudata;
|
||||
u32 prev_high = 0, cur_high = 0;
|
||||
- int ret;
|
||||
bool highest_perf_changed = false;
|
||||
|
||||
if (!amd_pstate_prefcore)
|
||||
return;
|
||||
|
||||
- policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
return;
|
||||
|
||||
- cpudata = policy->driver_data;
|
||||
-
|
||||
guard(mutex)(&amd_pstate_driver_lock);
|
||||
|
||||
- ret = amd_get_highest_perf(cpu, &cur_high);
|
||||
- if (ret) {
|
||||
- cpufreq_cpu_put(policy);
|
||||
+ if (amd_get_highest_perf(cpu, &cur_high))
|
||||
return;
|
||||
- }
|
||||
+
|
||||
+ cpudata = policy->driver_data;
|
||||
|
||||
prev_high = READ_ONCE(cpudata->prefcore_ranking);
|
||||
highest_perf_changed = (prev_high != cur_high);
|
||||
@@ -854,7 +846,6 @@ static void amd_pstate_update_limits(uns
|
||||
if (cur_high < CPPC_MAX_PERF)
|
||||
sched_set_itmt_core_prio((int)cur_high, cpu);
|
||||
}
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/include/linux/cpufreq.h
|
||||
+++ b/include/linux/cpufreq.h
|
||||
@@ -213,6 +213,9 @@ static inline struct cpufreq_policy *cpu
|
||||
static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
|
||||
#endif
|
||||
|
||||
+/* Scope based cleanup macro for cpufreq_policy kobject reference counting */
|
||||
+DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T))
|
||||
+
|
||||
static inline bool policy_is_inactive(struct cpufreq_policy *policy)
|
||||
{
|
||||
return cpumask_empty(policy->cpus);
|
@@ -1,26 +0,0 @@
|
||||
From 658a4b7a41583e3b73477c0fbbee07aa6d6f7e0e Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Wed, 5 Feb 2025 11:25:23 +0000
|
||||
Subject: cpufreq/amd-pstate: Remove the unncecessary driver_lock in
|
||||
amd_pstate_update_limits
|
||||
|
||||
There is no need to take a driver wide lock while updating the
|
||||
highest_perf value in the percpu cpudata struct. Hence remove it.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -831,8 +831,6 @@ static void amd_pstate_update_limits(uns
|
||||
if (!policy)
|
||||
return;
|
||||
|
||||
- guard(mutex)(&amd_pstate_driver_lock);
|
||||
-
|
||||
if (amd_get_highest_perf(cpu, &cur_high))
|
||||
return;
|
||||
|
@@ -1,35 +0,0 @@
|
||||
From 20f8507de83bc844c6ff2329e61ffc37734364e9 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Sat, 22 Feb 2025 03:32:22 +0000
|
||||
Subject: cpufreq/amd-pstate: Fix the clamping of perf values
|
||||
|
||||
The clamping in freq_to_perf() is broken right now, as we first typecast
|
||||
(read wraparound) the overflowing value into a u8 and then clamp it down.
|
||||
So, use a u32 to store the >255 value in certain edge cases and then clamp
|
||||
it down into a u8.
|
||||
|
||||
Also, use a "explicit typecast + clamp" instead of just a "clamp_t" as the
|
||||
latter typecasts first and then clamps between the limits, which defeats
|
||||
our purpose.
|
||||
|
||||
Fixes: 305621eb6a8b ("cpufreq/amd-pstate: Modularize perf<->freq conversion")
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -144,10 +144,10 @@ static struct quirk_entry quirk_amd_7k62
|
||||
|
||||
static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
|
||||
{
|
||||
- u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
|
||||
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
|
||||
cpudata->nominal_freq);
|
||||
|
||||
- return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
|
||||
+ return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
|
||||
}
|
||||
|
||||
static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
|
@@ -1,35 +0,0 @@
|
||||
From 240a074b7f92278755df715be1ea5ea5d3d2f5ac Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:17 -0600
|
||||
Subject: cpufreq/amd-pstate: Show a warning when a CPU fails to setup
|
||||
|
||||
I came across a system that MSR_AMD_CPPC_CAP1 for some CPUs isn't
|
||||
populated. This is an unexpected behavior that is most likely a
|
||||
BIOS bug. In the event it happens I'd like users to report bugs
|
||||
to properly root cause and get this fixed.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1027,6 +1027,7 @@ static int amd_pstate_cpu_init(struct cp
|
||||
free_cpudata2:
|
||||
freq_qos_remove_request(&cpudata->req[0]);
|
||||
free_cpudata1:
|
||||
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
|
||||
kfree(cpudata);
|
||||
return ret;
|
||||
}
|
||||
@@ -1520,6 +1521,7 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
return 0;
|
||||
|
||||
free_cpudata1:
|
||||
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
|
||||
kfree(cpudata);
|
||||
return ret;
|
||||
}
|
@@ -1,209 +0,0 @@
|
||||
From 82520910e91d62f19c944ff17ba8f966553e79d6 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:18 -0600
|
||||
Subject: cpufreq/amd-pstate: Drop min and max cached frequencies
|
||||
|
||||
Use the perf_to_freq helpers to calculate this on the fly.
|
||||
As the members are no longer cached add an extra check into
|
||||
amd_pstate_epp_update_limit() to avoid unnecessary calls in
|
||||
amd_pstate_update_min_max_limit().
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 14 +++++------
|
||||
drivers/cpufreq/amd-pstate.c | 43 +++++++++------------------------
|
||||
drivers/cpufreq/amd-pstate.h | 9 ++-----
|
||||
3 files changed, 20 insertions(+), 46 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -214,14 +214,14 @@ static void amd_pstate_ut_check_freq(u32
|
||||
break;
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
- if (!((cpudata->max_freq >= cpudata->nominal_freq) &&
|
||||
+ if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
|
||||
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
|
||||
- (cpudata->lowest_nonlinear_freq > cpudata->min_freq) &&
|
||||
- (cpudata->min_freq > 0))) {
|
||||
+ (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
|
||||
+ (policy->cpuinfo.min_freq > 0))) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
|
||||
- __func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
|
||||
- cpudata->lowest_nonlinear_freq, cpudata->min_freq);
|
||||
+ __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
|
||||
+ cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
@@ -233,13 +233,13 @@ static void amd_pstate_ut_check_freq(u32
|
||||
}
|
||||
|
||||
if (cpudata->boost_supported) {
|
||||
- if ((policy->max == cpudata->max_freq) ||
|
||||
+ if ((policy->max == policy->cpuinfo.max_freq) ||
|
||||
(policy->max == cpudata->nominal_freq))
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
else {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
|
||||
- __func__, cpu, policy->max, cpudata->max_freq,
|
||||
+ __func__, cpu, policy->max, policy->cpuinfo.max_freq,
|
||||
cpudata->nominal_freq);
|
||||
goto skip_test;
|
||||
}
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -717,7 +717,7 @@ static int amd_pstate_cpu_boost_update(s
|
||||
int ret = 0;
|
||||
|
||||
nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
||||
- max_freq = READ_ONCE(cpudata->max_freq);
|
||||
+ max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
|
||||
|
||||
if (on)
|
||||
policy->cpuinfo.max_freq = max_freq;
|
||||
@@ -916,13 +916,10 @@ static int amd_pstate_init_freq(struct a
|
||||
nominal_freq *= 1000;
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
|
||||
- WRITE_ONCE(cpudata->min_freq, min_freq);
|
||||
|
||||
max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
|
||||
-
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
|
||||
- WRITE_ONCE(cpudata->max_freq, max_freq);
|
||||
|
||||
/**
|
||||
* Below values need to be initialized correctly, otherwise driver will fail to load
|
||||
@@ -947,9 +944,9 @@ static int amd_pstate_init_freq(struct a
|
||||
|
||||
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
- int min_freq, max_freq, ret;
|
||||
- struct device *dev;
|
||||
struct amd_cpudata *cpudata;
|
||||
+ struct device *dev;
|
||||
+ int ret;
|
||||
|
||||
/*
|
||||
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
|
||||
@@ -980,17 +977,11 @@ static int amd_pstate_cpu_init(struct cp
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
- min_freq = READ_ONCE(cpudata->min_freq);
|
||||
- max_freq = READ_ONCE(cpudata->max_freq);
|
||||
-
|
||||
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
|
||||
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
|
||||
|
||||
- policy->min = min_freq;
|
||||
- policy->max = max_freq;
|
||||
-
|
||||
- policy->cpuinfo.min_freq = min_freq;
|
||||
- policy->cpuinfo.max_freq = max_freq;
|
||||
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
|
||||
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
|
||||
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
|
||||
|
||||
@@ -1014,9 +1005,6 @@ static int amd_pstate_cpu_init(struct cp
|
||||
goto free_cpudata2;
|
||||
}
|
||||
|
||||
- cpudata->max_limit_freq = max_freq;
|
||||
- cpudata->min_limit_freq = min_freq;
|
||||
-
|
||||
policy->driver_data = cpudata;
|
||||
|
||||
if (!current_pstate_driver->adjust_perf)
|
||||
@@ -1074,14 +1062,10 @@ static int amd_pstate_cpu_suspend(struct
|
||||
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
- int max_freq;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
- max_freq = READ_ONCE(cpudata->max_freq);
|
||||
- if (max_freq < 0)
|
||||
- return max_freq;
|
||||
|
||||
- return sysfs_emit(buf, "%u\n", max_freq);
|
||||
+ return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
|
||||
@@ -1439,10 +1423,10 @@ static bool amd_pstate_acpi_pm_profile_u
|
||||
|
||||
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
- int min_freq, max_freq, ret;
|
||||
struct amd_cpudata *cpudata;
|
||||
struct device *dev;
|
||||
u64 value;
|
||||
+ int ret;
|
||||
|
||||
/*
|
||||
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
|
||||
@@ -1473,19 +1457,13 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
- min_freq = READ_ONCE(cpudata->min_freq);
|
||||
- max_freq = READ_ONCE(cpudata->max_freq);
|
||||
-
|
||||
- policy->cpuinfo.min_freq = min_freq;
|
||||
- policy->cpuinfo.max_freq = max_freq;
|
||||
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
|
||||
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
/* It will be updated by governor */
|
||||
policy->cur = policy->cpuinfo.min_freq;
|
||||
|
||||
policy->driver_data = cpudata;
|
||||
|
||||
- policy->min = policy->cpuinfo.min_freq;
|
||||
- policy->max = policy->cpuinfo.max_freq;
|
||||
-
|
||||
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
|
||||
|
||||
/*
|
||||
@@ -1543,7 +1521,8 @@ static int amd_pstate_epp_update_limit(s
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
u8 epp;
|
||||
|
||||
- amd_pstate_update_min_max_limit(policy);
|
||||
+ if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
||||
+ amd_pstate_update_min_max_limit(policy);
|
||||
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
epp = 0;
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -46,8 +46,6 @@ struct amd_aperf_mperf {
|
||||
* @max_limit_perf: Cached value of the performance corresponding to policy->max
|
||||
* @min_limit_freq: Cached value of policy->min (in khz)
|
||||
* @max_limit_freq: Cached value of policy->max (in khz)
|
||||
- * @max_freq: the frequency (in khz) that mapped to highest_perf
|
||||
- * @min_freq: the frequency (in khz) that mapped to lowest_perf
|
||||
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
|
||||
* @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
|
||||
* @cur: Difference of Aperf/Mperf/tsc count between last and current sample
|
||||
@@ -77,11 +75,8 @@ struct amd_cpudata {
|
||||
u8 prefcore_ranking;
|
||||
u8 min_limit_perf;
|
||||
u8 max_limit_perf;
|
||||
- u32 min_limit_freq;
|
||||
- u32 max_limit_freq;
|
||||
-
|
||||
- u32 max_freq;
|
||||
- u32 min_freq;
|
||||
+ u32 min_limit_freq;
|
||||
+ u32 max_limit_freq;
|
||||
u32 nominal_freq;
|
||||
u32 lowest_nonlinear_freq;
|
||||
|
@@ -1,611 +0,0 @@
|
||||
From 21109b42429e0d9f0ee1bfadddae38fb5b0b23c3 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:19 -0600
|
||||
Subject: cpufreq/amd-pstate: Move perf values into a union
|
||||
|
||||
By storing perf values in a union all the writes and reads can
|
||||
be done atomically, removing the need for some concurrency protections.
|
||||
|
||||
While making this change, also drop the cached frequency values,
|
||||
using inline helpers to calculate them on demand from perf value.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 18 +--
|
||||
drivers/cpufreq/amd-pstate.c | 205 ++++++++++++++++++--------------
|
||||
drivers/cpufreq/amd-pstate.h | 51 +++++---
|
||||
3 files changed, 158 insertions(+), 116 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
struct cpufreq_policy *policy = NULL;
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
+ union perf_cached cur_perf;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
@@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32
|
||||
lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
}
|
||||
|
||||
- if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) {
|
||||
+ cur_perf = READ_ONCE(cpudata->perf);
|
||||
+ if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
|
||||
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
|
||||
- __func__, cpu, highest_perf, cpudata->highest_perf);
|
||||
+ __func__, cpu, highest_perf, cur_perf.highest_perf);
|
||||
goto skip_test;
|
||||
}
|
||||
- if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) ||
|
||||
- (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) ||
|
||||
- (lowest_perf != READ_ONCE(cpudata->lowest_perf))) {
|
||||
+ if (nominal_perf != cur_perf.nominal_perf ||
|
||||
+ (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
|
||||
+ (lowest_perf != cur_perf.lowest_perf)) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
|
||||
- __func__, cpu, nominal_perf, cpudata->nominal_perf,
|
||||
- lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
|
||||
- lowest_perf, cpudata->lowest_perf);
|
||||
+ __func__, cpu, nominal_perf, cur_perf.nominal_perf,
|
||||
+ lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
|
||||
+ lowest_perf, cur_perf.lowest_perf);
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -142,18 +142,17 @@ static struct quirk_entry quirk_amd_7k62
|
||||
.lowest_freq = 550,
|
||||
};
|
||||
|
||||
-static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
|
||||
+static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val)
|
||||
{
|
||||
- u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
|
||||
- cpudata->nominal_freq);
|
||||
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq);
|
||||
|
||||
- return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
|
||||
+ return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf);
|
||||
}
|
||||
|
||||
-static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
|
||||
+static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val)
|
||||
{
|
||||
- return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
|
||||
- cpudata->nominal_perf);
|
||||
+ return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val,
|
||||
+ perf.nominal_perf);
|
||||
}
|
||||
|
||||
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
|
||||
@@ -347,7 +346,9 @@ static int amd_pstate_set_energy_pref_in
|
||||
}
|
||||
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
epp,
|
||||
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
|
||||
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
|
||||
@@ -425,6 +426,7 @@ static inline int amd_pstate_cppc_enable
|
||||
|
||||
static int msr_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
u64 cap1, numerator;
|
||||
|
||||
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
||||
@@ -436,19 +438,21 @@ static int msr_init_perf(struct amd_cpud
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- WRITE_ONCE(cpudata->highest_perf, numerator);
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
|
||||
- WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
||||
- WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
||||
- WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
+ perf.highest_perf = numerator;
|
||||
+ perf.max_limit_perf = numerator;
|
||||
+ perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
+ perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
|
||||
+ perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
|
||||
+ perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
+ WRITE_ONCE(cpudata->perf, perf);
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
- WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shmem_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
u64 numerator;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
@@ -459,14 +463,14 @@ static int shmem_init_perf(struct amd_cp
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- WRITE_ONCE(cpudata->highest_perf, numerator);
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
|
||||
- WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
|
||||
- WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
||||
- cppc_perf.lowest_nonlinear_perf);
|
||||
- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
||||
+ perf.highest_perf = numerator;
|
||||
+ perf.max_limit_perf = numerator;
|
||||
+ perf.min_limit_perf = cppc_perf.lowest_perf;
|
||||
+ perf.nominal_perf = cppc_perf.nominal_perf;
|
||||
+ perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
|
||||
+ perf.lowest_perf = cppc_perf.lowest_perf;
|
||||
+ WRITE_ONCE(cpudata->perf, perf);
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
|
||||
- WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
|
||||
|
||||
if (cppc_state == AMD_PSTATE_ACTIVE)
|
||||
return 0;
|
||||
@@ -549,14 +553,14 @@ static void amd_pstate_update(struct amd
|
||||
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
|
||||
{
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
|
||||
- u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
if (!policy)
|
||||
return;
|
||||
|
||||
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
|
||||
|
||||
- policy->cur = perf_to_freq(cpudata, des_perf);
|
||||
+ policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf);
|
||||
|
||||
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
|
||||
min_perf = des_perf;
|
||||
@@ -565,7 +569,7 @@ static void amd_pstate_update(struct amd
|
||||
|
||||
/* limit the max perf when core performance boost feature is disabled */
|
||||
if (!cpudata->boost_supported)
|
||||
- max_perf = min_t(u8, nominal_perf, max_perf);
|
||||
+ max_perf = min_t(u8, perf.nominal_perf, max_perf);
|
||||
|
||||
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
|
||||
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
|
||||
@@ -602,39 +606,41 @@ static int amd_pstate_verify(struct cpuf
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
||||
+static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
- u8 max_limit_perf, min_limit_perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
- max_limit_perf = freq_to_perf(cpudata, policy->max);
|
||||
- min_limit_perf = freq_to_perf(cpudata, policy->min);
|
||||
+ perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
|
||||
+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
|
||||
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
- min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
|
||||
+ perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
|
||||
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
||||
- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
||||
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
|
||||
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
|
||||
-
|
||||
- return 0;
|
||||
+ WRITE_ONCE(cpudata->perf, perf);
|
||||
}
|
||||
|
||||
static int amd_pstate_update_freq(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq, bool fast_switch)
|
||||
{
|
||||
struct cpufreq_freqs freqs;
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
u8 des_perf;
|
||||
|
||||
+ cpudata = policy->driver_data;
|
||||
+
|
||||
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
||||
amd_pstate_update_min_max_limit(policy);
|
||||
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
freqs.old = policy->cur;
|
||||
freqs.new = target_freq;
|
||||
|
||||
- des_perf = freq_to_perf(cpudata, target_freq);
|
||||
+ des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq);
|
||||
|
||||
WARN_ON(fast_switch && !policy->fast_switch_enabled);
|
||||
/*
|
||||
@@ -645,8 +651,8 @@ static int amd_pstate_update_freq(struct
|
||||
if (!fast_switch)
|
||||
cpufreq_freq_transition_begin(policy, &freqs);
|
||||
|
||||
- amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf,
|
||||
- cpudata->max_limit_perf, fast_switch,
|
||||
+ amd_pstate_update(cpudata, perf.min_limit_perf, des_perf,
|
||||
+ perf.max_limit_perf, fast_switch,
|
||||
policy->governor->flags);
|
||||
|
||||
if (!fast_switch)
|
||||
@@ -675,9 +681,10 @@ static void amd_pstate_adjust_perf(unsig
|
||||
unsigned long target_perf,
|
||||
unsigned long capacity)
|
||||
{
|
||||
- u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
|
||||
+ u8 max_perf, min_perf, des_perf, cap_perf;
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
|
||||
struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
|
||||
if (!policy)
|
||||
return;
|
||||
@@ -687,8 +694,8 @@ static void amd_pstate_adjust_perf(unsig
|
||||
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
||||
amd_pstate_update_min_max_limit(policy);
|
||||
|
||||
- cap_perf = READ_ONCE(cpudata->highest_perf);
|
||||
- min_limit_perf = READ_ONCE(cpudata->min_limit_perf);
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
+ cap_perf = perf.highest_perf;
|
||||
|
||||
des_perf = cap_perf;
|
||||
if (target_perf < capacity)
|
||||
@@ -699,10 +706,10 @@ static void amd_pstate_adjust_perf(unsig
|
||||
else
|
||||
min_perf = cap_perf;
|
||||
|
||||
- if (min_perf < min_limit_perf)
|
||||
- min_perf = min_limit_perf;
|
||||
+ if (min_perf < perf.min_limit_perf)
|
||||
+ min_perf = perf.min_limit_perf;
|
||||
|
||||
- max_perf = cpudata->max_limit_perf;
|
||||
+ max_perf = perf.max_limit_perf;
|
||||
if (max_perf < min_perf)
|
||||
max_perf = min_perf;
|
||||
|
||||
@@ -713,11 +720,12 @@ static void amd_pstate_adjust_perf(unsig
|
||||
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
u32 nominal_freq, max_freq;
|
||||
int ret = 0;
|
||||
|
||||
nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
||||
- max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
|
||||
+ max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf);
|
||||
|
||||
if (on)
|
||||
policy->cpuinfo.max_freq = max_freq;
|
||||
@@ -881,30 +889,30 @@ static u32 amd_pstate_get_transition_lat
|
||||
}
|
||||
|
||||
/*
|
||||
- * amd_pstate_init_freq: Initialize the max_freq, min_freq,
|
||||
- * nominal_freq and lowest_nonlinear_freq for
|
||||
- * the @cpudata object.
|
||||
+ * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq
|
||||
+ * for the @cpudata object.
|
||||
*
|
||||
- * Requires: highest_perf, lowest_perf, nominal_perf and
|
||||
- * lowest_nonlinear_perf members of @cpudata to be
|
||||
- * initialized.
|
||||
+ * Requires: all perf members of @cpudata to be initialized.
|
||||
*
|
||||
- * Returns 0 on success, non-zero value on failure.
|
||||
+ * Returns 0 on success, non-zero value on failure.
|
||||
*/
|
||||
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
- int ret;
|
||||
- u32 min_freq, max_freq;
|
||||
- u32 nominal_freq, lowest_nonlinear_freq;
|
||||
+ u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq;
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
+ union perf_cached perf;
|
||||
+ int ret;
|
||||
|
||||
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
- if (quirks && quirks->lowest_freq)
|
||||
+ if (quirks && quirks->lowest_freq) {
|
||||
min_freq = quirks->lowest_freq;
|
||||
- else
|
||||
+ perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
|
||||
+ WRITE_ONCE(cpudata->perf, perf);
|
||||
+ } else
|
||||
min_freq = cppc_perf.lowest_freq;
|
||||
|
||||
if (quirks && quirks->nominal_freq)
|
||||
@@ -917,8 +925,8 @@ static int amd_pstate_init_freq(struct a
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
|
||||
|
||||
- max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
- lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
|
||||
+ max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf);
|
||||
+ lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
|
||||
|
||||
/**
|
||||
@@ -945,6 +953,7 @@ static int amd_pstate_init_freq(struct a
|
||||
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
struct device *dev;
|
||||
int ret;
|
||||
|
||||
@@ -980,8 +989,14 @@ static int amd_pstate_cpu_init(struct cp
|
||||
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
|
||||
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
|
||||
|
||||
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
|
||||
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
|
||||
+ cpudata->nominal_freq,
|
||||
+ perf.lowest_perf);
|
||||
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
|
||||
+ cpudata->nominal_freq,
|
||||
+ perf.highest_perf);
|
||||
|
||||
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
|
||||
|
||||
@@ -1062,23 +1077,27 @@ static int amd_pstate_cpu_suspend(struct
|
||||
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
|
||||
+ cpudata = policy->driver_data;
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
- return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
|
||||
+ return sysfs_emit(buf, "%u\n",
|
||||
+ perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf));
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
- int freq;
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
|
||||
- freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
|
||||
- if (freq < 0)
|
||||
- return freq;
|
||||
+ cpudata = policy->driver_data;
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
- return sysfs_emit(buf, "%u\n", freq);
|
||||
+ return sysfs_emit(buf, "%u\n",
|
||||
+ perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1088,12 +1107,11 @@ static ssize_t show_amd_pstate_lowest_no
|
||||
static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
- u8 perf;
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
|
||||
- perf = READ_ONCE(cpudata->highest_perf);
|
||||
+ cpudata = policy->driver_data;
|
||||
|
||||
- return sysfs_emit(buf, "%u\n", perf);
|
||||
+ return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf);
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
|
||||
@@ -1424,6 +1442,7 @@ static bool amd_pstate_acpi_pm_profile_u
|
||||
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata;
|
||||
+ union perf_cached perf;
|
||||
struct device *dev;
|
||||
u64 value;
|
||||
int ret;
|
||||
@@ -1457,8 +1476,15 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
|
||||
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
|
||||
+ cpudata->nominal_freq,
|
||||
+ perf.lowest_perf);
|
||||
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
|
||||
+ cpudata->nominal_freq,
|
||||
+ perf.highest_perf);
|
||||
+
|
||||
/* It will be updated by governor */
|
||||
policy->cur = policy->cpuinfo.min_freq;
|
||||
|
||||
@@ -1519,6 +1545,7 @@ static void amd_pstate_epp_cpu_exit(stru
|
||||
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ union perf_cached perf;
|
||||
u8 epp;
|
||||
|
||||
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
||||
@@ -1529,15 +1556,16 @@ static int amd_pstate_epp_update_limit(s
|
||||
else
|
||||
epp = READ_ONCE(cpudata->epp_cached);
|
||||
|
||||
+ perf = READ_ONCE(cpudata->perf);
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp,
|
||||
- cpudata->min_limit_perf,
|
||||
- cpudata->max_limit_perf,
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
|
||||
+ perf.min_limit_perf,
|
||||
+ perf.max_limit_perf,
|
||||
policy->boost_enabled);
|
||||
}
|
||||
|
||||
- return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
|
||||
- cpudata->max_limit_perf, epp, false);
|
||||
+ return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
|
||||
+ perf.max_limit_perf, epp, false);
|
||||
}
|
||||
|
||||
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
|
||||
@@ -1569,20 +1597,18 @@ static int amd_pstate_epp_set_policy(str
|
||||
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u8 max_perf;
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
int ret;
|
||||
|
||||
ret = amd_pstate_cppc_enable(true);
|
||||
if (ret)
|
||||
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
|
||||
|
||||
- max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
-
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
cpudata->epp_cached,
|
||||
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
|
||||
- max_perf, policy->boost_enabled);
|
||||
+ perf.highest_perf, policy->boost_enabled);
|
||||
}
|
||||
|
||||
return amd_pstate_epp_update_limit(policy);
|
||||
@@ -1606,22 +1632,21 @@ static int amd_pstate_epp_cpu_online(str
|
||||
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u8 min_perf;
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
if (cpudata->suspended)
|
||||
return 0;
|
||||
|
||||
- min_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
-
|
||||
guard(mutex)(&amd_pstate_limits_lock);
|
||||
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
AMD_CPPC_EPP_BALANCE_POWERSAVE,
|
||||
- min_perf, min_perf, policy->boost_enabled);
|
||||
+ perf.lowest_perf, perf.lowest_perf,
|
||||
+ policy->boost_enabled);
|
||||
}
|
||||
|
||||
- return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf,
|
||||
+ return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
|
||||
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
|
||||
}
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -13,6 +13,36 @@
|
||||
/*********************************************************************
|
||||
* AMD P-state INTERFACE *
|
||||
*********************************************************************/
|
||||
+
|
||||
+/**
|
||||
+ * union perf_cached - A union to cache performance-related data.
|
||||
+ * @highest_perf: the maximum performance an individual processor may reach,
|
||||
+ * assuming ideal conditions
|
||||
+ * For platforms that support the preferred core feature, the highest_perf value maybe
|
||||
+ * configured to any value in the range 166-255 by the firmware (because the preferred
|
||||
+ * core ranking is encoded in the highest_perf value). To maintain consistency across
|
||||
+ * all platforms, we split the highest_perf and preferred core ranking values into
|
||||
+ * cpudata->perf.highest_perf and cpudata->prefcore_ranking.
|
||||
+ * @nominal_perf: the maximum sustained performance level of the processor,
|
||||
+ * assuming ideal operating conditions
|
||||
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
||||
+ * savings are achieved
|
||||
+ * @lowest_perf: the absolute lowest performance level of the processor
|
||||
+ * @min_limit_perf: Cached value of the performance corresponding to policy->min
|
||||
+ * @max_limit_perf: Cached value of the performance corresponding to policy->max
|
||||
+ */
|
||||
+union perf_cached {
|
||||
+ struct {
|
||||
+ u8 highest_perf;
|
||||
+ u8 nominal_perf;
|
||||
+ u8 lowest_nonlinear_perf;
|
||||
+ u8 lowest_perf;
|
||||
+ u8 min_limit_perf;
|
||||
+ u8 max_limit_perf;
|
||||
+ };
|
||||
+ u64 val;
|
||||
+};
|
||||
+
|
||||
/**
|
||||
* struct amd_aperf_mperf
|
||||
* @aperf: actual performance frequency clock count
|
||||
@@ -30,20 +60,9 @@ struct amd_aperf_mperf {
|
||||
* @cpu: CPU number
|
||||
* @req: constraint request to apply
|
||||
* @cppc_req_cached: cached performance request hints
|
||||
- * @highest_perf: the maximum performance an individual processor may reach,
|
||||
- * assuming ideal conditions
|
||||
- * For platforms that do not support the preferred core feature, the
|
||||
- * highest_pef may be configured with 166 or 255, to avoid max frequency
|
||||
- * calculated wrongly. we take the fixed value as the highest_perf.
|
||||
- * @nominal_perf: the maximum sustained performance level of the processor,
|
||||
- * assuming ideal operating conditions
|
||||
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
||||
- * savings are achieved
|
||||
- * @lowest_perf: the absolute lowest performance level of the processor
|
||||
+ * @perf: cached performance-related data
|
||||
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
|
||||
* priority.
|
||||
- * @min_limit_perf: Cached value of the performance corresponding to policy->min
|
||||
- * @max_limit_perf: Cached value of the performance corresponding to policy->max
|
||||
* @min_limit_freq: Cached value of policy->min (in khz)
|
||||
* @max_limit_freq: Cached value of policy->max (in khz)
|
||||
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
|
||||
@@ -68,13 +87,9 @@ struct amd_cpudata {
|
||||
struct freq_qos_request req[2];
|
||||
u64 cppc_req_cached;
|
||||
|
||||
- u8 highest_perf;
|
||||
- u8 nominal_perf;
|
||||
- u8 lowest_nonlinear_perf;
|
||||
- u8 lowest_perf;
|
||||
+ union perf_cached perf;
|
||||
+
|
||||
u8 prefcore_ranking;
|
||||
- u8 min_limit_perf;
|
||||
- u8 max_limit_perf;
|
||||
u32 min_limit_freq;
|
||||
u32 max_limit_freq;
|
||||
u32 nominal_freq;
|
@@ -1,81 +0,0 @@
|
||||
From 0daee82069cfe4a322bed954a4a5f19226e49e95 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:20 -0600
|
||||
Subject: cpufreq/amd-pstate: Overhaul locking
|
||||
|
||||
amd_pstate_cpu_boost_update() and refresh_frequency_limits() both
|
||||
update the policy state and have nothing to do with the amd-pstate
|
||||
driver itself.
|
||||
|
||||
A global "limits" lock doesn't make sense because each CPU can have
|
||||
policies changed independently. Each time a CPU changes values they
|
||||
will atomically be written to the per-CPU perf member. Drop per CPU
|
||||
locking cases.
|
||||
|
||||
The remaining "global" driver lock is used to ensure that only one
|
||||
entity can change driver modes at a given time.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 13 +++----------
|
||||
1 file changed, 3 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -196,7 +196,6 @@ static inline int get_mode_idx_from_str(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
-static DEFINE_MUTEX(amd_pstate_limits_lock);
|
||||
static DEFINE_MUTEX(amd_pstate_driver_lock);
|
||||
|
||||
static u8 msr_get_epp(struct amd_cpudata *cpudata)
|
||||
@@ -1169,8 +1168,6 @@ static ssize_t store_energy_performance_
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
|
||||
- guard(mutex)(&amd_pstate_limits_lock);
|
||||
-
|
||||
ret = amd_pstate_set_energy_pref_index(policy, ret);
|
||||
|
||||
return ret ? ret : count;
|
||||
@@ -1343,8 +1340,10 @@ int amd_pstate_update_status(const char
|
||||
if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
- if (mode_state_machine[cppc_state][mode_idx])
|
||||
+ if (mode_state_machine[cppc_state][mode_idx]) {
|
||||
+ guard(mutex)(&amd_pstate_driver_lock);
|
||||
return mode_state_machine[cppc_state][mode_idx](mode_idx);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1365,7 +1364,6 @@ static ssize_t status_store(struct devic
|
||||
char *p = memchr(buf, '\n', count);
|
||||
int ret;
|
||||
|
||||
- guard(mutex)(&amd_pstate_driver_lock);
|
||||
ret = amd_pstate_update_status(buf, p ? p - buf : count);
|
||||
|
||||
return ret < 0 ? ret : count;
|
||||
@@ -1637,8 +1635,6 @@ static int amd_pstate_epp_cpu_offline(st
|
||||
if (cpudata->suspended)
|
||||
return 0;
|
||||
|
||||
- guard(mutex)(&amd_pstate_limits_lock);
|
||||
-
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
AMD_CPPC_EPP_BALANCE_POWERSAVE,
|
||||
@@ -1678,8 +1674,6 @@ static int amd_pstate_epp_resume(struct
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
if (cpudata->suspended) {
|
||||
- guard(mutex)(&amd_pstate_limits_lock);
|
||||
-
|
||||
/* enable amd pstate from suspend state*/
|
||||
amd_pstate_epp_reenable(policy);
|
||||
|
@@ -1,48 +0,0 @@
|
||||
From 7c820a91ffd02aa7e426e8801893575f218a7a80 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:21 -0600
|
||||
Subject: cpufreq/amd-pstate: Drop `cppc_cap1_cached`
|
||||
|
||||
The `cppc_cap1_cached` variable isn't used at all, there is no
|
||||
need to read it at initialization for each CPU.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 5 -----
|
||||
drivers/cpufreq/amd-pstate.h | 2 --
|
||||
2 files changed, 7 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1508,11 +1508,6 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
if (ret)
|
||||
return ret;
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
-
|
||||
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- WRITE_ONCE(cpudata->cppc_cap1_cached, value);
|
||||
}
|
||||
ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
|
||||
if (ret)
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -76,7 +76,6 @@ struct amd_aperf_mperf {
|
||||
* AMD P-State driver supports preferred core featue.
|
||||
* @epp_cached: Cached CPPC energy-performance preference value
|
||||
* @policy: Cpufreq policy value
|
||||
- * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
|
||||
*
|
||||
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
|
||||
* represents all the attributes and goals that AMD P-State requests at runtime.
|
||||
@@ -105,7 +104,6 @@ struct amd_cpudata {
|
||||
/* EPP feature related attributes*/
|
||||
u8 epp_cached;
|
||||
u32 policy;
|
||||
- u64 cppc_cap1_cached;
|
||||
bool suspended;
|
||||
u8 epp_default;
|
||||
};
|
@@ -1,144 +0,0 @@
|
||||
From 5d0c340db98de378a11abfbaf587b6e601e7291c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:22 -0600
|
||||
Subject: cpufreq/amd-pstate-ut: Use _free macro to free put policy
|
||||
|
||||
Using a scoped cleanup macro simplifies cleanup code.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 33 ++++++++++++++-------------------
|
||||
1 file changed, 14 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/fs.h>
|
||||
+#include <linux/cleanup.h>
|
||||
|
||||
#include <acpi/cppc_acpi.h>
|
||||
|
||||
@@ -127,11 +128,12 @@ static void amd_pstate_ut_check_perf(u32
|
||||
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
|
||||
u64 cap1 = 0;
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
- struct cpufreq_policy *policy = NULL;
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
union perf_cached cur_perf;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
+
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
break;
|
||||
@@ -142,7 +144,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
if (ret) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
|
||||
highest_perf = cppc_perf.highest_perf;
|
||||
@@ -154,7 +156,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
if (ret) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
|
||||
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
@@ -167,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
|
||||
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
|
||||
__func__, cpu, highest_perf, cur_perf.highest_perf);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
if (nominal_perf != cur_perf.nominal_perf ||
|
||||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
|
||||
@@ -177,7 +179,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
|
||||
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
|
||||
lowest_perf, cur_perf.lowest_perf);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (!((highest_perf >= nominal_perf) &&
|
||||
@@ -188,15 +190,11 @@ static void amd_pstate_ut_check_perf(u32
|
||||
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, highest_perf, nominal_perf,
|
||||
lowest_nonlinear_perf, lowest_perf);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- return;
|
||||
-skip_test:
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -207,10 +205,11 @@ skip_test:
|
||||
static void amd_pstate_ut_check_freq(u32 index)
|
||||
{
|
||||
int cpu = 0;
|
||||
- struct cpufreq_policy *policy = NULL;
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
+
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
break;
|
||||
@@ -224,14 +223,14 @@ static void amd_pstate_ut_check_freq(u32
|
||||
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
|
||||
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (cpudata->lowest_nonlinear_freq != policy->min) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
|
||||
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (cpudata->boost_supported) {
|
||||
@@ -243,20 +242,16 @@ static void amd_pstate_ut_check_freq(u32
|
||||
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
|
||||
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
|
||||
cpudata->nominal_freq);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
} else {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
|
||||
- goto skip_test;
|
||||
+ return;
|
||||
}
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- return;
|
||||
-skip_test:
|
||||
- cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
static int amd_pstate_set_mode(enum amd_pstate_mode mode)
|
@@ -1,37 +0,0 @@
|
||||
From 8937b7068ca30072c4c4cf4c22000112afbd6839 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:23 -0600
|
||||
Subject: cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the
|
||||
same
|
||||
|
||||
Several Ryzen AI processors support the exact same value for lowest
|
||||
nonlinear perf and lowest perf. Loosen up the unit tests to allow this
|
||||
scenario.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -184,7 +184,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
|
||||
if (!((highest_perf >= nominal_perf) &&
|
||||
(nominal_perf > lowest_nonlinear_perf) &&
|
||||
- (lowest_nonlinear_perf > lowest_perf) &&
|
||||
+ (lowest_nonlinear_perf >= lowest_perf) &&
|
||||
(lowest_perf > 0))) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
|
||||
@@ -217,7 +217,7 @@ static void amd_pstate_ut_check_freq(u32
|
||||
|
||||
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
|
||||
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
|
||||
- (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
|
||||
+ (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
|
||||
(policy->cpuinfo.min_freq > 0))) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
|
@@ -1,309 +0,0 @@
|
||||
From 8cb701e059fa08dcb9ab74e3c84abc224ff72714 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:24 -0600
|
||||
Subject: cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums
|
||||
|
||||
Enums are effectively used as a boolean and don't show
|
||||
the return value of the failing call.
|
||||
|
||||
Instead of using enums switch to returning the actual return
|
||||
code from the unit test.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 143 ++++++++++++--------------------
|
||||
1 file changed, 55 insertions(+), 88 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -32,30 +32,20 @@
|
||||
|
||||
#include "amd-pstate.h"
|
||||
|
||||
-/*
|
||||
- * Abbreviations:
|
||||
- * amd_pstate_ut: used as a shortform for AMD P-State unit test.
|
||||
- * It helps to keep variable names smaller, simpler
|
||||
- */
|
||||
-enum amd_pstate_ut_result {
|
||||
- AMD_PSTATE_UT_RESULT_PASS,
|
||||
- AMD_PSTATE_UT_RESULT_FAIL,
|
||||
-};
|
||||
|
||||
struct amd_pstate_ut_struct {
|
||||
const char *name;
|
||||
- void (*func)(u32 index);
|
||||
- enum amd_pstate_ut_result result;
|
||||
+ int (*func)(u32 index);
|
||||
};
|
||||
|
||||
/*
|
||||
* Kernel module for testing the AMD P-State unit test
|
||||
*/
|
||||
-static void amd_pstate_ut_acpi_cpc_valid(u32 index);
|
||||
-static void amd_pstate_ut_check_enabled(u32 index);
|
||||
-static void amd_pstate_ut_check_perf(u32 index);
|
||||
-static void amd_pstate_ut_check_freq(u32 index);
|
||||
-static void amd_pstate_ut_check_driver(u32 index);
|
||||
+static int amd_pstate_ut_acpi_cpc_valid(u32 index);
|
||||
+static int amd_pstate_ut_check_enabled(u32 index);
|
||||
+static int amd_pstate_ut_check_perf(u32 index);
|
||||
+static int amd_pstate_ut_check_freq(u32 index);
|
||||
+static int amd_pstate_ut_check_driver(u32 index);
|
||||
|
||||
static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
|
||||
{"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
|
||||
@@ -78,51 +68,46 @@ static bool get_shared_mem(void)
|
||||
/*
|
||||
* check the _CPC object is present in SBIOS.
|
||||
*/
|
||||
-static void amd_pstate_ut_acpi_cpc_valid(u32 index)
|
||||
+static int amd_pstate_ut_acpi_cpc_valid(u32 index)
|
||||
{
|
||||
- if (acpi_cpc_valid())
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- else {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
+ if (!acpi_cpc_valid()) {
|
||||
pr_err("%s the _CPC object is not present in SBIOS!\n", __func__);
|
||||
+ return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void amd_pstate_ut_pstate_enable(u32 index)
|
||||
+/*
|
||||
+ * check if amd pstate is enabled
|
||||
+ */
|
||||
+static int amd_pstate_ut_check_enabled(u32 index)
|
||||
{
|
||||
- int ret = 0;
|
||||
u64 cppc_enable = 0;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (get_shared_mem())
|
||||
+ return 0;
|
||||
|
||||
ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable);
|
||||
if (ret) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret);
|
||||
- return;
|
||||
+ return ret;
|
||||
}
|
||||
- if (cppc_enable)
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- else {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
+
|
||||
+ if (!cppc_enable) {
|
||||
pr_err("%s amd pstate must be enabled!\n", __func__);
|
||||
+ return -EINVAL;
|
||||
}
|
||||
-}
|
||||
|
||||
-/*
|
||||
- * check if amd pstate is enabled
|
||||
- */
|
||||
-static void amd_pstate_ut_check_enabled(u32 index)
|
||||
-{
|
||||
- if (get_shared_mem())
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- else
|
||||
- amd_pstate_ut_pstate_enable(index);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check if performance values are reasonable.
|
||||
* highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0
|
||||
*/
|
||||
-static void amd_pstate_ut_check_perf(u32 index)
|
||||
+static int amd_pstate_ut_check_perf(u32 index)
|
||||
{
|
||||
int cpu = 0, ret = 0;
|
||||
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
|
||||
@@ -142,9 +127,8 @@ static void amd_pstate_ut_check_perf(u32
|
||||
if (get_shared_mem()) {
|
||||
ret = cppc_get_perf_caps(cpu, &cppc_perf);
|
||||
if (ret) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
|
||||
- return;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
highest_perf = cppc_perf.highest_perf;
|
||||
@@ -154,9 +138,8 @@ static void amd_pstate_ut_check_perf(u32
|
||||
} else {
|
||||
ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
|
||||
if (ret) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
|
||||
- return;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
@@ -169,32 +152,30 @@ static void amd_pstate_ut_check_perf(u32
|
||||
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
|
||||
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
|
||||
__func__, cpu, highest_perf, cur_perf.highest_perf);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
if (nominal_perf != cur_perf.nominal_perf ||
|
||||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
|
||||
(lowest_perf != cur_perf.lowest_perf)) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
|
||||
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
|
||||
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
|
||||
lowest_perf, cur_perf.lowest_perf);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
if (!((highest_perf >= nominal_perf) &&
|
||||
(nominal_perf > lowest_nonlinear_perf) &&
|
||||
(lowest_nonlinear_perf >= lowest_perf) &&
|
||||
(lowest_perf > 0))) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, highest_perf, nominal_perf,
|
||||
lowest_nonlinear_perf, lowest_perf);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -202,7 +183,7 @@ static void amd_pstate_ut_check_perf(u32
|
||||
* max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0
|
||||
* check max freq when set support boost mode.
|
||||
*/
|
||||
-static void amd_pstate_ut_check_freq(u32 index)
|
||||
+static int amd_pstate_ut_check_freq(u32 index)
|
||||
{
|
||||
int cpu = 0;
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
@@ -219,39 +200,33 @@ static void amd_pstate_ut_check_freq(u32
|
||||
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
|
||||
(cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
|
||||
(policy->cpuinfo.min_freq > 0))) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
|
||||
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
if (cpudata->lowest_nonlinear_freq != policy->min) {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
|
||||
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
if (cpudata->boost_supported) {
|
||||
- if ((policy->max == policy->cpuinfo.max_freq) ||
|
||||
- (policy->max == cpudata->nominal_freq))
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
- else {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
+ if ((policy->max != policy->cpuinfo.max_freq) &&
|
||||
+ (policy->max != cpudata->nominal_freq)) {
|
||||
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
|
||||
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
|
||||
cpudata->nominal_freq);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
|
||||
- return;
|
||||
+ return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int amd_pstate_set_mode(enum amd_pstate_mode mode)
|
||||
@@ -263,32 +238,28 @@ static int amd_pstate_set_mode(enum amd_
|
||||
return amd_pstate_update_status(mode_str, strlen(mode_str));
|
||||
}
|
||||
|
||||
-static void amd_pstate_ut_check_driver(u32 index)
|
||||
+static int amd_pstate_ut_check_driver(u32 index)
|
||||
{
|
||||
enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
|
||||
- int ret;
|
||||
|
||||
for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
|
||||
- ret = amd_pstate_set_mode(mode1);
|
||||
+ int ret = amd_pstate_set_mode(mode1);
|
||||
if (ret)
|
||||
- goto out;
|
||||
+ return ret;
|
||||
for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
|
||||
if (mode1 == mode2)
|
||||
continue;
|
||||
ret = amd_pstate_set_mode(mode2);
|
||||
- if (ret)
|
||||
- goto out;
|
||||
+ if (ret) {
|
||||
+ pr_err("%s: failed to update status for %s->%s\n", __func__,
|
||||
+ amd_pstate_get_mode_string(mode1),
|
||||
+ amd_pstate_get_mode_string(mode2));
|
||||
+ return ret;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
-out:
|
||||
- if (ret)
|
||||
- pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
|
||||
- amd_pstate_get_mode_string(mode1),
|
||||
- amd_pstate_get_mode_string(mode2), ret);
|
||||
-
|
||||
- amd_pstate_ut_cases[index].result = ret ?
|
||||
- AMD_PSTATE_UT_RESULT_FAIL :
|
||||
- AMD_PSTATE_UT_RESULT_PASS;
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int __init amd_pstate_ut_init(void)
|
||||
@@ -296,16 +267,12 @@ static int __init amd_pstate_ut_init(voi
|
||||
u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
|
||||
|
||||
for (i = 0; i < arr_size; i++) {
|
||||
- amd_pstate_ut_cases[i].func(i);
|
||||
- switch (amd_pstate_ut_cases[i].result) {
|
||||
- case AMD_PSTATE_UT_RESULT_PASS:
|
||||
+ int ret = amd_pstate_ut_cases[i].func(i);
|
||||
+
|
||||
+ if (ret)
|
||||
+ pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret);
|
||||
+ else
|
||||
pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name);
|
||||
- break;
|
||||
- case AMD_PSTATE_UT_RESULT_FAIL:
|
||||
- default:
|
||||
- pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name);
|
||||
- break;
|
||||
- }
|
||||
}
|
||||
|
||||
return 0;
|
@@ -1,50 +0,0 @@
|
||||
From c553e0165997349a3f831fa04bdd7f61913a3442 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:25 -0600
|
||||
Subject: cpufreq/amd-pstate-ut: Run on all of the correct CPUs
|
||||
|
||||
If a CPU is missing a policy or one has been offlined then the unit test
|
||||
is skipped for the rest of the CPUs on the system.
|
||||
|
||||
Instead; iterate online CPUs and skip any missing policies to allow
|
||||
continuing to test the rest of them.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -116,12 +116,12 @@ static int amd_pstate_ut_check_perf(u32
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
union perf_cached cur_perf;
|
||||
|
||||
- for_each_possible_cpu(cpu) {
|
||||
+ for_each_online_cpu(cpu) {
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
- break;
|
||||
+ continue;
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
if (get_shared_mem()) {
|
||||
@@ -188,12 +188,12 @@ static int amd_pstate_ut_check_freq(u32
|
||||
int cpu = 0;
|
||||
struct amd_cpudata *cpudata = NULL;
|
||||
|
||||
- for_each_possible_cpu(cpu) {
|
||||
+ for_each_online_cpu(cpu) {
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
- break;
|
||||
+ continue;
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
|
@@ -1,42 +0,0 @@
|
||||
From c4197fd693cb98a8a71557187a7cf592d6b68b3c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:26 -0600
|
||||
Subject: cpufreq/amd-pstate-ut: Adjust variable scope
|
||||
|
||||
In amd_pstate_ut_check_freq() and amd_pstate_ut_check_perf() the cpudata
|
||||
variable is only needed in the scope of the for loop. Move it there.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -113,11 +113,11 @@ static int amd_pstate_ut_check_perf(u32
|
||||
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
|
||||
u64 cap1 = 0;
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
- struct amd_cpudata *cpudata = NULL;
|
||||
union perf_cached cur_perf;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
||||
@@ -186,10 +186,10 @@ static int amd_pstate_ut_check_perf(u32
|
||||
static int amd_pstate_ut_check_freq(u32 index)
|
||||
{
|
||||
int cpu = 0;
|
||||
- struct amd_cpudata *cpudata = NULL;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy)
|
@@ -1,123 +0,0 @@
|
||||
From 19c375251767f49b62894d3b4782f0b8b01313b8 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:27 -0600
|
||||
Subject: cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks
|
||||
|
||||
Bitfield masks are easier to follow and less error prone.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
arch/x86/include/asm/msr-index.h | 20 +++++++++++---------
|
||||
arch/x86/kernel/acpi/cppc.c | 4 +++-
|
||||
drivers/cpufreq/amd-pstate-ut.c | 9 +++++----
|
||||
drivers/cpufreq/amd-pstate.c | 16 ++++++----------
|
||||
4 files changed, 25 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/arch/x86/include/asm/msr-index.h
|
||||
+++ b/arch/x86/include/asm/msr-index.h
|
||||
@@ -709,15 +709,17 @@
|
||||
#define MSR_AMD_CPPC_REQ 0xc00102b3
|
||||
#define MSR_AMD_CPPC_STATUS 0xc00102b4
|
||||
|
||||
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
|
||||
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
|
||||
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
|
||||
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
|
||||
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
|
||||
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
|
||||
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
|
||||
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
|
||||
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
|
||||
|
||||
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
|
||||
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
|
||||
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
|
||||
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
|
||||
+/* Masks for use with MSR_AMD_CPPC_REQ */
|
||||
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
|
||||
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
|
||||
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
|
||||
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
|
||||
|
||||
/* AMD Performance Counter Global Status and Control MSRs */
|
||||
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -4,6 +4,8 @@
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
*/
|
||||
|
||||
+#include <linux/bitfield.h>
|
||||
+
|
||||
#include <acpi/cppc_acpi.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor.h>
|
||||
@@ -149,7 +151,7 @@ int amd_get_highest_perf(unsigned int cp
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
- val = AMD_CPPC_HIGHEST_PERF(val);
|
||||
+ val = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, val);
|
||||
} else {
|
||||
ret = cppc_get_highest_perf(cpu, &val);
|
||||
if (ret)
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
+#include <linux/bitfield.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
@@ -142,10 +143,10 @@ static int amd_pstate_ut_check_perf(u32
|
||||
return ret;
|
||||
}
|
||||
|
||||
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
- nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
|
||||
- lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
|
||||
- lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
+ highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1);
|
||||
+ nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
|
||||
+ lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
|
||||
+ lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
|
||||
}
|
||||
|
||||
cur_perf = READ_ONCE(cpudata->perf);
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -89,11 +89,6 @@ static bool cppc_enabled;
|
||||
static bool amd_pstate_prefcore = true;
|
||||
static struct quirk_entry *quirks;
|
||||
|
||||
-#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
|
||||
-#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
|
||||
-#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
|
||||
-#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
|
||||
-
|
||||
/*
|
||||
* AMD Energy Preference Performance (EPP)
|
||||
* The EPP is used in the CCLK DPM controller to drive
|
||||
@@ -439,12 +434,13 @@ static int msr_init_perf(struct amd_cpud
|
||||
|
||||
perf.highest_perf = numerator;
|
||||
perf.max_limit_perf = numerator;
|
||||
- perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
- perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
|
||||
- perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
|
||||
- perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
|
||||
+ perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
|
||||
+ perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
|
||||
+ perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
|
||||
+ perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
|
||||
WRITE_ONCE(cpudata->perf, perf);
|
||||
- WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
+ WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1));
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,60 +0,0 @@
|
||||
From bb7fadf4a86e19b52cbe850c9274bfa643d3ce52 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:28 -0600
|
||||
Subject: cpufreq/amd-pstate: Cache CPPC request in shared mem case too
|
||||
|
||||
In order to prevent a potential write for shmem_update_perf()
|
||||
cache the request into the cppc_req_cached variable normally only
|
||||
used for the MSR case.
|
||||
|
||||
This adds symmetry into the code and potentially avoids extra writes.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++-
|
||||
1 file changed, 21 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_
|
||||
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
|
||||
{
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
+ u64 value, prev;
|
||||
+ int ret;
|
||||
|
||||
if (cppc_state == AMD_PSTATE_ACTIVE) {
|
||||
int ret = shmem_set_epp(cpudata, epp);
|
||||
@@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ value = prev = READ_ONCE(cpudata->cppc_req_cached);
|
||||
+
|
||||
+ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
|
||||
+ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
|
||||
+ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
|
||||
+ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
|
||||
+ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
|
||||
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
+
|
||||
+ if (value == prev)
|
||||
+ return 0;
|
||||
+
|
||||
perf_ctrls.max_perf = max_perf;
|
||||
perf_ctrls.min_perf = min_perf;
|
||||
perf_ctrls.desired_perf = des_perf;
|
||||
|
||||
- return cppc_set_perf(cpudata->cpu, &perf_ctrls);
|
||||
+ ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
|
@@ -1,318 +0,0 @@
|
||||
From e02f8a14d44223160d348d5841cc3dd916a14401 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:29 -0600
|
||||
Subject: cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and
|
||||
*_set_epp functions
|
||||
|
||||
The EPP tracing is done by the caller today, but this precludes the
|
||||
information about whether the CPPC request has changed.
|
||||
|
||||
Move it into the update_perf and set_epp functions and include information
|
||||
about whether the request has changed from the last one.
|
||||
amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy
|
||||
as an argument instead of the cpudata.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-trace.h | 13 +++-
|
||||
drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------
|
||||
2 files changed, 80 insertions(+), 51 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-trace.h
|
||||
+++ b/drivers/cpufreq/amd-pstate-trace.h
|
||||
@@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
|
||||
u8 epp,
|
||||
u8 min_perf,
|
||||
u8 max_perf,
|
||||
- bool boost
|
||||
+ bool boost,
|
||||
+ bool changed
|
||||
),
|
||||
|
||||
TP_ARGS(cpu_id,
|
||||
@@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
|
||||
epp,
|
||||
min_perf,
|
||||
max_perf,
|
||||
- boost),
|
||||
+ boost,
|
||||
+ changed),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, cpu_id)
|
||||
@@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf,
|
||||
__field(u8, min_perf)
|
||||
__field(u8, max_perf)
|
||||
__field(bool, boost)
|
||||
+ __field(bool, changed)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf,
|
||||
__entry->min_perf = min_perf;
|
||||
__entry->max_perf = max_perf;
|
||||
__entry->boost = boost;
|
||||
+ __entry->changed = changed;
|
||||
),
|
||||
|
||||
- TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u",
|
||||
+ TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u",
|
||||
(unsigned int)__entry->cpu_id,
|
||||
(u8)__entry->min_perf,
|
||||
(u8)__entry->max_perf,
|
||||
(u8)__entry->highest_perf,
|
||||
(u8)__entry->epp,
|
||||
- (bool)__entry->boost
|
||||
+ (bool)__entry->boost,
|
||||
+ (bool)__entry->changed
|
||||
)
|
||||
);
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpuda
|
||||
return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
}
|
||||
|
||||
-static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
|
||||
+static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
|
||||
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
|
||||
{
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
u64 value, prev;
|
||||
|
||||
value = prev = READ_ONCE(cpudata->cppc_req_cached);
|
||||
@@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cp
|
||||
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
|
||||
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
|
||||
+ if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu,
|
||||
+ perf.highest_perf,
|
||||
+ epp,
|
||||
+ min_perf,
|
||||
+ max_perf,
|
||||
+ policy->boost_enabled,
|
||||
+ value != prev);
|
||||
+ }
|
||||
+
|
||||
if (value == prev)
|
||||
return 0;
|
||||
|
||||
@@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cp
|
||||
}
|
||||
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
- WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
+ if (epp != cpudata->epp_cached)
|
||||
+ WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
|
||||
|
||||
-static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
|
||||
+static inline int amd_pstate_update_perf(struct cpufreq_policy *policy,
|
||||
u8 min_perf, u8 des_perf,
|
||||
u8 max_perf, u8 epp,
|
||||
bool fast_switch)
|
||||
{
|
||||
- return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
|
||||
+ return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf,
|
||||
max_perf, epp, fast_switch);
|
||||
}
|
||||
|
||||
-static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp)
|
||||
+static int msr_set_epp(struct cpufreq_policy *policy, u8 epp)
|
||||
{
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
u64 value, prev;
|
||||
int ret;
|
||||
|
||||
@@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudat
|
||||
value &= ~AMD_CPPC_EPP_PERF_MASK;
|
||||
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
|
||||
+ if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
+ union perf_cached perf = cpudata->perf;
|
||||
+
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
+ epp,
|
||||
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
|
||||
+ cpudata->cppc_req_cached),
|
||||
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
|
||||
+ cpudata->cppc_req_cached),
|
||||
+ policy->boost_enabled,
|
||||
+ value != prev);
|
||||
+ }
|
||||
+
|
||||
if (value == prev)
|
||||
return 0;
|
||||
|
||||
@@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudat
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
|
||||
|
||||
-static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp)
|
||||
+static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp)
|
||||
{
|
||||
- return static_call(amd_pstate_set_epp)(cpudata, epp);
|
||||
+ return static_call(amd_pstate_set_epp)(policy, epp);
|
||||
}
|
||||
|
||||
-static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp)
|
||||
+static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
|
||||
{
|
||||
- int ret;
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
+ union perf_cached perf = cpudata->perf;
|
||||
+
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
+ epp,
|
||||
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
|
||||
+ cpudata->cppc_req_cached),
|
||||
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
|
||||
+ cpudata->cppc_req_cached),
|
||||
+ policy->boost_enabled,
|
||||
+ epp != cpudata->epp_cached);
|
||||
+ }
|
||||
|
||||
if (epp == cpudata->epp_cached)
|
||||
return 0;
|
||||
@@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_in
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
- if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
-
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
- epp,
|
||||
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
|
||||
- FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
|
||||
- policy->boost_enabled);
|
||||
- }
|
||||
-
|
||||
- return amd_pstate_set_epp(cpudata, epp);
|
||||
+ return amd_pstate_set_epp(policy, epp);
|
||||
}
|
||||
|
||||
static inline int msr_cppc_enable(bool enable)
|
||||
@@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(s
|
||||
return static_call(amd_pstate_init_perf)(cpudata);
|
||||
}
|
||||
|
||||
-static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
|
||||
+static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf,
|
||||
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
|
||||
{
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
u64 value, prev;
|
||||
int ret;
|
||||
|
||||
if (cppc_state == AMD_PSTATE_ACTIVE) {
|
||||
- int ret = shmem_set_epp(cpudata, epp);
|
||||
+ int ret = shmem_set_epp(policy, epp);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_
|
||||
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
|
||||
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
|
||||
+ if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
+ union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
+
|
||||
+ trace_amd_pstate_epp_perf(cpudata->cpu,
|
||||
+ perf.highest_perf,
|
||||
+ epp,
|
||||
+ min_perf,
|
||||
+ max_perf,
|
||||
+ policy->boost_enabled,
|
||||
+ value != prev);
|
||||
+ }
|
||||
+
|
||||
if (value == prev)
|
||||
return 0;
|
||||
|
||||
@@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd
|
||||
cpudata->cpu, fast_switch);
|
||||
}
|
||||
|
||||
- amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
|
||||
+ amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch);
|
||||
}
|
||||
|
||||
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
|
||||
@@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
return ret;
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
}
|
||||
- ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
|
||||
+ ret = amd_pstate_set_epp(policy, cpudata->epp_default);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(s
|
||||
epp = READ_ONCE(cpudata->epp_cached);
|
||||
|
||||
perf = READ_ONCE(cpudata->perf);
|
||||
- if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
|
||||
- perf.min_limit_perf,
|
||||
- perf.max_limit_perf,
|
||||
- policy->boost_enabled);
|
||||
- }
|
||||
|
||||
- return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
|
||||
+ return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U,
|
||||
perf.max_limit_perf, epp, false);
|
||||
}
|
||||
|
||||
@@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(str
|
||||
|
||||
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
|
||||
{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
int ret;
|
||||
|
||||
ret = amd_pstate_cppc_enable(true);
|
||||
if (ret)
|
||||
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
|
||||
|
||||
- if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
- cpudata->epp_cached,
|
||||
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
|
||||
- perf.highest_perf, policy->boost_enabled);
|
||||
- }
|
||||
|
||||
return amd_pstate_epp_update_limit(policy);
|
||||
}
|
||||
@@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(st
|
||||
if (cpudata->suspended)
|
||||
return 0;
|
||||
|
||||
- if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
|
||||
- AMD_CPPC_EPP_BALANCE_POWERSAVE,
|
||||
- perf.lowest_perf, perf.lowest_perf,
|
||||
- policy->boost_enabled);
|
||||
- }
|
||||
-
|
||||
- return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
|
||||
+ return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
|
||||
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
|
||||
}
|
||||
|
@@ -1,37 +0,0 @@
|
||||
From 5f0b3bf5497422293576a0783e47d203c52ed863 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:30 -0600
|
||||
Subject: cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes
|
||||
|
||||
On EPP only writes update the cached variable so that the min/max
|
||||
performance controls don't need to be updated again.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
+ u64 value;
|
||||
int ret;
|
||||
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
@@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_
|
||||
}
|
||||
WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
|
||||
+ value = READ_ONCE(cpudata->cppc_req_cached);
|
||||
+ value &= ~AMD_CPPC_EPP_PERF_MASK;
|
||||
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
|
||||
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1,38 +0,0 @@
|
||||
From 6c2201fe880d7d35fbde67d74ec1989f053cc0bd Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:31 -0600
|
||||
Subject: cpufreq/amd-pstate: Drop debug statements for policy setting
|
||||
|
||||
There are trace events that exist now for all amd-pstate modes that
|
||||
will output information right before programming to the hardware.
|
||||
|
||||
This makes the existing debug statements unnecessary remaining
|
||||
overhead. Drop them.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpuf
|
||||
}
|
||||
|
||||
cpufreq_verify_within_cpu_limits(policy_data);
|
||||
- pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(str
|
||||
if (!policy->cpuinfo.max_freq)
|
||||
return -ENODEV;
|
||||
|
||||
- pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
|
||||
- policy->cpuinfo.max_freq, policy->max);
|
||||
-
|
||||
cpudata->policy = policy->policy;
|
||||
|
||||
ret = amd_pstate_epp_update_limit(policy);
|
@@ -1,327 +0,0 @@
|
||||
From 3c5030a27361deff20bec5d43339109901f3198c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:32 -0600
|
||||
Subject: cpufreq/amd-pstate: Rework CPPC enabling
|
||||
|
||||
The CPPC enable register is configured as "write once". That is
|
||||
any future writes don't actually do anything.
|
||||
|
||||
Because of this, all the cleanup paths that currently exist for
|
||||
CPPC disable are non-effective.
|
||||
|
||||
Rework CPPC enable to only enable after all the CAP registers have
|
||||
been read to avoid enabling CPPC on CPUs with invalid _CPC or
|
||||
unpopulated MSRs.
|
||||
|
||||
As the register is write once, remove all cleanup paths as well.
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 179 +++++++----------------------------
|
||||
1 file changed, 35 insertions(+), 144 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -85,7 +85,6 @@ static struct cpufreq_driver *current_ps
|
||||
static struct cpufreq_driver amd_pstate_driver;
|
||||
static struct cpufreq_driver amd_pstate_epp_driver;
|
||||
static int cppc_state = AMD_PSTATE_UNDEFINED;
|
||||
-static bool cppc_enabled;
|
||||
static bool amd_pstate_prefcore = true;
|
||||
static struct quirk_entry *quirks;
|
||||
|
||||
@@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
|
||||
- int pref_index)
|
||||
+static inline int msr_cppc_enable(struct cpufreq_policy *policy)
|
||||
{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u8 epp;
|
||||
-
|
||||
- if (!pref_index)
|
||||
- epp = cpudata->epp_default;
|
||||
- else
|
||||
- epp = epp_values[pref_index];
|
||||
-
|
||||
- if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
|
||||
- pr_debug("EPP cannot be set under performance policy\n");
|
||||
- return -EBUSY;
|
||||
- }
|
||||
-
|
||||
- return amd_pstate_set_epp(policy, epp);
|
||||
-}
|
||||
-
|
||||
-static inline int msr_cppc_enable(bool enable)
|
||||
-{
|
||||
- int ret, cpu;
|
||||
- unsigned long logical_proc_id_mask = 0;
|
||||
-
|
||||
- /*
|
||||
- * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
|
||||
- */
|
||||
- if (!enable)
|
||||
- return 0;
|
||||
-
|
||||
- if (enable == cppc_enabled)
|
||||
- return 0;
|
||||
-
|
||||
- for_each_present_cpu(cpu) {
|
||||
- unsigned long logical_id = topology_logical_package_id(cpu);
|
||||
-
|
||||
- if (test_bit(logical_id, &logical_proc_id_mask))
|
||||
- continue;
|
||||
-
|
||||
- set_bit(logical_id, &logical_proc_id_mask);
|
||||
-
|
||||
- ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
|
||||
- enable);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- cppc_enabled = enable;
|
||||
- return 0;
|
||||
+ return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1);
|
||||
}
|
||||
|
||||
-static int shmem_cppc_enable(bool enable)
|
||||
+static int shmem_cppc_enable(struct cpufreq_policy *policy)
|
||||
{
|
||||
- int cpu, ret = 0;
|
||||
- struct cppc_perf_ctrls perf_ctrls;
|
||||
-
|
||||
- if (enable == cppc_enabled)
|
||||
- return 0;
|
||||
-
|
||||
- for_each_present_cpu(cpu) {
|
||||
- ret = cppc_set_enable(cpu, enable);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
-
|
||||
- /* Enable autonomous mode for EPP */
|
||||
- if (cppc_state == AMD_PSTATE_ACTIVE) {
|
||||
- /* Set desired perf as zero to allow EPP firmware control */
|
||||
- perf_ctrls.desired_perf = 0;
|
||||
- ret = cppc_set_perf(cpu, &perf_ctrls);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- cppc_enabled = enable;
|
||||
- return ret;
|
||||
+ return cppc_set_enable(policy->cpu, 1);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable);
|
||||
|
||||
-static inline int amd_pstate_cppc_enable(bool enable)
|
||||
+static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
|
||||
{
|
||||
- return static_call(amd_pstate_cppc_enable)(enable);
|
||||
+ return static_call(amd_pstate_cppc_enable)(policy);
|
||||
}
|
||||
|
||||
static int msr_init_perf(struct amd_cpudata *cpudata)
|
||||
@@ -1063,6 +994,10 @@ static int amd_pstate_cpu_init(struct cp
|
||||
cpudata->nominal_freq,
|
||||
perf.highest_perf);
|
||||
|
||||
+ ret = amd_pstate_cppc_enable(policy);
|
||||
+ if (ret)
|
||||
+ goto free_cpudata1;
|
||||
+
|
||||
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
|
||||
|
||||
/* It will be updated by governor */
|
||||
@@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct c
|
||||
kfree(cpudata);
|
||||
}
|
||||
|
||||
-static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- ret = amd_pstate_cppc_enable(true);
|
||||
- if (ret)
|
||||
- pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- ret = amd_pstate_cppc_enable(false);
|
||||
- if (ret)
|
||||
- pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
/* Sysfs attributes */
|
||||
|
||||
/*
|
||||
@@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_a
|
||||
static ssize_t store_energy_performance_preference(
|
||||
struct cpufreq_policy *policy, const char *buf, size_t count)
|
||||
{
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
char str_preference[21];
|
||||
ssize_t ret;
|
||||
+ u8 epp;
|
||||
|
||||
ret = sscanf(buf, "%20s", str_preference);
|
||||
if (ret != 1)
|
||||
@@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
|
||||
- ret = amd_pstate_set_energy_pref_index(policy, ret);
|
||||
+ if (!ret)
|
||||
+ epp = cpudata->epp_default;
|
||||
+ else
|
||||
+ epp = epp_values[ret];
|
||||
+
|
||||
+ if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
|
||||
+ pr_debug("EPP cannot be set under performance policy\n");
|
||||
+ return -EBUSY;
|
||||
+ }
|
||||
+
|
||||
+ ret = amd_pstate_set_epp(policy, epp);
|
||||
|
||||
return ret ? ret : count;
|
||||
}
|
||||
@@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_p
|
||||
|
||||
static void amd_pstate_driver_cleanup(void)
|
||||
{
|
||||
- amd_pstate_cppc_enable(false);
|
||||
cppc_state = AMD_PSTATE_DISABLE;
|
||||
current_pstate_driver = NULL;
|
||||
}
|
||||
@@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(in
|
||||
|
||||
cppc_state = mode;
|
||||
|
||||
- ret = amd_pstate_cppc_enable(true);
|
||||
- if (ret) {
|
||||
- pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
|
||||
- ret);
|
||||
- amd_pstate_driver_cleanup();
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
/* at least one CPU supports CPB */
|
||||
current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
|
||||
|
||||
@@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
|
||||
cpudata->nominal_freq,
|
||||
perf.highest_perf);
|
||||
+ policy->driver_data = cpudata;
|
||||
+
|
||||
+ ret = amd_pstate_cppc_enable(policy);
|
||||
+ if (ret)
|
||||
+ goto free_cpudata1;
|
||||
|
||||
/* It will be updated by governor */
|
||||
policy->cur = policy->cpuinfo.min_freq;
|
||||
|
||||
- policy->driver_data = cpudata;
|
||||
|
||||
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
|
||||
|
||||
@@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(str
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- ret = amd_pstate_cppc_enable(true);
|
||||
- if (ret)
|
||||
- pr_err("failed to enable amd pstate during resume, return %d\n", ret);
|
||||
-
|
||||
-
|
||||
- return amd_pstate_epp_update_limit(policy);
|
||||
-}
|
||||
-
|
||||
static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
|
||||
{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- int ret;
|
||||
-
|
||||
- pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
|
||||
+ pr_debug("AMD CPU Core %d going online\n", policy->cpu);
|
||||
|
||||
- ret = amd_pstate_epp_reenable(policy);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- cpudata->suspended = false;
|
||||
-
|
||||
- return 0;
|
||||
+ return amd_pstate_cppc_enable(policy);
|
||||
}
|
||||
|
||||
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
|
||||
@@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(st
|
||||
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- int ret;
|
||||
-
|
||||
- /* avoid suspending when EPP is not enabled */
|
||||
- if (cppc_state != AMD_PSTATE_ACTIVE)
|
||||
- return 0;
|
||||
|
||||
/* invalidate to ensure it's rewritten during resume */
|
||||
cpudata->cppc_req_cached = 0;
|
||||
@@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct
|
||||
/* set this flag to avoid setting core offline*/
|
||||
cpudata->suspended = true;
|
||||
|
||||
- /* disable CPPC in lowlevel firmware */
|
||||
- ret = amd_pstate_cppc_enable(false);
|
||||
- if (ret)
|
||||
- pr_err("failed to suspend, return %d\n", ret);
|
||||
-
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
if (cpudata->suspended) {
|
||||
+ int ret;
|
||||
+
|
||||
/* enable amd pstate from suspend state*/
|
||||
- amd_pstate_epp_reenable(policy);
|
||||
+ ret = amd_pstate_epp_update_limit(policy);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
cpudata->suspended = false;
|
||||
}
|
||||
@@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_
|
||||
.fast_switch = amd_pstate_fast_switch,
|
||||
.init = amd_pstate_cpu_init,
|
||||
.exit = amd_pstate_cpu_exit,
|
||||
- .suspend = amd_pstate_cpu_suspend,
|
||||
- .resume = amd_pstate_cpu_resume,
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
.update_limits = amd_pstate_update_limits,
|
||||
.name = "amd-pstate",
|
||||
@@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void)
|
||||
|
||||
global_attr_free:
|
||||
cpufreq_unregister_driver(current_pstate_driver);
|
||||
- amd_pstate_cppc_enable(false);
|
||||
return ret;
|
||||
}
|
||||
device_initcall(amd_pstate_init);
|
@@ -1,105 +0,0 @@
|
||||
From c06cca99a6d74e7a6d6f020dbf982b0b9bf704e6 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:33 -0600
|
||||
Subject: cpufreq/amd-pstate: Stop caching EPP
|
||||
|
||||
EPP values are cached in the cpudata structure per CPU. This is needless
|
||||
though because they are also cached in the CPPC request variable.
|
||||
|
||||
Drop the separate cache for EPP values and always reference the CPPC
|
||||
request variable when needed.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 19 ++++++++++---------
|
||||
drivers/cpufreq/amd-pstate.h | 1 -
|
||||
2 files changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufre
|
||||
}
|
||||
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
- if (epp != cpudata->epp_cached)
|
||||
- WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_po
|
||||
}
|
||||
|
||||
/* update both so that msr_update_perf() can effectively check */
|
||||
- WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
|
||||
return ret;
|
||||
@@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
+ u8 epp_cached;
|
||||
u64 value;
|
||||
int ret;
|
||||
|
||||
+
|
||||
+ epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
|
||||
if (trace_amd_pstate_epp_perf_enabled()) {
|
||||
union perf_cached perf = cpudata->perf;
|
||||
|
||||
@@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_
|
||||
FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
|
||||
cpudata->cppc_req_cached),
|
||||
policy->boost_enabled,
|
||||
- epp != cpudata->epp_cached);
|
||||
+ epp != epp_cached);
|
||||
}
|
||||
|
||||
- if (epp == cpudata->epp_cached)
|
||||
+ if (epp == epp_cached)
|
||||
return 0;
|
||||
|
||||
perf_ctrls.energy_perf = epp;
|
||||
@@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_
|
||||
pr_debug("failed to set energy perf value (%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
- WRITE_ONCE(cpudata->epp_cached, epp);
|
||||
|
||||
value = READ_ONCE(cpudata->cppc_req_cached);
|
||||
value &= ~AMD_CPPC_EPP_PERF_MASK;
|
||||
@@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_p
|
||||
struct cpufreq_policy *policy, char *buf)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u8 preference;
|
||||
+ u8 preference, epp;
|
||||
+
|
||||
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
|
||||
|
||||
- switch (cpudata->epp_cached) {
|
||||
+ switch (epp) {
|
||||
case AMD_CPPC_EPP_PERFORMANCE:
|
||||
preference = EPP_INDEX_PERFORMANCE;
|
||||
break;
|
||||
@@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(s
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
epp = 0;
|
||||
else
|
||||
- epp = READ_ONCE(cpudata->epp_cached);
|
||||
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
|
||||
|
||||
perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -102,7 +102,6 @@ struct amd_cpudata {
|
||||
bool hw_prefcore;
|
||||
|
||||
/* EPP feature related attributes*/
|
||||
- u8 epp_cached;
|
||||
u32 policy;
|
||||
bool suspended;
|
||||
u8 epp_default;
|
@@ -1,39 +0,0 @@
|
||||
From a82e4f4eb6e5e9806c66285cb3cefde644b8ea6b Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Wed, 26 Feb 2025 01:49:34 -0600
|
||||
Subject: cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline()
|
||||
|
||||
When the CPU goes offline there is no need to change the CPPC request
|
||||
because the CPU will go into the deepest C-state it supports already.
|
||||
|
||||
Actually changing the CPPC request when it goes offline messes up the
|
||||
cached values and can lead to the wrong values being restored when
|
||||
it comes back.
|
||||
|
||||
Instead drop the actions and if the CPU comes back online let
|
||||
amd_pstate_epp_set_policy() restore it to expected values.
|
||||
|
||||
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 9 +--------
|
||||
1 file changed, 1 insertion(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(str
|
||||
|
||||
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
|
||||
{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
-
|
||||
- if (cpudata->suspended)
|
||||
- return 0;
|
||||
-
|
||||
- return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
|
||||
- AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
|
@@ -1,41 +0,0 @@
|
||||
From de3dd387423b30565e846e0ff4424e2c99164030 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <superm1@kernel.org>
|
||||
Date: Thu, 27 Feb 2025 14:09:08 -0600
|
||||
Subject: cpufreq/amd-pstate: fix warning noticed by kernel test robot
|
||||
|
||||
Reported-by: kernel test robot <lkp@intel.com>
|
||||
Closes: https://lore.kernel.org/oe-kbuild-all/202502272001.nafS0qXq-lkp@intel.com/
|
||||
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 13 ++++++-------
|
||||
1 file changed, 6 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -903,20 +903,19 @@ static int amd_pstate_init_freq(struct a
|
||||
return ret;
|
||||
perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
+ if (quirks && quirks->nominal_freq)
|
||||
+ nominal_freq = quirks->nominal_freq;
|
||||
+ else
|
||||
+ nominal_freq = cppc_perf.nominal_freq;
|
||||
+ nominal_freq *= 1000;
|
||||
+
|
||||
if (quirks && quirks->lowest_freq) {
|
||||
min_freq = quirks->lowest_freq;
|
||||
perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
|
||||
WRITE_ONCE(cpudata->perf, perf);
|
||||
} else
|
||||
min_freq = cppc_perf.lowest_freq;
|
||||
-
|
||||
- if (quirks && quirks->nominal_freq)
|
||||
- nominal_freq = quirks->nominal_freq;
|
||||
- else
|
||||
- nominal_freq = cppc_perf.nominal_freq;
|
||||
-
|
||||
min_freq *= 1000;
|
||||
- nominal_freq *= 1000;
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
|
||||
|
@@ -1,42 +0,0 @@
|
||||
From 7e68278a4a90d52966b923404a2d280e3a83b66f Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Date: Mon, 7 Apr 2025 08:19:26 +0000
|
||||
Subject: cpufreq/amd-pstate: Fix min_limit perf and freq updation for
|
||||
performance governor
|
||||
|
||||
The min_limit perf and freq values can get disconnected with performance
|
||||
governor, as we only modify the perf value in the special case. Fix that
|
||||
by modifying the perf and freq values together
|
||||
|
||||
Fixes: 009d1c29a451 ("cpufreq/amd-pstate: Move perf values into a union")
|
||||
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Link: https://lore.kernel.org/r/20250407081925.850473-1-dhananjay.ugwekar@amd.com
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_li
|
||||
union perf_cached perf = READ_ONCE(cpudata->perf);
|
||||
|
||||
perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
|
||||
- perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
|
||||
+ WRITE_ONCE(cpudata->max_limit_freq, policy->max);
|
||||
|
||||
- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
|
||||
perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
|
||||
+ WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq));
|
||||
+ } else {
|
||||
+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
|
||||
+ WRITE_ONCE(cpudata->min_limit_freq, policy->min);
|
||||
+ }
|
||||
|
||||
- WRITE_ONCE(cpudata->max_limit_freq, policy->max);
|
||||
- WRITE_ONCE(cpudata->min_limit_freq, policy->min);
|
||||
WRITE_ONCE(cpudata->perf, perf);
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 247749c27f92a789d4f1727aa870167c25ca3c5e Mon Sep 17 00:00:00 2001
|
||||
From 1cb9f09cead0ba384729bfdc74d6fa21d586530c Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Thu, 5 Sep 2024 10:26:39 +0100
|
||||
Subject: cpuidle: Prefer teo over menu governor
|
||||
@@ -36,7 +36,7 @@ Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
depends on KVM_GUEST
|
||||
--- a/drivers/cpuidle/governors/menu.c
|
||||
+++ b/drivers/cpuidle/governors/menu.c
|
||||
@@ -519,7 +519,7 @@ static int menu_enable_device(struct cpu
|
||||
@@ -513,7 +513,7 @@ static int menu_enable_device(struct cpu
|
||||
|
||||
static struct cpuidle_governor menu_governor = {
|
||||
.name = "menu",
|
||||
|
@@ -1,65 +0,0 @@
|
||||
From 5e5a835c50afc3b9bb2b8b9175d0924abb5a7f3c Mon Sep 17 00:00:00 2001
|
||||
From: Eric Biggers <ebiggers@google.com>
|
||||
Date: Mon, 27 Jan 2025 13:16:09 -0800
|
||||
Subject: crypto: x86/aes-xts - make the fast path 64-bit specific
|
||||
|
||||
Remove 32-bit support from the fast path in xts_crypt(). Then optimize
|
||||
it for 64-bit, and simplify the code, by switching to sg_virt() and
|
||||
removing the now-unnecessary checks for crossing a page boundary.
|
||||
|
||||
The result is simpler code that is slightly smaller and faster in the
|
||||
case that actually matters (64-bit).
|
||||
|
||||
Signed-off-by: Eric Biggers <ebiggers@google.com>
|
||||
---
|
||||
arch/x86/crypto/aesni-intel_glue.c | 30 ++++++++++--------------------
|
||||
1 file changed, 10 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/aesni-intel_glue.c
|
||||
+++ b/arch/x86/crypto/aesni-intel_glue.c
|
||||
@@ -581,11 +581,8 @@ xts_crypt(struct skcipher_request *req,
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
|
||||
- const unsigned int cryptlen = req->cryptlen;
|
||||
- struct scatterlist *src = req->src;
|
||||
- struct scatterlist *dst = req->dst;
|
||||
|
||||
- if (unlikely(cryptlen < AES_BLOCK_SIZE))
|
||||
+ if (unlikely(req->cryptlen < AES_BLOCK_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
kernel_fpu_begin();
|
||||
@@ -593,23 +590,16 @@ xts_crypt(struct skcipher_request *req,
|
||||
|
||||
/*
|
||||
* In practice, virtually all XTS plaintexts and ciphertexts are either
|
||||
- * 512 or 4096 bytes, aligned such that they don't span page boundaries.
|
||||
- * To optimize the performance of these cases, and also any other case
|
||||
- * where no page boundary is spanned, the below fast-path handles
|
||||
- * single-page sources and destinations as efficiently as possible.
|
||||
+ * 512 or 4096 bytes and do not use multiple scatterlist elements. To
|
||||
+ * optimize the performance of these cases, the below fast-path handles
|
||||
+ * single-scatterlist-element messages as efficiently as possible. The
|
||||
+ * code is 64-bit specific, as it assumes no page mapping is needed.
|
||||
*/
|
||||
- if (likely(src->length >= cryptlen && dst->length >= cryptlen &&
|
||||
- src->offset + cryptlen <= PAGE_SIZE &&
|
||||
- dst->offset + cryptlen <= PAGE_SIZE)) {
|
||||
- struct page *src_page = sg_page(src);
|
||||
- struct page *dst_page = sg_page(dst);
|
||||
- void *src_virt = kmap_local_page(src_page) + src->offset;
|
||||
- void *dst_virt = kmap_local_page(dst_page) + dst->offset;
|
||||
-
|
||||
- (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen,
|
||||
- req->iv);
|
||||
- kunmap_local(dst_virt);
|
||||
- kunmap_local(src_virt);
|
||||
+ if (IS_ENABLED(CONFIG_X86_64) &&
|
||||
+ likely(req->src->length >= req->cryptlen &&
|
||||
+ req->dst->length >= req->cryptlen)) {
|
||||
+ (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src),
|
||||
+ sg_virt(req->dst), req->cryptlen, req->iv);
|
||||
kernel_fpu_end();
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@@ -1,176 +0,0 @@
|
||||
From 4506de20739ac4726a258faa98609a552184d2d2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Sergio=20Gonz=C3=A1lez=20Collado?=
|
||||
<sergio.collado@gmail.com>
|
||||
Date: Sun, 2 Mar 2025 23:15:18 +0100
|
||||
Subject: Kunit to check the longest symbol length
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The longest length of a symbol (KSYM_NAME_LEN) was increased to 512
|
||||
in the reference [1]. This patch adds kunit test suite to check the longest
|
||||
symbol length. These tests verify that the longest symbol length defined
|
||||
is supported.
|
||||
|
||||
This test can also help other efforts for longer symbol length,
|
||||
like [2].
|
||||
|
||||
The test suite defines one symbol with the longest possible length.
|
||||
|
||||
The first test verify that functions with names of the created
|
||||
symbol, can be called or not.
|
||||
|
||||
The second test, verify that the symbols are created (or
|
||||
not) in the kernel symbol table.
|
||||
|
||||
[1] https://lore.kernel.org/lkml/20220802015052.10452-6-ojeda@kernel.org/
|
||||
[2] https://lore.kernel.org/lkml/20240605032120.3179157-1-song@kernel.org/
|
||||
|
||||
Tested-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
|
||||
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
|
||||
Reviewed-by: Rae Moar <rmoar@google.com>
|
||||
Signed-off-by: Sergio González Collado <sergio.collado@gmail.com>
|
||||
Link: https://github.com/Rust-for-Linux/linux/issues/504
|
||||
Source: https://lore.kernel.org/rust-for-linux/20250302221518.76874-1-sergio.collado@gmail.com/
|
||||
Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/63
|
||||
---
|
||||
arch/x86/tools/insn_decoder_test.c | 3 +-
|
||||
lib/Kconfig.debug | 9 ++++
|
||||
lib/Makefile | 2 +
|
||||
lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++
|
||||
4 files changed, 95 insertions(+), 1 deletion(-)
|
||||
create mode 100644 lib/longest_symbol_kunit.c
|
||||
|
||||
--- a/arch/x86/tools/insn_decoder_test.c
|
||||
+++ b/arch/x86/tools/insn_decoder_test.c
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
+#include <linux/kallsyms.h>
|
||||
|
||||
#define unlikely(cond) (cond)
|
||||
|
||||
@@ -106,7 +107,7 @@ static void parse_args(int argc, char **
|
||||
}
|
||||
}
|
||||
|
||||
-#define BUFSIZE 256
|
||||
+#define BUFSIZE (256 + KSYM_NAME_LEN)
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -2838,6 +2838,15 @@ config FORTIFY_KUNIT_TEST
|
||||
by the str*() and mem*() family of functions. For testing runtime
|
||||
traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests.
|
||||
|
||||
+config LONGEST_SYM_KUNIT_TEST
|
||||
+ tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS
|
||||
+ depends on KUNIT && KPROBES
|
||||
+ default KUNIT_ALL_TESTS
|
||||
+ help
|
||||
+ Tests the longest symbol possible
|
||||
+
|
||||
+ If unsure, say N.
|
||||
+
|
||||
config HW_BREAKPOINT_KUNIT_TEST
|
||||
bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
|
||||
depends on HAVE_HW_BREAKPOINT
|
||||
--- a/lib/Makefile
|
||||
+++ b/lib/Makefile
|
||||
@@ -398,6 +398,8 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fort
|
||||
obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o
|
||||
obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
|
||||
obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
|
||||
+obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o
|
||||
+CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes)
|
||||
|
||||
obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
|
||||
|
||||
--- /dev/null
|
||||
+++ b/lib/longest_symbol_kunit.c
|
||||
@@ -0,0 +1,82 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Test the longest symbol length. Execute with:
|
||||
+ * ./tools/testing/kunit/kunit.py run longest-symbol
|
||||
+ * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y
|
||||
+ * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n
|
||||
+ * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n
|
||||
+ */
|
||||
+
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <kunit/test.h>
|
||||
+#include <linux/stringify.h>
|
||||
+#include <linux/kprobes.h>
|
||||
+#include <linux/kallsyms.h>
|
||||
+
|
||||
+#define DI(name) s##name##name
|
||||
+#define DDI(name) DI(n##name##name)
|
||||
+#define DDDI(name) DDI(n##name##name)
|
||||
+#define DDDDI(name) DDDI(n##name##name)
|
||||
+#define DDDDDI(name) DDDDI(n##name##name)
|
||||
+
|
||||
+/*Generate a symbol whose name length is 511 */
|
||||
+#define LONGEST_SYM_NAME DDDDDI(g1h2i3j4k5l6m7n)
|
||||
+
|
||||
+#define RETURN_LONGEST_SYM 0xAAAAA
|
||||
+
|
||||
+noinline int LONGEST_SYM_NAME(void);
|
||||
+noinline int LONGEST_SYM_NAME(void)
|
||||
+{
|
||||
+ return RETURN_LONGEST_SYM;
|
||||
+}
|
||||
+
|
||||
+_Static_assert(sizeof(__stringify(LONGEST_SYM_NAME)) == KSYM_NAME_LEN,
|
||||
+"Incorrect symbol length found. Expected KSYM_NAME_LEN: "
|
||||
+__stringify(KSYM_NAME_LEN) ", but found: "
|
||||
+__stringify(sizeof(LONGEST_SYM_NAME)));
|
||||
+
|
||||
+static void test_longest_symbol(struct kunit *test)
|
||||
+{
|
||||
+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, LONGEST_SYM_NAME());
|
||||
+};
|
||||
+
|
||||
+static void test_longest_symbol_kallsyms(struct kunit *test)
|
||||
+{
|
||||
+ unsigned long (*kallsyms_lookup_name)(const char *name);
|
||||
+ static int (*longest_sym)(void);
|
||||
+
|
||||
+ struct kprobe kp = {
|
||||
+ .symbol_name = "kallsyms_lookup_name",
|
||||
+ };
|
||||
+
|
||||
+ if (register_kprobe(&kp) < 0) {
|
||||
+ pr_info("%s: kprobe not registered", __func__);
|
||||
+ KUNIT_FAIL(test, "test_longest_symbol kallsyms: kprobe not registered\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ kunit_warn(test, "test_longest_symbol kallsyms: kprobe registered\n");
|
||||
+ kallsyms_lookup_name = (unsigned long (*)(const char *name))kp.addr;
|
||||
+ unregister_kprobe(&kp);
|
||||
+
|
||||
+ longest_sym =
|
||||
+ (void *) kallsyms_lookup_name(__stringify(LONGEST_SYM_NAME));
|
||||
+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, longest_sym());
|
||||
+};
|
||||
+
|
||||
+static struct kunit_case longest_symbol_test_cases[] = {
|
||||
+ KUNIT_CASE(test_longest_symbol),
|
||||
+ KUNIT_CASE(test_longest_symbol_kallsyms),
|
||||
+ {}
|
||||
+};
|
||||
+
|
||||
+static struct kunit_suite longest_symbol_test_suite = {
|
||||
+ .name = "longest-symbol",
|
||||
+ .test_cases = longest_symbol_test_cases,
|
||||
+};
|
||||
+kunit_test_suite(longest_symbol_test_suite);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_DESCRIPTION("Test the longest symbol length");
|
||||
+MODULE_AUTHOR("Sergio González Collado");
|
70
debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch
vendored
Normal file
70
debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
From cda8b1022f32bb7a917148f75f4641e7a5b3e729 Mon Sep 17 00:00:00 2001
|
||||
From: Jinliang Zheng <alexjlzheng@tencent.com>
|
||||
Date: Tue, 15 Apr 2025 17:02:32 +0800
|
||||
Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler()
|
||||
|
||||
In dirty_ratio_handler(), vm_dirty_bytes must be set to zero before
|
||||
calling writeback_set_ratelimit(), as global_dirty_limits() always
|
||||
prioritizes the value of vm_dirty_bytes.
|
||||
|
||||
It's domain_dirty_limits() that's relevant here, not node_dirty_ok:
|
||||
|
||||
dirty_ratio_handler
|
||||
writeback_set_ratelimit
|
||||
global_dirty_limits(&dirty_thresh) <- ratelimit_pages based on dirty_thresh
|
||||
domain_dirty_limits
|
||||
if (bytes) <- bytes = vm_dirty_bytes <--------+
|
||||
thresh = f1(bytes) <- prioritizes vm_dirty_bytes |
|
||||
else |
|
||||
thresh = f2(ratio) |
|
||||
ratelimit_pages = f3(dirty_thresh) |
|
||||
vm_dirty_bytes = 0 <- it's late! ---------------------+
|
||||
|
||||
This causes ratelimit_pages to still use the value calculated based on
|
||||
vm_dirty_bytes, which is wrong now.
|
||||
|
||||
|
||||
The impact visible to userspace is difficult to capture directly because
|
||||
there is no procfs/sysfs interface exported to user space. However, it
|
||||
will have a real impact on the balance of dirty pages.
|
||||
|
||||
For example:
|
||||
|
||||
1. On default, we have vm_dirty_ratio=40, vm_dirty_bytes=0
|
||||
|
||||
2. echo 8192 > dirty_bytes, then vm_dirty_bytes=8192,
|
||||
vm_dirty_ratio=0, and ratelimit_pages is calculated based on
|
||||
vm_dirty_bytes now.
|
||||
|
||||
3. echo 20 > dirty_ratio, then since vm_dirty_bytes is not reset to
|
||||
zero when writeback_set_ratelimit() -> global_dirty_limits() ->
|
||||
domain_dirty_limits() is called, reallimit_pages is still calculated
|
||||
based on vm_dirty_bytes instead of vm_dirty_ratio. This does not
|
||||
conform to the actual intent of the user.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250415090232.7544-1-alexjlzheng@tencent.com
|
||||
Fixes: 9d823e8f6b1b ("writeback: per task dirty rate limit")
|
||||
Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
|
||||
Reviewed-by: MengEn Sun <mengensun@tencent.com>
|
||||
Cc: Andrea Righi <andrea@betterlinux.com>
|
||||
Cc: Fenggaung Wu <fengguang.wu@intel.com>
|
||||
Cc: Jinliang Zheng <alexjlzheng@tencent.com>
|
||||
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/page-writeback.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/mm/page-writeback.c
|
||||
+++ b/mm/page-writeback.c
|
||||
@@ -520,8 +520,8 @@ static int dirty_ratio_handler(const str
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
|
||||
- writeback_set_ratelimit();
|
||||
vm_dirty_bytes = 0;
|
||||
+ writeback_set_ratelimit();
|
||||
}
|
||||
return ret;
|
||||
}
|
179
debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch
vendored
Normal file
179
debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch
vendored
Normal file
@@ -0,0 +1,179 @@
|
||||
From 30a724581b5037176f6492359c189ebb180ccf1f Mon Sep 17 00:00:00 2001
|
||||
From: GONG Ruiqi <gongruiqi1@huawei.com>
|
||||
Date: Sun, 27 Apr 2025 10:53:03 +0800
|
||||
Subject: vgacon: Add check for vc_origin address range in vgacon_scroll()
|
||||
|
||||
Our in-house Syzkaller reported the following BUG (twice), which we
|
||||
believed was the same issue with [1]:
|
||||
|
||||
==================================================================
|
||||
BUG: KASAN: slab-out-of-bounds in vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740
|
||||
Read of size 2 at addr ffff88800f5bef60 by task syz.7.2620/12393
|
||||
...
|
||||
Call Trace:
|
||||
<TASK>
|
||||
__dump_stack lib/dump_stack.c:88 [inline]
|
||||
dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106
|
||||
print_address_description.constprop.0+0x6b/0x3d0 mm/kasan/report.c:364
|
||||
print_report+0xba/0x280 mm/kasan/report.c:475
|
||||
kasan_report+0xa9/0xe0 mm/kasan/report.c:588
|
||||
vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740
|
||||
vcs_write_buf_noattr drivers/tty/vt/vc_screen.c:493 [inline]
|
||||
vcs_write+0x586/0x840 drivers/tty/vt/vc_screen.c:690
|
||||
vfs_write+0x219/0x960 fs/read_write.c:584
|
||||
ksys_write+0x12e/0x260 fs/read_write.c:639
|
||||
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
|
||||
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
|
||||
entry_SYSCALL_64_after_hwframe+0x78/0xe2
|
||||
...
|
||||
</TASK>
|
||||
|
||||
Allocated by task 5614:
|
||||
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
|
||||
kasan_set_track+0x25/0x30 mm/kasan/common.c:52
|
||||
____kasan_kmalloc mm/kasan/common.c:374 [inline]
|
||||
__kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:383
|
||||
kasan_kmalloc include/linux/kasan.h:201 [inline]
|
||||
__do_kmalloc_node mm/slab_common.c:1007 [inline]
|
||||
__kmalloc+0x62/0x140 mm/slab_common.c:1020
|
||||
kmalloc include/linux/slab.h:604 [inline]
|
||||
kzalloc include/linux/slab.h:721 [inline]
|
||||
vc_do_resize+0x235/0xf40 drivers/tty/vt/vt.c:1193
|
||||
vgacon_adjust_height+0x2d4/0x350 drivers/video/console/vgacon.c:1007
|
||||
vgacon_font_set+0x1f7/0x240 drivers/video/console/vgacon.c:1031
|
||||
con_font_set drivers/tty/vt/vt.c:4628 [inline]
|
||||
con_font_op+0x4da/0xa20 drivers/tty/vt/vt.c:4675
|
||||
vt_k_ioctl+0xa10/0xb30 drivers/tty/vt/vt_ioctl.c:474
|
||||
vt_ioctl+0x14c/0x1870 drivers/tty/vt/vt_ioctl.c:752
|
||||
tty_ioctl+0x655/0x1510 drivers/tty/tty_io.c:2779
|
||||
vfs_ioctl fs/ioctl.c:51 [inline]
|
||||
__do_sys_ioctl fs/ioctl.c:871 [inline]
|
||||
__se_sys_ioctl+0x12d/0x190 fs/ioctl.c:857
|
||||
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
|
||||
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
|
||||
entry_SYSCALL_64_after_hwframe+0x78/0xe2
|
||||
|
||||
Last potentially related work creation:
|
||||
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
|
||||
__kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492
|
||||
__call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713
|
||||
netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802
|
||||
__sock_release+0xb5/0x270 net/socket.c:663
|
||||
sock_close+0x1e/0x30 net/socket.c:1425
|
||||
__fput+0x408/0xab0 fs/file_table.c:384
|
||||
__fput_sync+0x4c/0x60 fs/file_table.c:465
|
||||
__do_sys_close fs/open.c:1580 [inline]
|
||||
__se_sys_close+0x68/0xd0 fs/open.c:1565
|
||||
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
|
||||
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
|
||||
entry_SYSCALL_64_after_hwframe+0x78/0xe2
|
||||
|
||||
Second to last potentially related work creation:
|
||||
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
|
||||
__kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492
|
||||
__call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713
|
||||
netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802
|
||||
__sock_release+0xb5/0x270 net/socket.c:663
|
||||
sock_close+0x1e/0x30 net/socket.c:1425
|
||||
__fput+0x408/0xab0 fs/file_table.c:384
|
||||
task_work_run+0x154/0x240 kernel/task_work.c:239
|
||||
exit_task_work include/linux/task_work.h:45 [inline]
|
||||
do_exit+0x8e5/0x1320 kernel/exit.c:874
|
||||
do_group_exit+0xcd/0x280 kernel/exit.c:1023
|
||||
get_signal+0x1675/0x1850 kernel/signal.c:2905
|
||||
arch_do_signal_or_restart+0x80/0x3b0 arch/x86/kernel/signal.c:310
|
||||
exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
|
||||
exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline]
|
||||
__syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
|
||||
syscall_exit_to_user_mode+0x1b3/0x1e0 kernel/entry/common.c:218
|
||||
do_syscall_64+0x66/0x110 arch/x86/entry/common.c:87
|
||||
entry_SYSCALL_64_after_hwframe+0x78/0xe2
|
||||
|
||||
The buggy address belongs to the object at ffff88800f5be000
|
||||
which belongs to the cache kmalloc-2k of size 2048
|
||||
The buggy address is located 2656 bytes to the right of
|
||||
allocated 1280-byte region [ffff88800f5be000, ffff88800f5be500)
|
||||
|
||||
...
|
||||
|
||||
Memory state around the buggy address:
|
||||
ffff88800f5bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff88800f5bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
>ffff88800f5bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
^
|
||||
ffff88800f5bef80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff88800f5bf000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
==================================================================
|
||||
|
||||
By analyzing the vmcore, we found that vc->vc_origin was somehow placed
|
||||
one line prior to vc->vc_screenbuf when vc was in KD_TEXT mode, and
|
||||
further writings to /dev/vcs caused out-of-bounds reads (and writes
|
||||
right after) in vcs_write_buf_noattr().
|
||||
|
||||
Our further experiments show that in most cases, vc->vc_origin equals to
|
||||
vga_vram_base when the console is in KD_TEXT mode, and it's around
|
||||
vc->vc_screenbuf for the KD_GRAPHICS mode. But via triggerring a
|
||||
TIOCL_SETVESABLANK ioctl beforehand, we can make vc->vc_origin be around
|
||||
vc->vc_screenbuf while the console is in KD_TEXT mode, and then by
|
||||
writing the special 'ESC M' control sequence to the tty certain times
|
||||
(depends on the value of `vc->state.y - vc->vc_top`), we can eventually
|
||||
move vc->vc_origin prior to vc->vc_screenbuf. Here's the PoC, tested on
|
||||
QEMU:
|
||||
|
||||
```
|
||||
int main() {
|
||||
const int RI_NUM = 10; // should be greater than `vc->state.y - vc->vc_top`
|
||||
int tty_fd, vcs_fd;
|
||||
const char *tty_path = "/dev/tty0";
|
||||
const char *vcs_path = "/dev/vcs";
|
||||
const char escape_seq[] = "\x1bM"; // ESC + M
|
||||
const char trigger_seq[] = "Let's trigger an OOB write.";
|
||||
struct vt_sizes vt_size = { 70, 2 };
|
||||
int blank = TIOCL_BLANKSCREEN;
|
||||
|
||||
tty_fd = open(tty_path, O_RDWR);
|
||||
|
||||
char vesa_mode[] = { TIOCL_SETVESABLANK, 1 };
|
||||
ioctl(tty_fd, TIOCLINUX, vesa_mode);
|
||||
|
||||
ioctl(tty_fd, TIOCLINUX, &blank);
|
||||
ioctl(tty_fd, VT_RESIZE, &vt_size);
|
||||
|
||||
for (int i = 0; i < RI_NUM; ++i)
|
||||
write(tty_fd, escape_seq, sizeof(escape_seq) - 1);
|
||||
|
||||
vcs_fd = open(vcs_path, O_RDWR);
|
||||
write(vcs_fd, trigger_seq, sizeof(trigger_seq));
|
||||
|
||||
close(vcs_fd);
|
||||
close(tty_fd);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
To solve this problem, add an address range validation check in
|
||||
vgacon_scroll(), ensuring vc->vc_origin never precedes vc_screenbuf.
|
||||
|
||||
Reported-by: syzbot+9c09fda97a1a65ea859b@syzkaller.appspotmail.com
|
||||
Closes: https://syzkaller.appspot.com/bug?extid=9c09fda97a1a65ea859b [1]
|
||||
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
|
||||
Cc: stable@vger.kernel.org
|
||||
Co-developed-by: Yi Yang <yiyang13@huawei.com>
|
||||
Signed-off-by: Yi Yang <yiyang13@huawei.com>
|
||||
Signed-off-by: GONG Ruiqi <gongruiqi1@huawei.com>
|
||||
Signed-off-by: Helge Deller <deller@gmx.de>
|
||||
---
|
||||
drivers/video/console/vgacon.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/video/console/vgacon.c
|
||||
+++ b/drivers/video/console/vgacon.c
|
||||
@@ -1168,7 +1168,7 @@ static bool vgacon_scroll(struct vc_data
|
||||
c->vc_screenbuf_size - delta);
|
||||
c->vc_origin = vga_vram_end - c->vc_screenbuf_size;
|
||||
vga_rolled_over = 0;
|
||||
- } else
|
||||
+ } else if (oldo - delta >= (unsigned long)c->vc_screenbuf)
|
||||
c->vc_origin -= delta;
|
||||
c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size;
|
||||
scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char,
|
@@ -1,36 +0,0 @@
|
||||
From b5a4b82efd19d0687a5582a58f6830bf714e34fc Mon Sep 17 00:00:00 2001
|
||||
From: Nathan Chancellor <nathan@kernel.org>
|
||||
Date: Tue, 18 Mar 2025 15:32:30 -0700
|
||||
Subject: x86/tools: Drop duplicate unlikely() definition in
|
||||
insn_decoder_test.c
|
||||
|
||||
After commit c104c16073b7 ("Kunit to check the longest symbol length"),
|
||||
there is a warning when building with clang because there is now a
|
||||
definition of unlikely from compiler.h in tools/include/linux, which
|
||||
conflicts with the one in the instruction decoder selftest:
|
||||
|
||||
arch/x86/tools/insn_decoder_test.c:15:9: warning: 'unlikely' macro redefined [-Wmacro-redefined]
|
||||
|
||||
Remove the second unlikely() definition, as it is no longer necessary,
|
||||
clearing up the warning.
|
||||
|
||||
Fixes: c104c16073b7 ("Kunit to check the longest symbol length")
|
||||
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
|
||||
Link: https://lore.kernel.org/r/20250318-x86-decoder-test-fix-unlikely-redef-v1-1-74c84a7bf05b@kernel.org
|
||||
---
|
||||
arch/x86/tools/insn_decoder_test.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/arch/x86/tools/insn_decoder_test.c
|
||||
+++ b/arch/x86/tools/insn_decoder_test.c
|
||||
@@ -12,8 +12,6 @@
|
||||
#include <stdarg.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
-#define unlikely(cond) (cond)
|
||||
-
|
||||
#include <asm/insn.h>
|
||||
#include <inat.c>
|
||||
#include <insn.c>
|
102
debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch
vendored
Normal file
102
debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
From 5cf26cf9fd9c11cb1543aac026f8928829895663 Mon Sep 17 00:00:00 2001
|
||||
From: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Date: Mon, 28 Apr 2025 18:34:06 +0300
|
||||
Subject: fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in
|
||||
fb_videomode_to_var
|
||||
|
||||
If fb_add_videomode() in do_register_framebuffer() fails to allocate
|
||||
memory for fb_videomode, it will later lead to a null-ptr dereference in
|
||||
fb_videomode_to_var(), as the fb_info is registered while not having the
|
||||
mode in modelist that is expected to be there, i.e. the one that is
|
||||
described in fb_info->var.
|
||||
|
||||
================================================================
|
||||
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
|
||||
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
|
||||
CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
|
||||
RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901
|
||||
Call Trace:
|
||||
display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929
|
||||
fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071
|
||||
resize_screen drivers/tty/vt/vt.c:1176 [inline]
|
||||
vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263
|
||||
fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720
|
||||
fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776
|
||||
do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128
|
||||
fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203
|
||||
vfs_ioctl fs/ioctl.c:48 [inline]
|
||||
__do_sys_ioctl fs/ioctl.c:753 [inline]
|
||||
__se_sys_ioctl fs/ioctl.c:739 [inline]
|
||||
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739
|
||||
do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
|
||||
entry_SYSCALL_64_after_hwframe+0x67/0xd1
|
||||
================================================================
|
||||
|
||||
Even though fbcon_init() checks beforehand if fb_match_mode() in
|
||||
var_to_display() fails, it can not prevent the panic because fbcon_init()
|
||||
does not return error code. Considering this and the comment in the code
|
||||
about fb_match_mode() returning NULL - "This should not happen" - it is
|
||||
better to prevent registering the fb_info if its mode was not set
|
||||
successfully. Also move fb_add_videomode() closer to the beginning of
|
||||
do_register_framebuffer() to avoid having to do the cleanup on fail.
|
||||
|
||||
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
|
||||
|
||||
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Signed-off-by: Helge Deller <deller@gmx.de>
|
||||
---
|
||||
drivers/video/fbdev/core/fbmem.c | 18 +++++++++++-------
|
||||
1 file changed, 11 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/video/fbdev/core/fbmem.c
|
||||
+++ b/drivers/video/fbdev/core/fbmem.c
|
||||
@@ -388,7 +388,7 @@ static int fb_check_foreignness(struct f
|
||||
|
||||
static int do_register_framebuffer(struct fb_info *fb_info)
|
||||
{
|
||||
- int i;
|
||||
+ int i, err = 0;
|
||||
struct fb_videomode mode;
|
||||
|
||||
if (fb_check_foreignness(fb_info))
|
||||
@@ -397,10 +397,18 @@ static int do_register_framebuffer(struc
|
||||
if (num_registered_fb == FB_MAX)
|
||||
return -ENXIO;
|
||||
|
||||
- num_registered_fb++;
|
||||
for (i = 0 ; i < FB_MAX; i++)
|
||||
if (!registered_fb[i])
|
||||
break;
|
||||
+
|
||||
+ if (!fb_info->modelist.prev || !fb_info->modelist.next)
|
||||
+ INIT_LIST_HEAD(&fb_info->modelist);
|
||||
+
|
||||
+ fb_var_to_videomode(&mode, &fb_info->var);
|
||||
+ err = fb_add_videomode(&mode, &fb_info->modelist);
|
||||
+ if (err < 0)
|
||||
+ return err;
|
||||
+
|
||||
fb_info->node = i;
|
||||
refcount_set(&fb_info->count, 1);
|
||||
mutex_init(&fb_info->lock);
|
||||
@@ -426,16 +434,12 @@ static int do_register_framebuffer(struc
|
||||
if (bitmap_empty(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT))
|
||||
bitmap_fill(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT);
|
||||
|
||||
- if (!fb_info->modelist.prev || !fb_info->modelist.next)
|
||||
- INIT_LIST_HEAD(&fb_info->modelist);
|
||||
-
|
||||
if (fb_info->skip_vt_switch)
|
||||
pm_vt_switch_required(fb_info->device, false);
|
||||
else
|
||||
pm_vt_switch_required(fb_info->device, true);
|
||||
|
||||
- fb_var_to_videomode(&mode, &fb_info->var);
|
||||
- fb_add_videomode(&mode, &fb_info->modelist);
|
||||
+ num_registered_fb++;
|
||||
registered_fb[i] = fb_info;
|
||||
|
||||
#ifdef CONFIG_GUMSTIX_AM200EPD
|
65
debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch
vendored
Normal file
65
debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
From 54c7f478f1a9d58f5609a48d461c7d495bb8301a Mon Sep 17 00:00:00 2001
|
||||
From: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Date: Mon, 28 Apr 2025 18:34:07 +0300
|
||||
Subject: fbdev: Fix fb_set_var to prevent null-ptr-deref in
|
||||
fb_videomode_to_var
|
||||
|
||||
If fb_add_videomode() in fb_set_var() fails to allocate memory for
|
||||
fb_videomode, later it may lead to a null-ptr dereference in
|
||||
fb_videomode_to_var(), as the fb_info is registered while not having the
|
||||
mode in modelist that is expected to be there, i.e. the one that is
|
||||
described in fb_info->var.
|
||||
|
||||
================================================================
|
||||
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
|
||||
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
|
||||
CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
|
||||
RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901
|
||||
Call Trace:
|
||||
display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929
|
||||
fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071
|
||||
resize_screen drivers/tty/vt/vt.c:1176 [inline]
|
||||
vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263
|
||||
fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720
|
||||
fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776
|
||||
do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128
|
||||
fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203
|
||||
vfs_ioctl fs/ioctl.c:48 [inline]
|
||||
__do_sys_ioctl fs/ioctl.c:753 [inline]
|
||||
__se_sys_ioctl fs/ioctl.c:739 [inline]
|
||||
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739
|
||||
do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
|
||||
entry_SYSCALL_64_after_hwframe+0x67/0xd1
|
||||
================================================================
|
||||
|
||||
The reason is that fb_info->var is being modified in fb_set_var(), and
|
||||
then fb_videomode_to_var() is called. If it fails to add the mode to
|
||||
fb_info->modelist, fb_set_var() returns error, but does not restore the
|
||||
old value of fb_info->var. Restore fb_info->var on failure the same way
|
||||
it is done earlier in the function.
|
||||
|
||||
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
|
||||
|
||||
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Signed-off-by: Helge Deller <deller@gmx.de>
|
||||
---
|
||||
drivers/video/fbdev/core/fbmem.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/video/fbdev/core/fbmem.c
|
||||
+++ b/drivers/video/fbdev/core/fbmem.c
|
||||
@@ -328,8 +328,10 @@ fb_set_var(struct fb_info *info, struct
|
||||
!list_empty(&info->modelist))
|
||||
ret = fb_add_videomode(&mode, &info->modelist);
|
||||
|
||||
- if (ret)
|
||||
+ if (ret) {
|
||||
+ info->var = old_var;
|
||||
return ret;
|
||||
+ }
|
||||
|
||||
event.info = info;
|
||||
event.data = &mode;
|
@@ -1,40 +0,0 @@
|
||||
From e56acee381a8e07edf1920fb58f3166f911b6e5c Mon Sep 17 00:00:00 2001
|
||||
From: Lingbo Kong <quic_lingbok@quicinc.com>
|
||||
Date: Wed, 26 Feb 2025 19:31:18 +0800
|
||||
Subject: wifi: ath12k: Abort scan before removing link interface to prevent
|
||||
duplicate deletion
|
||||
|
||||
Currently, when ath12k performs the remove link interface operation, if
|
||||
there is an ongoing scan operation on the arvif, ath12k may execute the
|
||||
remove link interface operation multiple times on the same arvif. This
|
||||
occurs because, during the remove link operation, if a scan operation is
|
||||
present on the arvif, ath12k may receive a WMI_SCAN_EVENT_COMPLETED event
|
||||
from the firmware. Upon receiving this event, ath12k will continue to
|
||||
execute the ath12k_scan_vdev_clean_work() function, performing the remove
|
||||
link interface operation on the same arvif again.
|
||||
|
||||
To address this issue, before executing the remove link interface
|
||||
operation, ath12k needs to check if there is an ongoing scan operation on
|
||||
the current arvif. If such an operation exists, it should be aborted.
|
||||
|
||||
Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
|
||||
|
||||
Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
|
||||
---
|
||||
drivers/net/wireless/ath/ath12k/mac.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
--- a/drivers/net/wireless/ath/ath12k/mac.c
|
||||
+++ b/drivers/net/wireless/ath/ath12k/mac.c
|
||||
@@ -9395,6 +9395,11 @@ ath12k_mac_op_unassign_vif_chanctx(struc
|
||||
ar->num_started_vdevs == 1 && ar->monitor_vdev_created)
|
||||
ath12k_mac_monitor_stop(ar);
|
||||
|
||||
+ if (ar->scan.arvif == arvif && ar->scan.state == ATH12K_SCAN_RUNNING) {
|
||||
+ ath12k_scan_abort(ar);
|
||||
+ ar->scan.arvif = NULL;
|
||||
+ }
|
||||
+
|
||||
ath12k_mac_remove_link_interface(hw, arvif);
|
||||
ath12k_mac_unassign_link_vif(arvif);
|
||||
}
|
@@ -1,49 +0,0 @@
|
||||
From 8d0e02f81d08c7b1e082028af0f55a22e7e1dfb2 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Tue, 15 Apr 2025 10:22:04 +0200
|
||||
Subject: Kconfig: switch CONFIG_SYSFS_SYCALL default to n
|
||||
|
||||
This odd system call will be removed in the future. Let's decouple it
|
||||
from CONFIG_EXPERT and switch the default to n as a first step.
|
||||
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
init/Kconfig | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1603,6 +1603,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
|
||||
the unaligned access emulation.
|
||||
see arch/parisc/kernel/unaligned.c for reference
|
||||
|
||||
+config SYSFS_SYSCALL
|
||||
+ bool "Sysfs syscall support"
|
||||
+ default n
|
||||
+ help
|
||||
+ sys_sysfs is an obsolete system call no longer supported in libc.
|
||||
+ Note that disabling this option is more secure but might break
|
||||
+ compatibility with some systems.
|
||||
+
|
||||
+ If unsure say N here.
|
||||
+
|
||||
config HAVE_PCSPKR_PLATFORM
|
||||
bool
|
||||
|
||||
@@ -1647,16 +1657,6 @@ config SGETMASK_SYSCALL
|
||||
|
||||
If unsure, leave the default option here.
|
||||
|
||||
-config SYSFS_SYSCALL
|
||||
- bool "Sysfs syscall support" if EXPERT
|
||||
- default y
|
||||
- help
|
||||
- sys_sysfs is an obsolete system call no longer supported in libc.
|
||||
- Note that disabling this option is more secure but might break
|
||||
- compatibility with some systems.
|
||||
-
|
||||
- If unsure say Y here.
|
||||
-
|
||||
config FHANDLE
|
||||
bool "open by fhandle syscalls" if EXPERT
|
||||
select EXPORTFS
|
113
debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch
vendored
Normal file
113
debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
From 9cb2f9d210f915aabe54c5061d84f3fbe93c71ea Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:15 +0200
|
||||
Subject: anon_inode: use a proper mode internally
|
||||
|
||||
This allows the VFS to not trip over anonymous inodes and we can add
|
||||
asserts based on the mode into the vfs. When we report it to userspace
|
||||
we can simply hide the mode to avoid regressions. I've audited all
|
||||
direct callers of alloc_anon_inode() and only secretmen overrides i_mode
|
||||
and i_op inode operations but it already uses a regular file.
|
||||
|
||||
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org
|
||||
Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
|
||||
Reviewed-by: Jeff Layton <jlayton@kernel.org>
|
||||
Cc: stable@vger.kernel.org # all LTS kernels
|
||||
Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
|
||||
Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
fs/internal.h | 3 +++
|
||||
fs/libfs.c | 8 +++++++-
|
||||
3 files changed, 46 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/fs/anon_inodes.c
|
||||
+++ b/fs/anon_inodes.c
|
||||
@@ -24,10 +24,44 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
+#include "internal.h"
|
||||
+
|
||||
static struct vfsmount *anon_inode_mnt __ro_after_init;
|
||||
static struct inode *anon_inode_inode __ro_after_init;
|
||||
|
||||
/*
|
||||
+ * User space expects anonymous inodes to have no file type in st_mode.
|
||||
+ *
|
||||
+ * In particular, 'lsof' has this legacy logic:
|
||||
+ *
|
||||
+ * type = s->st_mode & S_IFMT;
|
||||
+ * switch (type) {
|
||||
+ * ...
|
||||
+ * case 0:
|
||||
+ * if (!strcmp(p, "anon_inode"))
|
||||
+ * Lf->ntype = Ntype = N_ANON_INODE;
|
||||
+ *
|
||||
+ * to detect our old anon_inode logic.
|
||||
+ *
|
||||
+ * Rather than mess with our internal sane inode data, just fix it
|
||||
+ * up here in getattr() by masking off the format bits.
|
||||
+ */
|
||||
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
|
||||
+ struct kstat *stat, u32 request_mask,
|
||||
+ unsigned int query_flags)
|
||||
+{
|
||||
+ struct inode *inode = d_inode(path->dentry);
|
||||
+
|
||||
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
|
||||
+ stat->mode &= ~S_IFMT;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct inode_operations anon_inode_operations = {
|
||||
+ .getattr = anon_inode_getattr,
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
* anon_inodefs_dname() is called from d_path().
|
||||
*/
|
||||
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
|
||||
@@ -66,6 +100,7 @@ static struct inode *anon_inode_make_sec
|
||||
if (IS_ERR(inode))
|
||||
return inode;
|
||||
inode->i_flags &= ~S_PRIVATE;
|
||||
+ inode->i_op = &anon_inode_operations;
|
||||
error = security_inode_init_security_anon(inode, &QSTR(name),
|
||||
context_inode);
|
||||
if (error) {
|
||||
@@ -313,6 +348,7 @@ static int __init anon_inode_init(void)
|
||||
anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
|
||||
if (IS_ERR(anon_inode_inode))
|
||||
panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
|
||||
+ anon_inode_inode->i_op = &anon_inode_operations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -343,3 +343,6 @@ static inline bool path_mounted(const st
|
||||
void file_f_owner_release(struct file *file);
|
||||
bool file_seek_cur_needs_f_lock(struct file *file);
|
||||
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
|
||||
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
|
||||
+ struct kstat *stat, u32 request_mask,
|
||||
+ unsigned int query_flags);
|
||||
--- a/fs/libfs.c
|
||||
+++ b/fs/libfs.c
|
||||
@@ -1647,7 +1647,13 @@ struct inode *alloc_anon_inode(struct su
|
||||
* that it already _is_ on the dirty list.
|
||||
*/
|
||||
inode->i_state = I_DIRTY;
|
||||
- inode->i_mode = S_IRUSR | S_IWUSR;
|
||||
+ /*
|
||||
+ * Historically anonymous inodes didn't have a type at all and
|
||||
+ * userspace has come to rely on this. Internally they're just
|
||||
+ * regular files but S_IFREG is masked off when reporting
|
||||
+ * information to userspace.
|
||||
+ */
|
||||
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
|
||||
inode->i_uid = current_fsuid();
|
||||
inode->i_gid = current_fsgid();
|
||||
inode->i_flags |= S_PRIVATE;
|
80
debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch
vendored
Normal file
80
debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
From ea4199112ae6d8da866417f50e035be01488c502 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:17 +0200
|
||||
Subject: anon_inode: explicitly block ->setattr()
|
||||
|
||||
It is currently possible to change the mode and owner of the single
|
||||
anonymous inode in the kernel:
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int ret, sfd;
|
||||
sigset_t mask;
|
||||
struct signalfd_siginfo fdsi;
|
||||
|
||||
sigemptyset(&mask);
|
||||
sigaddset(&mask, SIGINT);
|
||||
sigaddset(&mask, SIGQUIT);
|
||||
|
||||
ret = sigprocmask(SIG_BLOCK, &mask, NULL);
|
||||
if (ret < 0)
|
||||
_exit(1);
|
||||
|
||||
sfd = signalfd(-1, &mask, 0);
|
||||
if (sfd < 0)
|
||||
_exit(2);
|
||||
|
||||
ret = fchown(sfd, 5555, 5555);
|
||||
if (ret < 0)
|
||||
_exit(3);
|
||||
|
||||
ret = fchmod(sfd, 0777);
|
||||
if (ret < 0)
|
||||
_exit(3);
|
||||
|
||||
_exit(4);
|
||||
}
|
||||
|
||||
This is a bug. It's not really a meaningful one because anonymous inodes
|
||||
don't really figure into path lookup and they cannot be reopened via
|
||||
/proc/<pid>/fd/<nr> and can't be used for lookup itself. So they can
|
||||
only ever serve as direct references.
|
||||
|
||||
But it is still completely bogus to allow the mode and ownership or any
|
||||
of the properties of the anonymous inode to be changed. Block this!
|
||||
|
||||
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-3-53a44c20d44e@kernel.org
|
||||
Reviewed-by: Jeff Layton <jlayton@kernel.org>
|
||||
Cc: stable@vger.kernel.org # all LTS kernels
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
fs/anon_inodes.c | 7 +++++++
|
||||
fs/internal.h | 2 ++
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
--- a/fs/anon_inodes.c
|
||||
+++ b/fs/anon_inodes.c
|
||||
@@ -57,8 +57,15 @@ int anon_inode_getattr(struct mnt_idmap
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
+ struct iattr *attr)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+
|
||||
static const struct inode_operations anon_inode_operations = {
|
||||
.getattr = anon_inode_getattr,
|
||||
+ .setattr = anon_inode_setattr,
|
||||
};
|
||||
|
||||
/*
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -346,3 +346,5 @@ int statmount_mnt_idmap(struct mnt_idmap
|
||||
int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
|
||||
struct kstat *stat, u32 request_mask,
|
||||
unsigned int query_flags);
|
||||
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
+ struct iattr *attr);
|
39
debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch
vendored
Normal file
39
debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
From 79f54c5bc7c6097a379c83e9ed56bee27cf1218a Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:19 +0200
|
||||
Subject: anon_inode: raise SB_I_NODEV and SB_I_NOEXEC
|
||||
|
||||
It isn't possible to execute anonymous inodes because they cannot be
|
||||
opened in any way after they have been created. This includes execution:
|
||||
|
||||
execveat(fd_anon_inode, "", NULL, NULL, AT_EMPTY_PATH)
|
||||
|
||||
Anonymous inodes have inode->f_op set to no_open_fops which sets
|
||||
no_open() which returns ENXIO. That means any call to do_dentry_open()
|
||||
which is the endpoint of the do_open_execat() will fail. There's no
|
||||
chance to execute an anonymous inode. Unless a given subsystem overrides
|
||||
it ofc.
|
||||
|
||||
However, we should still harden this and raise SB_I_NODEV and
|
||||
SB_I_NOEXEC on the superblock itself so that no one gets any creative
|
||||
ideas.
|
||||
|
||||
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-5-53a44c20d44e@kernel.org
|
||||
Reviewed-by: Jeff Layton <jlayton@kernel.org>
|
||||
Cc: stable@vger.kernel.org # all LTS kernels
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
fs/anon_inodes.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/fs/anon_inodes.c
|
||||
+++ b/fs/anon_inodes.c
|
||||
@@ -86,6 +86,8 @@ static int anon_inodefs_init_fs_context(
|
||||
struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
+ fc->s_iflags |= SB_I_NOEXEC;
|
||||
+ fc->s_iflags |= SB_I_NODEV;
|
||||
ctx->dops = &anon_inodefs_dentry_operations;
|
||||
return 0;
|
||||
}
|
136
debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch
vendored
Normal file
136
debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
From edaacbee0f33b7371ec460723d1042a6c5a4bb9d Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 21 Apr 2025 10:27:40 +0200
|
||||
Subject: fs: add S_ANON_INODE
|
||||
|
||||
This makes it easy to detect proper anonymous inodes and to ensure that
|
||||
we can detect them in codepaths such as readahead().
|
||||
|
||||
Readahead on anonymous inodes didn't work because they didn't have a
|
||||
proper mode. Now that they have we need to retain EINVAL being returned
|
||||
otherwise LTP will fail.
|
||||
|
||||
We also need to ensure that ioctls aren't simply fired like they are for
|
||||
regular files so things like inotify inodes continue to correctly call
|
||||
their own ioctl handlers as in [1].
|
||||
|
||||
Reported-by: Xilin Wu <sophon@radxa.com>
|
||||
Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1]
|
||||
Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
fs/ioctl.c | 7 ++++---
|
||||
fs/libfs.c | 2 +-
|
||||
fs/pidfs.c | 2 +-
|
||||
include/linux/fs.h | 2 ++
|
||||
mm/readahead.c | 20 ++++++++++++++++----
|
||||
5 files changed, 24 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/fs/ioctl.c
|
||||
+++ b/fs/ioctl.c
|
||||
@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *fil
|
||||
return ioctl_fioasync(fd, filp, argp);
|
||||
|
||||
case FIOQSIZE:
|
||||
- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
|
||||
+ if (S_ISDIR(inode->i_mode) ||
|
||||
+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) ||
|
||||
S_ISLNK(inode->i_mode)) {
|
||||
loff_t res = inode_get_bytes(inode);
|
||||
return copy_to_user(argp, &res, sizeof(res)) ?
|
||||
@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *fil
|
||||
return ioctl_file_dedupe_range(filp, argp);
|
||||
|
||||
case FIONREAD:
|
||||
- if (!S_ISREG(inode->i_mode))
|
||||
+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode))
|
||||
return vfs_ioctl(filp, cmd, arg);
|
||||
|
||||
return put_user(i_size_read(inode) - filp->f_pos,
|
||||
@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *fil
|
||||
return ioctl_get_fs_sysfs_path(filp, argp);
|
||||
|
||||
default:
|
||||
- if (S_ISREG(inode->i_mode))
|
||||
+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode))
|
||||
return file_ioctl(filp, cmd, argp);
|
||||
break;
|
||||
}
|
||||
--- a/fs/libfs.c
|
||||
+++ b/fs/libfs.c
|
||||
@@ -1656,7 +1656,7 @@ struct inode *alloc_anon_inode(struct su
|
||||
inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
|
||||
inode->i_uid = current_fsuid();
|
||||
inode->i_gid = current_fsgid();
|
||||
- inode->i_flags |= S_PRIVATE;
|
||||
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
|
||||
simple_inode_init_ts(inode);
|
||||
return inode;
|
||||
}
|
||||
--- a/fs/pidfs.c
|
||||
+++ b/fs/pidfs.c
|
||||
@@ -826,7 +826,7 @@ static int pidfs_init_inode(struct inode
|
||||
const struct pid *pid = data;
|
||||
|
||||
inode->i_private = data;
|
||||
- inode->i_flags |= S_PRIVATE;
|
||||
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
|
||||
inode->i_mode |= S_IRWXU;
|
||||
inode->i_op = &pidfs_inode_operations;
|
||||
inode->i_fop = &pidfs_file_operations;
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2344,6 +2344,7 @@ struct super_operations {
|
||||
#define S_CASEFOLD (1 << 15) /* Casefolded file */
|
||||
#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
|
||||
#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
|
||||
+#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */
|
||||
|
||||
/*
|
||||
* Note that nosuid etc flags are inode-specific: setting some file-system
|
||||
@@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struc
|
||||
|
||||
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
|
||||
(inode)->i_rdev == WHITEOUT_DEV)
|
||||
+#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE)
|
||||
|
||||
static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
|
||||
struct inode *inode)
|
||||
--- a/mm/readahead.c
|
||||
+++ b/mm/readahead.c
|
||||
@@ -690,9 +690,15 @@ EXPORT_SYMBOL_GPL(page_cache_async_ra);
|
||||
|
||||
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
||||
{
|
||||
+ struct file *file;
|
||||
+ const struct inode *inode;
|
||||
+
|
||||
CLASS(fd, f)(fd);
|
||||
+ if (fd_empty(f))
|
||||
+ return -EBADF;
|
||||
|
||||
- if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ))
|
||||
+ file = fd_file(f);
|
||||
+ if (!(file->f_mode & FMODE_READ))
|
||||
return -EBADF;
|
||||
|
||||
/*
|
||||
@@ -700,9 +706,15 @@ ssize_t ksys_readahead(int fd, loff_t of
|
||||
* that can execute readahead. If readahead is not possible
|
||||
* on this file, then we must return -EINVAL.
|
||||
*/
|
||||
- if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops ||
|
||||
- (!S_ISREG(file_inode(fd_file(f))->i_mode) &&
|
||||
- !S_ISBLK(file_inode(fd_file(f))->i_mode)))
|
||||
+ if (!file->f_mapping)
|
||||
+ return -EINVAL;
|
||||
+ if (!file->f_mapping->a_ops)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ inode = file_inode(file);
|
||||
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
|
||||
+ return -EINVAL;
|
||||
+ if (IS_ANON_FILE(inode))
|
||||
return -EINVAL;
|
||||
|
||||
return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED);
|
35
debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch
vendored
Normal file
35
debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
From ab287d709809b6dfe4d3c42016a543d976533d51 Mon Sep 17 00:00:00 2001
|
||||
From: Zijun Hu <quic_zijuhu@quicinc.com>
|
||||
Date: Wed, 7 May 2025 19:50:26 +0800
|
||||
Subject: configfs: Do not override creating attribute file failure in
|
||||
populate_attrs()
|
||||
|
||||
populate_attrs() may override failure for creating attribute files
|
||||
by success for creating subsequent bin attribute files, and have
|
||||
wrong return value.
|
||||
|
||||
Fix by creating bin attribute files under successfully creating
|
||||
attribute files.
|
||||
|
||||
Fixes: 03607ace807b ("configfs: implement binary attributes")
|
||||
Cc: stable@vger.kernel.org
|
||||
Reviewed-by: Joel Becker <jlbec@evilplan.org>
|
||||
Reviewed-by: Breno Leitao <leitao@debian.org>
|
||||
Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
|
||||
Link: https://lore.kernel.org/r/20250507-fix_configfs-v3-2-fe2d96de8dc4@quicinc.com
|
||||
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
|
||||
---
|
||||
fs/configfs/dir.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/fs/configfs/dir.c
|
||||
+++ b/fs/configfs/dir.c
|
||||
@@ -619,7 +619,7 @@ static int populate_attrs(struct config_
|
||||
break;
|
||||
}
|
||||
}
|
||||
- if (t->ct_bin_attrs) {
|
||||
+ if (!error && t->ct_bin_attrs) {
|
||||
for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) {
|
||||
if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i))
|
||||
continue;
|
104
debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch
vendored
Normal file
104
debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
From 896b7b0d6ed53a7fe159c4b76f25407c816aa619 Mon Sep 17 00:00:00 2001
|
||||
From: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Date: Fri, 23 May 2025 19:20:36 -0400
|
||||
Subject: Don't propagate mounts into detached trees
|
||||
|
||||
All versions up to 6.14 did not propagate mount events into detached
|
||||
tree. Shortly after 6.14 a merge of vfs-6.15-rc1.mount.namespace
|
||||
(130e696aa68b) has changed that.
|
||||
|
||||
Unfortunately, that has caused userland regressions (reported in
|
||||
https://lore.kernel.org/all/CAOYeF9WQhFDe+BGW=Dp5fK8oRy5AgZ6zokVyTj1Wp4EUiYgt4w@mail.gmail.com/)
|
||||
|
||||
Straight revert wouldn't be an option - in particular, the variant in 6.14
|
||||
had a bug that got fixed in d1ddc6f1d9f0 ("fix IS_MNT_PROPAGATING uses")
|
||||
and we don't want to bring the bug back.
|
||||
|
||||
This is a modification of manual revert posted by Christian, with changes
|
||||
needed to avoid reintroducing the breakage in scenario described in
|
||||
d1ddc6f1d9f0.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Allison Karlitskaya <lis@redhat.com>
|
||||
Tested-by: Allison Karlitskaya <lis@redhat.com>
|
||||
Acked-by: Christian Brauner <brauner@kernel.org>
|
||||
Co-developed-by: Christian Brauner <brauner@kernel.org>
|
||||
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
|
||||
---
|
||||
fs/mount.h | 5 -----
|
||||
fs/namespace.c | 15 ++-------------
|
||||
fs/pnode.c | 4 ++--
|
||||
3 files changed, 4 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/fs/mount.h
|
||||
+++ b/fs/mount.h
|
||||
@@ -7,10 +7,6 @@
|
||||
|
||||
extern struct list_head notify_list;
|
||||
|
||||
-typedef __u32 __bitwise mntns_flags_t;
|
||||
-
|
||||
-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0))
|
||||
-
|
||||
struct mnt_namespace {
|
||||
struct ns_common ns;
|
||||
struct mount * root;
|
||||
@@ -37,7 +33,6 @@ struct mnt_namespace {
|
||||
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
|
||||
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
|
||||
refcount_t passive; /* number references not pinning @mounts */
|
||||
- mntns_flags_t mntns_flags;
|
||||
} __randomize_layout;
|
||||
|
||||
struct mnt_pcp {
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -3648,7 +3648,7 @@ static int do_move_mount(struct path *ol
|
||||
if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
|
||||
goto out;
|
||||
|
||||
- if (is_anon_ns(ns)) {
|
||||
+ if (is_anon_ns(ns) && ns == p->mnt_ns) {
|
||||
/*
|
||||
* Ending up with two files referring to the root of the
|
||||
* same anonymous mount namespace would cause an error
|
||||
@@ -3656,16 +3656,7 @@ static int do_move_mount(struct path *ol
|
||||
* twice into the mount tree which would be rejected
|
||||
* later. But be explicit about it right here.
|
||||
*/
|
||||
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
|
||||
- goto out;
|
||||
-
|
||||
- /*
|
||||
- * If this is an anonymous mount tree ensure that mount
|
||||
- * propagation can detect mounts that were just
|
||||
- * propagated to the target mount tree so we don't
|
||||
- * propagate onto them.
|
||||
- */
|
||||
- ns->mntns_flags |= MNTNS_PROPAGATING;
|
||||
+ goto out;
|
||||
} else if (is_anon_ns(p->mnt_ns)) {
|
||||
/*
|
||||
* Don't allow moving an attached mount tree to an
|
||||
@@ -3722,8 +3713,6 @@ static int do_move_mount(struct path *ol
|
||||
if (attached)
|
||||
put_mountpoint(old_mp);
|
||||
out:
|
||||
- if (is_anon_ns(ns))
|
||||
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
|
||||
unlock_mount(mp);
|
||||
if (!err) {
|
||||
if (attached) {
|
||||
--- a/fs/pnode.c
|
||||
+++ b/fs/pnode.c
|
||||
@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m
|
||||
/* skip if mountpoint isn't visible in m */
|
||||
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
|
||||
return 0;
|
||||
- /* skip if m is in the anon_ns we are emptying */
|
||||
- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING)
|
||||
+ /* skip if m is in the anon_ns */
|
||||
+ if (is_anon_ns(m->mnt_ns))
|
||||
return 0;
|
||||
|
||||
if (peers(m, last_dest)) {
|
51
debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch
vendored
Normal file
51
debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
From bc86aaf0e0256220ca787fdbb57a73429ade1129 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:52 -0600
|
||||
Subject: mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback
|
||||
|
||||
It's possible for the folio to either get marked for writeback or
|
||||
redirtied. Add a helper, filemap_end_dropbehind(), which guards the
|
||||
folio_unmap_invalidate() call behind check for the folio being both
|
||||
non-dirty and not under writeback AFTER the folio lock has been
|
||||
acquired. Use this helper folio_end_dropbehind_write().
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes")
|
||||
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 13 +++++++++++--
|
||||
1 file changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct
|
||||
}
|
||||
EXPORT_SYMBOL(folio_wait_private_2_killable);
|
||||
|
||||
+static void filemap_end_dropbehind(struct folio *folio)
|
||||
+{
|
||||
+ struct address_space *mapping = folio->mapping;
|
||||
+
|
||||
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
||||
+
|
||||
+ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
|
||||
+ folio_unmap_invalidate(mapping, folio, 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* If folio was marked as dropbehind, then pages should be dropped when writeback
|
||||
* completes. Do that now. If we fail, it's likely because of a big folio -
|
||||
@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(s
|
||||
* invalidation in that case.
|
||||
*/
|
||||
if (in_task() && folio_trylock(folio)) {
|
||||
- if (folio->mapping)
|
||||
- folio_unmap_invalidate(folio->mapping, folio, 0);
|
||||
+ filemap_end_dropbehind(folio);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
51
debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch
vendored
Normal file
51
debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
From fad76185ca91983990c660642151083eb05cbfc0 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:53 -0600
|
||||
Subject: mm/filemap: use filemap_end_dropbehind() for read invalidation
|
||||
|
||||
Use the filemap_end_dropbehind() helper rather than calling
|
||||
folio_unmap_invalidate() directly, as we need to check if the folio has
|
||||
been redirtied or marked for writeback once the folio lock has been
|
||||
re-acquired.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Trond Myklebust <trondmy@hammerspace.com>
|
||||
Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE")
|
||||
Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 7 +++----
|
||||
1 file changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t
|
||||
return (pos1 >> shift == pos2 >> shift);
|
||||
}
|
||||
|
||||
-static void filemap_end_dropbehind_read(struct address_space *mapping,
|
||||
- struct folio *folio)
|
||||
+static void filemap_end_dropbehind_read(struct folio *folio)
|
||||
{
|
||||
if (!folio_test_dropbehind(folio))
|
||||
return;
|
||||
@@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(
|
||||
return;
|
||||
if (folio_trylock(folio)) {
|
||||
if (folio_test_clear_dropbehind(folio))
|
||||
- folio_unmap_invalidate(mapping, folio, 0);
|
||||
+ filemap_end_dropbehind(folio);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
||||
@@ -2774,7 +2773,7 @@ put_folios:
|
||||
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
- filemap_end_dropbehind_read(mapping, folio);
|
||||
+ filemap_end_dropbehind_read(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
folio_batch_init(&fbatch);
|
29
debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch
vendored
Normal file
29
debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
From f0579d45f2e03fa3ba0d9466e79a31ea37acb487 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:54 -0600
|
||||
Subject: Revert "Disable FOP_DONTCACHE for now due to bugs"
|
||||
|
||||
This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d.
|
||||
|
||||
Both the read and write side dirty && writeback races should be resolved
|
||||
now, revert the commit that disabled FOP_DONTCACHE for filesystems.
|
||||
|
||||
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
include/linux/fs.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2186,7 +2186,7 @@ struct file_operations {
|
||||
/* Supports asynchronous lock callbacks */
|
||||
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
|
||||
/* File system supports uncached read/write buffered IO */
|
||||
-#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */
|
||||
+#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
|
||||
|
||||
/* Wrap a directory iterator that needs exclusive inode access */
|
||||
int wrap_directory_iterator(struct file *, struct dir_context *,
|
36
debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch
vendored
Normal file
36
debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
From 3b4614564770691cf3a6eb88127268ef6a84180c Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:55 -0600
|
||||
Subject: mm/filemap: unify read/write dropbehind naming
|
||||
|
||||
The read side is filemap_end_dropbehind_read(), while the write side
|
||||
used folio_ as the prefix rather than filemap_. The read side makes more
|
||||
sense, unify the naming such that the write side follows that.
|
||||
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-5-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -1604,7 +1604,7 @@ static void filemap_end_dropbehind(struc
|
||||
* completes. Do that now. If we fail, it's likely because of a big folio -
|
||||
* just reset dropbehind for that case and latter completions should invalidate.
|
||||
*/
|
||||
-static void folio_end_dropbehind_write(struct folio *folio)
|
||||
+static void filemap_end_dropbehind_write(struct folio *folio)
|
||||
{
|
||||
/*
|
||||
* Hitting !in_task() should not happen off RWF_DONTCACHE writeback,
|
||||
@@ -1659,7 +1659,7 @@ void folio_end_writeback(struct folio *f
|
||||
acct_reclaim_writeback(folio);
|
||||
|
||||
if (folio_dropbehind)
|
||||
- folio_end_dropbehind_write(folio);
|
||||
+ filemap_end_dropbehind_write(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
EXPORT_SYMBOL(folio_end_writeback);
|
78
debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch
vendored
Normal file
78
debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
From 6003153e1bc4ad4952773081d7b89aa1ab2274c3 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:56 -0600
|
||||
Subject: mm/filemap: unify dropbehind flag testing and clearing
|
||||
|
||||
The read and write side does this a bit differently, unify it such that
|
||||
the _{read,write} helpers check the bit before locking, and the generic
|
||||
handler is in charge of clearing the bit and invalidating, once under
|
||||
the folio lock.
|
||||
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-6-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 21 +++++++++++----------
|
||||
1 file changed, 11 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -1595,7 +1595,11 @@ static void filemap_end_dropbehind(struc
|
||||
|
||||
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
||||
|
||||
- if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
|
||||
+ if (folio_test_writeback(folio) || folio_test_dirty(folio))
|
||||
+ return;
|
||||
+ if (!folio_test_clear_dropbehind(folio))
|
||||
+ return;
|
||||
+ if (mapping)
|
||||
folio_unmap_invalidate(mapping, folio, 0);
|
||||
}
|
||||
|
||||
@@ -1606,6 +1610,9 @@ static void filemap_end_dropbehind(struc
|
||||
*/
|
||||
static void filemap_end_dropbehind_write(struct folio *folio)
|
||||
{
|
||||
+ if (!folio_test_dropbehind(folio))
|
||||
+ return;
|
||||
+
|
||||
/*
|
||||
* Hitting !in_task() should not happen off RWF_DONTCACHE writeback,
|
||||
* but can happen if normal writeback just happens to find dirty folios
|
||||
@@ -1629,8 +1636,6 @@ static void filemap_end_dropbehind_write
|
||||
*/
|
||||
void folio_end_writeback(struct folio *folio)
|
||||
{
|
||||
- bool folio_dropbehind = false;
|
||||
-
|
||||
VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
|
||||
|
||||
/*
|
||||
@@ -1652,14 +1657,11 @@ void folio_end_writeback(struct folio *f
|
||||
* reused before the folio_wake_bit().
|
||||
*/
|
||||
folio_get(folio);
|
||||
- if (!folio_test_dirty(folio))
|
||||
- folio_dropbehind = folio_test_clear_dropbehind(folio);
|
||||
if (__folio_end_writeback(folio))
|
||||
folio_wake_bit(folio, PG_writeback);
|
||||
- acct_reclaim_writeback(folio);
|
||||
|
||||
- if (folio_dropbehind)
|
||||
- filemap_end_dropbehind_write(folio);
|
||||
+ filemap_end_dropbehind_write(folio);
|
||||
+ acct_reclaim_writeback(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
EXPORT_SYMBOL(folio_end_writeback);
|
||||
@@ -2651,8 +2653,7 @@ static void filemap_end_dropbehind_read(
|
||||
if (folio_test_writeback(folio) || folio_test_dirty(folio))
|
||||
return;
|
||||
if (folio_trylock(folio)) {
|
||||
- if (folio_test_clear_dropbehind(folio))
|
||||
- filemap_end_dropbehind(folio);
|
||||
+ filemap_end_dropbehind(folio);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
98
debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch
vendored
Normal file
98
debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
From 61c0b2450f2b85c5053fa4f71d9c619b34d3af6c Mon Sep 17 00:00:00 2001
|
||||
From: Shivank Garg <shivankg@amd.com>
|
||||
Date: Mon, 26 May 2025 18:28:18 +0000
|
||||
Subject: mm/khugepaged: fix race with folio split/free using temporary
|
||||
reference
|
||||
|
||||
hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn
|
||||
calls folio_mapcount(). folio_mapcount() checks folio_test_large() before
|
||||
proceeding to folio_large_mapcount(), but there is a race window where the
|
||||
folio may get split/freed between these checks, triggering:
|
||||
|
||||
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio)
|
||||
|
||||
Take a temporary reference to the folio in hpage_collapse_scan_file().
|
||||
This stabilizes the folio during refcount check and prevents incorrect
|
||||
large folio detection due to concurrent split/free. Use helper
|
||||
folio_expected_ref_count() + 1 to compare with folio_ref_count() instead
|
||||
of using is_refcount_suitable().
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com
|
||||
Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value")
|
||||
Signed-off-by: Shivank Garg <shivankg@amd.com>
|
||||
Reported-by: syzbot+2b99589e33edbe9475ca@syzkaller.appspotmail.com
|
||||
Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com
|
||||
Suggested-by: David Hildenbrand <david@redhat.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Acked-by: Dev Jain <dev.jain@arm.com>
|
||||
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
|
||||
Cc: Bharata B Rao <bharata@amd.com>
|
||||
Cc: Fengwei Yin <fengwei.yin@intel.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Cc: Mariano Pache <npache@redhat.com>
|
||||
Cc: Ryan Roberts <ryan.roberts@arm.com>
|
||||
Cc: Zi Yan <ziy@nvidia.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/khugepaged.c | 18 +++++++++++++++++-
|
||||
1 file changed, 17 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/mm/khugepaged.c
|
||||
+++ b/mm/khugepaged.c
|
||||
@@ -2295,6 +2295,17 @@ static int hpage_collapse_scan_file(stru
|
||||
continue;
|
||||
}
|
||||
|
||||
+ if (!folio_try_get(folio)) {
|
||||
+ xas_reset(&xas);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (unlikely(folio != xas_reload(&xas))) {
|
||||
+ folio_put(folio);
|
||||
+ xas_reset(&xas);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
if (folio_order(folio) == HPAGE_PMD_ORDER &&
|
||||
folio->index == start) {
|
||||
/* Maybe PMD-mapped */
|
||||
@@ -2305,23 +2316,27 @@ static int hpage_collapse_scan_file(stru
|
||||
* it's safe to skip LRU and refcount checks before
|
||||
* returning.
|
||||
*/
|
||||
+ folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
node = folio_nid(folio);
|
||||
if (hpage_collapse_scan_abort(node, cc)) {
|
||||
result = SCAN_SCAN_ABORT;
|
||||
+ folio_put(folio);
|
||||
break;
|
||||
}
|
||||
cc->node_load[node]++;
|
||||
|
||||
if (!folio_test_lru(folio)) {
|
||||
result = SCAN_PAGE_LRU;
|
||||
+ folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
- if (!is_refcount_suitable(folio)) {
|
||||
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
|
||||
result = SCAN_PAGE_COUNT;
|
||||
+ folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2333,6 +2348,7 @@ static int hpage_collapse_scan_file(stru
|
||||
*/
|
||||
|
||||
present += folio_nr_pages(folio);
|
||||
+ folio_put(folio);
|
||||
|
||||
if (need_resched()) {
|
||||
xas_pause(&xas);
|
198
debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch
vendored
Normal file
198
debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
From 214092002cbd9945b7cc6314e76ec42b3f588c01 Mon Sep 17 00:00:00 2001
|
||||
From: Shivank Garg <shivankg@amd.com>
|
||||
Date: Wed, 30 Apr 2025 10:01:51 +0000
|
||||
Subject: mm: add folio_expected_ref_count() for reference count calculation
|
||||
|
||||
Patch series " JFS: Implement migrate_folio for jfs_metapage_aops" v5.
|
||||
|
||||
This patchset addresses a warning that occurs during memory compaction due
|
||||
to JFS's missing migrate_folio operation. The warning was introduced by
|
||||
commit 7ee3647243e5 ("migrate: Remove call to ->writepage") which added
|
||||
explicit warnings when filesystem don't implement migrate_folio.
|
||||
|
||||
The syzbot reported following [1]:
|
||||
jfs_metapage_aops does not implement migrate_folio
|
||||
WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 fallback_migrate_folio mm/migrate.c:953 [inline]
|
||||
WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 move_to_new_folio+0x70e/0x840 mm/migrate.c:1007
|
||||
Modules linked in:
|
||||
CPU: 1 UID: 0 PID: 5861 Comm: syz-executor280 Not tainted 6.15.0-rc1-next-20250411-syzkaller #0 PREEMPT(full)
|
||||
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025
|
||||
RIP: 0010:fallback_migrate_folio mm/migrate.c:953 [inline]
|
||||
RIP: 0010:move_to_new_folio+0x70e/0x840 mm/migrate.c:1007
|
||||
|
||||
To fix this issue, this series implement metapage_migrate_folio() for JFS
|
||||
which handles both single and multiple metapages per page configurations.
|
||||
|
||||
While most filesystems leverage existing migration implementations like
|
||||
filemap_migrate_folio(), buffer_migrate_folio_norefs() or
|
||||
buffer_migrate_folio() (which internally used folio_expected_refs()),
|
||||
JFS's metapage architecture requires special handling of its private data
|
||||
during migration. To support this, this series introduce the
|
||||
folio_expected_ref_count(), which calculates external references to a
|
||||
folio from page/swap cache, private data, and page table mappings.
|
||||
|
||||
This standardized implementation replaces the previous ad-hoc
|
||||
folio_expected_refs() function and enables JFS to accurately determine
|
||||
whether a folio has unexpected references before attempting migration.
|
||||
|
||||
|
||||
|
||||
|
||||
Implement folio_expected_ref_count() to calculate expected folio reference
|
||||
counts from:
|
||||
- Page/swap cache (1 per page)
|
||||
- Private data (1)
|
||||
- Page table mappings (1 per map)
|
||||
|
||||
While originally needed for page migration operations, this improved
|
||||
implementation standardizes reference counting by consolidating all
|
||||
refcount contributors into a single, reusable function that can benefit
|
||||
any subsystem needing to detect unexpected references to folios.
|
||||
|
||||
The folio_expected_ref_count() returns the sum of these external
|
||||
references without including any reference the caller itself might hold.
|
||||
Callers comparing against the actual folio_ref_count() must account for
|
||||
their own references separately.
|
||||
|
||||
Link: https://syzkaller.appspot.com/bug?extid=8bb6fd945af4e0ad9299 [1]
|
||||
Link: https://lkml.kernel.org/r/20250430100150.279751-1-shivankg@amd.com
|
||||
Link: https://lkml.kernel.org/r/20250430100150.279751-2-shivankg@amd.com
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Shivank Garg <shivankg@amd.com>
|
||||
Suggested-by: Matthew Wilcox <willy@infradead.org>
|
||||
Co-developed-by: David Hildenbrand <david@redhat.com>
|
||||
Cc: Alistair Popple <apopple@nvidia.com>
|
||||
Cc: Dave Kleikamp <shaggy@kernel.org>
|
||||
Cc: Donet Tom <donettom@linux.ibm.com>
|
||||
Cc: Jane Chu <jane.chu@oracle.com>
|
||||
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
|
||||
Cc: Zi Yan <ziy@nvidia.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
include/linux/mm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
mm/migrate.c | 22 ++++---------------
|
||||
2 files changed, 59 insertions(+), 18 deletions(-)
|
||||
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -2307,6 +2307,61 @@ static inline bool folio_maybe_mapped_sh
|
||||
return folio_test_large_maybe_mapped_shared(folio);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * folio_expected_ref_count - calculate the expected folio refcount
|
||||
+ * @folio: the folio
|
||||
+ *
|
||||
+ * Calculate the expected folio refcount, taking references from the pagecache,
|
||||
+ * swapcache, PG_private and page table mappings into account. Useful in
|
||||
+ * combination with folio_ref_count() to detect unexpected references (e.g.,
|
||||
+ * GUP or other temporary references).
|
||||
+ *
|
||||
+ * Does currently not consider references from the LRU cache. If the folio
|
||||
+ * was isolated from the LRU (which is the case during migration or split),
|
||||
+ * the LRU cache does not apply.
|
||||
+ *
|
||||
+ * Calling this function on an unmapped folio -- !folio_mapped() -- that is
|
||||
+ * locked will return a stable result.
|
||||
+ *
|
||||
+ * Calling this function on a mapped folio will not result in a stable result,
|
||||
+ * because nothing stops additional page table mappings from coming (e.g.,
|
||||
+ * fork()) or going (e.g., munmap()).
|
||||
+ *
|
||||
+ * Calling this function without the folio lock will also not result in a
|
||||
+ * stable result: for example, the folio might get dropped from the swapcache
|
||||
+ * concurrently.
|
||||
+ *
|
||||
+ * However, even when called without the folio lock or on a mapped folio,
|
||||
+ * this function can be used to detect unexpected references early (for example,
|
||||
+ * if it makes sense to even lock the folio and unmap it).
|
||||
+ *
|
||||
+ * The caller must add any reference (e.g., from folio_try_get()) it might be
|
||||
+ * holding itself to the result.
|
||||
+ *
|
||||
+ * Returns the expected folio refcount.
|
||||
+ */
|
||||
+static inline int folio_expected_ref_count(const struct folio *folio)
|
||||
+{
|
||||
+ const int order = folio_order(folio);
|
||||
+ int ref_count = 0;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(folio_test_slab(folio)))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (folio_test_anon(folio)) {
|
||||
+ /* One reference per page from the swapcache. */
|
||||
+ ref_count += folio_test_swapcache(folio) << order;
|
||||
+ } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) {
|
||||
+ /* One reference per page from the pagecache. */
|
||||
+ ref_count += !!folio->mapping << order;
|
||||
+ /* One reference from PG_private. */
|
||||
+ ref_count += folio_test_private(folio);
|
||||
+ }
|
||||
+
|
||||
+ /* One reference per page table mapping. */
|
||||
+ return ref_count + folio_mapcount(folio);
|
||||
+}
|
||||
+
|
||||
#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
|
||||
static inline int arch_make_folio_accessible(struct folio *folio)
|
||||
{
|
||||
--- a/mm/migrate.c
|
||||
+++ b/mm/migrate.c
|
||||
@@ -445,20 +445,6 @@ unlock:
|
||||
}
|
||||
#endif
|
||||
|
||||
-static int folio_expected_refs(struct address_space *mapping,
|
||||
- struct folio *folio)
|
||||
-{
|
||||
- int refs = 1;
|
||||
- if (!mapping)
|
||||
- return refs;
|
||||
-
|
||||
- refs += folio_nr_pages(folio);
|
||||
- if (folio_test_private(folio))
|
||||
- refs++;
|
||||
-
|
||||
- return refs;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Replace the folio in the mapping.
|
||||
*
|
||||
@@ -601,7 +587,7 @@ static int __folio_migrate_mapping(struc
|
||||
int folio_migrate_mapping(struct address_space *mapping,
|
||||
struct folio *newfolio, struct folio *folio, int extra_count)
|
||||
{
|
||||
- int expected_count = folio_expected_refs(mapping, folio) + extra_count;
|
||||
+ int expected_count = folio_expected_ref_count(folio) + extra_count + 1;
|
||||
|
||||
if (folio_ref_count(folio) != expected_count)
|
||||
return -EAGAIN;
|
||||
@@ -618,7 +604,7 @@ int migrate_huge_page_move_mapping(struc
|
||||
struct folio *dst, struct folio *src)
|
||||
{
|
||||
XA_STATE(xas, &mapping->i_pages, folio_index(src));
|
||||
- int rc, expected_count = folio_expected_refs(mapping, src);
|
||||
+ int rc, expected_count = folio_expected_ref_count(src) + 1;
|
||||
|
||||
if (folio_ref_count(src) != expected_count)
|
||||
return -EAGAIN;
|
||||
@@ -749,7 +735,7 @@ static int __migrate_folio(struct addres
|
||||
struct folio *src, void *src_private,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
- int rc, expected_count = folio_expected_refs(mapping, src);
|
||||
+ int rc, expected_count = folio_expected_ref_count(src) + 1;
|
||||
|
||||
/* Check whether src does not have extra refs before we do more work */
|
||||
if (folio_ref_count(src) != expected_count)
|
||||
@@ -837,7 +823,7 @@ static int __buffer_migrate_folio(struct
|
||||
return migrate_folio(mapping, dst, src, mode);
|
||||
|
||||
/* Check whether page does not have extra refs before we do more work */
|
||||
- expected_count = folio_expected_refs(mapping, src);
|
||||
+ expected_count = folio_expected_ref_count(src) + 1;
|
||||
if (folio_ref_count(src) != expected_count)
|
||||
return -EAGAIN;
|
||||
|
129
debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch
vendored
Normal file
129
debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
From 0f52f05148589fe4115322a9cc8ffab760091a0a Mon Sep 17 00:00:00 2001
|
||||
From: Pu Lehui <pulehui@huawei.com>
|
||||
Date: Thu, 29 May 2025 15:56:47 +0000
|
||||
Subject: mm: fix uprobe pte be overwritten when expanding vma
|
||||
|
||||
Patch series "Fix uprobe pte be overwritten when expanding vma".
|
||||
|
||||
|
||||
This patch (of 4):
|
||||
|
||||
We encountered a BUG alert triggered by Syzkaller as follows:
|
||||
BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1
|
||||
|
||||
And we can reproduce it with the following steps:
|
||||
1. register uprobe on file at zero offset
|
||||
2. mmap the file at zero offset:
|
||||
addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0);
|
||||
3. mremap part of vma1 to new vma2:
|
||||
addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE);
|
||||
4. mremap back to orig addr1:
|
||||
mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1);
|
||||
|
||||
In step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2
|
||||
with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1
|
||||
to vma2, then unmap the vma1 range [addr1, addr1 + 4096].
|
||||
|
||||
In step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the
|
||||
addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096,
|
||||
addr1 + 8192] still maps the file, it will take vma_merge_new_range to
|
||||
expand the range, and then do uprobe_mmap in vma_complete. Since the
|
||||
merged vma pgoff is also zero offset, it will install uprobe anon page to
|
||||
the merged vma. However, the upcomming move_page_tables step, which use
|
||||
set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite
|
||||
the newly uprobe pte in the merged vma, and lead that pte to be orphan.
|
||||
|
||||
Since the uprobe pte will be remapped to the merged vma, we can remove the
|
||||
unnecessary uprobe_mmap upon merged vma.
|
||||
|
||||
This problem was first found in linux-6.6.y and also exists in the
|
||||
community syzkaller:
|
||||
https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250529155650.4017699-1-pulehui@huaweicloud.com
|
||||
Link: https://lkml.kernel.org/r/20250529155650.4017699-2-pulehui@huaweicloud.com
|
||||
Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
|
||||
Signed-off-by: Pu Lehui <pulehui@huawei.com>
|
||||
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Cc: Jann Horn <jannh@google.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
|
||||
Cc: Oleg Nesterov <oleg@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/vma.c | 20 +++++++++++++++++---
|
||||
mm/vma.h | 7 +++++++
|
||||
2 files changed, 24 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/mm/vma.c
|
||||
+++ b/mm/vma.c
|
||||
@@ -144,6 +144,9 @@ static void init_multi_vma_prep(struct v
|
||||
vp->file = vma->vm_file;
|
||||
if (vp->file)
|
||||
vp->mapping = vma->vm_file->f_mapping;
|
||||
+
|
||||
+ if (vmg && vmg->skip_vma_uprobe)
|
||||
+ vp->skip_vma_uprobe = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -333,10 +336,13 @@ static void vma_complete(struct vma_prep
|
||||
|
||||
if (vp->file) {
|
||||
i_mmap_unlock_write(vp->mapping);
|
||||
- uprobe_mmap(vp->vma);
|
||||
|
||||
- if (vp->adj_next)
|
||||
- uprobe_mmap(vp->adj_next);
|
||||
+ if (!vp->skip_vma_uprobe) {
|
||||
+ uprobe_mmap(vp->vma);
|
||||
+
|
||||
+ if (vp->adj_next)
|
||||
+ uprobe_mmap(vp->adj_next);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (vp->remove) {
|
||||
@@ -1783,6 +1789,14 @@ struct vm_area_struct *copy_vma(struct v
|
||||
faulted_in_anon_vma = false;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * If the VMA we are copying might contain a uprobe PTE, ensure
|
||||
+ * that we do not establish one upon merge. Otherwise, when mremap()
|
||||
+ * moves page tables, it will orphan the newly created PTE.
|
||||
+ */
|
||||
+ if (vma->vm_file)
|
||||
+ vmg.skip_vma_uprobe = true;
|
||||
+
|
||||
new_vma = find_vma_prev(mm, addr, &vmg.prev);
|
||||
if (new_vma && new_vma->vm_start < addr + len)
|
||||
return NULL; /* should never get here */
|
||||
--- a/mm/vma.h
|
||||
+++ b/mm/vma.h
|
||||
@@ -19,6 +19,8 @@ struct vma_prepare {
|
||||
struct vm_area_struct *insert;
|
||||
struct vm_area_struct *remove;
|
||||
struct vm_area_struct *remove2;
|
||||
+
|
||||
+ bool skip_vma_uprobe :1;
|
||||
};
|
||||
|
||||
struct unlink_vma_file_batch {
|
||||
@@ -120,6 +122,11 @@ struct vma_merge_struct {
|
||||
*/
|
||||
bool give_up_on_oom :1;
|
||||
|
||||
+ /*
|
||||
+ * If set, skip uprobe_mmap upon merged vma.
|
||||
+ */
|
||||
+ bool skip_vma_uprobe :1;
|
||||
+
|
||||
/* Internal flags set during merge process: */
|
||||
|
||||
/*
|
217
debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch
vendored
Normal file
217
debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
From 6f1e03b94f7777323aaefd9286d992a1cbd0adf7 Mon Sep 17 00:00:00 2001
|
||||
From: Jann Horn <jannh@google.com>
|
||||
Date: Tue, 27 May 2025 23:23:53 +0200
|
||||
Subject: mm/hugetlb: unshare page tables during VMA split, not before
|
||||
|
||||
Currently, __split_vma() triggers hugetlb page table unsharing through
|
||||
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
|
||||
taken - which is too early, it allows racing VMA-locked page faults in our
|
||||
process and racing rmap walks from other processes to cause page tables to
|
||||
be shared again before we actually perform the split.
|
||||
|
||||
Fix it by explicitly calling into the hugetlb unshare logic from
|
||||
__split_vma() in the same place where THP splitting also happens. At that
|
||||
point, both the VMA and the rmap(s) are write-locked.
|
||||
|
||||
An annoying detail is that we can now call into the helper
|
||||
hugetlb_unshare_pmds() from two different locking contexts:
|
||||
|
||||
1. from hugetlb_split(), holding:
|
||||
- mmap lock (exclusively)
|
||||
- VMA lock
|
||||
- file rmap lock (exclusively)
|
||||
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
|
||||
call us with only the mmap lock held (in shared mode), but currently
|
||||
only runs while holding mmap lock (exclusively) and VMA lock
|
||||
|
||||
Backporting note:
|
||||
This commit fixes a racy protection that was introduced in commit
|
||||
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
|
||||
commit claimed to fix an issue introduced in 5.13, but it should actually
|
||||
also go all the way back.
|
||||
|
||||
[jannh@google.com: v2]
|
||||
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com
|
||||
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com
|
||||
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com
|
||||
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
|
||||
Signed-off-by: Jann Horn <jannh@google.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Reviewed-by: Oscar Salvador <osalvador@suse.de>
|
||||
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: <stable@vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
include/linux/hugetlb.h | 3 ++
|
||||
mm/hugetlb.c | 60 +++++++++++++++++++++++---------
|
||||
mm/vma.c | 7 ++++
|
||||
tools/testing/vma/vma_internal.h | 2 ++
|
||||
4 files changed, 56 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/include/linux/hugetlb.h
|
||||
+++ b/include/linux/hugetlb.h
|
||||
@@ -276,6 +276,7 @@ bool is_hugetlb_entry_migration(pte_t pt
|
||||
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
|
||||
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
|
||||
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
|
||||
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
|
||||
|
||||
#else /* !CONFIG_HUGETLB_PAGE */
|
||||
|
||||
@@ -473,6 +474,8 @@ static inline void fixup_hugetlb_reserva
|
||||
{
|
||||
}
|
||||
|
||||
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
|
||||
+
|
||||
#endif /* !CONFIG_HUGETLB_PAGE */
|
||||
|
||||
#ifndef pgd_write
|
||||
--- a/mm/hugetlb.c
|
||||
+++ b/mm/hugetlb.c
|
||||
@@ -120,7 +120,7 @@ static void hugetlb_vma_lock_free(struct
|
||||
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
|
||||
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
|
||||
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
|
||||
- unsigned long start, unsigned long end);
|
||||
+ unsigned long start, unsigned long end, bool take_locks);
|
||||
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
|
||||
|
||||
static void hugetlb_free_folio(struct folio *folio)
|
||||
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm
|
||||
{
|
||||
if (addr & ~(huge_page_mask(hstate_vma(vma))))
|
||||
return -EINVAL;
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
|
||||
+{
|
||||
/*
|
||||
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
|
||||
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
|
||||
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
|
||||
+ * This function is called in the middle of a VMA split operation, with
|
||||
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
|
||||
+ * walks (except hardware and gup_fast()).
|
||||
*/
|
||||
+ vma_assert_write_locked(vma);
|
||||
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
|
||||
+
|
||||
if (addr & ~PUD_MASK) {
|
||||
- /*
|
||||
- * hugetlb_vm_op_split is called right before we attempt to
|
||||
- * split the VMA. We will need to unshare PMDs in the old and
|
||||
- * new VMAs, so let's unshare before we split.
|
||||
- */
|
||||
unsigned long floor = addr & PUD_MASK;
|
||||
unsigned long ceil = floor + PUD_SIZE;
|
||||
|
||||
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
|
||||
- hugetlb_unshare_pmds(vma, floor, ceil);
|
||||
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
|
||||
+ /*
|
||||
+ * Locking:
|
||||
+ * Use take_locks=false here.
|
||||
+ * The file rmap lock is already held.
|
||||
+ * The hugetlb VMA lock can't be taken when we already
|
||||
+ * hold the file rmap lock, and we don't need it because
|
||||
+ * its purpose is to synchronize against concurrent page
|
||||
+ * table walks, which are not possible thanks to the
|
||||
+ * locks held by our caller.
|
||||
+ */
|
||||
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
|
||||
+ }
|
||||
}
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
|
||||
@@ -7884,9 +7898,16 @@ void move_hugetlb_state(struct folio *ol
|
||||
spin_unlock_irq(&hugetlb_lock);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * If @take_locks is false, the caller must ensure that no concurrent page table
|
||||
+ * access can happen (except for gup_fast() and hardware page walks).
|
||||
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
|
||||
+ * concurrent page fault handling) and the file rmap lock.
|
||||
+ */
|
||||
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
- unsigned long end)
|
||||
+ unsigned long end,
|
||||
+ bool take_locks)
|
||||
{
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
unsigned long sz = huge_page_size(h);
|
||||
@@ -7910,8 +7931,12 @@ static void hugetlb_unshare_pmds(struct
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
|
||||
start, end);
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
- hugetlb_vma_lock_write(vma);
|
||||
- i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
+ if (take_locks) {
|
||||
+ hugetlb_vma_lock_write(vma);
|
||||
+ i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
+ } else {
|
||||
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
|
||||
+ }
|
||||
for (address = start; address < end; address += PUD_SIZE) {
|
||||
ptep = hugetlb_walk(vma, address, sz);
|
||||
if (!ptep)
|
||||
@@ -7921,8 +7946,10 @@ static void hugetlb_unshare_pmds(struct
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
flush_hugetlb_tlb_range(vma, start, end);
|
||||
- i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
- hugetlb_vma_unlock_write(vma);
|
||||
+ if (take_locks) {
|
||||
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
+ hugetlb_vma_unlock_write(vma);
|
||||
+ }
|
||||
/*
|
||||
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
|
||||
* Documentation/mm/mmu_notifier.rst.
|
||||
@@ -7937,7 +7964,8 @@ static void hugetlb_unshare_pmds(struct
|
||||
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
|
||||
{
|
||||
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
|
||||
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
|
||||
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
|
||||
+ /* take_locks = */ true);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/mm/vma.c
|
||||
+++ b/mm/vma.c
|
||||
@@ -516,7 +516,14 @@ __split_vma(struct vma_iterator *vmi, st
|
||||
init_vma_prep(&vp, vma);
|
||||
vp.insert = new;
|
||||
vma_prepare(&vp);
|
||||
+
|
||||
+ /*
|
||||
+ * Get rid of huge pages and shared page tables straddling the split
|
||||
+ * boundary.
|
||||
+ */
|
||||
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
|
||||
+ if (is_vm_hugetlb_page(vma))
|
||||
+ hugetlb_split(vma, addr);
|
||||
|
||||
if (new_below) {
|
||||
vma->vm_start = addr;
|
||||
--- a/tools/testing/vma/vma_internal.h
|
||||
+++ b/tools/testing/vma/vma_internal.h
|
||||
@@ -793,6 +793,8 @@ static inline void vma_adjust_trans_huge
|
||||
(void)next;
|
||||
}
|
||||
|
||||
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
|
||||
+
|
||||
static inline void vma_iter_free(struct vma_iterator *vmi)
|
||||
{
|
||||
mas_destroy(&vmi->mas);
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user