diff --git a/debian/bin/genpatch-pfkernel b/debian/bin/genpatch-pfkernel index aae5cb5..76fb33e 100755 --- a/debian/bin/genpatch-pfkernel +++ b/debian/bin/genpatch-pfkernel @@ -7,7 +7,7 @@ w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w" dst='debian/patches/pf-tmp' src='../linux-extras' -branches='amd-pstate amd-rapl btrfs cpuidle crypto fixes invlpgb kbuild pksm xfs zstd' +branches='btrfs cpuidle crypto fixes kbuild pksm xfs zstd' if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi mkdir -p "${dst}" diff --git a/debian/changelog b/debian/changelog index 414b4a2..eb86be2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,15 @@ +linux (6.12.10-1) sid; urgency=medium + + * New upstream stable update: + https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.10 + + -- Konstantin Demin Sat, 18 Jan 2025 01:39:50 +0300 + linux (6.12.9-1) sid; urgency=medium * New upstream stable update: https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.9 - * Drop "pf-amd-pstate" and "amd-rapl" patchsets. + * Drop "pf/amd-pstate" and "pf/amd-rapl" patchsets. -- Konstantin Demin Thu, 09 Jan 2025 23:12:22 +0300 diff --git a/debian/config/amd64/config.cloud b/debian/config/amd64/config.cloud index 0a21e5d..135a2c1 100644 --- a/debian/config/amd64/config.cloud +++ b/debian/config/amd64/config.cloud @@ -2491,6 +2491,8 @@ CONFIG_KEXEC_CORE=y CONFIG_LZ4HC_COMPRESS=m CONFIG_LZ4_COMPRESS=m CONFIG_MFD_CORE=m +CONFIG_MMU_GATHER_RCU_TABLE_FREE=y +CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_ND_BTT=m CONFIG_ND_PFN=m CONFIG_NETFS_SUPPORT=m diff --git a/debian/config/amd64/config.vm b/debian/config/amd64/config.vm index f8baf9b..b880cfc 100644 --- a/debian/config/amd64/config.vm +++ b/debian/config/amd64/config.vm @@ -4064,6 +4064,8 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAPPING_DIRTY_HELPERS=y CONFIG_MCTP_FLOWS=y CONFIG_MFD_CORE=m +CONFIG_MMU_GATHER_RCU_TABLE_FREE=y +CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MOUSE_PS2_SMBUS=y CONFIG_ND_BTT=m CONFIG_ND_PFN=m diff --git a/debian/config/config b/debian/config/config index 62d6b00..e169901 100644 --- a/debian/config/config +++ b/debian/config/config @@ -3945,8 +3945,6 @@ CONFIG_MLX4_CORE=m CONFIG_MMCONF_FAM10H=y CONFIG_MMU=y CONFIG_MMU_GATHER_MERGE_VMAS=y -CONFIG_MMU_GATHER_RCU_TABLE_FREE=y -CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_LAZY_TLB_REFCOUNT=y CONFIG_MMU_NOTIFIER=y CONFIG_MODULES_TREE_LOOKUP=y diff --git a/debian/patches/patchset-pf/fixes/0003-USB-core-Disable-LPM-only-for-non-suspended-ports.patch b/debian/patches/patchset-pf/fixes/0003-USB-core-Disable-LPM-only-for-non-suspended-ports.patch deleted file mode 100644 index f557cb9..0000000 --- a/debian/patches/patchset-pf/fixes/0003-USB-core-Disable-LPM-only-for-non-suspended-ports.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 1c45e81769d174d02a26a61e3919313fa3b16120 Mon Sep 17 00:00:00 2001 -From: Kai-Heng Feng -Date: Fri, 6 Dec 2024 15:48:17 +0800 -Subject: USB: core: Disable LPM only for non-suspended ports - -There's USB error when tegra board is shutting down: -[ 180.919315] usb 2-3: Failed to set U1 timeout to 0x0,error code -113 -[ 180.919995] usb 2-3: Failed to set U1 timeout to 0xa,error code -113 -[ 180.920512] usb 2-3: Failed to set U2 timeout to 0x4,error code -113 -[ 186.157172] tegra-xusb 3610000.usb: xHCI host controller not responding, assume dead -[ 186.157858] tegra-xusb 3610000.usb: HC died; cleaning up -[ 186.317280] tegra-xusb 3610000.usb: Timeout while waiting for evaluate context command - -The issue is caused by disabling LPM on already suspended ports. - -For USB2 LPM, the LPM is already disabled during port suspend. For USB3 -LPM, port won't transit to U1/U2 when it's already suspended in U3, -hence disabling LPM is only needed for ports that are not suspended. - -Cc: Wayne Chang -Cc: stable@vger.kernel.org -Fixes: d920a2ed8620 ("usb: Disable USB3 LPM at shutdown") -Signed-off-by: Kai-Heng Feng -Acked-by: Alan Stern ---- - drivers/usb/core/port.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - ---- a/drivers/usb/core/port.c -+++ b/drivers/usb/core/port.c -@@ -452,10 +452,11 @@ static int usb_port_runtime_suspend(stru - static void usb_port_shutdown(struct device *dev) - { - struct usb_port *port_dev = to_usb_port(dev); -+ struct usb_device *udev = port_dev->child; - -- if (port_dev->child) { -- usb_disable_usb2_hardware_lpm(port_dev->child); -- usb_unlocked_disable_lpm(port_dev->child); -+ if (udev && !udev->port_is_suspended) { -+ usb_disable_usb2_hardware_lpm(udev); -+ usb_unlocked_disable_lpm(udev); - } - } - diff --git a/debian/patches/patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch b/debian/patches/patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch deleted file mode 100644 index 1fba26d..0000000 --- a/debian/patches/patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 60fbdd9e9dc7074d4cd30ada3ba9547d5c007702 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:02 -0500 -Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional - -Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using -paravirt, and not when running on bare metal. - -There is no real good reason to do things differently for -each setup. Make them all the same. - -After this change, the synchronization between get_user_pages_fast -and page table freeing is handled by RCU, which prevents page tables -from being reused for other data while get_user_pages_fast is walking -them. - -This allows us to invalidate page tables while other CPUs have -interrupts disabled. - -Signed-off-by: Rik van Riel -Suggested-by: Peter Zijlstra ---- - arch/x86/Kconfig | 2 +- - arch/x86/kernel/paravirt.c | 7 +------ - 2 files changed, 2 insertions(+), 7 deletions(-) - ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -270,7 +270,7 @@ config X86 - select HAVE_PCI - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP -- select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT -+ select MMU_GATHER_RCU_TABLE_FREE - select MMU_GATHER_MERGE_VMAS - select HAVE_POSIX_CPU_TIMERS_TASK_WORK - select HAVE_REGS_AND_STACK_ACCESS_API ---- a/arch/x86/kernel/paravirt.c -+++ b/arch/x86/kernel/paravirt.c -@@ -59,11 +59,6 @@ void __init native_pv_lock_init(void) - static_branch_enable(&virt_spin_lock_key); - } - --static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) --{ -- tlb_remove_page(tlb, table); --} -- - struct static_key paravirt_steal_enabled; - struct static_key paravirt_steal_rq_enabled; - -@@ -191,7 +186,7 @@ struct paravirt_patch_template pv_ops = - .mmu.flush_tlb_kernel = native_flush_tlb_global, - .mmu.flush_tlb_one_user = native_flush_tlb_one_user, - .mmu.flush_tlb_multi = native_flush_tlb_multi, -- .mmu.tlb_remove_table = native_tlb_remove_table, -+ .mmu.tlb_remove_table = tlb_remove_table, - - .mmu.exit_mmap = paravirt_nop, - .mmu.notify_page_enc_status_changed = paravirt_nop, diff --git a/debian/patches/patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch b/debian/patches/patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch deleted file mode 100644 index 4178de2..0000000 --- a/debian/patches/patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 8966aff4928c0bc3aa79b8729d74da5ea782f73a Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:03 -0500 -Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call - -Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table. - -Get rid of the indirection by simply calling tlb_remove_table directly, -and not going through the paravirt function pointers. - -Signed-off-by: Rik van Riel -Suggested-by: Qi Zheng ---- - arch/x86/hyperv/mmu.c | 1 - - arch/x86/include/asm/paravirt.h | 5 ----- - arch/x86/include/asm/paravirt_types.h | 2 -- - arch/x86/kernel/kvm.c | 1 - - arch/x86/kernel/paravirt.c | 1 - - arch/x86/mm/pgtable.c | 16 ++++------------ - arch/x86/xen/mmu_pv.c | 1 - - 7 files changed, 4 insertions(+), 23 deletions(-) - ---- a/arch/x86/hyperv/mmu.c -+++ b/arch/x86/hyperv/mmu.c -@@ -240,5 +240,4 @@ void hyperv_setup_mmu_ops(void) - - pr_info("Using hypercall for remote TLB flush\n"); - pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; -- pv_ops.mmu.tlb_remove_table = tlb_remove_table; - } ---- a/arch/x86/include/asm/paravirt.h -+++ b/arch/x86/include/asm/paravirt.h -@@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(con - PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); - } - --static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) --{ -- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); --} -- - static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) - { - PVOP_VCALL1(mmu.exit_mmap, mm); ---- a/arch/x86/include/asm/paravirt_types.h -+++ b/arch/x86/include/asm/paravirt_types.h -@@ -136,8 +136,6 @@ struct pv_mmu_ops { - void (*flush_tlb_multi)(const struct cpumask *cpus, - const struct flush_tlb_info *info); - -- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); -- - /* Hook for intercepting the destruction of an mm_struct. */ - void (*exit_mmap)(struct mm_struct *mm); - void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); ---- a/arch/x86/kernel/kvm.c -+++ b/arch/x86/kernel/kvm.c -@@ -838,7 +838,6 @@ static void __init kvm_guest_init(void) - #ifdef CONFIG_SMP - if (pv_tlb_flush_supported()) { - pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi; -- pv_ops.mmu.tlb_remove_table = tlb_remove_table; - pr_info("KVM setup pv remote TLB flush\n"); - } - ---- a/arch/x86/kernel/paravirt.c -+++ b/arch/x86/kernel/paravirt.c -@@ -186,7 +186,6 @@ struct paravirt_patch_template pv_ops = - .mmu.flush_tlb_kernel = native_flush_tlb_global, - .mmu.flush_tlb_one_user = native_flush_tlb_one_user, - .mmu.flush_tlb_multi = native_flush_tlb_multi, -- .mmu.tlb_remove_table = tlb_remove_table, - - .mmu.exit_mmap = paravirt_nop, - .mmu.notify_page_enc_status_changed = paravirt_nop, ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -18,14 +18,6 @@ EXPORT_SYMBOL(physical_mask); - #define PGTABLE_HIGHMEM 0 - #endif - --#ifndef CONFIG_PARAVIRT --static inline --void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) --{ -- tlb_remove_page(tlb, table); --} --#endif -- - gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; - - pgtable_t pte_alloc_one(struct mm_struct *mm) -@@ -54,7 +46,7 @@ void ___pte_free_tlb(struct mmu_gather * - { - pagetable_pte_dtor(page_ptdesc(pte)); - paravirt_release_pte(page_to_pfn(pte)); -- paravirt_tlb_remove_table(tlb, pte); -+ tlb_remove_table(tlb, pte); - } - - #if CONFIG_PGTABLE_LEVELS > 2 -@@ -70,7 +62,7 @@ void ___pmd_free_tlb(struct mmu_gather * - tlb->need_flush_all = 1; - #endif - pagetable_pmd_dtor(ptdesc); -- paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); -+ tlb_remove_table(tlb, ptdesc_page(ptdesc)); - } - - #if CONFIG_PGTABLE_LEVELS > 3 -@@ -80,14 +72,14 @@ void ___pud_free_tlb(struct mmu_gather * - - pagetable_pud_dtor(ptdesc); - paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); -- paravirt_tlb_remove_table(tlb, virt_to_page(pud)); -+ tlb_remove_table(tlb, virt_to_page(pud)); - } - - #if CONFIG_PGTABLE_LEVELS > 4 - void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) - { - paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); -- paravirt_tlb_remove_table(tlb, virt_to_page(p4d)); -+ tlb_remove_table(tlb, virt_to_page(p4d)); - } - #endif /* CONFIG_PGTABLE_LEVELS > 4 */ - #endif /* CONFIG_PGTABLE_LEVELS > 3 */ ---- a/arch/x86/xen/mmu_pv.c -+++ b/arch/x86/xen/mmu_pv.c -@@ -2137,7 +2137,6 @@ static const typeof(pv_ops) xen_mmu_ops - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_one_user = xen_flush_tlb_one_user, - .flush_tlb_multi = xen_flush_tlb_multi, -- .tlb_remove_table = tlb_remove_table, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, diff --git a/debian/patches/patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch b/debian/patches/patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch deleted file mode 100644 index cb5131a..0000000 --- a/debian/patches/patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch +++ /dev/null @@ -1,23 +0,0 @@ -From efde57842082e36ab2e2be5a11c7b06ff9e18b3d Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:04 -0500 -Subject: x86/mm: add X86_FEATURE_INVLPGB definition. - -Add the INVPLGB CPUID definition, allowing the kernel to recognize -whether the CPU supports the INVLPGB instruction. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/cpufeatures.h | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -335,6 +335,7 @@ - #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ - #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ - #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ -+#define X86_FEATURE_INVLPGB (13*32+ 3) /* "invlpgb" INVLPGB instruction */ - #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ - #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ - #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/debian/patches/patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch b/debian/patches/patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch deleted file mode 100644 index fc2995c..0000000 --- a/debian/patches/patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 98953e10e342ceea1dc877cfb63318fa85879a59 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:05 -0500 -Subject: x86/mm: get INVLPGB count max from CPUID - -The CPU advertises the maximum number of pages that can be shot down -with one INVLPGB instruction in the CPUID data. - -Save that information for later use. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/tlbflush.h | 1 + - arch/x86/kernel/cpu/amd.c | 8 ++++++++ - arch/x86/kernel/setup.c | 4 ++++ - 3 files changed, 13 insertions(+) - ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -182,6 +182,7 @@ static inline void cr4_init_shadow(void) - - extern unsigned long mmu_cr4_features; - extern u32 *trampoline_cr4_features; -+extern u16 invlpgb_count_max; - - extern void initialize_tlbstate_and_flush(void); - ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -1135,6 +1135,14 @@ static void cpu_detect_tlb_amd(struct cp - tlb_lli_2m[ENTRIES] = eax & mask; - - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; -+ -+ if (c->extended_cpuid_level < 0x80000008) -+ return; -+ -+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); -+ -+ /* Max number of pages INVLPGB can invalidate in one shot */ -+ invlpgb_count_max = (edx & 0xffff) + 1; - } - - static const struct cpu_dev amd_cpu_dev = { ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features - __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; - #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+u16 invlpgb_count_max __ro_after_init; -+#endif -+ - #ifdef CONFIG_IMA - static phys_addr_t ima_kexec_buffer_phys; - static size_t ima_kexec_buffer_size; diff --git a/debian/patches/patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch b/debian/patches/patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch deleted file mode 100644 index f116ad6..0000000 --- a/debian/patches/patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch +++ /dev/null @@ -1,121 +0,0 @@ -From bc9d1fa1bd32dca78f38bd2a8557e7fc638308bd Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:06 -0500 -Subject: x86/mm: add INVLPGB support code - -Add invlpgb.h with the helper functions and definitions needed to use -broadcast TLB invalidation on AMD EPYC 3 and newer CPUs. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/invlpgb.h | 93 +++++++++++++++++++++++++++++++++ - arch/x86/include/asm/tlbflush.h | 1 + - 2 files changed, 94 insertions(+) - create mode 100644 arch/x86/include/asm/invlpgb.h - ---- /dev/null -+++ b/arch/x86/include/asm/invlpgb.h -@@ -0,0 +1,93 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _ASM_X86_INVLPGB -+#define _ASM_X86_INVLPGB -+ -+#include -+ -+/* -+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. -+ * -+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can -+ * be done in a parallel fashion. -+ * -+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from -+ * this CPU have completed. -+ */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr, -+ int extra_count, bool pmd_stride, unsigned long flags) -+{ -+ u64 rax = addr | flags; -+ u32 ecx = (pmd_stride << 31) | extra_count; -+ u32 edx = (pcid << 16) | asid; -+ -+ asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx)); -+} -+ -+/* -+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination -+ * of the three. For example: -+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID -+ * -+ * The first can be used to invalidate (kernel) mappings at a particular -+ * address across all processes. -+ * -+ * The latter invalidates all TLB entries matching a PCID. -+ */ -+#define INVLPGB_VA BIT(0) -+#define INVLPGB_PCID BIT(1) -+#define INVLPGB_ASID BIT(2) -+#define INVLPGB_INCLUDE_GLOBAL BIT(3) -+#define INVLPGB_FINAL_ONLY BIT(4) -+#define INVLPGB_INCLUDE_NESTED BIT(5) -+ -+/* Flush all mappings for a given pcid and addr, not including globals. */ -+static inline void invlpgb_flush_user(unsigned long pcid, -+ unsigned long addr) -+{ -+ __invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA); -+} -+ -+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr, -+ int nr, bool pmd_stride) -+{ -+ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA); -+} -+ -+/* Flush all mappings for a given ASID, not including globals. */ -+static inline void invlpgb_flush_single_asid(unsigned long asid) -+{ -+ __invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID); -+} -+ -+/* Flush all mappings for a given PCID, not including globals. */ -+static inline void invlpgb_flush_single_pcid(unsigned long pcid) -+{ -+ __invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID); -+} -+ -+/* Flush all mappings, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_all(void) -+{ -+ __invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL); -+} -+ -+/* Flush addr, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_addr(unsigned long addr, int nr) -+{ -+ __invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL); -+} -+ -+/* Flush all mappings for all PCIDs except globals. */ -+static inline void invlpgb_flush_all_nonglobals(void) -+{ -+ __invlpgb(0, 0, 0, 0, 0, 0); -+} -+ -+/* Wait for INVLPGB originated by this CPU to complete. */ -+static inline void tlbsync(void) -+{ -+ asm volatile("tlbsync"); -+} -+ -+#endif /* _ASM_X86_INVLPGB */ ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include diff --git a/debian/patches/patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch b/debian/patches/patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch deleted file mode 100644 index 56a6df3..0000000 --- a/debian/patches/patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch +++ /dev/null @@ -1,61 +0,0 @@ -From ffd834c7140dc5fcaf96161c6d8c4601bb700afe Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:07 -0500 -Subject: x86/mm: use INVLPGB for kernel TLB flushes - -Use broadcast TLB invalidation for kernel addresses when available. - -This stops us from having to send IPIs for kernel TLB flushes. - -Signed-off-by: Rik van Riel ---- - arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++ - 1 file changed, 31 insertions(+) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1048,6 +1048,32 @@ void flush_tlb_all(void) - on_each_cpu(do_flush_tlb_all, NULL, 1); - } - -+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) -+{ -+ unsigned long addr; -+ unsigned long maxnr = invlpgb_count_max; -+ unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; -+ -+ /* -+ * TLBSYNC only waits for flushes originating on the same CPU. -+ * Disabling migration allows us to wait on all flushes. -+ */ -+ guard(preempt)(); -+ -+ if (end == TLB_FLUSH_ALL || -+ (end - start) > threshold << PAGE_SHIFT) { -+ invlpgb_flush_all(); -+ } else { -+ unsigned long nr; -+ for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { -+ nr = min((end - addr) >> PAGE_SHIFT, maxnr); -+ invlpgb_flush_addr(addr, nr); -+ } -+ } -+ -+ tlbsync(); -+} -+ - static void do_kernel_range_flush(void *info) - { - struct flush_tlb_info *f = info; -@@ -1060,6 +1086,11 @@ static void do_kernel_range_flush(void * - - void flush_tlb_kernel_range(unsigned long start, unsigned long end) - { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ broadcast_kernel_range_flush(start, end); -+ return; -+ } -+ - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { diff --git a/debian/patches/patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch b/debian/patches/patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch deleted file mode 100644 index fe9ed69..0000000 --- a/debian/patches/patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 13fac8226036456c15c517c1dd77be5109a61da2 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:08 -0500 -Subject: x86/tlb: use INVLPGB in flush_tlb_all - -The flush_tlb_all() function is not used a whole lot, but we might -as well use broadcast TLB flushing there, too. - -Signed-off-by: Rik van Riel ---- - arch/x86/mm/tlb.c | 6 ++++++ - 1 file changed, 6 insertions(+) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1045,6 +1045,12 @@ static void do_flush_tlb_all(void *info) - void flush_tlb_all(void) - { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ guard(preempt)(); -+ invlpgb_flush_all(); -+ tlbsync(); -+ return; -+ } - on_each_cpu(do_flush_tlb_all, NULL, 1); - } - diff --git a/debian/patches/patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch b/debian/patches/patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch deleted file mode 100644 index 4f33505..0000000 --- a/debian/patches/patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 765d531296765e7fb2888c70cb56c0e25b459231 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:09 -0500 -Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing - -In the page reclaim code, we only track the CPU(s) where the TLB needs -to be flushed, rather than all the individual mappings that may be getting -invalidated. - -Use broadcast TLB flushing when that is available. - -Signed-off-by: Rik van Riel ---- - arch/x86/mm/tlb.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1281,8 +1281,16 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all); - void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) - { - struct flush_tlb_info *info; -+ int cpu; - -- int cpu = get_cpu(); -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ guard(preempt)(); -+ invlpgb_flush_all_nonglobals(); -+ tlbsync(); -+ return; -+ } -+ -+ cpu = get_cpu(); - - info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, - TLB_GENERATION_INVALID); diff --git a/debian/patches/patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch b/debian/patches/patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch deleted file mode 100644 index 7786212..0000000 --- a/debian/patches/patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch +++ /dev/null @@ -1,508 +0,0 @@ -From 8b23125a3200a330fb407133f33aeb9ad3232603 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:10 -0500 -Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded - processes - -Use broadcast TLB invalidation, using the INVPLGB instruction, on AMD EPYC 3 -and newer CPUs. - -In order to not exhaust PCID space, and keep TLB flushes local for single -threaded processes, we only hand out broadcast ASIDs to processes active on -3 or more CPUs, and gradually increase the threshold as broadcast ASID space -is depleted. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/mmu.h | 6 + - arch/x86/include/asm/mmu_context.h | 12 ++ - arch/x86/include/asm/tlbflush.h | 17 ++ - arch/x86/mm/tlb.c | 310 ++++++++++++++++++++++++++++- - 4 files changed, 336 insertions(+), 9 deletions(-) - ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -46,6 +46,12 @@ typedef struct { - unsigned long flags; - #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+ struct list_head broadcast_asid_list; -+ u16 broadcast_asid; -+ bool asid_transition; -+#endif -+ - #ifdef CONFIG_ADDRESS_MASKING - /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ - unsigned long lam_cr3_mask; ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(s - #define enter_lazy_tlb enter_lazy_tlb - extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); - -+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm); -+ - /* - * Init a new mm. Used on mm copies, like at fork() - * and on mm's that are brand-new, like at execve(). -@@ -160,6 +162,13 @@ static inline int init_new_context(struc - mm->context.execute_only_pkey = -1; - } - #endif -+ -+#ifdef CONFIG_CPU_SUP_AMD -+ INIT_LIST_HEAD(&mm->context.broadcast_asid_list); -+ mm->context.broadcast_asid = 0; -+ mm->context.asid_transition = false; -+#endif -+ - mm_reset_untag_mask(mm); - init_new_context_ldt(mm); - return 0; -@@ -169,6 +178,9 @@ static inline int init_new_context(struc - static inline void destroy_context(struct mm_struct *mm) - { - destroy_context_ldt(mm); -+#ifdef CONFIG_CPU_SUP_AMD -+ destroy_context_free_broadcast_asid(mm); -+#endif - } - - extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -65,6 +65,23 @@ static inline void cr4_clear_bits(unsign - */ - #define TLB_NR_DYN_ASIDS 6 - -+#ifdef CONFIG_CPU_SUP_AMD -+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS -+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS -+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition) -+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid) -+#else -+#define is_dyn_asid(asid) true -+#define is_broadcast_asid(asid) false -+#define in_asid_transition(info) false -+#define mm_broadcast_asid(mm) 0 -+ -+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) -+{ -+ return false; -+} -+#endif -+ - struct tlb_context { - u64 ctx_id; - u64 tlb_gen; ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -74,13 +74,15 @@ - * use different names for each of them: - * - * ASID - [0, TLB_NR_DYN_ASIDS-1] -- * the canonical identifier for an mm -+ * the canonical identifier for an mm, dynamically allocated on each CPU -+ * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1] -+ * the canonical, global identifier for an mm, identical across all CPUs - * -- * kPCID - [1, TLB_NR_DYN_ASIDS] -+ * kPCID - [1, MAX_ASID_AVAILABLE] - * the value we write into the PCID part of CR3; corresponds to the - * ASID+1, because PCID 0 is special. - * -- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] -+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE] - * for KPTI each mm has two address spaces and thus needs two - * PCID values, but we can still do with a single ASID denomination - * for each mm. Corresponds to kPCID + 2048. -@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_st - return; - } - -+ /* -+ * TLB consistency for this ASID is maintained with INVLPGB; -+ * TLB flushes happen even while the process isn't running. -+ */ -+#ifdef CONFIG_CPU_SUP_AMD -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) { -+ *new_asid = mm_broadcast_asid(next); -+ *need_flush = false; -+ return; -+ } -+#endif -+ - if (this_cpu_read(cpu_tlbstate.invalidate_other)) - clear_asid_other(); - -@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_st - *need_flush = true; - } - -+#ifdef CONFIG_CPU_SUP_AMD -+/* -+ * Logic for AMD INVLPGB support. -+ */ -+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock); -+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS; -+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 }; -+static LIST_HEAD(broadcast_asid_list); -+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; -+ -+static void reset_broadcast_asid_space(void) -+{ -+ mm_context_t *context; -+ -+ lockdep_assert_held(&broadcast_asid_lock); -+ -+ /* -+ * Flush once when we wrap around the ASID space, so we won't need -+ * to flush every time we allocate an ASID for boradcast flushing. -+ */ -+ invlpgb_flush_all_nonglobals(); -+ tlbsync(); -+ -+ /* -+ * Leave the currently used broadcast ASIDs set in the bitmap, since -+ * those cannot be reused before the next wraparound and flush.. -+ */ -+ bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE); -+ list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list) -+ __set_bit(context->broadcast_asid, broadcast_asid_used); -+ -+ last_broadcast_asid = TLB_NR_DYN_ASIDS; -+} -+ -+static u16 get_broadcast_asid(void) -+{ -+ lockdep_assert_held(&broadcast_asid_lock); -+ -+ do { -+ u16 start = last_broadcast_asid; -+ u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start); -+ -+ if (asid >= MAX_ASID_AVAILABLE) { -+ reset_broadcast_asid_space(); -+ continue; -+ } -+ -+ /* Try claiming this broadcast ASID. */ -+ if (!test_and_set_bit(asid, broadcast_asid_used)) { -+ last_broadcast_asid = asid; -+ return asid; -+ } -+ } while (1); -+} -+ -+/* -+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast -+ * (INVLPGB) ASID, or the other way around. -+ */ -+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) -+{ -+ u16 broadcast_asid = mm_broadcast_asid(next); -+ -+ if (broadcast_asid && prev_asid != broadcast_asid) -+ return true; -+ -+ if (!broadcast_asid && is_broadcast_asid(prev_asid)) -+ return true; -+ -+ return false; -+} -+ -+void destroy_context_free_broadcast_asid(struct mm_struct *mm) -+{ -+ if (!mm->context.broadcast_asid) -+ return; -+ -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); -+ mm->context.broadcast_asid = 0; -+ list_del(&mm->context.broadcast_asid_list); -+ broadcast_asid_available++; -+} -+ -+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) -+{ -+ int count = 0; -+ int cpu; -+ -+ if (cpumask_weight(mm_cpumask(mm)) <= threshold) -+ return false; -+ -+ for_each_cpu(cpu, mm_cpumask(mm)) { -+ /* Skip the CPUs that aren't really running this process. */ -+ if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) -+ continue; -+ -+ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) -+ continue; -+ -+ if (++count > threshold) -+ return true; -+ } -+ return false; -+} -+ -+/* -+ * Assign a broadcast ASID to the current process, protecting against -+ * races between multiple threads in the process. -+ */ -+static void use_broadcast_asid(struct mm_struct *mm) -+{ -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); -+ -+ /* This process is already using broadcast TLB invalidation. */ -+ if (mm->context.broadcast_asid) -+ return; -+ -+ mm->context.broadcast_asid = get_broadcast_asid(); -+ mm->context.asid_transition = true; -+ list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list); -+ broadcast_asid_available--; -+} -+ -+/* -+ * Figure out whether to assign a broadcast (global) ASID to a process. -+ * We vary the threshold by how empty or full broadcast ASID space is. -+ * 1/4 full: >= 4 active threads -+ * 1/2 full: >= 8 active threads -+ * 3/4 full: >= 16 active threads -+ * 7/8 full: >= 32 active threads -+ * etc -+ * -+ * This way we should never exhaust the broadcast ASID space, even on very -+ * large systems, and the processes with the largest number of active -+ * threads should be able to use broadcast TLB invalidation. -+ */ -+#define HALFFULL_THRESHOLD 8 -+static bool meets_broadcast_asid_threshold(struct mm_struct *mm) -+{ -+ int avail = broadcast_asid_available; -+ int threshold = HALFFULL_THRESHOLD; -+ -+ if (!avail) -+ return false; -+ -+ if (avail > MAX_ASID_AVAILABLE * 3 / 4) { -+ threshold = HALFFULL_THRESHOLD / 4; -+ } else if (avail > MAX_ASID_AVAILABLE / 2) { -+ threshold = HALFFULL_THRESHOLD / 2; -+ } else if (avail < MAX_ASID_AVAILABLE / 3) { -+ do { -+ avail *= 2; -+ threshold *= 2; -+ } while ((avail + threshold) < MAX_ASID_AVAILABLE / 2); -+ } -+ -+ return mm_active_cpus_exceeds(mm, threshold); -+} -+ -+static void count_tlb_flush(struct mm_struct *mm) -+{ -+ if (!static_cpu_has(X86_FEATURE_INVLPGB)) -+ return; -+ -+ /* Check every once in a while. */ -+ if ((current->pid & 0x1f) != (jiffies & 0x1f)) -+ return; -+ -+ if (meets_broadcast_asid_threshold(mm)) -+ use_broadcast_asid(mm); -+} -+ -+static void finish_asid_transition(struct flush_tlb_info *info) -+{ -+ struct mm_struct *mm = info->mm; -+ int bc_asid = mm_broadcast_asid(mm); -+ int cpu; -+ -+ if (!mm->context.asid_transition) -+ return; -+ -+ for_each_cpu(cpu, mm_cpumask(mm)) { -+ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) -+ continue; -+ -+ /* -+ * If at least one CPU is not using the broadcast ASID yet, -+ * send a TLB flush IPI. The IPI should cause stragglers -+ * to transition soon. -+ */ -+ if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) { -+ flush_tlb_multi(mm_cpumask(info->mm), info); -+ return; -+ } -+ } -+ -+ /* All the CPUs running this process are using the broadcast ASID. */ -+ mm->context.asid_transition = 0; -+} -+ -+static void broadcast_tlb_flush(struct flush_tlb_info *info) -+{ -+ bool pmd = info->stride_shift == PMD_SHIFT; -+ unsigned long maxnr = invlpgb_count_max; -+ unsigned long asid = info->mm->context.broadcast_asid; -+ unsigned long addr = info->start; -+ unsigned long nr; -+ -+ /* Flushing multiple pages at once is not supported with 1GB pages. */ -+ if (info->stride_shift > PMD_SHIFT) -+ maxnr = 1; -+ -+ if (info->end == TLB_FLUSH_ALL) { -+ invlpgb_flush_single_pcid(kern_pcid(asid)); -+ /* Do any CPUs supporting INVLPGB need PTI? */ -+ if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_single_pcid(user_pcid(asid)); -+ } else do { -+ /* -+ * Calculate how many pages can be flushed at once; if the -+ * remainder of the range is less than one page, flush one. -+ */ -+ nr = min(maxnr, (info->end - addr) >> info->stride_shift); -+ nr = max(nr, 1); -+ -+ invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd); -+ /* Do any CPUs supporting INVLPGB need PTI? */ -+ if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd); -+ addr += nr << info->stride_shift; -+ } while (addr < info->end); -+ -+ finish_asid_transition(info); -+ -+ /* Wait for the INVLPGBs kicked off above to finish. */ -+ tlbsync(); -+} -+#endif /* CONFIG_CPU_SUP_AMD */ -+ - /* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. -@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct - */ - if (prev == next) { - /* Not actually switching mm's */ -- VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -- next->context.ctx_id); -+ if (is_dyn_asid(prev_asid)) -+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -+ next->context.ctx_id); - - /* - * If this races with another thread that enables lam, 'new_lam' -@@ -574,6 +828,23 @@ void switch_mm_irqs_off(struct mm_struct - cpumask_set_cpu(cpu, mm_cpumask(next)); - - /* -+ * Check if the current mm is transitioning to a new ASID. -+ */ -+ if (needs_broadcast_asid_reload(next, prev_asid)) { -+ next_tlb_gen = atomic64_read(&next->context.tlb_gen); -+ -+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); -+ goto reload_tlb; -+ } -+ -+ /* -+ * Broadcast TLB invalidation keeps this PCID up to date -+ * all the time. -+ */ -+ if (is_broadcast_asid(prev_asid)) -+ return; -+ -+ /* - * If the CPU is not in lazy TLB mode, we are just switching - * from one thread in a process to another thread in the same - * process. No TLB flush required. -@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct - barrier(); - } - -+reload_tlb: - new_lam = mm_lam_cr3_mask(next); - if (need_flush) { -+ VM_BUG_ON(is_broadcast_asid(new_asid)); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - load_new_mm_cr3(next->pgd, new_asid, new_lam, true); -@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info) - const struct flush_tlb_info *f = info; - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); -- u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); -+ u64 local_tlb_gen; - bool local = smp_processor_id() == f->initiating_cpu; - unsigned long nr_invalidate = 0; - u64 mm_tlb_gen; -@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info) - if (unlikely(loaded_mm == &init_mm)) - return; - -+ /* Reload the ASID if transitioning into or out of a broadcast ASID */ -+ if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) { -+ switch_mm_irqs_off(NULL, loaded_mm, NULL); -+ loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); -+ } -+ -+ /* Broadcast ASIDs are always kept up to date with INVLPGB. */ -+ if (is_broadcast_asid(loaded_mm_asid)) -+ return; -+ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != - loaded_mm->context.ctx_id); - -@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info) - return; - } - -+ local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); -+ - if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && - f->new_tlb_gen <= local_tlb_gen)) { - /* -@@ -926,7 +1211,7 @@ STATIC_NOPV void native_flush_tlb_multi( - * up on the new contents of what used to be page tables, while - * doing a speculative memory access. - */ -- if (info->freed_tables) -+ if (info->freed_tables || in_asid_transition(info)) - on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, -@@ -998,14 +1283,18 @@ void flush_tlb_mm_range(struct mm_struct - bool freed_tables) - { - struct flush_tlb_info *info; -+ unsigned long threshold = tlb_single_page_flush_ceiling; - u64 new_tlb_gen; - int cpu; - -+ if (static_cpu_has(X86_FEATURE_INVLPGB)) -+ threshold *= invlpgb_count_max; -+ - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || -- ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { -+ ((end - start) >> stride_shift) > threshold) { - start = 0; - end = TLB_FLUSH_ALL; - } -@@ -1021,8 +1310,11 @@ void flush_tlb_mm_range(struct mm_struct - * a local TLB flush is needed. Optimize this use-case by calling - * flush_tlb_func_local() directly in this case. - */ -- if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { -+ if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) { -+ broadcast_tlb_flush(info); -+ } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { - flush_tlb_multi(mm_cpumask(mm), info); -+ count_tlb_flush(mm); - } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { - lockdep_assert_irqs_enabled(); - local_irq_disable(); diff --git a/debian/patches/patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch b/debian/patches/patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch deleted file mode 100644 index 42e7f79..0000000 --- a/debian/patches/patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 1767a2786ebbe3451f973df44485309c2a8fd8a5 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:11 -0500 -Subject: x86,tlb: do targeted broadcast flushing from tlbbatch code - -Instead of doing a system-wide TLB flush from arch_tlbbatch_flush, -queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending. - -This also allows us to avoid adding the CPUs of processes using broadcast -flushing to the batch->cpumask, and will hopefully further reduce TLB -flushing from the reclaim and compaction paths. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/tlbbatch.h | 1 + - arch/x86/include/asm/tlbflush.h | 12 +++------ - arch/x86/mm/tlb.c | 48 ++++++++++++++++++++++++++------- - 3 files changed, 42 insertions(+), 19 deletions(-) - ---- a/arch/x86/include/asm/tlbbatch.h -+++ b/arch/x86/include/asm/tlbbatch.h -@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch { - * the PFNs being flushed.. - */ - struct cpumask cpumask; -+ bool used_invlpgb; - }; - - #endif /* _ARCH_X86_TLBBATCH_H */ ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -296,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct - return atomic64_inc_return(&mm->context.tlb_gen); - } - --static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, -- struct mm_struct *mm, -- unsigned long uaddr) --{ -- inc_mm_tlb_gen(mm); -- cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); -- mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); --} -- - static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) - { - flush_tlb_mm(mm); - } - - extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, -+ struct mm_struct *mm, -+ unsigned long uaddr); - - static inline bool pte_flags_need_flush(unsigned long oldflags, - unsigned long newflags, ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1573,16 +1573,7 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all); - void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) - { - struct flush_tlb_info *info; -- int cpu; -- -- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -- guard(preempt)(); -- invlpgb_flush_all_nonglobals(); -- tlbsync(); -- return; -- } -- -- cpu = get_cpu(); -+ int cpu = get_cpu(); - - info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, - TLB_GENERATION_INVALID); -@@ -1600,12 +1591,49 @@ void arch_tlbbatch_flush(struct arch_tlb - local_irq_enable(); - } - -+ /* -+ * If we issued (asynchronous) INVLPGB flushes, wait for them here. -+ * The cpumask above contains only CPUs that were running tasks -+ * not using broadcast TLB flushing. -+ */ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) { -+ tlbsync(); -+ migrate_enable(); -+ batch->used_invlpgb = false; -+ } -+ - cpumask_clear(&batch->cpumask); - - put_flush_tlb_info(); - put_cpu(); - } - -+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, -+ struct mm_struct *mm, -+ unsigned long uaddr) -+{ -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) { -+ u16 asid = mm_broadcast_asid(mm); -+ /* -+ * Queue up an asynchronous invalidation. The corresponding -+ * TLBSYNC is done in arch_tlbbatch_flush(), and must be done -+ * on the same CPU. -+ */ -+ if (!batch->used_invlpgb) { -+ batch->used_invlpgb = true; -+ migrate_disable(); -+ } -+ invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0); -+ /* Do any CPUs supporting INVLPGB need PTI? */ -+ if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0); -+ } else { -+ inc_mm_tlb_gen(mm); -+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); -+ } -+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); -+} -+ - /* - * Blindly accessing user memory from NMI context can be dangerous - * if we're in the middle of switching the current user task or diff --git a/debian/patches/patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch b/debian/patches/patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch deleted file mode 100644 index b72f506..0000000 --- a/debian/patches/patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 13faf551d1a146ed18c448babe1953def4ed3d56 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:12 -0500 -Subject: x86/mm: enable AMD translation cache extensions - -With AMD TCE (translation cache extensions) only the intermediate mappings -that cover the address range zapped by INVLPG / INVLPGB get invalidated, -rather than all intermediate mappings getting zapped at every TLB invalidation. - -This can help reduce the TLB miss rate, by keeping more intermediate -mappings in the cache. - ->From the AMD manual: - -Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit -to 1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on -TLB entries. When this bit is 0, these instructions remove the target PTE -from the TLB as well as all upper-level table entries that are cached -in the TLB, whether or not they are associated with the target PTE. -When this bit is set, these instructions will remove the target PTE and -only those upper-level entries that lead to the target PTE in -the page table hierarchy, leaving unrelated upper-level entries intact. - -Signed-off-by: Rik van Riel ---- - arch/x86/kernel/cpu/amd.c | 8 ++++++++ - arch/x86/mm/tlb.c | 10 +++++++--- - 2 files changed, 15 insertions(+), 3 deletions(-) - ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -1143,6 +1143,14 @@ static void cpu_detect_tlb_amd(struct cp - - /* Max number of pages INVLPGB can invalidate in one shot */ - invlpgb_count_max = (edx & 0xffff) + 1; -+ -+ /* If supported, enable translation cache extensions (TCE) */ -+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx); -+ if (ecx & BIT(17)) { -+ u64 msr = native_read_msr(MSR_EFER);; -+ msr |= BIT(15); -+ wrmsrl(MSR_EFER, msr); -+ } - } - - static const struct cpu_dev amd_cpu_dev = { ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -477,7 +477,7 @@ static void broadcast_tlb_flush(struct f - if (info->stride_shift > PMD_SHIFT) - maxnr = 1; - -- if (info->end == TLB_FLUSH_ALL) { -+ if (info->end == TLB_FLUSH_ALL || info->freed_tables) { - invlpgb_flush_single_pcid(kern_pcid(asid)); - /* Do any CPUs supporting INVLPGB need PTI? */ - if (static_cpu_has(X86_FEATURE_PTI)) -@@ -1110,7 +1110,7 @@ static void flush_tlb_func(void *info) - * - * The only question is whether to do a full or partial flush. - * -- * We do a partial flush if requested and two extra conditions -+ * We do a partial flush if requested and three extra conditions - * are met: - * - * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that -@@ -1137,10 +1137,14 @@ static void flush_tlb_func(void *info) - * date. By doing a full flush instead, we can increase - * local_tlb_gen all the way to mm_tlb_gen and we can probably - * avoid another flush in the very near future. -+ * -+ * 3. No page tables were freed. If page tables were freed, a full -+ * flush ensures intermediate translations in the TLB get flushed. - */ - if (f->end != TLB_FLUSH_ALL && - f->new_tlb_gen == local_tlb_gen + 1 && -- f->new_tlb_gen == mm_tlb_gen) { -+ f->new_tlb_gen == mm_tlb_gen && -+ !f->freed_tables) { - /* Partial flush */ - unsigned long addr = f->start; - diff --git a/debian/patches/patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch b/debian/patches/patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch deleted file mode 100644 index 7feb629..0000000 --- a/debian/patches/patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 2fc0be5fbcee1a62162b699451bb94f90ec64244 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Mon, 30 Dec 2024 12:53:13 -0500 -Subject: x86/mm: only invalidate final translations with INVLPGB - -Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB. -This way only leaf mappings get removed from the TLB, leaving intermediate -translations cached. - -On the (rare) occasions where we free page tables we do a full flush, -ensuring intermediate translations get flushed from the TLB. - -Signed-off-by: Rik van Riel ---- - arch/x86/include/asm/invlpgb.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/x86/include/asm/invlpgb.h -+++ b/arch/x86/include/asm/invlpgb.h -@@ -51,7 +51,7 @@ static inline void invlpgb_flush_user(un - static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr, - int nr, bool pmd_stride) - { -- __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA); -+ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY); - } - - /* Flush all mappings for a given ASID, not including globals. */ diff --git a/debian/patches/patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch b/debian/patches/patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch deleted file mode 100644 index eb4f0da..0000000 --- a/debian/patches/patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch +++ /dev/null @@ -1,92 +0,0 @@ -From a3ff46a157cadb29349c5b388fc70804c351e561 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Thu, 19 Dec 2024 15:32:53 -0500 -Subject: mm: remove unnecessary calls to lru_add_drain - -There seem to be several categories of calls to lru_add_drain -and lru_add_drain_all. - -The first are code paths that recently allocated, swapped in, -or otherwise processed a batch of pages, and want them all on -the LRU. These drain pages that were recently allocated, -probably on the local CPU. - -A second category are code paths that are actively trying to -reclaim, migrate, or offline memory. These often use lru_add_drain_all, -to drain the caches on all CPUs. - -However, there also seem to be some other callers where we -aren't really doing either. They are calling lru_add_drain(), -despite operating on pages that may have been allocated -long ago, and quite possibly on different CPUs. - -Those calls are not likely to be effective at anything but -creating lock contention on the LRU locks. - -Remove the lru_add_drain calls in the latter category. - -Signed-off-by: Rik van Riel -Suggested-by: David Hildenbrand ---- - mm/memory.c | 1 - - mm/mmap.c | 2 -- - mm/swap_state.c | 1 - - mm/vma.c | 2 -- - 4 files changed, 6 deletions(-) - ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -1921,7 +1921,6 @@ void zap_page_range_single(struct vm_are - struct mmu_notifier_range range; - struct mmu_gather tlb; - -- lru_add_drain(); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, - address, end); - hugetlb_zap_begin(vma, &range.start, &range.end); ---- a/mm/mmap.c -+++ b/mm/mmap.c -@@ -1931,7 +1931,6 @@ void exit_mmap(struct mm_struct *mm) - goto destroy; - } - -- lru_add_drain(); - flush_cache_mm(mm); - tlb_gather_mmu_fullmm(&tlb, mm); - /* update_hiwater_rss(mm) here? but nobody should be looking */ -@@ -2374,7 +2373,6 @@ int relocate_vma_down(struct vm_area_str - vma, new_start, length, false, true)) - return -ENOMEM; - -- lru_add_drain(); - tlb_gather_mmu(&tlb, mm); - next = vma_next(&vmi); - if (new_end > old_start) { ---- a/mm/swap_state.c -+++ b/mm/swap_state.c -@@ -317,7 +317,6 @@ void free_pages_and_swap_cache(struct en - struct folio_batch folios; - unsigned int refs[PAGEVEC_SIZE]; - -- lru_add_drain(); - folio_batch_init(&folios); - for (int i = 0; i < nr; i++) { - struct folio *folio = page_folio(encoded_page_ptr(pages[i])); ---- a/mm/vma.c -+++ b/mm/vma.c -@@ -347,7 +347,6 @@ void unmap_region(struct ma_state *mas, - struct mm_struct *mm = vma->vm_mm; - struct mmu_gather tlb; - -- lru_add_drain(); - tlb_gather_mmu(&tlb, mm); - update_hiwater_rss(mm); - unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end, -@@ -1089,7 +1088,6 @@ static inline void vms_clear_ptes(struct - * were isolated before we downgraded mmap_lock. - */ - mas_set(mas_detach, 1); -- lru_add_drain(); - tlb_gather_mmu(&tlb, vms->vma->vm_mm); - update_hiwater_rss(vms->vma->vm_mm); - unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, diff --git a/debian/patches/series b/debian/patches/series index 0080e9f..0a51c7d 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -173,20 +173,6 @@ patchset-pf/crypto/0001-crypto-x86-crc32c-simplify-code-for-handling-fewer-t.pat patchset-pf/crypto/0002-crypto-x86-crc32c-access-32-bit-arguments-as-32-bit.patch patchset-pf/crypto/0003-crypto-x86-crc32c-eliminate-jump-table-and-excessive.patch -patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch -patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch -patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch -patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch -patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch -patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch -patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch -patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch -patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch -patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch -patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch -patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch -patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch - patchset-pf/pksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch patchset-pf/xfs/0001-xfs-fix-chown-with-rt-quota.patch @@ -261,6 +247,5 @@ patchset-zen/sauce/0024-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.pat patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch patchset-pf/fixes/0002-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch -patchset-pf/fixes/0003-USB-core-Disable-LPM-only-for-non-suspended-ports.patch patchset-zen/fixes/0001-futex-improve-user-space-accesses.patch