release 6.12.8

2025-01-02 21:18:06 +03:00
parent d0b6dece32
commit 3fb1083e44
23 changed files with 1455 additions and 22 deletions
--- a/debian/bin/genpatch-pfkernel
+++ b/debian/bin/genpatch-pfkernel
@@ -7,7 +7,7 @@ w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"

 dst='debian/patches/pf-tmp'
 src='../linux-extras'
-branches='amd-pstate amd-rapl cpuidle crypto fixes kbuild pksm xfs zstd'
+branches='amd-pstate amd-rapl cpuidle crypto fixes invlpgb kbuild pksm xfs zstd'

 if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi
 mkdir -p "${dst}"
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+linux (6.12.8-1) sid; urgency=medium
+
+  * New upstream stable update:
+    https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.8
+
+ -- Konstantin Demin <rockdrilla@gmail.com>  Thu, 02 Jan 2025 19:34:34 +0300
+
 linux (6.12.7-1) sid; urgency=medium

  * New upstream stable update:
--- a/debian/config/amd64/config.cloud
+++ b/debian/config/amd64/config.cloud
@@ -1384,9 +1384,6 @@ CONFIG_BLK_DEV_PMEM=m
 # CONFIG_NVME_HWMON is not set
 # CONFIG_NVME_RDMA is not set
 CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-# CONFIG_NVME_TCP_TLS is not set
-# CONFIG_NVME_HOST_AUTH is not set

 ##
 ## file: drivers/nvme/target/Kconfig
@@ -1394,8 +1391,6 @@ CONFIG_NVME_TCP=m
 CONFIG_NVME_TARGET_RDMA=m
 CONFIG_NVME_TARGET_FC=m
 # CONFIG_NVME_TARGET_FCLOOP is not set
-CONFIG_NVME_TARGET_TCP=m
-# CONFIG_NVME_TARGET_TCP_TLS is not set

 ##
 ## file: drivers/of/Kconfig
@@ -2495,8 +2490,6 @@ CONFIG_KEXEC_CORE=y
 CONFIG_LZ4HC_COMPRESS=m
 CONFIG_LZ4_COMPRESS=m
 CONFIG_MFD_CORE=m
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
 CONFIG_ND_BTT=m
 CONFIG_ND_PFN=m
 CONFIG_NETFS_SUPPORT=m
--- a/debian/config/amd64/config.mobile
+++ b/debian/config/amd64/config.mobile
@@ -4892,14 +4892,11 @@ CONFIG_OF_PMEM=y
 ##
 CONFIG_NVME_HWMON=y
 # CONFIG_NVME_FC is not set
-# CONFIG_NVME_TCP is not set
-CONFIG_NVME_HOST_AUTH=y

 ##
 ## file: drivers/nvme/target/Kconfig
 ##
 # CONFIG_NVME_TARGET_FC is not set
-# CONFIG_NVME_TARGET_TCP is not set

 ##
 ## file: drivers/nvmem/Kconfig
@@ -8756,7 +8753,6 @@ CONFIG_NFC_ST_NCI=m
 CONFIG_NF_NAT_AMANDA=m
 CONFIG_NLS_UCS2_UTILS=m
 CONFIG_NVMEM_LAYOUTS=y
-CONFIG_NVME_AUTH=m
 CONFIG_OF_ADDRESS=y
 CONFIG_OF_EARLY_FLATTREE=y
 CONFIG_OF_FLATTREE=y
--- a/debian/config/amd64/config.vm
+++ b/debian/config/amd64/config.vm
@@ -2355,9 +2355,6 @@ CONFIG_BLK_DEV_PMEM=m
 CONFIG_NVME_HWMON=y
 CONFIG_NVME_RDMA=m
 CONFIG_NVME_FC=m
-CONFIG_NVME_TCP=m
-# CONFIG_NVME_TCP_TLS is not set
-# CONFIG_NVME_HOST_AUTH is not set

 ##
 ## file: drivers/nvme/target/Kconfig
@@ -2365,8 +2362,6 @@ CONFIG_NVME_TCP=m
 CONFIG_NVME_TARGET_RDMA=m
 CONFIG_NVME_TARGET_FC=m
 # CONFIG_NVME_TARGET_FCLOOP is not set
-CONFIG_NVME_TARGET_TCP=m
-# CONFIG_NVME_TARGET_TCP_TLS is not set

 ##
 ## file: drivers/of/Kconfig
@@ -4068,8 +4063,6 @@ CONFIG_LZ4_COMPRESS=m
 CONFIG_MAPPING_DIRTY_HELPERS=y
 CONFIG_MCTP_FLOWS=y
 CONFIG_MFD_CORE=m
-CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
-CONFIG_MMU_GATHER_TABLE_FREE=y
 CONFIG_MOUSE_PS2_SMBUS=y
 CONFIG_ND_BTT=m
 CONFIG_ND_PFN=m
--- a/debian/config/config
+++ b/debian/config/config
@@ -1098,6 +1098,9 @@ CONFIG_NVDIMM_DAX=y
 CONFIG_BLK_DEV_NVME=m
 CONFIG_NVME_MULTIPATH=y
 CONFIG_NVME_VERBOSE_ERRORS=y
+CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
+CONFIG_NVME_HOST_AUTH=y

 ##
 ## file: drivers/nvme/target/Kconfig
@@ -1106,7 +1109,9 @@ CONFIG_NVME_TARGET=m
 # CONFIG_NVME_TARGET_DEBUGFS is not set
 CONFIG_NVME_TARGET_PASSTHRU=y
 CONFIG_NVME_TARGET_LOOP=m
-# CONFIG_NVME_TARGET_AUTH is not set
+CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
+CONFIG_NVME_TARGET_AUTH=y

 ##
 ## file: drivers/nvmem/Kconfig
@@ -3941,6 +3946,8 @@ CONFIG_MLX4_CORE=m
 CONFIG_MMCONF_FAM10H=y
 CONFIG_MMU=y
 CONFIG_MMU_GATHER_MERGE_VMAS=y
+CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
+CONFIG_MMU_GATHER_TABLE_FREE=y
 CONFIG_MMU_LAZY_TLB_REFCOUNT=y
 CONFIG_MMU_NOTIFIER=y
 CONFIG_MODULES_TREE_LOOKUP=y
@@ -4015,8 +4022,10 @@ CONFIG_NR_CPUS_RANGE_END=512
 CONFIG_NUMA_KEEP_MEMINFO=y
 CONFIG_NUMA_MEMBLKS=y
 CONFIG_NVDIMM_KEYS=y
+CONFIG_NVME_AUTH=m
 CONFIG_NVME_CORE=m
 CONFIG_NVME_FABRICS=m
+CONFIG_NVME_KEYRING=m
 CONFIG_OBJTOOL=y
 CONFIG_OID_REGISTRY=y
 CONFIG_OLD_SIGSUSPEND3=y
--- a/debian/patches/bugfix/all/nfsd-fix-legacy-client-tracking-initialization.patch
+++ b/debian/patches/bugfix/all/nfsd-fix-legacy-client-tracking-initialization.patch
@@ -0,0 +1,33 @@
+From: Scott Mayhew <smayhew@redhat.com>
+Date: Tue, 10 Dec 2024 07:25:54 -0500
+Subject: nfsd: fix legacy client tracking initialization
+Origin: https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/?h=nfsd-next&id=45cd8c0c13fe5c9f1b926bd307df431f8f1b8a16
+Bug: https://bugzilla.kernel.org/show_bug.cgi?id=219580
+Bug-Debian: https://bugs.debian.org/1087900
+
+Get rid of the nfsd4_legacy_tracking_ops->init() call in
+check_for_legacy_methods().  That will be handled in the caller
+(nfsd4_client_tracking_init()).  Otherwise, we'll wind up calling
+nfsd4_legacy_tracking_ops->init() twice, and the second time we'll
+trigger the BUG_ON() in nfsd4_init_recdir().
+
+Fixes: 74fd48739d04 ("nfsd: new Kconfig option for legacy client tracking")
+Reported-by: Jur van der Burg <jur@avtware.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219580
+Signed-off-by: Scott Mayhew <smayhew@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4recover.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
+@@ -2052,7 +2052,6 @@ static inline int check_for_legacy_metho
+ 		path_put(&path);
+ 		if (status)
+ 			return -ENOTDIR;
+-		status = nn->client_tracking_ops->init(net);
+ 	}
+ 	return status;
+ }
--- a/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch
+++ b/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch
@@ -34,7 +34,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
 /*
  * Minimum number of threads to boot the kernel
  */
-@@ -2158,6 +2164,10 @@ __latent_entropy struct task_struct *cop
+@@ -2157,6 +2163,10 @@ __latent_entropy struct task_struct *cop
 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 
@@ -45,7 +45,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
 	/*
 	 * Thread groups must share signals as well, and detached threads
 	 * can only be started up within the thread group.
-@@ -3311,6 +3321,12 @@ int ksys_unshare(unsigned long unshare_f
+@@ -3310,6 +3320,12 @@ int ksys_unshare(unsigned long unshare_f
 	if (unshare_flags & CLONE_NEWNS)
 		unshare_flags |= CLONE_FS;
 
--- a/debian/patches/patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+++ b/debian/patches/patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
@@ -0,0 +1,60 @@
+From 60fbdd9e9dc7074d4cd30ada3ba9547d5c007702 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:02 -0500
+Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional
+
+Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
+paravirt, and not when running on bare metal.
+
+There is no real good reason to do things differently for
+each setup. Make them all the same.
+
+After this change, the synchronization between get_user_pages_fast
+and page table freeing is handled by RCU, which prevents page tables
+from being reused for other data while get_user_pages_fast is walking
+them.
+
+This allows us to invalidate page tables while other CPUs have
+interrupts disabled.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+---
+ arch/x86/Kconfig           | 2 +-
+ arch/x86/kernel/paravirt.c | 7 +------
+ 2 files changed, 2 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
+@@ -270,7 +270,7 @@ config X86
+ 	select HAVE_PCI
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
+-	select MMU_GATHER_RCU_TABLE_FREE	if PARAVIRT
+	select MMU_GATHER_RCU_TABLE_FREE
+ 	select MMU_GATHER_MERGE_VMAS
+ 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
+@@ -59,11 +59,6 @@ void __init native_pv_lock_init(void)
+ 		static_branch_enable(&virt_spin_lock_key);
+ }
+ 
+-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
+-{
+-	tlb_remove_page(tlb, table);
+-}
+-
+ struct static_key paravirt_steal_enabled;
+ struct static_key paravirt_steal_rq_enabled;
+ 
+@@ -191,7 +186,7 @@ struct paravirt_patch_template pv_ops =
+ 	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
+ 	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
+ 	.mmu.flush_tlb_multi	= native_flush_tlb_multi,
+-	.mmu.tlb_remove_table	= native_tlb_remove_table,
+	.mmu.tlb_remove_table	= tlb_remove_table,
+ 
+ 	.mmu.exit_mmap		= paravirt_nop,
+ 	.mmu.notify_page_enc_status_changed	= paravirt_nop,
--- a/debian/patches/patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+++ b/debian/patches/patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
@@ -0,0 +1,137 @@
+From 8966aff4928c0bc3aa79b8729d74da5ea782f73a Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:03 -0500
+Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call
+
+Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.
+
+Get rid of the indirection by simply calling tlb_remove_table directly,
+and not going through the paravirt function pointers.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
+---
+ arch/x86/hyperv/mmu.c                 |  1 -
+ arch/x86/include/asm/paravirt.h       |  5 -----
+ arch/x86/include/asm/paravirt_types.h |  2 --
+ arch/x86/kernel/kvm.c                 |  1 -
+ arch/x86/kernel/paravirt.c            |  1 -
+ arch/x86/mm/pgtable.c                 | 16 ++++------------
+ arch/x86/xen/mmu_pv.c                 |  1 -
+ 7 files changed, 4 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
+@@ -240,5 +240,4 @@ void hyperv_setup_mmu_ops(void)
+ 
+ 	pr_info("Using hypercall for remote TLB flush\n");
+ 	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
+-	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ }
+--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
+@@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(con
+ 	PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
+ }
+ 
+-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+-{
+-	PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
+-}
+-
+ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
+ {
+ 	PVOP_VCALL1(mmu.exit_mmap, mm);
+--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
+@@ -136,8 +136,6 @@ struct pv_mmu_ops {
+ 	void (*flush_tlb_multi)(const struct cpumask *cpus,
+ 				const struct flush_tlb_info *info);
+ 
+-	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+-
+ 	/* Hook for intercepting the destruction of an mm_struct. */
+ 	void (*exit_mmap)(struct mm_struct *mm);
+ 	void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
+--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
+@@ -838,7 +838,6 @@ static void __init kvm_guest_init(void)
+ #ifdef CONFIG_SMP
+ 	if (pv_tlb_flush_supported()) {
+ 		pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
+-		pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ 		pr_info("KVM setup pv remote TLB flush\n");
+ 	}
+ 
+--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
+@@ -186,7 +186,6 @@ struct paravirt_patch_template pv_ops =
+ 	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
+ 	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
+ 	.mmu.flush_tlb_multi	= native_flush_tlb_multi,
+-	.mmu.tlb_remove_table	= tlb_remove_table,
+ 
+ 	.mmu.exit_mmap		= paravirt_nop,
+ 	.mmu.notify_page_enc_status_changed	= paravirt_nop,
+--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
+@@ -18,14 +18,6 @@ EXPORT_SYMBOL(physical_mask);
+ #define PGTABLE_HIGHMEM 0
+ #endif
+ 
+-#ifndef CONFIG_PARAVIRT
+-static inline
+-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+-{
+-	tlb_remove_page(tlb, table);
+-}
+-#endif
+-
+ gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
+ 
+ pgtable_t pte_alloc_one(struct mm_struct *mm)
+@@ -54,7 +46,7 @@ void ___pte_free_tlb(struct mmu_gather *
+ {
+ 	pagetable_pte_dtor(page_ptdesc(pte));
+ 	paravirt_release_pte(page_to_pfn(pte));
+-	paravirt_tlb_remove_table(tlb, pte);
+	tlb_remove_table(tlb, pte);
+ }
+ 
+ #if CONFIG_PGTABLE_LEVELS > 2
+@@ -70,7 +62,7 @@ void ___pmd_free_tlb(struct mmu_gather *
+ 	tlb->need_flush_all = 1;
+ #endif
+ 	pagetable_pmd_dtor(ptdesc);
+-	paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc));
+	tlb_remove_table(tlb, ptdesc_page(ptdesc));
+ }
+ 
+ #if CONFIG_PGTABLE_LEVELS > 3
+@@ -80,14 +72,14 @@ void ___pud_free_tlb(struct mmu_gather *
+ 
+ 	pagetable_pud_dtor(ptdesc);
+ 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
+-	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
+	tlb_remove_table(tlb, virt_to_page(pud));
+ }
+ 
+ #if CONFIG_PGTABLE_LEVELS > 4
+ void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
+ {
+ 	paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
+-	paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
+	tlb_remove_table(tlb, virt_to_page(p4d));
+ }
+ #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
+ #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
+--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
+@@ -2137,7 +2137,6 @@ static const typeof(pv_ops) xen_mmu_ops
+ 		.flush_tlb_kernel = xen_flush_tlb,
+ 		.flush_tlb_one_user = xen_flush_tlb_one_user,
+ 		.flush_tlb_multi = xen_flush_tlb_multi,
+-		.tlb_remove_table = tlb_remove_table,
+ 
+ 		.pgd_alloc = xen_pgd_alloc,
+ 		.pgd_free = xen_pgd_free,
--- a/debian/patches/patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch
+++ b/debian/patches/patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch
@@ -0,0 +1,23 @@
+From efde57842082e36ab2e2be5a11c7b06ff9e18b3d Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:04 -0500
+Subject: x86/mm: add X86_FEATURE_INVLPGB definition.
+
+Add the INVPLGB CPUID definition, allowing the kernel to recognize
+whether the CPU supports the INVLPGB instruction.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
+@@ -335,6 +335,7 @@
+ #define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
+ #define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
+ #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* "invlpgb" INVLPGB instruction */
+ #define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
+ #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+ #define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
--- a/debian/patches/patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+++ b/debian/patches/patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
@@ -0,0 +1,57 @@
+From 98953e10e342ceea1dc877cfb63318fa85879a59 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:05 -0500
+Subject: x86/mm: get INVLPGB count max from CPUID
+
+The CPU advertises the maximum number of pages that can be shot down
+with one INVLPGB instruction in the CPUID data.
+
+Save that information for later use.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/tlbflush.h | 1 +
+ arch/x86/kernel/cpu/amd.c       | 8 ++++++++
+ arch/x86/kernel/setup.c         | 4 ++++
+ 3 files changed, 13 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -182,6 +182,7 @@ static inline void cr4_init_shadow(void)
+ 
+ extern unsigned long mmu_cr4_features;
+ extern u32 *trampoline_cr4_features;
+extern u16 invlpgb_count_max;
+ 
+ extern void initialize_tlbstate_and_flush(void);
+ 
+--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
+@@ -1135,6 +1135,14 @@ static void cpu_detect_tlb_amd(struct cp
+ 		tlb_lli_2m[ENTRIES] = eax & mask;
+ 
+ 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+
+	if (c->extended_cpuid_level < 0x80000008)
+		return;
+
+	cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+
+	/* Max number of pages INVLPGB can invalidate in one shot */
+	invlpgb_count_max = (edx & 0xffff) + 1;
+ }
+ 
+ static const struct cpu_dev amd_cpu_dev = {
+--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
+@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features
+ __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
+ #endif
+ 
+#ifdef CONFIG_CPU_SUP_AMD
+u16 invlpgb_count_max __ro_after_init;
+#endif
+
+ #ifdef CONFIG_IMA
+ static phys_addr_t ima_kexec_buffer_phys;
+ static size_t ima_kexec_buffer_size;
--- a/debian/patches/patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch
+++ b/debian/patches/patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch
@@ -0,0 +1,121 @@
+From bc9d1fa1bd32dca78f38bd2a8557e7fc638308bd Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:06 -0500
+Subject: x86/mm: add INVLPGB support code
+
+Add invlpgb.h with the helper functions and definitions needed to use
+broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/invlpgb.h  | 93 +++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/tlbflush.h |  1 +
+ 2 files changed, 94 insertions(+)
+ create mode 100644 arch/x86/include/asm/invlpgb.h
+
+--- /dev/null
+++ b/arch/x86/include/asm/invlpgb.h
+@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVLPGB
+#define _ASM_X86_INVLPGB
+
+#include <vdso/bits.h>
+
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr,
+			    int extra_count, bool pmd_stride, unsigned long flags)
+{
+	u64 rax = addr | flags;
+	u32 ecx = (pmd_stride << 31) | extra_count;
+	u32 edx = (pcid << 16) | asid;
+
+	asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx));
+}
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - INVLPGB_PCID:              	  invalidate all TLB entries matching the PCID
+ *
+ * The first can be used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_VA			BIT(0)
+#define INVLPGB_PCID			BIT(1)
+#define INVLPGB_ASID			BIT(2)
+#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
+#define INVLPGB_FINAL_ONLY		BIT(4)
+#define INVLPGB_INCLUDE_NESTED		BIT(5)
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invlpgb_flush_user(unsigned long pcid,
+				      unsigned long addr)
+{
+	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
+}
+
+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr,
+					 int nr, bool pmd_stride)
+{
+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+}
+
+/* Flush all mappings for a given ASID, not including globals. */
+static inline void invlpgb_flush_single_asid(unsigned long asid)
+{
+	__invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid(unsigned long pcid)
+{
+	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr(unsigned long addr, int nr)
+{
+	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	__invlpgb(0, 0, 0, 0, 0, 0);
+}
+
+/* Wait for INVLPGB originated by this CPU to complete. */
+static inline void tlbsync(void)
+{
+	asm volatile("tlbsync");
+}
+
+#endif /* _ASM_X86_INVLPGB */
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -10,6 +10,7 @@
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
+ #include <asm/smp.h>
+#include <asm/invlpgb.h>
+ #include <asm/invpcid.h>
+ #include <asm/pti.h>
+ #include <asm/processor-flags.h>
--- a/debian/patches/patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+++ b/debian/patches/patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
@@ -0,0 +1,61 @@
+From ffd834c7140dc5fcaf96161c6d8c4601bb700afe Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:07 -0500
+Subject: x86/mm: use INVLPGB for kernel TLB flushes
+
+Use broadcast TLB invalidation for kernel addresses when available.
+
+This stops us from having to send IPIs for kernel TLB flushes.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1048,6 +1048,32 @@ void flush_tlb_all(void)
+ 	on_each_cpu(do_flush_tlb_all, NULL, 1);
+ }
+ 
+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	unsigned long maxnr = invlpgb_count_max;
+	unsigned long threshold = tlb_single_page_flush_ceiling * maxnr;
+
+	/*
+	 * TLBSYNC only waits for flushes originating on the same CPU.
+	 * Disabling migration allows us to wait on all flushes.
+	 */
+	guard(preempt)();
+
+	if (end == TLB_FLUSH_ALL ||
+	    (end - start) > threshold << PAGE_SHIFT) {
+		invlpgb_flush_all();
+	} else {
+		unsigned long nr;
+		for (addr = start; addr < end; addr += nr << PAGE_SHIFT) {
+			nr = min((end - addr) >> PAGE_SHIFT, maxnr);
+			invlpgb_flush_addr(addr, nr);
+		}
+	}
+
+	tlbsync();
+}
+
+ static void do_kernel_range_flush(void *info)
+ {
+ 	struct flush_tlb_info *f = info;
+@@ -1060,6 +1086,11 @@ static void do_kernel_range_flush(void *
+ 
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		broadcast_kernel_range_flush(start, end);
+		return;
+	}
+
+ 	/* Balance as user space task's flush, a bit conservative */
+ 	if (end == TLB_FLUSH_ALL ||
+ 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
--- a/debian/patches/patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch
+++ b/debian/patches/patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch
@@ -0,0 +1,28 @@
+From 13fac8226036456c15c517c1dd77be5109a61da2 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:08 -0500
+Subject: x86/tlb: use INVLPGB in flush_tlb_all
+
+The flush_tlb_all() function is not used a whole lot, but we might
+as well use broadcast TLB flushing there, too.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/mm/tlb.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1045,6 +1045,12 @@ static void do_flush_tlb_all(void *info)
+ void flush_tlb_all(void)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		guard(preempt)();
+		invlpgb_flush_all();
+		tlbsync();
+		return;
+	}
+ 	on_each_cpu(do_flush_tlb_all, NULL, 1);
+ }
+ 
--- a/debian/patches/patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+++ b/debian/patches/patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
@@ -0,0 +1,36 @@
+From 765d531296765e7fb2888c70cb56c0e25b459231 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:09 -0500
+Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing
+
+In the page reclaim code, we only track the CPU(s) where the TLB needs
+to be flushed, rather than all the individual mappings that may be getting
+invalidated.
+
+Use broadcast TLB flushing when that is available.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/mm/tlb.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1281,8 +1281,16 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all);
+ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ {
+ 	struct flush_tlb_info *info;
+	int cpu;
+ 
+-	int cpu = get_cpu();
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		guard(preempt)();
+		invlpgb_flush_all_nonglobals();
+		tlbsync();
+		return;
+	}
+
+	cpu = get_cpu();
+ 
+ 	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+ 				  TLB_GENERATION_INVALID);
--- a/debian/patches/patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+++ b/debian/patches/patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
@@ -0,0 +1,508 @@
+From 8b23125a3200a330fb407133f33aeb9ad3232603 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:10 -0500
+Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
+ processes
+
+Use broadcast TLB invalidation, using the INVPLGB instruction, on AMD EPYC 3
+and newer CPUs.
+
+In order to not exhaust PCID space, and keep TLB flushes local for single
+threaded processes, we only hand out broadcast ASIDs to processes active on
+3 or more CPUs, and gradually increase the threshold as broadcast ASID space
+is depleted.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/mmu.h         |   6 +
+ arch/x86/include/asm/mmu_context.h |  12 ++
+ arch/x86/include/asm/tlbflush.h    |  17 ++
+ arch/x86/mm/tlb.c                  | 310 ++++++++++++++++++++++++++++-
+ 4 files changed, 336 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
+@@ -46,6 +46,12 @@ typedef struct {
+ 	unsigned long flags;
+ #endif
+ 
+#ifdef CONFIG_CPU_SUP_AMD
+	struct list_head broadcast_asid_list;
+	u16 broadcast_asid;
+	bool asid_transition;
+#endif
+
+ #ifdef CONFIG_ADDRESS_MASKING
+ 	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
+ 	unsigned long lam_cr3_mask;
+--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
+@@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(s
+ #define enter_lazy_tlb enter_lazy_tlb
+ extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+ 
+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm);
+
+ /*
+  * Init a new mm.  Used on mm copies, like at fork()
+  * and on mm's that are brand-new, like at execve().
+@@ -160,6 +162,13 @@ static inline int init_new_context(struc
+ 		mm->context.execute_only_pkey = -1;
+ 	}
+ #endif
+
+#ifdef CONFIG_CPU_SUP_AMD
+	INIT_LIST_HEAD(&mm->context.broadcast_asid_list);
+	mm->context.broadcast_asid = 0;
+	mm->context.asid_transition = false;
+#endif
+
+ 	mm_reset_untag_mask(mm);
+ 	init_new_context_ldt(mm);
+ 	return 0;
+@@ -169,6 +178,9 @@ static inline int init_new_context(struc
+ static inline void destroy_context(struct mm_struct *mm)
+ {
+ 	destroy_context_ldt(mm);
+#ifdef CONFIG_CPU_SUP_AMD
+	destroy_context_free_broadcast_asid(mm);
+#endif
+ }
+ 
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -65,6 +65,23 @@ static inline void cr4_clear_bits(unsign
+  */
+ #define TLB_NR_DYN_ASIDS	6
+ 
+#ifdef CONFIG_CPU_SUP_AMD
+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS
+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS
+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition)
+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid)
+#else
+#define is_dyn_asid(asid) true
+#define is_broadcast_asid(asid) false
+#define in_asid_transition(info) false
+#define mm_broadcast_asid(mm) 0
+
+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
+{
+	return false;
+}
+#endif
+
+ struct tlb_context {
+ 	u64 ctx_id;
+ 	u64 tlb_gen;
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -74,13 +74,15 @@
+  * use different names for each of them:
+  *
+  * ASID  - [0, TLB_NR_DYN_ASIDS-1]
+- *         the canonical identifier for an mm
+ *         the canonical identifier for an mm, dynamically allocated on each CPU
+ *         [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
+ *         the canonical, global identifier for an mm, identical across all CPUs
+  *
+- * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * kPCID - [1, MAX_ASID_AVAILABLE]
+  *         the value we write into the PCID part of CR3; corresponds to the
+  *         ASID+1, because PCID 0 is special.
+  *
+- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
+  *         for KPTI each mm has two address spaces and thus needs two
+  *         PCID values, but we can still do with a single ASID denomination
+  *         for each mm. Corresponds to kPCID + 2048.
+@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_st
+ 		return;
+ 	}
+ 
+	/*
+	 * TLB consistency for this ASID is maintained with INVLPGB;
+	 * TLB flushes happen even while the process isn't running.
+	 */
+#ifdef CONFIG_CPU_SUP_AMD
+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) {
+		*new_asid = mm_broadcast_asid(next);
+		*need_flush = false;
+		return;
+	}
+#endif
+
+ 	if (this_cpu_read(cpu_tlbstate.invalidate_other))
+ 		clear_asid_other();
+ 
+@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_st
+ 	*need_flush = true;
+ }
+ 
+#ifdef CONFIG_CPU_SUP_AMD
+/*
+ * Logic for AMD INVLPGB support.
+ */
+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock);
+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS;
+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 };
+static LIST_HEAD(broadcast_asid_list);
+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
+
+static void reset_broadcast_asid_space(void)
+{
+	mm_context_t *context;
+
+	lockdep_assert_held(&broadcast_asid_lock);
+
+	/*
+	 * Flush once when we wrap around the ASID space, so we won't need
+	 * to flush every time we allocate an ASID for boradcast flushing.
+	 */
+	invlpgb_flush_all_nonglobals();
+	tlbsync();
+
+	/*
+	 * Leave the currently used broadcast ASIDs set in the bitmap, since
+	 * those cannot be reused before the next wraparound and flush..
+	 */
+	bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE);
+	list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list)
+		__set_bit(context->broadcast_asid, broadcast_asid_used);
+
+	last_broadcast_asid = TLB_NR_DYN_ASIDS;
+}
+
+static u16 get_broadcast_asid(void)
+{
+	lockdep_assert_held(&broadcast_asid_lock);
+
+	do {
+		u16 start = last_broadcast_asid;
+		u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start);
+
+		if (asid >= MAX_ASID_AVAILABLE) {
+			reset_broadcast_asid_space();
+			continue;
+		}
+
+		/* Try claiming this broadcast ASID. */
+		if (!test_and_set_bit(asid, broadcast_asid_used)) {
+			last_broadcast_asid = asid;
+			return asid;
+		}
+	} while (1);
+}
+
+/*
+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast
+ * (INVLPGB) ASID, or the other way around.
+ */
+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
+{
+	u16 broadcast_asid = mm_broadcast_asid(next);
+
+	if (broadcast_asid && prev_asid != broadcast_asid)
+		return true;
+
+	if (!broadcast_asid && is_broadcast_asid(prev_asid))
+		return true;
+
+	return false;
+}
+
+void destroy_context_free_broadcast_asid(struct mm_struct *mm)
+{
+	if (!mm->context.broadcast_asid)
+		return;
+
+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
+	mm->context.broadcast_asid = 0;
+	list_del(&mm->context.broadcast_asid_list);
+	broadcast_asid_available++;
+}
+
+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
+{
+	int count = 0;
+	int cpu;
+
+	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
+		return false;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		/* Skip the CPUs that aren't really running this process. */
+		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
+			continue;
+
+		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
+			continue;
+
+		if (++count > threshold)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Assign a broadcast ASID to the current process, protecting against
+ * races between multiple threads in the process.
+ */
+static void use_broadcast_asid(struct mm_struct *mm)
+{
+	guard(raw_spinlock_irqsave)(&broadcast_asid_lock);
+
+	/* This process is already using broadcast TLB invalidation. */
+	if (mm->context.broadcast_asid)
+		return;
+
+	mm->context.broadcast_asid = get_broadcast_asid();
+	mm->context.asid_transition = true;
+	list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list);
+	broadcast_asid_available--;
+}
+
+/*
+ * Figure out whether to assign a broadcast (global) ASID to a process.
+ * We vary the threshold by how empty or full broadcast ASID space is.
+ * 1/4 full: >= 4 active threads
+ * 1/2 full: >= 8 active threads
+ * 3/4 full: >= 16 active threads
+ * 7/8 full: >= 32 active threads
+ * etc
+ *
+ * This way we should never exhaust the broadcast ASID space, even on very
+ * large systems, and the processes with the largest number of active
+ * threads should be able to use broadcast TLB invalidation.
+ */
+#define HALFFULL_THRESHOLD 8
+static bool meets_broadcast_asid_threshold(struct mm_struct *mm)
+{
+	int avail = broadcast_asid_available;
+	int threshold = HALFFULL_THRESHOLD;
+
+	if (!avail)
+		return false;
+
+	if (avail > MAX_ASID_AVAILABLE * 3 / 4) {
+		threshold = HALFFULL_THRESHOLD / 4;
+	} else if (avail > MAX_ASID_AVAILABLE / 2) {
+		threshold = HALFFULL_THRESHOLD / 2;
+	} else if (avail < MAX_ASID_AVAILABLE / 3) {
+		do {
+			avail *= 2;
+			threshold *= 2;
+		} while ((avail + threshold) < MAX_ASID_AVAILABLE / 2);
+	}
+
+	return mm_active_cpus_exceeds(mm, threshold);
+}
+
+static void count_tlb_flush(struct mm_struct *mm)
+{
+	if (!static_cpu_has(X86_FEATURE_INVLPGB))
+		return;
+
+	/* Check every once in a while. */
+	if ((current->pid & 0x1f) != (jiffies & 0x1f))
+		return;
+
+	if (meets_broadcast_asid_threshold(mm))
+		use_broadcast_asid(mm);
+}
+
+static void finish_asid_transition(struct flush_tlb_info *info)
+{
+	struct mm_struct *mm = info->mm;
+	int bc_asid = mm_broadcast_asid(mm);
+	int cpu;
+
+	if (!mm->context.asid_transition)
+		return;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
+			continue;
+
+		/*
+		 * If at least one CPU is not using the broadcast ASID yet,
+		 * send a TLB flush IPI. The IPI should cause stragglers
+		 * to transition soon.
+		 */
+		if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) {
+			flush_tlb_multi(mm_cpumask(info->mm), info);
+			return;
+		}
+	}
+
+	/* All the CPUs running this process are using the broadcast ASID. */
+	mm->context.asid_transition = 0;
+}
+
+static void broadcast_tlb_flush(struct flush_tlb_info *info)
+{
+	bool pmd = info->stride_shift == PMD_SHIFT;
+	unsigned long maxnr = invlpgb_count_max;
+	unsigned long asid = info->mm->context.broadcast_asid;
+	unsigned long addr = info->start;
+	unsigned long nr;
+
+	/* Flushing multiple pages at once is not supported with 1GB pages. */
+	if (info->stride_shift > PMD_SHIFT)
+		maxnr = 1;
+
+	if (info->end == TLB_FLUSH_ALL) {
+		invlpgb_flush_single_pcid(kern_pcid(asid));
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (static_cpu_has(X86_FEATURE_PTI))
+			invlpgb_flush_single_pcid(user_pcid(asid));
+	} else do {
+		/*
+		 * Calculate how many pages can be flushed at once; if the
+		 * remainder of the range is less than one page, flush one.
+		 */
+		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
+		nr = max(nr, 1);
+
+		invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd);
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (static_cpu_has(X86_FEATURE_PTI))
+			invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd);
+		addr += nr << info->stride_shift;
+	} while (addr < info->end);
+
+	finish_asid_transition(info);
+
+	/* Wait for the INVLPGBs kicked off above to finish. */
+	tlbsync();
+}
+#endif /* CONFIG_CPU_SUP_AMD */
+
+ /*
+  * Given an ASID, flush the corresponding user ASID.  We can delay this
+  * until the next time we switch to it.
+@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct
+ 	 */
+ 	if (prev == next) {
+ 		/* Not actually switching mm's */
+-		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+-			   next->context.ctx_id);
+		if (is_dyn_asid(prev_asid))
+			VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+				   next->context.ctx_id);
+ 
+ 		/*
+ 		 * If this races with another thread that enables lam, 'new_lam'
+@@ -574,6 +828,23 @@ void switch_mm_irqs_off(struct mm_struct
+ 			cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+ 		/*
+		 * Check if the current mm is transitioning to a new ASID.
+		 */
+		if (needs_broadcast_asid_reload(next, prev_asid)) {
+			next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+
+			choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+			goto reload_tlb;
+		}
+
+		/*
+		 * Broadcast TLB invalidation keeps this PCID up to date
+		 * all the time.
+		 */
+		if (is_broadcast_asid(prev_asid))
+			return;
+
+		/*
+ 		 * If the CPU is not in lazy TLB mode, we are just switching
+ 		 * from one thread in a process to another thread in the same
+ 		 * process. No TLB flush required.
+@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct
+ 		barrier();
+ 	}
+ 
+reload_tlb:
+ 	new_lam = mm_lam_cr3_mask(next);
+ 	if (need_flush) {
+		VM_BUG_ON(is_broadcast_asid(new_asid));
+ 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+ 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+ 		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info)
+ 	const struct flush_tlb_info *f = info;
+ 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+-	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+	u64 local_tlb_gen;
+ 	bool local = smp_processor_id() == f->initiating_cpu;
+ 	unsigned long nr_invalidate = 0;
+ 	u64 mm_tlb_gen;
+@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info)
+ 	if (unlikely(loaded_mm == &init_mm))
+ 		return;
+ 
+	/* Reload the ASID if transitioning into or out of a broadcast ASID */
+	if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) {
+		switch_mm_irqs_off(NULL, loaded_mm, NULL);
+		loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	}
+
+	/* Broadcast ASIDs are always kept up to date with INVLPGB. */
+	if (is_broadcast_asid(loaded_mm_asid))
+		return;
+
+ 	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+ 		   loaded_mm->context.ctx_id);
+ 
+@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info)
+ 		return;
+ 	}
+ 
+	local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+
+ 	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
+ 		     f->new_tlb_gen <= local_tlb_gen)) {
+ 		/*
+@@ -926,7 +1211,7 @@ STATIC_NOPV void native_flush_tlb_multi(
+ 	 * up on the new contents of what used to be page tables, while
+ 	 * doing a speculative memory access.
+ 	 */
+-	if (info->freed_tables)
+	if (info->freed_tables || in_asid_transition(info))
+ 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
+ 	else
+ 		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
+@@ -998,14 +1283,18 @@ void flush_tlb_mm_range(struct mm_struct
+ 				bool freed_tables)
+ {
+ 	struct flush_tlb_info *info;
+	unsigned long threshold = tlb_single_page_flush_ceiling;
+ 	u64 new_tlb_gen;
+ 	int cpu;
+ 
+	if (static_cpu_has(X86_FEATURE_INVLPGB))
+		threshold *= invlpgb_count_max;
+
+ 	cpu = get_cpu();
+ 
+ 	/* Should we flush just the requested range? */
+ 	if ((end == TLB_FLUSH_ALL) ||
+-	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+	    ((end - start) >> stride_shift) > threshold) {
+ 		start = 0;
+ 		end = TLB_FLUSH_ALL;
+ 	}
+@@ -1021,8 +1310,11 @@ void flush_tlb_mm_range(struct mm_struct
+ 	 * a local TLB flush is needed. Optimize this use-case by calling
+ 	 * flush_tlb_func_local() directly in this case.
+ 	 */
+-	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+	if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) {
+		broadcast_tlb_flush(info);
+	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+ 		flush_tlb_multi(mm_cpumask(mm), info);
+		count_tlb_flush(mm);
+ 	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
+ 		lockdep_assert_irqs_enabled();
+ 		local_irq_disable();
--- a/debian/patches/patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch
+++ b/debian/patches/patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch
@@ -0,0 +1,126 @@
+From 1767a2786ebbe3451f973df44485309c2a8fd8a5 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:11 -0500
+Subject: x86,tlb: do targeted broadcast flushing from tlbbatch code
+
+Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
+queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
+
+This also allows us to avoid adding the CPUs of processes using broadcast
+flushing to the batch->cpumask, and will hopefully further reduce TLB
+flushing from the reclaim and compaction paths.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/tlbbatch.h |  1 +
+ arch/x86/include/asm/tlbflush.h | 12 +++------
+ arch/x86/mm/tlb.c               | 48 ++++++++++++++++++++++++++-------
+ 3 files changed, 42 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
+@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
+ 	 * the PFNs being flushed..
+ 	 */
+ 	struct cpumask cpumask;
+	bool used_invlpgb;
+ };
+ 
+ #endif /* _ARCH_X86_TLBBATCH_H */
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -296,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct
+ 	return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ 
+-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+-					     struct mm_struct *mm,
+-					     unsigned long uaddr)
+-{
+-	inc_mm_tlb_gen(mm);
+-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+-}
+-
+ static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+ {
+ 	flush_tlb_mm(mm);
+ }
+ 
+ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr);
+ 
+ static inline bool pte_flags_need_flush(unsigned long oldflags,
+ 					unsigned long newflags,
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1573,16 +1573,7 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all);
+ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ {
+ 	struct flush_tlb_info *info;
+-	int cpu;
+-
+-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+-		guard(preempt)();
+-		invlpgb_flush_all_nonglobals();
+-		tlbsync();
+-		return;
+-	}
+-
+-	cpu = get_cpu();
+	int cpu = get_cpu();
+ 
+ 	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+ 				  TLB_GENERATION_INVALID);
+@@ -1600,12 +1591,49 @@ void arch_tlbbatch_flush(struct arch_tlb
+ 		local_irq_enable();
+ 	}
+ 
+	/*
+	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+	 * The cpumask above contains only CPUs that were running tasks
+	 * not using broadcast TLB flushing.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
+		tlbsync();
+		migrate_enable();
+		batch->used_invlpgb = false;
+	}
+
+ 	cpumask_clear(&batch->cpumask);
+ 
+ 	put_flush_tlb_info();
+ 	put_cpu();
+ }
+ 
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr)
+{
+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) {
+		u16 asid = mm_broadcast_asid(mm);
+		/*
+		 * Queue up an asynchronous invalidation. The corresponding
+		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
+		 * on the same CPU.
+		 */
+		if (!batch->used_invlpgb) {
+			batch->used_invlpgb = true;
+			migrate_disable();
+		}
+		invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0);
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (static_cpu_has(X86_FEATURE_PTI))
+			invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0);
+	} else {
+		inc_mm_tlb_gen(mm);
+		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+	}
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+ /*
+  * Blindly accessing user memory from NMI context can be dangerous
+  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+++ b/debian/patches/patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
@@ -0,0 +1,82 @@
+From 13faf551d1a146ed18c448babe1953def4ed3d56 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:12 -0500
+Subject: x86/mm: enable AMD translation cache extensions
+
+With AMD TCE (translation cache extensions) only the intermediate mappings
+that cover the address range zapped by INVLPG / INVLPGB get invalidated,
+rather than all intermediate mappings getting zapped at every TLB invalidation.
+
+This can help reduce the TLB miss rate, by keeping more intermediate
+mappings in the cache.
+
+>From the AMD manual:
+
+Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit
+to 1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on
+TLB entries. When this bit is 0, these instructions remove the target PTE
+from the TLB as well as all upper-level table entries that are cached
+in the TLB, whether or not they are associated with the target PTE.
+When this bit is set, these instructions will remove the target PTE and
+only those upper-level entries that lead to the target PTE in
+the page table hierarchy, leaving unrelated upper-level entries intact.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/kernel/cpu/amd.c |  8 ++++++++
+ arch/x86/mm/tlb.c         | 10 +++++++---
+ 2 files changed, 15 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
+@@ -1143,6 +1143,14 @@ static void cpu_detect_tlb_amd(struct cp
+ 
+ 	/* Max number of pages INVLPGB can invalidate in one shot */
+ 	invlpgb_count_max = (edx & 0xffff) + 1;
+
+	/* If supported, enable translation cache extensions (TCE) */
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (ecx & BIT(17)) {
+		u64 msr = native_read_msr(MSR_EFER);;
+		msr |= BIT(15);
+		wrmsrl(MSR_EFER, msr);
+	}
+ }
+ 
+ static const struct cpu_dev amd_cpu_dev = {
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -477,7 +477,7 @@ static void broadcast_tlb_flush(struct f
+ 	if (info->stride_shift > PMD_SHIFT)
+ 		maxnr = 1;
+ 
+-	if (info->end == TLB_FLUSH_ALL) {
+	if (info->end == TLB_FLUSH_ALL || info->freed_tables) {
+ 		invlpgb_flush_single_pcid(kern_pcid(asid));
+ 		/* Do any CPUs supporting INVLPGB need PTI? */
+ 		if (static_cpu_has(X86_FEATURE_PTI))
+@@ -1110,7 +1110,7 @@ static void flush_tlb_func(void *info)
+ 	 *
+ 	 * The only question is whether to do a full or partial flush.
+ 	 *
+-	 * We do a partial flush if requested and two extra conditions
+	 * We do a partial flush if requested and three extra conditions
+ 	 * are met:
+ 	 *
+ 	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
+@@ -1137,10 +1137,14 @@ static void flush_tlb_func(void *info)
+ 	 *    date.  By doing a full flush instead, we can increase
+ 	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
+ 	 *    avoid another flush in the very near future.
+	 *
+	 * 3. No page tables were freed. If page tables were freed, a full
+	 *    flush ensures intermediate translations in the TLB get flushed.
+ 	 */
+ 	if (f->end != TLB_FLUSH_ALL &&
+ 	    f->new_tlb_gen == local_tlb_gen + 1 &&
+-	    f->new_tlb_gen == mm_tlb_gen) {
+	    f->new_tlb_gen == mm_tlb_gen &&
+	    !f->freed_tables) {
+ 		/* Partial flush */
+ 		unsigned long addr = f->start;
+ 
--- a/debian/patches/patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+++ b/debian/patches/patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
@@ -0,0 +1,28 @@
+From 2fc0be5fbcee1a62162b699451bb94f90ec64244 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Mon, 30 Dec 2024 12:53:13 -0500
+Subject: x86/mm: only invalidate final translations with INVLPGB
+
+Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
+This way only leaf mappings get removed from the TLB, leaving intermediate
+translations cached.
+
+On the (rare) occasions where we free page tables we do a full flush,
+ensuring intermediate translations get flushed from the TLB.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+---
+ arch/x86/include/asm/invlpgb.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
+@@ -51,7 +51,7 @@ static inline void invlpgb_flush_user(un
+ static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr,
+ 					 int nr, bool pmd_stride)
+ {
+-	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY);
+ }
+ 
+ /* Flush all mappings for a given ASID, not including globals. */
--- a/debian/patches/patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+++ b/debian/patches/patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
@@ -0,0 +1,92 @@
+From a3ff46a157cadb29349c5b388fc70804c351e561 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Thu, 19 Dec 2024 15:32:53 -0500
+Subject: mm: remove unnecessary calls to lru_add_drain
+
+There seem to be several categories of calls to lru_add_drain
+and lru_add_drain_all.
+
+The first are code paths that recently allocated, swapped in,
+or otherwise processed a batch of pages, and want them all on
+the LRU. These drain pages that were recently allocated,
+probably on the local CPU.
+
+A second category are code paths that are actively trying to
+reclaim, migrate, or offline memory. These often use lru_add_drain_all,
+to drain the caches on all CPUs.
+
+However, there also seem to be some other callers where we
+aren't really doing either. They are calling lru_add_drain(),
+despite operating on pages that may have been allocated
+long ago, and quite possibly on different CPUs.
+
+Those calls are not likely to be effective at anything but
+creating lock contention on the LRU locks.
+
+Remove the lru_add_drain calls in the latter category.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+---
+ mm/memory.c     | 1 -
+ mm/mmap.c       | 2 --
+ mm/swap_state.c | 1 -
+ mm/vma.c        | 2 --
+ 4 files changed, 6 deletions(-)
+
+--- a/mm/memory.c
+++ b/mm/memory.c
+@@ -1921,7 +1921,6 @@ void zap_page_range_single(struct vm_are
+ 	struct mmu_notifier_range range;
+ 	struct mmu_gather tlb;
+ 
+-	lru_add_drain();
+ 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
+ 				address, end);
+ 	hugetlb_zap_begin(vma, &range.start, &range.end);
+--- a/mm/mmap.c
+++ b/mm/mmap.c
+@@ -1927,7 +1927,6 @@ void exit_mmap(struct mm_struct *mm)
+ 		goto destroy;
+ 	}
+ 
+-	lru_add_drain();
+ 	flush_cache_mm(mm);
+ 	tlb_gather_mmu_fullmm(&tlb, mm);
+ 	/* update_hiwater_rss(mm) here? but nobody should be looking */
+@@ -2370,7 +2369,6 @@ int relocate_vma_down(struct vm_area_str
+ 				       vma, new_start, length, false, true))
+ 		return -ENOMEM;
+ 
+-	lru_add_drain();
+ 	tlb_gather_mmu(&tlb, mm);
+ 	next = vma_next(&vmi);
+ 	if (new_end > old_start) {
+--- a/mm/swap_state.c
+++ b/mm/swap_state.c
+@@ -317,7 +317,6 @@ void free_pages_and_swap_cache(struct en
+ 	struct folio_batch folios;
+ 	unsigned int refs[PAGEVEC_SIZE];
+ 
+-	lru_add_drain();
+ 	folio_batch_init(&folios);
+ 	for (int i = 0; i < nr; i++) {
+ 		struct folio *folio = page_folio(encoded_page_ptr(pages[i]));
+--- a/mm/vma.c
+++ b/mm/vma.c
+@@ -347,7 +347,6 @@ void unmap_region(struct ma_state *mas,
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	struct mmu_gather tlb;
+ 
+-	lru_add_drain();
+ 	tlb_gather_mmu(&tlb, mm);
+ 	update_hiwater_rss(mm);
+ 	unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end,
+@@ -1089,7 +1088,6 @@ static inline void vms_clear_ptes(struct
+ 	 * were isolated before we downgraded mmap_lock.
+ 	 */
+ 	mas_set(mas_detach, 1);
+-	lru_add_drain();
+ 	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
+ 	update_hiwater_rss(vms->vma->vm_mm);
+ 	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
--- a/debian/patches/patchset-zen/fixes/0002-drm-amdgpu-Add-missing-statement-in-resume_phase3.patch
+++ b/debian/patches/patchset-zen/fixes/0002-drm-amdgpu-Add-missing-statement-in-resume_phase3.patch
@@ -0,0 +1,27 @@
+From 6f554b20207f69146c07be3743b115e42f443627 Mon Sep 17 00:00:00 2001
+From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
+Date: Fri, 27 Dec 2024 15:08:09 +0100
+Subject: drm/amdgpu: Add missing statement in resume_phase3
+
+Fixes: 73dae652dcac776296890da215ee7dec357a1032
+See: https://gitlab.freedesktop.org/drm/amd/-/issues/3853#note_2714815
+For: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/101
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 29b8346b..cbca5fa7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3723,6 +3723,7 @@ static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+ 			r = adev->ip_blocks[i].version->funcs->resume(adev);
+ 			if (r)
+ 				return r;
+			adev->ip_blocks[i].status.hw = true;
+ 		}
+ 	}
+ 
+-- 
+2.45.2
+
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -72,6 +72,7 @@ features/x86/x86-make-x32-syscall-support-conditional.patch
 bugfix/all/disable-some-marvell-phys.patch
 bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch
 bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch
+bugfix/all/nfsd-fix-legacy-client-tracking-initialization.patch

 # Miscellaneous features

@@ -231,6 +232,20 @@ patchset-pf/crypto/0001-crypto-x86-crc32c-simplify-code-for-handling-fewer-t.pat
 patchset-pf/crypto/0002-crypto-x86-crc32c-access-32-bit-arguments-as-32-bit.patch
 patchset-pf/crypto/0003-crypto-x86-crc32c-eliminate-jump-table-and-excessive.patch

+patchset-pf/invlpgb/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+patchset-pf/invlpgb/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+patchset-pf/invlpgb/0003-x86-mm-add-X86_FEATURE_INVLPGB-definition.patch
+patchset-pf/invlpgb/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+patchset-pf/invlpgb/0005-x86-mm-add-INVLPGB-support-code.patch
+patchset-pf/invlpgb/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+patchset-pf/invlpgb/0007-x86-tlb-use-INVLPGB-in-flush_tlb_all.patch
+patchset-pf/invlpgb/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+patchset-pf/invlpgb/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+patchset-pf/invlpgb/0010-x86-tlb-do-targeted-broadcast-flushing-from-tlbbatch.patch
+patchset-pf/invlpgb/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+patchset-pf/invlpgb/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+patchset-pf/invlpgb/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+
 patchset-pf/pksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch

 patchset-pf/xfs/0001-xfs-fix-chown-with-rt-quota.patch
@@ -308,3 +323,4 @@ patchset-pf/fixes/0002-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patc
 patchset-pf/fixes/0003-USB-core-Disable-LPM-only-for-non-suspended-ports.patch

 patchset-zen/fixes/0001-futex-improve-user-space-accesses.patch
+patchset-zen/fixes/0002-drm-amdgpu-Add-missing-statement-in-resume_phase3.patch