refresh patches

2025-03-27 01:51:30 +03:00
parent 3d597650a9
commit b65c570ac2
239 changed files with 14214 additions and 9267 deletions
--- a/debian/patches/patchset-zen/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
+++ b/debian/patches/patchset-zen/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
@@ -0,0 +1,33 @@
+From 6dada600ab3579296c9b2b57cf41b95792f021ed Mon Sep 17 00:00:00 2001
+From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
+Date: Sat, 13 Jan 2024 15:29:25 +0100
+Subject: arch/Kconfig: Default to maximum amount of ASLR bits
+
+To mitigate CVE-2024-26621 and improve randomization quality further. Do
+this with a patch to avoid having to enable `CONFIG_EXPERT`.
+
+Cherry-picked-for: https://zolutal.github.io/aslrnt/
+---
+ arch/Kconfig | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/Kconfig
+++ b/arch/Kconfig
+@@ -1137,7 +1137,7 @@ config ARCH_MMAP_RND_BITS
+ 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
+ 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+ 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+-	default ARCH_MMAP_RND_BITS_MIN
+	default ARCH_MMAP_RND_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
+@@ -1171,7 +1171,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
+ 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+ 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+-	default ARCH_MMAP_RND_COMPAT_BITS_MIN
+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
--- a/debian/patches/patchset-zen/fixes/0001-futex-improve-user-space-accesses.patch
+++ b/debian/patches/patchset-zen/fixes/0001-futex-improve-user-space-accesses.patch
@@ -1,162 +0,0 @@
-From 3c32c0d457a2c4b2817f57e1e2c9cbba4624639e Mon Sep 17 00:00:00 2001
-From: Linus Torvalds <torvalds@linux-foundation.org>
-Date: Fri, 22 Nov 2024 11:33:05 -0800
-Subject: futex: improve user space accesses
-
-Josh Poimboeuf reports that he got a "will-it-scale.per_process_ops 1.9%
-improvement" report for his patch that changed __get_user() to use
-pointer masking instead of the explicit speculation barrier.  However,
-that patch doesn't actually work in the general case, because some (very
-bad) architecture-specific code actually depends on __get_user() also
-working on kernel addresses.
-
-A profile showed that the offending __get_user() was the futex code,
-which really should be fixed up to not use that horrid legacy case.
-Rewrite futex_get_value_locked() to use the modern user acccess helpers,
-and inline it so that the compiler not only avoids the function call for
-a few instructions, but can do CSE on the address masking.
-
-It also turns out the x86 futex functions have unnecessary barriers in
-other places, so let's fix those up too.
-
-Link: https://lore.kernel.org/all/20241115230653.hfvzyf3aqqntgp63@jpoimboe/
-Reported-by: Josh Poimboeuf <jpoimboe@kernel.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
- arch/x86/include/asm/futex.h |  8 +++--
- kernel/futex/core.c          | 22 --------------
- kernel/futex/futex.h         | 59 ++++++++++++++++++++++++++++++++++--
- 3 files changed, 63 insertions(+), 26 deletions(-)
-
--- a/arch/x86/include/asm/futex.h
-+++ b/arch/x86/include/asm/futex.h
-@@ -48,7 +48,9 @@ do {								\
- static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- 		u32 __user *uaddr)
- {
-	if (!user_access_begin(uaddr, sizeof(u32)))
-+	if (can_do_masked_user_access())
-+		uaddr = masked_user_access_begin(uaddr);
-+	else if (!user_access_begin(uaddr, sizeof(u32)))
- 		return -EFAULT;
- 
- 	switch (op) {
-@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_i
- {
- 	int ret = 0;
- 
-	if (!user_access_begin(uaddr, sizeof(u32)))
-+	if (can_do_masked_user_access())
-+		uaddr = masked_user_access_begin(uaddr);
-+	else if (!user_access_begin(uaddr, sizeof(u32)))
- 		return -EFAULT;
- 	asm volatile("\n"
- 		"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
--- a/kernel/futex/core.c
-+++ b/kernel/futex/core.c
-@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct
- 	return NULL;
- }
- 
-int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-{
-	int ret;
-
-	pagefault_disable();
-	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-	pagefault_enable();
-
-	return ret;
-}
-
-int futex_get_value_locked(u32 *dest, u32 __user *from)
-{
-	int ret;
-
-	pagefault_disable();
-	ret = __get_user(*dest, from);
-	pagefault_enable();
-
-	return ret ? -EFAULT : 0;
-}
-
- /**
-  * wait_for_owner_exiting - Block until the owner has exited
-  * @ret: owner's current futex lock status
--- a/kernel/futex/futex.h
-+++ b/kernel/futex/futex.h
-@@ -6,6 +6,7 @@
- #include <linux/rtmutex.h>
- #include <linux/sched/wake_q.h>
- #include <linux/compat.h>
-+#include <linux/uaccess.h>
- 
- #ifdef CONFIG_PREEMPT_RT
- #include <linux/rcuwait.h>
-@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct fut
- extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
- 
- extern int fault_in_user_writeable(u32 __user *uaddr);
-extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-extern int futex_get_value_locked(u32 *dest, u32 __user *from);
- extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
- 
-+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+	int ret;
-+
-+	pagefault_disable();
-+	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-+	pagefault_enable();
-+
-+	return ret;
-+}
-+
-+/*
-+ * This does a plain atomic user space read, and the user pointer has
-+ * already been verified earlier by get_futex_key() to be both aligned
-+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
-+ *
-+ * We still want to avoid any speculation, and while __get_user() is
-+ * the traditional model for this, it's actually slower then doing
-+ * this manually these days.
-+ *
-+ * We could just have a per-architecture special function for it,
-+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
-+ * than force everybody to do that, write it out long-hand using
-+ * the low-level user-access infrastructure.
-+ *
-+ * This looks a bit overkill, but generally just results in a couple
-+ * of instructions.
-+ */
-+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
-+{
-+	u32 val;
-+
-+	if (can_do_masked_user_access())
-+		from = masked_user_access_begin(from);
-+	else if (!user_read_access_begin(from, sizeof(*from)))
-+		return -EFAULT;
-+	unsafe_get_user(val, from, Efault);
-+	user_access_end();
-+	*dest = val;
-+	return 0;
-+Efault:
-+	user_access_end();
-+	return -EFAULT;
-+}
-+
-+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
-+{
-+	int ret;
-+
-+	pagefault_disable();
-+	ret = futex_read_inatomic(dest, from);
-+	pagefault_enable();
-+
-+	return ret;
-+}
-+
- extern void __futex_unqueue(struct futex_q *q);
- extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
- 				struct task_struct *task);
--- a/debian/patches/patchset-zen/fixes/0002-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
+++ b/debian/patches/patchset-zen/fixes/0002-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
@@ -0,0 +1,85 @@
+From 5ac90c5aed97728c8f4f64c02d75334c84a801ef Mon Sep 17 00:00:00 2001
+From: Javier Martinez Canillas <javierm@redhat.com>
+Date: Thu, 19 May 2022 14:40:07 +0200
+Subject: drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set
+
+The Nvidia proprietary driver has some bugs that leads to issues if used
+with the simpledrm driver. The most noticeable is that does not register
+an emulated fbdev device.
+
+It just relies on a fbdev to be registered by another driver, that could
+be that could be attached to the framebuffer console. On UEFI machines,
+this is the efifb driver.
+
+This means that disabling the efifb driver will cause virtual consoles to
+not be present in the system when using the Nvidia driver. Legacy BIOS is
+not affected just because fbcon is not used there, but instead vgacon.
+
+Unless a VGA mode is specified using the vga= kernel command line option,
+in that case the vesafb driver is used instead and its fbdev attached to
+the fbcon.
+
+This is a problem because with CONFIG_SYSFB_SIMPLEFB=y, the sysfb platform
+code attempts to register a "simple-framebuffer" platform device (that is
+matched against simpledrm) and only registers either an "efi-framebuffer"
+or "vesa-framebuffer" if this fails to be registered due the video modes
+not being compatible.
+
+The Nvidia driver relying on another driver to register the fbdev is quite
+fragile, since it can't really assume those will stick around. For example
+there are patches posted to remove the EFI and VESA platform devices once
+a real DRM or fbdev driver probes.
+
+But in any case, moving to a simpledrm + emulated fbdev only breaks this
+assumption and causes users to not have VT if the Nvidia driver is used.
+
+So to prevent this, let's add a workaround and make the sysfb to skip the
+"simple-framebuffer" registration when nvidia-drm.modeset=1 option is set.
+
+This is quite horrible, but honestly I can't think of any other approach.
+
+For this to work, the CONFIG_FB_EFI and CONFIG_FB_VESA config options must
+be enabled besides CONFIG_DRM_SIMPLEDRM.
+
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Source: https://gitlab.com/cki-project/kernel-ark/-/merge_requests/1788
+Cherry-picked-for: https://bugs.archlinux.org/task/73720
+Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/94
+---
+ drivers/firmware/sysfb.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
+@@ -35,6 +35,22 @@
+ #include <linux/screen_info.h>
+ #include <linux/sysfb.h>
+ 
+static int skip_simpledrm;
+
+static int __init simpledrm_disable(char *opt)
+{
+	if (!opt)
+                return -EINVAL;
+
+	get_option(&opt, &skip_simpledrm);
+
+	if (skip_simpledrm)
+		pr_info("The simpledrm driver will not be probed\n");
+
+	return 0;
+}
+early_param("nvidia-drm.modeset", simpledrm_disable);
+
+ static struct platform_device *pd;
+ static DEFINE_MUTEX(disable_lock);
+ static bool disabled;
+@@ -164,7 +180,7 @@ static __init int sysfb_init(void)
+ 
+ 	/* try to create a simple-framebuffer device */
+ 	compatible = sysfb_parse_mode(si, &mode);
+-	if (compatible) {
+	if (compatible && !skip_simpledrm) {
+ 		pd = sysfb_create_simplefb(si, &mode, parent);
+ 		if (!IS_ERR(pd))
+ 			goto put_device;
--- a/debian/patches/patchset-zen/fixes/0003-EDAC-igen6-Fix-the-flood-of-invalid-error-reports.patch
+++ b/debian/patches/patchset-zen/fixes/0003-EDAC-igen6-Fix-the-flood-of-invalid-error-reports.patch
@@ -0,0 +1,56 @@
+From 69907adec3041a6a89d192441a61481d80ee5806 Mon Sep 17 00:00:00 2001
+From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Date: Wed, 12 Feb 2025 16:33:54 +0800
+Subject: EDAC/igen6: Fix the flood of invalid error reports
+
+The ECC_ERROR_LOG register of certain SoCs may contain the invalid value
+~0, which results in a flood of invalid error reports in polling mode.
+
+Fix the flood of invalid error reports by skipping the invalid ECC error
+log value ~0.
+
+Fixes: e14232afa944 ("EDAC/igen6: Add polling support")
+Reported-by: Ramses <ramses@well-founded.dev>
+Closes: https://lore.kernel.org/all/OISL8Rv--F-9@well-founded.dev/
+Tested-by: Ramses <ramses@well-founded.dev>
+Reported-by: John <therealgraysky@proton.me>
+Closes: https://lore.kernel.org/all/p5YcxOE6M3Ncxpn2-Ia_wCt61EM4LwIiN3LroQvT_-G2jMrFDSOW5k2A9D8UUzD2toGpQBN1eI0sL5dSKnkO8iteZegLoQEj-DwQaMhGx4A=@proton.me/
+Tested-by: John <therealgraysky@proton.me>
+Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Link: https://lore.kernel.org/r/20250212083354.31919-1-qiuxu.zhuo@intel.com
+---
+ drivers/edac/igen6_edac.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
+@@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct
+ {
+ 	u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
+ 
+-	if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
+-		/* Clear CE/UE bits by writing 1s */
+-		writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+-		return ecclog;
+-	}
+	/*
+	 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
+	 *        the invalid value ~0. This will result in a flood of invalid
+	 *        error reports in polling mode. Skip it.
+	 */
+	if (ecclog == ~0)
+		return 0;
+ 
+-	return 0;
+	/* Neither a CE nor a UE. Skip it.*/
+	if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
+		return 0;
+
+	/* Clear CE/UE bits by writing 1s */
+	writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+
+	return ecclog;
+ }
+ 
+ static void errsts_clear(struct igen6_imc *imc)
--- a/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
@@ -1,113 +0,0 @@
-From 7ac6508c4db81eced5f6e3d7c8913af1da6cf110 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:22 -0500
-Subject: x86/mm: consolidate full flush threshold decision
-
-Reduce code duplication by consolidating the decision point
-for whether to do individual invalidations or a full flush
-inside get_flush_tlb_info.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Suggested-by: Dave Hansen <dave.hansen@intel.com>
---
- arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
- 1 file changed, 31 insertions(+), 25 deletions(-)
-
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -1000,8 +1000,13 @@ static struct flush_tlb_info *get_flush_
- 	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
- #endif
- 
-	info->start		= start;
-	info->end		= end;
-+	/*
-+	 * Round the start and end addresses to the page size specified
-+	 * by the stride shift. This ensures partial pages at the end of
-+	 * a range get fully invalidated.
-+	 */
-+	info->start		= round_down(start, 1 << stride_shift);
-+	info->end		= round_up(end, 1 << stride_shift);
- 	info->mm		= mm;
- 	info->stride_shift	= stride_shift;
- 	info->freed_tables	= freed_tables;
-@@ -1009,6 +1014,19 @@ static struct flush_tlb_info *get_flush_
- 	info->initiating_cpu	= smp_processor_id();
- 	info->trim_cpumask	= 0;
- 
-+	WARN_ONCE(start != info->start || end != info->end,
-+		  "TLB flush not stride %x aligned. Start %lx, end %lx\n",
-+		  1 << stride_shift, start, end);
-+
-+	/*
-+	 * If the number of flushes is so large that a full flush
-+	 * would be faster, do a full flush.
-+	 */
-+	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
-+		info->start = 0;
-+		info->end = TLB_FLUSH_ALL;
-+	}
-+
- 	return info;
- }
- 
-@@ -1026,17 +1044,8 @@ void flush_tlb_mm_range(struct mm_struct
- 				bool freed_tables)
- {
- 	struct flush_tlb_info *info;
-+	int cpu = get_cpu();
- 	u64 new_tlb_gen;
-	int cpu;
-
-	cpu = get_cpu();
-
-	/* Should we flush just the requested range? */
-	if ((end == TLB_FLUSH_ALL) ||
-	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
-		start = 0;
-		end = TLB_FLUSH_ALL;
-	}
- 
- 	/* This is also a barrier that synchronizes with switch_mm(). */
- 	new_tlb_gen = inc_mm_tlb_gen(mm);
-@@ -1089,22 +1098,19 @@ static void do_kernel_range_flush(void *
- 
- void flush_tlb_kernel_range(unsigned long start, unsigned long end)
- {
-	/* Balance as user space task's flush, a bit conservative */
-	if (end == TLB_FLUSH_ALL ||
-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-		on_each_cpu(do_flush_tlb_all, NULL, 1);
-	} else {
-		struct flush_tlb_info *info;
-+	struct flush_tlb_info *info;
-+
-+	guard(preempt)();
- 
-		preempt_disable();
-		info = get_flush_tlb_info(NULL, start, end, 0, false,
-					  TLB_GENERATION_INVALID);
-+	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
-+				  TLB_GENERATION_INVALID);
- 
-+	if (info->end == TLB_FLUSH_ALL)
-+		on_each_cpu(do_flush_tlb_all, NULL, 1);
-+	else
- 		on_each_cpu(do_kernel_range_flush, info, 1);
- 
-		put_flush_tlb_info();
-		preempt_enable();
-	}
-+	put_flush_tlb_info();
- }
- 
- /*
-@@ -1276,7 +1282,7 @@ void arch_tlbbatch_flush(struct arch_tlb
- 
- 	int cpu = get_cpu();
- 
-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
-+	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
- 				  TLB_GENERATION_INVALID);
- 	/*
- 	 * flush_tlb_multi() is not optimized for the common case in which only
--- a/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
@@ -1,90 +0,0 @@
-From e772b2eb66e5c3cf668feadab678f2a88d896189 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:23 -0500
-Subject: x86/mm: get INVLPGB count max from CPUID
-
-The CPU advertises the maximum number of pages that can be shot down
-with one INVLPGB instruction in the CPUID data.
-
-Save that information for later use.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/Kconfig.cpu               | 5 +++++
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/include/asm/tlbflush.h    | 7 +++++++
- arch/x86/kernel/cpu/amd.c          | 8 ++++++++
- 4 files changed, 21 insertions(+)
-
--- a/arch/x86/Kconfig.cpu
-+++ b/arch/x86/Kconfig.cpu
-@@ -726,6 +726,10 @@ config X86_VMX_FEATURE_NAMES
- 	def_bool y
- 	depends on IA32_FEAT_CTL
- 
-+config X86_BROADCAST_TLB_FLUSH
-+	def_bool y
-+	depends on CPU_SUP_AMD && 64BIT
-+
- menuconfig PROCESSOR_SELECT
- 	bool "Supported processor vendors" if EXPERT
- 	help
-@@ -762,6 +766,7 @@ config CPU_SUP_CYRIX_32
- config CPU_SUP_AMD
- 	default y
- 	bool "Support AMD processors" if PROCESSOR_SELECT
-+	select X86_BROADCAST_TLB_FLUSH
- 	help
- 	  This enables detection, tunings and quirks for AMD processors
- 
--- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -335,6 +335,7 @@
- #define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
- #define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
- #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
-+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */
- #define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
- #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
- #define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
--- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -183,6 +183,13 @@ static inline void cr4_init_shadow(void)
- extern unsigned long mmu_cr4_features;
- extern u32 *trampoline_cr4_features;
- 
-+/* How many pages can we invalidate with one INVLPGB. */
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+extern u16 invlpgb_count_max;
-+#else
-+#define invlpgb_count_max 1
-+#endif
-+
- extern void initialize_tlbstate_and_flush(void);
- 
- /*
--- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -29,6 +29,8 @@
- 
- #include "cpu.h"
- 
-+u16 invlpgb_count_max __ro_after_init;
-+
- static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
- {
- 	u32 gprs[8] = { 0 };
-@@ -1135,6 +1137,12 @@ static void cpu_detect_tlb_amd(struct cp
- 		tlb_lli_2m[ENTRIES] = eax & mask;
- 
- 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
-+
-+	/* Max number of pages INVLPGB can invalidate in one shot */
-+	if (boot_cpu_has(X86_FEATURE_INVLPGB)) {
-+		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
-+		invlpgb_count_max = (edx & 0xffff) + 1;
-+	}
- }
- 
- static const struct cpu_dev amd_cpu_dev = {
--- a/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
@@ -1,130 +0,0 @@
-From 7a896b12875e2b988acbf0229fb4bcf9157b83bd Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:24 -0500
-Subject: x86/mm: add INVLPGB support code
-
-Add invlpgb.h with the helper functions and definitions needed to use
-broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/include/asm/invlpgb.h  | 101 ++++++++++++++++++++++++++++++++
- arch/x86/include/asm/tlbflush.h |   1 +
- 2 files changed, 102 insertions(+)
- create mode 100644 arch/x86/include/asm/invlpgb.h
-
--- /dev/null
-+++ b/arch/x86/include/asm/invlpgb.h
-@@ -0,0 +1,101 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _ASM_X86_INVLPGB
-+#define _ASM_X86_INVLPGB
-+
-+#include <linux/kernel.h>
-+#include <vdso/bits.h>
-+#include <vdso/page.h>
-+
-+/*
-+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
-+ *
-+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
-+ * be done in a parallel fashion.
-+ *
-+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
-+ * this CPU have completed.
-+ */
-+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
-+			     unsigned long addr, u16 extra_count,
-+			     bool pmd_stride, u8 flags)
-+{
-+	u32 edx = (pcid << 16) | asid;
-+	u32 ecx = (pmd_stride << 31) | extra_count;
-+	u64 rax = addr | flags;
-+
-+	/* The low bits in rax are for flags. Verify addr is clean. */
-+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
-+
-+	/* INVLPGB; supported in binutils >= 2.36. */
-+	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
-+}
-+
-+/* Wait for INVLPGB originated by this CPU to complete. */
-+static inline void tlbsync(void)
-+{
-+	cant_migrate();
-+	/* TLBSYNC: supported in binutils >= 0.36. */
-+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
-+}
-+
-+/*
-+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
-+ * of the three. For example:
-+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
-+ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
-+ *
-+ * The first can be used to invalidate (kernel) mappings at a particular
-+ * address across all processes.
-+ *
-+ * The latter invalidates all TLB entries matching a PCID.
-+ */
-+#define INVLPGB_VA			BIT(0)
-+#define INVLPGB_PCID			BIT(1)
-+#define INVLPGB_ASID			BIT(2)
-+#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
-+#define INVLPGB_FINAL_ONLY		BIT(4)
-+#define INVLPGB_INCLUDE_NESTED		BIT(5)
-+
-+/* Flush all mappings for a given pcid and addr, not including globals. */
-+static inline void invlpgb_flush_user(unsigned long pcid,
-+				      unsigned long addr)
-+{
-+	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
-+	tlbsync();
-+}
-+
-+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
-+						unsigned long addr,
-+						u16 nr,
-+						bool pmd_stride)
-+{
-+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
-+}
-+
-+/* Flush all mappings for a given PCID, not including globals. */
-+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
-+{
-+	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
-+}
-+
-+/* Flush all mappings, including globals, for all PCIDs. */
-+static inline void invlpgb_flush_all(void)
-+{
-+	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
-+	tlbsync();
-+}
-+
-+/* Flush addr, including globals, for all PCIDs. */
-+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
-+{
-+	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
-+}
-+
-+/* Flush all mappings for all PCIDs except globals. */
-+static inline void invlpgb_flush_all_nonglobals(void)
-+{
-+	__invlpgb(0, 0, 0, 0, 0, 0);
-+	tlbsync();
-+}
-+
-+#endif /* _ASM_X86_INVLPGB */
--- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -10,6 +10,7 @@
- #include <asm/cpufeature.h>
- #include <asm/special_insns.h>
- #include <asm/smp.h>
-+#include <asm/invlpgb.h>
- #include <asm/invpcid.h>
- #include <asm/pti.h>
- #include <asm/processor-flags.h>
--- a/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
@@ -1,59 +0,0 @@
-From 99f2b0eda74d7ec76c9c48b78f9d30d251501c28 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:25 -0500
-Subject: x86/mm: use INVLPGB for kernel TLB flushes
-
-Use broadcast TLB invalidation for kernel addresses when available.
-
-Remove the need to send IPIs for kernel TLB flushes.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++-
- 1 file changed, 27 insertions(+), 1 deletion(-)
-
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -1086,6 +1086,30 @@ void flush_tlb_all(void)
- 	on_each_cpu(do_flush_tlb_all, NULL, 1);
- }
- 
-+static bool broadcast_kernel_range_flush(struct flush_tlb_info *info)
-+{
-+	unsigned long addr;
-+	unsigned long nr;
-+
-+	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
-+		return false;
-+
-+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		return false;
-+
-+	if (info->end == TLB_FLUSH_ALL) {
-+		invlpgb_flush_all();
-+		return true;
-+	}
-+
-+	for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
-+		nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max);
-+		invlpgb_flush_addr_nosync(addr, nr);
-+	}
-+	tlbsync();
-+	return true;
-+}
-+
- static void do_kernel_range_flush(void *info)
- {
- 	struct flush_tlb_info *f = info;
-@@ -1105,7 +1129,9 @@ void flush_tlb_kernel_range(unsigned lon
- 	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
- 				  TLB_GENERATION_INVALID);
- 
-	if (info->end == TLB_FLUSH_ALL)
-+	if (broadcast_kernel_range_flush(info))
-+		; /* Fall through. */
-+	else if (info->end == TLB_FLUSH_ALL)
- 		on_each_cpu(do_flush_tlb_all, NULL, 1);
- 	else
- 		on_each_cpu(do_kernel_range_flush, info, 1);
--- a/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
@@ -1,45 +0,0 @@
-From 1ef7edb5b2375d4010ed2ad0c7d87fcfa7ab4519 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:26 -0500
-Subject: x86/mm: use INVLPGB in flush_tlb_all
-
-The flush_tlb_all() function is not used a whole lot, but we might
-as well use broadcast TLB flushing there, too.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/mm/tlb.c | 15 +++++++++++++++
- 1 file changed, 15 insertions(+)
-
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -1074,6 +1074,19 @@ void flush_tlb_mm_range(struct mm_struct
- }
- 
- 
-+static bool broadcast_flush_tlb_all(void)
-+{
-+	if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH))
-+		return false;
-+
-+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		return false;
-+
-+	guard(preempt)();
-+	invlpgb_flush_all();
-+	return true;
-+}
-+
- static void do_flush_tlb_all(void *info)
- {
- 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -1082,6 +1095,8 @@ static void do_flush_tlb_all(void *info)
- 
- void flush_tlb_all(void)
- {
-+	if (broadcast_flush_tlb_all())
-+		return;
- 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- 	on_each_cpu(do_flush_tlb_all, NULL, 1);
- }
--- a/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
@@ -1,603 +0,0 @@
-From c7212dc64d8e9e4f12f1c6edea3b75c350a30381 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:28 -0500
-Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
- processes
-
-Use broadcast TLB invalidation, using the INVPLGB instruction, on AMD EPYC 3
-and newer CPUs.
-
-In order to not exhaust PCID space, and keep TLB flushes local for single
-threaded processes, we only hand out broadcast ASIDs to processes active on
-4 or more CPUs.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/include/asm/mmu.h         |   6 +
- arch/x86/include/asm/mmu_context.h |  14 ++
- arch/x86/include/asm/tlbflush.h    |  73 ++++++
- arch/x86/mm/tlb.c                  | 344 ++++++++++++++++++++++++++++-
- 4 files changed, 425 insertions(+), 12 deletions(-)
-
--- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -69,6 +69,12 @@ typedef struct {
- 	u16 pkey_allocation_map;
- 	s16 execute_only_pkey;
- #endif
-+
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+	u16 global_asid;
-+	bool asid_transition;
-+#endif
-+
- } mm_context_t;
- 
- #define INIT_MM_CONTEXT(mm)						\
--- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(s
- #define enter_lazy_tlb enter_lazy_tlb
- extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
- 
-+extern void destroy_context_free_global_asid(struct mm_struct *mm);
-+
- /*
-  * Init a new mm.  Used on mm copies, like at fork()
-  * and on mm's that are brand-new, like at execve().
-@@ -161,6 +163,14 @@ static inline int init_new_context(struc
- 		mm->context.execute_only_pkey = -1;
- 	}
- #endif
-+
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-+		mm->context.global_asid = 0;
-+		mm->context.asid_transition = false;
-+	}
-+#endif
-+
- 	mm_reset_untag_mask(mm);
- 	init_new_context_ldt(mm);
- 	return 0;
-@@ -170,6 +180,10 @@ static inline int init_new_context(struc
- static inline void destroy_context(struct mm_struct *mm)
- {
- 	destroy_context_ldt(mm);
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		destroy_context_free_global_asid(mm);
-+#endif
- }
- 
- extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
--- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -6,6 +6,7 @@
- #include <linux/mmu_notifier.h>
- #include <linux/sched.h>
- 
-+#include <asm/barrier.h>
- #include <asm/processor.h>
- #include <asm/cpufeature.h>
- #include <asm/special_insns.h>
-@@ -239,6 +240,78 @@ void flush_tlb_one_kernel(unsigned long
- void flush_tlb_multi(const struct cpumask *cpumask,
- 		      const struct flush_tlb_info *info);
- 
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+static inline bool is_dyn_asid(u16 asid)
-+{
-+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		return true;
-+
-+	return asid < TLB_NR_DYN_ASIDS;
-+}
-+
-+static inline bool is_global_asid(u16 asid)
-+{
-+	return !is_dyn_asid(asid);
-+}
-+
-+static inline bool in_asid_transition(struct mm_struct *mm)
-+{
-+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		return false;
-+
-+	return mm && READ_ONCE(mm->context.asid_transition);
-+}
-+
-+static inline u16 mm_global_asid(struct mm_struct *mm)
-+{
-+	u16 asid;
-+
-+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		return 0;
-+
-+	asid = smp_load_acquire(&mm->context.global_asid);
-+
-+	/* mm->context.global_asid is either 0, or a global ASID */
-+	VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
-+
-+	return asid;
-+}
-+#else
-+static inline bool is_dyn_asid(u16 asid)
-+{
-+	return true;
-+}
-+
-+static inline bool is_global_asid(u16 asid)
-+{
-+	return false;
-+}
-+
-+static inline bool in_asid_transition(struct mm_struct *mm)
-+{
-+	return false;
-+}
-+
-+static inline u16 mm_global_asid(struct mm_struct *mm)
-+{
-+	return 0;
-+}
-+
-+static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
-+{
-+	return false;
-+}
-+
-+static inline void broadcast_tlb_flush(struct flush_tlb_info *info)
-+{
-+	VM_WARN_ON_ONCE(1);
-+}
-+
-+static inline void consider_global_asid(struct mm_struct *mm)
-+{
-+}
-+#endif
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #endif
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -74,13 +74,15 @@
-  * use different names for each of them:
-  *
-  * ASID  - [0, TLB_NR_DYN_ASIDS-1]
- *         the canonical identifier for an mm
-+ *         the canonical identifier for an mm, dynamically allocated on each CPU
-+ *         [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
-+ *         the canonical, global identifier for an mm, identical across all CPUs
-  *
- * kPCID - [1, TLB_NR_DYN_ASIDS]
-+ * kPCID - [1, MAX_ASID_AVAILABLE]
-  *         the value we write into the PCID part of CR3; corresponds to the
-  *         ASID+1, because PCID 0 is special.
-  *
- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
-+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
-  *         for KPTI each mm has two address spaces and thus needs two
-  *         PCID values, but we can still do with a single ASID denomination
-  *         for each mm. Corresponds to kPCID + 2048.
-@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_st
- 		return;
- 	}
- 
-+	/*
-+	 * TLB consistency for global ASIDs is maintained with broadcast TLB
-+	 * flushing. The TLB is never outdated, and does not need flushing.
-+	 */
-+	if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) {
-+		u16 global_asid = mm_global_asid(next);
-+
-+		if (global_asid) {
-+			*new_asid = global_asid;
-+			*need_flush = false;
-+			return;
-+		}
-+	}
-+
- 	if (this_cpu_read(cpu_tlbstate.invalidate_other))
- 		clear_asid_other();
- 
-@@ -251,6 +267,272 @@ static void choose_new_asid(struct mm_st
- 	*need_flush = true;
- }
- 
-+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
-+/*
-+ * Logic for broadcast TLB invalidation.
-+ */
-+static DEFINE_RAW_SPINLOCK(global_asid_lock);
-+static u16 last_global_asid = MAX_ASID_AVAILABLE;
-+static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 };
-+static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 };
-+static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
-+
-+static void reset_global_asid_space(void)
-+{
-+	lockdep_assert_held(&global_asid_lock);
-+
-+	/*
-+	 * A global TLB flush guarantees that any stale entries from
-+	 * previously freed global ASIDs get flushed from the TLB
-+	 * everywhere, making these global ASIDs safe to reuse.
-+	 */
-+	invlpgb_flush_all_nonglobals();
-+
-+	/*
-+	 * Clear all the previously freed global ASIDs from the
-+	 * broadcast_asid_used bitmap, now that the global TLB flush
-+	 * has made them actually available for re-use.
-+	 */
-+	bitmap_andnot(global_asid_used, global_asid_used,
-+			global_asid_freed, MAX_ASID_AVAILABLE);
-+	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
-+
-+	/*
-+	 * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID
-+	 * assignments, for tasks doing IPI based TLB shootdowns.
-+	 * Restart the search from the start of the global ASID space.
-+	 */
-+	last_global_asid = TLB_NR_DYN_ASIDS;
-+}
-+
-+static u16 get_global_asid(void)
-+{
-+
-+	u16 asid;
-+
-+	lockdep_assert_held(&global_asid_lock);
-+
-+	/* The previous allocated ASID is at the top of the address space. */
-+	if (last_global_asid >= MAX_ASID_AVAILABLE - 1)
-+		reset_global_asid_space();
-+
-+	asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid);
-+
-+	if (asid >= MAX_ASID_AVAILABLE) {
-+		/* This should never happen. */
-+		VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n", global_asid_available);
-+		return 0;
-+	}
-+
-+	/* Claim this global ASID. */
-+	__set_bit(asid, global_asid_used);
-+	last_global_asid = asid;
-+	global_asid_available--;
-+	return asid;
-+}
-+
-+/*
-+ * Returns true if the mm is transitioning from a CPU-local ASID to a global
-+ * (INVLPGB) ASID, or the other way around.
-+ */
-+static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid)
-+{
-+	u16 global_asid = mm_global_asid(next);
-+
-+	if (global_asid && prev_asid != global_asid)
-+		return true;
-+
-+	if (!global_asid && is_global_asid(prev_asid))
-+		return true;
-+
-+	return false;
-+}
-+
-+void destroy_context_free_global_asid(struct mm_struct *mm)
-+{
-+	if (!mm->context.global_asid)
-+		return;
-+
-+	guard(raw_spinlock_irqsave)(&global_asid_lock);
-+
-+	/* The global ASID can be re-used only after flush at wrap-around. */
-+	__set_bit(mm->context.global_asid, global_asid_freed);
-+
-+	mm->context.global_asid = 0;
-+	global_asid_available++;
-+}
-+
-+/*
-+ * Check whether a process is currently active on more than "threshold" CPUs.
-+ * This is a cheap estimation on whether or not it may make sense to assign
-+ * a global ASID to this process, and use broadcast TLB invalidation.
-+ */
-+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
-+{
-+	int count = 0;
-+	int cpu;
-+
-+	/* This quick check should eliminate most single threaded programs. */
-+	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
-+		return false;
-+
-+	/* Slower check to make sure. */
-+	for_each_cpu(cpu, mm_cpumask(mm)) {
-+		/* Skip the CPUs that aren't really running this process. */
-+		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
-+			continue;
-+
-+		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
-+			continue;
-+
-+		if (++count > threshold)
-+			return true;
-+	}
-+	return false;
-+}
-+
-+/*
-+ * Assign a global ASID to the current process, protecting against
-+ * races between multiple threads in the process.
-+ */
-+static void use_global_asid(struct mm_struct *mm)
-+{
-+	u16 asid;
-+
-+	guard(raw_spinlock_irqsave)(&global_asid_lock);
-+
-+	/* This process is already using broadcast TLB invalidation. */
-+	if (mm->context.global_asid)
-+		return;
-+
-+	/* The last global ASID was consumed while waiting for the lock. */
-+	if (!global_asid_available) {
-+		VM_WARN_ONCE(1, "Ran out of global ASIDs\n");
-+		return;
-+	}
-+
-+	asid = get_global_asid();
-+	if (!asid)
-+		return;
-+
-+	/*
-+	 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
-+	 * finish_asid_transition() needs to observe asid_transition = true
-+	 * once it observes global_asid.
-+	 */
-+	mm->context.asid_transition = true;
-+	smp_store_release(&mm->context.global_asid, asid);
-+}
-+
-+static bool meets_global_asid_threshold(struct mm_struct *mm)
-+{
-+	if (!global_asid_available)
-+		return false;
-+
-+	/*
-+	 * Assign a global ASID if the process is active on
-+	 * 4 or more CPUs simultaneously.
-+	 */
-+	return mm_active_cpus_exceeds(mm, 3);
-+}
-+
-+static void consider_global_asid(struct mm_struct *mm)
-+{
-+	if (!static_cpu_has(X86_FEATURE_INVLPGB))
-+		return;
-+
-+	/* Check every once in a while. */
-+	if ((current->pid & 0x1f) != (jiffies & 0x1f))
-+		return;
-+
-+	if (meets_global_asid_threshold(mm))
-+		use_global_asid(mm);
-+}
-+
-+static void finish_asid_transition(struct flush_tlb_info *info)
-+{
-+	struct mm_struct *mm = info->mm;
-+	int bc_asid = mm_global_asid(mm);
-+	int cpu;
-+
-+	if (!READ_ONCE(mm->context.asid_transition))
-+		return;
-+
-+	for_each_cpu(cpu, mm_cpumask(mm)) {
-+		/*
-+		 * The remote CPU is context switching. Wait for that to
-+		 * finish, to catch the unlikely case of it switching to
-+		 * the target mm with an out of date ASID.
-+		 */
-+		while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
-+			cpu_relax();
-+
-+		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
-+			continue;
-+
-+		/*
-+		 * If at least one CPU is not using the global ASID yet,
-+		 * send a TLB flush IPI. The IPI should cause stragglers
-+		 * to transition soon.
-+		 *
-+		 * This can race with the CPU switching to another task;
-+		 * that results in a (harmless) extra IPI.
-+		 */
-+		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
-+			flush_tlb_multi(mm_cpumask(info->mm), info);
-+			return;
-+		}
-+	}
-+
-+	/* All the CPUs running this process are using the global ASID. */
-+	WRITE_ONCE(mm->context.asid_transition, false);
-+}
-+
-+static void broadcast_tlb_flush(struct flush_tlb_info *info)
-+{
-+	bool pmd = info->stride_shift == PMD_SHIFT;
-+	unsigned long maxnr = invlpgb_count_max;
-+	unsigned long asid = info->mm->context.global_asid;
-+	unsigned long addr = info->start;
-+	unsigned long nr;
-+
-+	/* Flushing multiple pages at once is not supported with 1GB pages. */
-+	if (info->stride_shift > PMD_SHIFT)
-+		maxnr = 1;
-+
-+	/*
-+	 * TLB flushes with INVLPGB are kicked off asynchronously.
-+	 * The inc_mm_tlb_gen() guarantees page table updates are done
-+	 * before these TLB flushes happen.
-+	 */
-+	if (info->end == TLB_FLUSH_ALL) {
-+		invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
-+		/* Do any CPUs supporting INVLPGB need PTI? */
-+		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_single_pcid_nosync(user_pcid(asid));
-+	} else do {
-+		/*
-+		 * Calculate how many pages can be flushed at once; if the
-+		 * remainder of the range is less than one page, flush one.
-+		 */
-+		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
-+		nr = max(nr, 1);
-+
-+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd);
-+		/* Do any CPUs supporting INVLPGB need PTI? */
-+		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd);
-+
-+		addr += nr << info->stride_shift;
-+	} while (addr < info->end);
-+
-+	finish_asid_transition(info);
-+
-+	/* Wait for the INVLPGBs kicked off above to finish. */
-+	tlbsync();
-+}
-+#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */
-+
- /*
-  * Given an ASID, flush the corresponding user ASID.  We can delay this
-  * until the next time we switch to it.
-@@ -556,8 +838,9 @@ void switch_mm_irqs_off(struct mm_struct
- 	 */
- 	if (prev == next) {
- 		/* Not actually switching mm's */
-		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-			   next->context.ctx_id);
-+		VM_WARN_ON(is_dyn_asid(prev_asid) &&
-+				this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-+				next->context.ctx_id);
- 
- 		/*
- 		 * If this races with another thread that enables lam, 'new_lam'
-@@ -574,6 +857,23 @@ void switch_mm_irqs_off(struct mm_struct
- 			cpumask_set_cpu(cpu, mm_cpumask(next));
- 
- 		/*
-+		 * Check if the current mm is transitioning to a new ASID.
-+		 */
-+		if (needs_global_asid_reload(next, prev_asid)) {
-+			next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-+
-+			choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
-+			goto reload_tlb;
-+		}
-+
-+		/*
-+		 * Broadcast TLB invalidation keeps this PCID up to date
-+		 * all the time.
-+		 */
-+		if (is_global_asid(prev_asid))
-+			return;
-+
-+		/*
- 		 * If the CPU is not in lazy TLB mode, we are just switching
- 		 * from one thread in a process to another thread in the same
- 		 * process. No TLB flush required.
-@@ -607,6 +907,13 @@ void switch_mm_irqs_off(struct mm_struct
- 		cond_mitigation(tsk);
- 
- 		/*
-+		 * Let nmi_uaccess_okay() and finish_asid_transition()
-+		 * know that we're changing CR3.
-+		 */
-+		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
-+		barrier();
-+
-+		/*
- 		 * Stop remote flushes for the previous mm.
- 		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
- 		 * but the bitmap manipulation can cause cache line contention.
-@@ -623,14 +930,12 @@ void switch_mm_irqs_off(struct mm_struct
- 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- 
- 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
-
-		/* Let nmi_uaccess_okay() know that we're changing CR3. */
-		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
-		barrier();
- 	}
- 
-+reload_tlb:
- 	new_lam = mm_lam_cr3_mask(next);
- 	if (need_flush) {
-+		VM_WARN_ON_ONCE(is_global_asid(new_asid));
- 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- 		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
-@@ -749,7 +1054,7 @@ static void flush_tlb_func(void *info)
- 	const struct flush_tlb_info *f = info;
- 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
- 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
-+	u64 local_tlb_gen;
- 	bool local = smp_processor_id() == f->initiating_cpu;
- 	unsigned long nr_invalidate = 0;
- 	u64 mm_tlb_gen;
-@@ -769,6 +1074,16 @@ static void flush_tlb_func(void *info)
- 	if (unlikely(loaded_mm == &init_mm))
- 		return;
- 
-+	/* Reload the ASID if transitioning into or out of a global ASID */
-+	if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) {
-+		switch_mm_irqs_off(NULL, loaded_mm, NULL);
-+		loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-+	}
-+
-+	/* Broadcast ASIDs are always kept up to date with INVLPGB. */
-+	if (is_global_asid(loaded_mm_asid))
-+		return;
-+
- 	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
- 		   loaded_mm->context.ctx_id);
- 
-@@ -786,6 +1101,8 @@ static void flush_tlb_func(void *info)
- 		return;
- 	}
- 
-+	local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
-+
- 	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
- 		     f->new_tlb_gen <= local_tlb_gen)) {
- 		/*
-@@ -953,7 +1270,7 @@ STATIC_NOPV void native_flush_tlb_multi(
- 	 * up on the new contents of what used to be page tables, while
- 	 * doing a speculative memory access.
- 	 */
-	if (info->freed_tables)
-+	if (info->freed_tables || in_asid_transition(info->mm))
- 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
- 	else
- 		on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func,
-@@ -1058,9 +1375,12 @@ void flush_tlb_mm_range(struct mm_struct
- 	 * a local TLB flush is needed. Optimize this use-case by calling
- 	 * flush_tlb_func_local() directly in this case.
- 	 */
-	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
-+	if (mm_global_asid(mm)) {
-+		broadcast_tlb_flush(info);
-+	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
- 		info->trim_cpumask = should_trim_cpumask(mm);
- 		flush_tlb_multi(mm_cpumask(mm), info);
-+		consider_global_asid(mm);
- 	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
- 		lockdep_assert_irqs_enabled();
- 		local_irq_disable();
--- a/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
@@ -1,251 +0,0 @@
-From 6f601cdcd33be8fc0da98c6bab777575af3260b8 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:29 -0500
-Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
-
-Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
-queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
-
-This also allows us to avoid adding the CPUs of processes using broadcast
-flushing to the batch->cpumask, and will hopefully further reduce TLB
-flushing from the reclaim and compaction paths.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/include/asm/invlpgb.h  | 21 +++++----
- arch/x86/include/asm/tlbflush.h | 17 ++++---
- arch/x86/mm/tlb.c               | 80 +++++++++++++++++++++++++++++++--
- 3 files changed, 95 insertions(+), 23 deletions(-)
-
--- a/arch/x86/include/asm/invlpgb.h
-+++ b/arch/x86/include/asm/invlpgb.h
-@@ -31,9 +31,8 @@ static inline void __invlpgb(unsigned lo
- }
- 
- /* Wait for INVLPGB originated by this CPU to complete. */
-static inline void tlbsync(void)
-+static inline void __tlbsync(void)
- {
-	cant_migrate();
- 	/* TLBSYNC: supported in binutils >= 0.36. */
- 	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
- }
-@@ -61,19 +60,19 @@ static inline void invlpgb_flush_user(un
- 				      unsigned long addr)
- {
- 	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
-	tlbsync();
-+	__tlbsync();
- }
- 
-static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
-						unsigned long addr,
-						u16 nr,
-						bool pmd_stride)
-+static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
-+						  unsigned long addr,
-+						  u16 nr,
-+						  bool pmd_stride)
- {
- 	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
- }
- 
- /* Flush all mappings for a given PCID, not including globals. */
-static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
-+static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
- {
- 	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
- }
-@@ -82,11 +81,11 @@ static inline void invlpgb_flush_single_
- static inline void invlpgb_flush_all(void)
- {
- 	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
-	tlbsync();
-+	__tlbsync();
- }
- 
- /* Flush addr, including globals, for all PCIDs. */
-static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
-+static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
- {
- 	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
- }
-@@ -95,7 +94,7 @@ static inline void invlpgb_flush_addr_no
- static inline void invlpgb_flush_all_nonglobals(void)
- {
- 	__invlpgb(0, 0, 0, 0, 0, 0);
-	tlbsync();
-+	__tlbsync();
- }
- 
- #endif /* _ASM_X86_INVLPGB */
--- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -106,6 +106,7 @@ struct tlb_state {
- 	 * need to be invalidated.
- 	 */
- 	bool invalidate_other;
-+	bool need_tlbsync;
- 
- #ifdef CONFIG_ADDRESS_MASKING
- 	/*
-@@ -310,6 +311,10 @@ static inline void broadcast_tlb_flush(s
- static inline void consider_global_asid(struct mm_struct *mm)
- {
- }
-+
-+static inline void tlbsync(void)
-+{
-+}
- #endif
- 
- #ifdef CONFIG_PARAVIRT
-@@ -359,21 +364,15 @@ static inline u64 inc_mm_tlb_gen(struct
- 	return atomic64_inc_return(&mm->context.tlb_gen);
- }
- 
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-					     struct mm_struct *mm,
-					     unsigned long uaddr)
-{
-	inc_mm_tlb_gen(mm);
-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
- static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
- {
- 	flush_tlb_mm(mm);
- }
- 
- extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-+					     struct mm_struct *mm,
-+					     unsigned long uaddr);
- 
- static inline bool pte_flags_need_flush(unsigned long oldflags,
- 					unsigned long newflags,
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -488,6 +488,37 @@ static void finish_asid_transition(struc
- 	WRITE_ONCE(mm->context.asid_transition, false);
- }
- 
-+static inline void tlbsync(void)
-+{
-+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
-+		return;
-+	__tlbsync();
-+	this_cpu_write(cpu_tlbstate.need_tlbsync, false);
-+}
-+
-+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
-+						unsigned long addr,
-+						u16 nr, bool pmd_stride)
-+{
-+	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
-+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
-+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
-+}
-+
-+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
-+{
-+	__invlpgb_flush_single_pcid_nosync(pcid);
-+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
-+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
-+}
-+
-+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
-+{
-+	__invlpgb_flush_addr_nosync(addr, nr);
-+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
-+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
-+}
-+
- static void broadcast_tlb_flush(struct flush_tlb_info *info)
- {
- 	bool pmd = info->stride_shift == PMD_SHIFT;
-@@ -794,6 +825,8 @@ void switch_mm_irqs_off(struct mm_struct
- 	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
- 		WARN_ON_ONCE(!irqs_disabled());
- 
-+	tlbsync();
-+
- 	/*
- 	 * Verify that CR3 is what we think it is.  This will catch
- 	 * hypothetical buggy code that directly switches to swapper_pg_dir
-@@ -976,6 +1009,8 @@ reload_tlb:
-  */
- void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
- {
-+	tlbsync();
-+
- 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
- 		return;
- 
-@@ -1650,9 +1685,7 @@ void arch_tlbbatch_flush(struct arch_tlb
- 	 * a local TLB flush is needed. Optimize this use-case by calling
- 	 * flush_tlb_func_local() directly in this case.
- 	 */
-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-		invlpgb_flush_all_nonglobals();
-	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
-+	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
- 		flush_tlb_multi(&batch->cpumask, info);
- 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
- 		lockdep_assert_irqs_enabled();
-@@ -1661,12 +1694,53 @@ void arch_tlbbatch_flush(struct arch_tlb
- 		local_irq_enable();
- 	}
- 
-+	/*
-+	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
-+	 * The cpumask above contains only CPUs that were running tasks
-+	 * not using broadcast TLB flushing.
-+	 */
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
-+		tlbsync();
-+
- 	cpumask_clear(&batch->cpumask);
- 
- 	put_flush_tlb_info();
- 	put_cpu();
- }
- 
-+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-+					     struct mm_struct *mm,
-+					     unsigned long uaddr)
-+{
-+	u16 asid = mm_global_asid(mm);
-+
-+	if (asid) {
-+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
-+		/* Do any CPUs supporting INVLPGB need PTI? */
-+		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
-+
-+		/*
-+		 * Some CPUs might still be using a local ASID for this
-+		 * process, and require IPIs, while others are using the
-+		 * global ASID.
-+		 *
-+		 * In this corner case we need to do both the broadcast
-+		 * TLB invalidation, and send IPIs. The IPIs will help
-+		 * stragglers transition to the broadcast ASID.
-+		 */
-+		if (in_asid_transition(mm))
-+			asid = 0;
-+	}
-+
-+	if (!asid) {
-+		inc_mm_tlb_gen(mm);
-+		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-+	}
-+
-+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-+}
-+
- /*
-  * Blindly accessing user memory from NMI context can be dangerous
-  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
@@ -1,80 +0,0 @@
-From 7b8ef03b059bca98d2af696c3ec2adcaa673f7e4 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:31 -0500
-Subject: x86/mm: only invalidate final translations with INVLPGB
-
-Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
-This way only leaf mappings get removed from the TLB, leaving intermediate
-translations cached.
-
-On the (rare) occasions where we free page tables we do a full flush,
-ensuring intermediate translations get flushed from the TLB.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
- arch/x86/include/asm/invlpgb.h | 10 ++++++++--
- arch/x86/mm/tlb.c              | 13 +++++++------
- 2 files changed, 15 insertions(+), 8 deletions(-)
-
--- a/arch/x86/include/asm/invlpgb.h
-+++ b/arch/x86/include/asm/invlpgb.h
-@@ -66,9 +66,15 @@ static inline void invlpgb_flush_user(un
- static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
- 						  unsigned long addr,
- 						  u16 nr,
-						  bool pmd_stride)
-+						  bool pmd_stride,
-+						  bool freed_tables)
- {
-	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
-+	u8 flags = INVLPGB_PCID | INVLPGB_VA;
-+
-+	if (!freed_tables)
-+		flags |= INVLPGB_FINAL_ONLY;
-+
-+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags);
- }
- 
- /* Flush all mappings for a given PCID, not including globals. */
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -498,9 +498,10 @@ static inline void tlbsync(void)
- 
- static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
- 						unsigned long addr,
-						u16 nr, bool pmd_stride)
-+						u16 nr, bool pmd_stride,
-+						bool freed_tables)
- {
-	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
-+	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride, freed_tables);
- 	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
- 		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
- }
-@@ -549,10 +550,10 @@ static void broadcast_tlb_flush(struct f
- 		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
- 		nr = max(nr, 1);
- 
-		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd);
-+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables);
- 		/* Do any CPUs supporting INVLPGB need PTI? */
- 		if (static_cpu_has(X86_FEATURE_PTI))
-			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd);
-+			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables);
- 
- 		addr += nr << info->stride_shift;
- 	} while (addr < info->end);
-@@ -1715,10 +1716,10 @@ void arch_tlbbatch_add_pending(struct ar
- 	u16 asid = mm_global_asid(mm);
- 
- 	if (asid) {
-		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
-+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
- 		/* Do any CPUs supporting INVLPGB need PTI? */
- 		if (static_cpu_has(X86_FEATURE_PTI))
-			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
-+			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false);
- 
- 		/*
- 		 * Some CPUs might still be using a local ASID for this
--- a/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
@@ -1,94 +0,0 @@
-From 7b0836fcad644d24d6318bf63013ec1b35d6a27b Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Thu, 19 Dec 2024 15:32:53 -0500
-Subject: mm: remove unnecessary calls to lru_add_drain
-
-There seem to be several categories of calls to lru_add_drain
-and lru_add_drain_all.
-
-The first are code paths that recently allocated, swapped in,
-or otherwise processed a batch of pages, and want them all on
-the LRU. These drain pages that were recently allocated,
-probably on the local CPU.
-
-A second category are code paths that are actively trying to
-reclaim, migrate, or offline memory. These often use lru_add_drain_all,
-to drain the caches on all CPUs.
-
-However, there also seem to be some other callers where we
-aren't really doing either. They are calling lru_add_drain(),
-despite operating on pages that may have been allocated
-long ago, and quite possibly on different CPUs.
-
-Those calls are not likely to be effective at anything but
-creating lock contention on the LRU locks.
-
-Remove the lru_add_drain calls in the latter category.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
-Suggested-by: David Hildenbrand <david@redhat.com>
-Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
-Acked-by: David Hildenbrand <david@redhat.com>
---
- mm/memory.c     | 1 -
- mm/mmap.c       | 2 --
- mm/swap_state.c | 1 -
- mm/vma.c        | 2 --
- 4 files changed, 6 deletions(-)
-
--- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -1921,7 +1921,6 @@ void zap_page_range_single(struct vm_are
- 	struct mmu_notifier_range range;
- 	struct mmu_gather tlb;
- 
-	lru_add_drain();
- 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
- 				address, end);
- 	hugetlb_zap_begin(vma, &range.start, &range.end);
--- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -1931,7 +1931,6 @@ void exit_mmap(struct mm_struct *mm)
- 		goto destroy;
- 	}
- 
-	lru_add_drain();
- 	flush_cache_mm(mm);
- 	tlb_gather_mmu_fullmm(&tlb, mm);
- 	/* update_hiwater_rss(mm) here? but nobody should be looking */
-@@ -2374,7 +2373,6 @@ int relocate_vma_down(struct vm_area_str
- 				       vma, new_start, length, false, true))
- 		return -ENOMEM;
- 
-	lru_add_drain();
- 	tlb_gather_mmu(&tlb, mm);
- 	next = vma_next(&vmi);
- 	if (new_end > old_start) {
--- a/mm/swap_state.c
-+++ b/mm/swap_state.c
-@@ -317,7 +317,6 @@ void free_pages_and_swap_cache(struct en
- 	struct folio_batch folios;
- 	unsigned int refs[PAGEVEC_SIZE];
- 
-	lru_add_drain();
- 	folio_batch_init(&folios);
- 	for (int i = 0; i < nr; i++) {
- 		struct folio *folio = page_folio(encoded_page_ptr(pages[i]));
--- a/mm/vma.c
-+++ b/mm/vma.c
-@@ -347,7 +347,6 @@ void unmap_region(struct ma_state *mas,
- 	struct mm_struct *mm = vma->vm_mm;
- 	struct mmu_gather tlb;
- 
-	lru_add_drain();
- 	tlb_gather_mmu(&tlb, mm);
- 	update_hiwater_rss(mm);
- 	unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end,
-@@ -1089,7 +1088,6 @@ static inline void vms_clear_ptes(struct
- 	 * were isolated before we downgraded mmap_lock.
- 	 */
- 	mas_set(mas_detach, 1);
-	lru_add_drain();
- 	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
- 	update_hiwater_rss(vms->vma->vm_mm);
- 	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
--- a/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
@@ -1,429 +0,0 @@
-From 7ecab5a83d3155baa009cd6bc6e18959fee8be62 Mon Sep 17 00:00:00 2001
-From: Vincenzo Frascino <vincenzo.frascino@arm.com>
-Date: Mon, 14 Oct 2024 16:13:39 +0100
-Subject: vdso: Introduce vdso/page.h
-
-The VDSO implementation includes headers from outside of the
-vdso/ namespace.
-
-Introduce vdso/page.h to make sure that the generic library
-uses only the allowed namespace.
-
-Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Arnd Bergmann <arnd@arndb.de>
-Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # m68k
-Link: https://lore.kernel.org/all/20241014151340.1639555-3-vincenzo.frascino@arm.com
---
- arch/alpha/include/asm/page.h      |  6 +-----
- arch/arc/include/uapi/asm/page.h   |  7 +++----
- arch/arm/include/asm/page.h        |  5 +----
- arch/arm64/include/asm/page-def.h  |  5 +----
- arch/csky/include/asm/page.h       |  8 ++------
- arch/hexagon/include/asm/page.h    |  4 +---
- arch/loongarch/include/asm/page.h  |  7 +------
- arch/m68k/include/asm/page.h       |  6 ++----
- arch/microblaze/include/asm/page.h |  5 +----
- arch/mips/include/asm/page.h       |  7 +------
- arch/nios2/include/asm/page.h      |  7 +------
- arch/openrisc/include/asm/page.h   | 11 +----------
- arch/parisc/include/asm/page.h     |  4 +---
- arch/powerpc/include/asm/page.h    | 10 +---------
- arch/riscv/include/asm/page.h      |  4 +---
- arch/s390/include/asm/page.h       | 13 +++++--------
- arch/sh/include/asm/page.h         |  6 ++----
- arch/sparc/include/asm/page_32.h   |  4 +---
- arch/sparc/include/asm/page_64.h   |  4 +---
- arch/um/include/asm/page.h         |  5 +----
- arch/x86/include/asm/page_types.h  |  5 +----
- arch/xtensa/include/asm/page.h     |  8 +-------
- include/vdso/page.h                | 30 ++++++++++++++++++++++++++++++
- 23 files changed, 61 insertions(+), 110 deletions(-)
- create mode 100644 include/vdso/page.h
-
--- a/arch/alpha/include/asm/page.h
-+++ b/arch/alpha/include/asm/page.h
-@@ -4,11 +4,7 @@
- 
- #include <linux/const.h>
- #include <asm/pal.h>
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #ifndef __ASSEMBLY__
- 
--- a/arch/arc/include/uapi/asm/page.h
-+++ b/arch/arc/include/uapi/asm/page.h
-@@ -14,7 +14,7 @@
- 
- /* PAGE_SHIFT determines the page size */
- #ifdef __KERNEL__
-#define PAGE_SHIFT CONFIG_PAGE_SHIFT
-+#include <vdso/page.h>
- #else
- /*
-  * Default 8k
-@@ -24,11 +24,10 @@
-  * not available
-  */
- #define PAGE_SHIFT 13
-+#define PAGE_SIZE	_BITUL(PAGE_SHIFT)	/* Default 8K */
-+#define PAGE_MASK	(~(PAGE_SIZE-1))
- #endif
- 
-#define PAGE_SIZE	_BITUL(PAGE_SHIFT)	/* Default 8K */
- #define PAGE_OFFSET	_AC(0x80000000, UL)	/* Kernel starts at 2G onwrds */
- 
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-
- #endif /* _UAPI__ASM_ARC_PAGE_H */
--- a/arch/arm/include/asm/page.h
-+++ b/arch/arm/include/asm/page.h
-@@ -7,10 +7,7 @@
- #ifndef _ASMARM_PAGE_H
- #define _ASMARM_PAGE_H
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT		CONFIG_PAGE_SHIFT
-#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~((1 << PAGE_SHIFT) - 1))
-+#include <vdso/page.h>
- 
- #ifndef __ASSEMBLY__
- 
--- a/arch/arm64/include/asm/page-def.h
-+++ b/arch/arm64/include/asm/page-def.h
-@@ -10,9 +10,6 @@
- 
- #include <linux/const.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT		CONFIG_PAGE_SHIFT
-#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #endif /* __ASM_PAGE_DEF_H */
--- a/arch/csky/include/asm/page.h
-+++ b/arch/csky/include/asm/page.h
-@@ -7,12 +7,8 @@
- #include <asm/cache.h>
- #include <linux/const.h>
- 
-/*
- * PAGE_SHIFT determines the page size: 4KB
- */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE - 1))
-+#include <vdso/page.h>
-+
- #define THREAD_SIZE	(PAGE_SIZE * 2)
- #define THREAD_MASK	(~(THREAD_SIZE - 1))
- #define THREAD_SHIFT	(PAGE_SHIFT + 1)
--- a/arch/hexagon/include/asm/page.h
-+++ b/arch/hexagon/include/asm/page.h
-@@ -45,9 +45,7 @@
- #define HVM_HUGEPAGE_SIZE 0x5
- #endif
- 
-#define PAGE_SHIFT CONFIG_PAGE_SHIFT
-#define PAGE_SIZE  (1UL << PAGE_SHIFT)
-#define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
-+#include <vdso/page.h>
- 
- #ifdef __KERNEL__
- #ifndef __ASSEMBLY__
--- a/arch/loongarch/include/asm/page.h
-+++ b/arch/loongarch/include/asm/page.h
-@@ -8,12 +8,7 @@
- #include <linux/const.h>
- #include <asm/addrspace.h>
- 
-/*
- * PAGE_SHIFT determines the page size
- */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE - 1))
-+#include <vdso/page.h>
- 
- #define HPAGE_SHIFT	(PAGE_SHIFT + PAGE_SHIFT - 3)
- #define HPAGE_SIZE	(_AC(1, UL) << HPAGE_SHIFT)
--- a/arch/m68k/include/asm/page.h
-+++ b/arch/m68k/include/asm/page.h
-@@ -6,10 +6,8 @@
- #include <asm/setup.h>
- #include <asm/page_offset.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
-+
- #define PAGE_OFFSET	(PAGE_OFFSET_RAW)
- 
- #ifndef __ASSEMBLY__
--- a/arch/microblaze/include/asm/page.h
-+++ b/arch/microblaze/include/asm/page.h
-@@ -19,10 +19,7 @@
- 
- #ifdef __KERNEL__
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(ASM_CONST(1) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #define LOAD_OFFSET	ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
- 
--- a/arch/mips/include/asm/page.h
-+++ b/arch/mips/include/asm/page.h
-@@ -14,12 +14,7 @@
- #include <linux/kernel.h>
- #include <asm/mipsregs.h>
- 
-/*
- * PAGE_SHIFT determines the page size
- */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~((1 << PAGE_SHIFT) - 1))
-+#include <vdso/page.h>
- 
- /*
-  * This is used for calculating the real page sizes
--- a/arch/nios2/include/asm/page.h
-+++ b/arch/nios2/include/asm/page.h
-@@ -18,12 +18,7 @@
- #include <linux/pfn.h>
- #include <linux/const.h>
- 
-/*
- * PAGE_SHIFT determines the page size
- */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE - 1))
-+#include <vdso/page.h>
- 
- /*
-  * PAGE_OFFSET -- the first address of the first page of memory.
--- a/arch/openrisc/include/asm/page.h
-+++ b/arch/openrisc/include/asm/page.h
-@@ -15,16 +15,7 @@
- #ifndef __ASM_OPENRISC_PAGE_H
- #define __ASM_OPENRISC_PAGE_H
- 
-
-/* PAGE_SHIFT determines the page size */
-
-#define PAGE_SHIFT      CONFIG_PAGE_SHIFT
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE       (1 << PAGE_SHIFT)
-#else
-#define PAGE_SIZE       (1UL << PAGE_SHIFT)
-#endif
-#define PAGE_MASK       (~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #define PAGE_OFFSET	0xc0000000
- #define KERNELBASE	PAGE_OFFSET
--- a/arch/parisc/include/asm/page.h
-+++ b/arch/parisc/include/asm/page.h
-@@ -4,9 +4,7 @@
- 
- #include <linux/const.h>
- 
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
- 
--- a/arch/powerpc/include/asm/page.h
-+++ b/arch/powerpc/include/asm/page.h
-@@ -21,8 +21,7 @@
-  * page size. When using 64K pages however, whether we are really supporting
-  * 64K pages in HW or not is irrelevant to those definitions.
-  */
-#define PAGE_SHIFT		CONFIG_PAGE_SHIFT
-#define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
-+#include <vdso/page.h>
- 
- #ifndef __ASSEMBLY__
- #ifndef CONFIG_HUGETLB_PAGE
-@@ -42,13 +41,6 @@ extern unsigned int hpage_shift;
- #endif
- 
- /*
- * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
- * assign PAGE_MASK to a larger type it gets extended the way we want
- * (i.e. with 1s in the high bits)
- */
-#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
-
-/*
-  * KERNELBASE is the virtual address of the start of the kernel, it's often
-  * the same as PAGE_OFFSET, but _might not be_.
-  *
--- a/arch/riscv/include/asm/page.h
-+++ b/arch/riscv/include/asm/page.h
-@@ -12,9 +12,7 @@
- #include <linux/pfn.h>
- #include <linux/const.h>
- 
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE - 1))
-+#include <vdso/page.h>
- 
- #define HPAGE_SHIFT		PMD_SHIFT
- #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
--- a/arch/s390/include/asm/page.h
-+++ b/arch/s390/include/asm/page.h
-@@ -11,14 +11,11 @@
- #include <linux/const.h>
- #include <asm/types.h>
- 
-#define _PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define _PAGE_SIZE	(_AC(1, UL) << _PAGE_SHIFT)
-#define _PAGE_MASK	(~(_PAGE_SIZE - 1))
-+#include <vdso/page.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	_PAGE_SHIFT
-#define PAGE_SIZE	_PAGE_SIZE
-#define PAGE_MASK	_PAGE_MASK
-+#define _PAGE_SHIFT	PAGE_SHIFT
-+#define _PAGE_SIZE	PAGE_SIZE
-+#define _PAGE_MASK	PAGE_MASK
- #define PAGE_DEFAULT_ACC	_AC(0, UL)
- /* storage-protection override */
- #define PAGE_SPO_ACC		9
--- a/arch/sh/include/asm/page.h
-+++ b/arch/sh/include/asm/page.h
-@@ -8,10 +8,8 @@
- 
- #include <linux/const.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
-+
- #define PTE_MASK	PAGE_MASK
- 
- #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
--- a/arch/sparc/include/asm/page_32.h
-+++ b/arch/sparc/include/asm/page_32.h
-@@ -11,9 +11,7 @@
- 
- #include <linux/const.h>
- 
-#define PAGE_SHIFT   CONFIG_PAGE_SHIFT
-#define PAGE_SIZE    (_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK    (~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #ifndef __ASSEMBLY__
- 
--- a/arch/sparc/include/asm/page_64.h
-+++ b/arch/sparc/include/asm/page_64.h
-@@ -4,9 +4,7 @@
- 
- #include <linux/const.h>
- 
-#define PAGE_SHIFT   CONFIG_PAGE_SHIFT
-#define PAGE_SIZE    (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK    (~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- /* Flushing for D-cache alias handling is only needed if
-  * the page size is smaller than 16K.
--- a/arch/um/include/asm/page.h
-+++ b/arch/um/include/asm/page.h
-@@ -9,10 +9,7 @@
- 
- #include <linux/const.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #ifndef __ASSEMBLY__
- 
--- a/arch/x86/include/asm/page_types.h
-+++ b/arch/x86/include/asm/page_types.h
-@@ -6,10 +6,7 @@
- #include <linux/types.h>
- #include <linux/mem_encrypt.h>
- 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT		CONFIG_PAGE_SHIFT
-#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
- 
--- a/arch/xtensa/include/asm/page.h
-+++ b/arch/xtensa/include/asm/page.h
-@@ -18,13 +18,7 @@
- #include <asm/cache.h>
- #include <asm/kmem_layout.h>
- 
-/*
- * PAGE_SHIFT determines the page size
- */
-
-#define PAGE_SHIFT	CONFIG_PAGE_SHIFT
-#define PAGE_SIZE	(__XTENSA_UL_CONST(1) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-+#include <vdso/page.h>
- 
- #ifdef CONFIG_MMU
- #define PAGE_OFFSET	XCHAL_KSEG_CACHED_VADDR
--- /dev/null
-+++ b/include/vdso/page.h
-@@ -0,0 +1,30 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef __VDSO_PAGE_H
-+#define __VDSO_PAGE_H
-+
-+#include <uapi/linux/const.h>
-+
-+/*
-+ * PAGE_SHIFT determines the page size.
-+ *
-+ * Note: This definition is required because PAGE_SHIFT is used
-+ * in several places throuout the codebase.
-+ */
-+#define PAGE_SHIFT      CONFIG_PAGE_SHIFT
-+
-+#define PAGE_SIZE	(_AC(1,UL) << CONFIG_PAGE_SHIFT)
-+
-+#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT)
-+/*
-+ * Applies only to 32-bit architectures with a 64-bit phys_addr_t.
-+ *
-+ * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long.
-+ * So if we assign PAGE_MASK to a larger type it gets extended the
-+ * way we want (i.e. with 1s in the high bits)
-+ */
-+#define PAGE_MASK	(~((1 << CONFIG_PAGE_SHIFT) - 1))
-+#else
-+#define PAGE_MASK	(~(PAGE_SIZE - 1))
-+#endif
-+
-+#endif	/* __VDSO_PAGE_H */
--- a/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
@@ -1,68 +0,0 @@
-From d1bcf51400e790e65945a29078bd816bd61aa148 Mon Sep 17 00:00:00 2001
-From: Arnd Bergmann <arnd@arndb.de>
-Date: Thu, 24 Oct 2024 13:34:26 +0000
-Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures
-
-With the introduction of an architecture-independent defintion of
-PAGE_MASK, we had to make a choice between defining it as 'unsigned long'
-as on 64-bit architectures, or as signed 'long' as required for
-architectures with a 64-bit phys_addr_t.
-
-To reduce the risk for regressions and minimize the changes in behavior,
-the result was using the signed value only when CONFIG_PHYS_ADDR_T_64BIT
-is set, but that ended up causing a regression after all in the
-early_init_dt_add_memory_arch() function that uses 64-bit integers for
-address calculation.
-
-Presumably the same regression also affects mips32 and powerpc32 when
-dealing with large amounts of memory on DT platforms: like arm32, they were
-using the signed version unconditionally.
-
-The two most sensible options for addressing the regression are either to
-go back to an architecture specific definition, using a signed constant on
-arm/powerpc/mips and unsigned on the others, or to use the same definition
-everywhere.
-
-Use the simpler of those two and change them all to the signed version, in
-the hope that this does not cause a different type of bug. Most of the
-other 32-bit architectures have no large physical address support and are
-rarely used, so it seems more likely that using the same definition helps
-than hurts here.
-
-In particular, x86-32 does have physical addressing extensions, so it
-already changed to the signed version after the previous patch, so it makes
-sense to use the same version on non-PAE as well.
-
-Fixes: efe8419ae78d ("vdso: Introduce vdso/page.h")
-Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
-Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: Anders Roxell <anders.roxell@linaro.org>
-Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
-Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
-Link: https://lore.kernel.org/all/20241024133447.3117273-1-arnd@kernel.org
-Link: https://lore.kernel.org/lkml/CA+G9fYt86bUAu_v5dXPWnDUwQNVipj+Wq3Djir1KUSKdr9QLNg@mail.gmail.com/
---
- include/vdso/page.h | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
--- a/include/vdso/page.h
-+++ b/include/vdso/page.h
-@@ -14,13 +14,14 @@
- 
- #define PAGE_SIZE	(_AC(1,UL) << CONFIG_PAGE_SHIFT)
- 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT)
-+#if !defined(CONFIG_64BIT)
- /*
- * Applies only to 32-bit architectures with a 64-bit phys_addr_t.
-+ * Applies only to 32-bit architectures.
-  *
-  * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long.
-  * So if we assign PAGE_MASK to a larger type it gets extended the
- * way we want (i.e. with 1s in the high bits)
-+ * way we want (i.e. with 1s in the high bits) while masking a
-+ * 64-bit value such as phys_addr_t.
-  */
- #define PAGE_MASK	(~((1 << CONFIG_PAGE_SHIFT) - 1))
- #else
--- a/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
@@ -1,7 +1,7 @@
-From e11153c4df0fee7caadec3714a60a4936d6a9ea2 Mon Sep 17 00:00:00 2001
+From 1901291057a3f1bf2bf94c7a4ddf3253d3116acb Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:20 -0500
-Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional
+Date: Thu, 13 Feb 2025 11:13:52 -0500
+Subject: x86/mm: Make MMU_GATHER_RCU_TABLE_FREE unconditional

 Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
 paravirt, and not when running on bare metal.
@@ -11,8 +11,9 @@ each setup. Make them all the same.

 Currently get_user_pages_fast synchronizes against page table
 freeing in two different ways:
- on bare metal, by blocking IRQs, which block TLB flush IPIs
- on paravirt, with MMU_GATHER_RCU_TABLE_FREE
+
+ - on bare metal, by blocking IRQs, which block TLB flush IPIs
+ - on paravirt, with MMU_GATHER_RCU_TABLE_FREE

 This is done because some paravirt TLB flush implementations
 handle the TLB flush in the hypervisor, and will do the flush
@@ -27,18 +28,22 @@ as an implicit way to block RCU frees.

 That makes it safe to use INVLPGB on AMD CPUs.

-Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
+Tested-by: Brendan Jackman <jackmanb@google.com>
+Tested-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/20250213161423.449435-2-riel@surriel.com
 ---
 arch/x86/Kconfig           |  2 +-
- arch/x86/kernel/paravirt.c |  7 +------
- arch/x86/mm/pgtable.c      | 16 ++++------------
- 3 files changed, 6 insertions(+), 19 deletions(-)
+ arch/x86/kernel/paravirt.c | 17 +----------------
+ arch/x86/mm/pgtable.c      | 27 ++++-----------------------
+ 3 files changed, 6 insertions(+), 40 deletions(-)

 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
-@@ -270,7 +270,7 @@ config X86
+@@ -277,7 +277,7 @@ config X86
 	select HAVE_PCI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
@@ -49,19 +54,29 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 	select HAVE_REGS_AND_STACK_ACCESS_API
 --- a/arch/x86/kernel/paravirt.c
 +++ b/arch/x86/kernel/paravirt.c
-@@ -59,11 +59,6 @@ void __init native_pv_lock_init(void)
+@@ -59,21 +59,6 @@ void __init native_pv_lock_init(void)
 		static_branch_enable(&virt_spin_lock_key);
 }
 
+-#ifndef CONFIG_PT_RECLAIM
 -static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
 -{
-	tlb_remove_page(tlb, table);
+-	struct ptdesc *ptdesc = (struct ptdesc *)table;
+-
+-	pagetable_dtor(ptdesc);
+-	tlb_remove_page(tlb, ptdesc_page(ptdesc));
 -}
+-#else
+-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
+-{
+-	tlb_remove_table(tlb, table);
+-}
+-#endif
 -
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-@@ -191,7 +186,7 @@ struct paravirt_patch_template pv_ops =
+@@ -195,7 +180,7 @@ struct paravirt_patch_template pv_ops =
 	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
 	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
 	.mmu.flush_tlb_multi	= native_flush_tlb_multi,
@@ -72,53 +87,63 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 	.mmu.notify_page_enc_status_changed	= paravirt_nop,
 --- a/arch/x86/mm/pgtable.c
 +++ b/arch/x86/mm/pgtable.c
-@@ -18,14 +18,6 @@ EXPORT_SYMBOL(physical_mask);
+@@ -18,25 +18,6 @@ EXPORT_SYMBOL(physical_mask);
 #define PGTABLE_HIGHMEM 0
 #endif
 
 -#ifndef CONFIG_PARAVIRT
+-#ifndef CONFIG_PT_RECLAIM
 -static inline
 -void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
 -{
-	tlb_remove_page(tlb, table);
+-	struct ptdesc *ptdesc = (struct ptdesc *)table;
+-
+-	pagetable_dtor(ptdesc);
+-	tlb_remove_page(tlb, ptdesc_page(ptdesc));
 -}
-#endif
+-#else
+-static inline
+-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+-{
+-	tlb_remove_table(tlb, table);
+-}
+-#endif /* !CONFIG_PT_RECLAIM */
+-#endif /* !CONFIG_PARAVIRT */
 -
 gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
 
 pgtable_t pte_alloc_one(struct mm_struct *mm)
-@@ -54,7 +46,7 @@ void ___pte_free_tlb(struct mmu_gather *
+@@ -64,7 +45,7 @@ early_param("userpte", setup_userpte);
+ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
- 	pagetable_pte_dtor(page_ptdesc(pte));
 	paravirt_release_pte(page_to_pfn(pte));
-	paravirt_tlb_remove_table(tlb, pte);
-+	tlb_remove_table(tlb, pte);
+-	paravirt_tlb_remove_table(tlb, page_ptdesc(pte));
+	tlb_remove_table(tlb, page_ptdesc(pte));
 }
 
 #if CONFIG_PGTABLE_LEVELS > 2
-@@ -70,7 +62,7 @@ void ___pmd_free_tlb(struct mmu_gather *
+@@ -78,21 +59,21 @@ void ___pmd_free_tlb(struct mmu_gather *
+ #ifdef CONFIG_X86_PAE
 	tlb->need_flush_all = 1;
 #endif
- 	pagetable_pmd_dtor(ptdesc);
-	paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc));
-+	tlb_remove_table(tlb, ptdesc_page(ptdesc));
+-	paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd));
+	tlb_remove_table(tlb, virt_to_ptdesc(pmd));
 }
 
 #if CONFIG_PGTABLE_LEVELS > 3
-@@ -80,14 +72,14 @@ void ___pud_free_tlb(struct mmu_gather *
- 
- 	pagetable_pud_dtor(ptdesc);
+ void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+ {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
-+	tlb_remove_table(tlb, virt_to_page(pud));
+-	paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud));
+	tlb_remove_table(tlb, virt_to_ptdesc(pud));
 }
 
 #if CONFIG_PGTABLE_LEVELS > 4
 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
 {
 	paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
-	paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
-+	tlb_remove_table(tlb, virt_to_page(p4d));
+-	paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d));
+	tlb_remove_table(tlb, virt_to_ptdesc(p4d));
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
--- a/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
@@ -1,16 +1,20 @@
-From e8008cb69c5e4efbaedd70b0fb692343e4aa0e51 Mon Sep 17 00:00:00 2001
+From 002a3e971d0d7987bdcdd564eccfa3dd63637226 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:21 -0500
-Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call
+Date: Thu, 13 Feb 2025 11:13:53 -0500
+Subject: x86/mm: Remove pv_ops.mmu.tlb_remove_table call

 Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.

 Get rid of the indirection by simply calling tlb_remove_table directly,
 and not going through the paravirt function pointers.

-Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
+Tested-by: Brendan Jackman <jackmanb@google.com>
+Tested-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/20250213161423.449435-3-riel@surriel.com
 ---
 arch/x86/hyperv/mmu.c                 | 1 -
 arch/x86/include/asm/paravirt.h       | 5 -----
@@ -22,7 +26,7 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>

 --- a/arch/x86/hyperv/mmu.c
 +++ b/arch/x86/hyperv/mmu.c
-@@ -240,5 +240,4 @@ void hyperv_setup_mmu_ops(void)
+@@ -239,5 +239,4 @@ void hyperv_setup_mmu_ops(void)
 
 	pr_info("Using hypercall for remote TLB flush\n");
 	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
@@ -44,7 +48,7 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 	PVOP_VCALL1(mmu.exit_mmap, mm);
 --- a/arch/x86/include/asm/paravirt_types.h
 +++ b/arch/x86/include/asm/paravirt_types.h
-@@ -136,8 +136,6 @@ struct pv_mmu_ops {
+@@ -134,8 +134,6 @@ struct pv_mmu_ops {
 	void (*flush_tlb_multi)(const struct cpumask *cpus,
 				const struct flush_tlb_info *info);
 
@@ -65,7 +69,7 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 
 --- a/arch/x86/kernel/paravirt.c
 +++ b/arch/x86/kernel/paravirt.c
-@@ -186,7 +186,6 @@ struct paravirt_patch_template pv_ops =
+@@ -180,7 +180,6 @@ struct paravirt_patch_template pv_ops =
 	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
 	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
 	.mmu.flush_tlb_multi	= native_flush_tlb_multi,
--- a/debian/patches/patchset-zen/invlpgb/0003-x86-mm-Consolidate-full-flush-threshold-decision.patch
+++ b/debian/patches/patchset-zen/invlpgb/0003-x86-mm-Consolidate-full-flush-threshold-decision.patch
@@ -0,0 +1,87 @@
+From d4784e28cc2e488fce80ded0ff086c50244593ca Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:36 -0500
+Subject: x86/mm: Consolidate full flush threshold decision
+
+Reduce code duplication by consolidating the decision point for whether to do
+individual invalidations or a full flush inside get_flush_tlb_info().
+
+Suggested-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Link: https://lore.kernel.org/r/20250226030129.530345-2-riel@surriel.com
+---
+ arch/x86/mm/tlb.c | 41 +++++++++++++++++++----------------------
+ 1 file changed, 19 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1000,6 +1000,15 @@ static struct flush_tlb_info *get_flush_
+ 	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+ #endif
+ 
+	/*
+	 * If the number of flushes is so large that a full flush
+	 * would be faster, do a full flush.
+	 */
+	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
+		start = 0;
+		end = TLB_FLUSH_ALL;
+	}
+
+ 	info->start		= start;
+ 	info->end		= end;
+ 	info->mm		= mm;
+@@ -1026,17 +1035,8 @@ void flush_tlb_mm_range(struct mm_struct
+ 				bool freed_tables)
+ {
+ 	struct flush_tlb_info *info;
+	int cpu = get_cpu();
+ 	u64 new_tlb_gen;
+-	int cpu;
+-
+-	cpu = get_cpu();
+-
+-	/* Should we flush just the requested range? */
+-	if ((end == TLB_FLUSH_ALL) ||
+-	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+-		start = 0;
+-		end = TLB_FLUSH_ALL;
+-	}
+ 
+ 	/* This is also a barrier that synchronizes with switch_mm(). */
+ 	new_tlb_gen = inc_mm_tlb_gen(mm);
+@@ -1089,22 +1089,19 @@ static void do_kernel_range_flush(void *
+ 
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+-	/* Balance as user space task's flush, a bit conservative */
+-	if (end == TLB_FLUSH_ALL ||
+-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
+-		on_each_cpu(do_flush_tlb_all, NULL, 1);
+-	} else {
+-		struct flush_tlb_info *info;
+	struct flush_tlb_info *info;
+ 
+-		preempt_disable();
+-		info = get_flush_tlb_info(NULL, start, end, 0, false,
+-					  TLB_GENERATION_INVALID);
+	guard(preempt)();
+ 
+	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+				  TLB_GENERATION_INVALID);
+
+	if (info->end == TLB_FLUSH_ALL)
+		on_each_cpu(do_flush_tlb_all, NULL, 1);
+	else
+ 		on_each_cpu(do_kernel_range_flush, info, 1);
+ 
+-		put_flush_tlb_info();
+-		preempt_enable();
+-	}
+	put_flush_tlb_info();
+ }
+ 
+ /*
--- a/debian/patches/patchset-zen/invlpgb/0004-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch
+++ b/debian/patches/patchset-zen/invlpgb/0004-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch
@@ -0,0 +1,103 @@
+From e5d151337c384934c9b669967d72f9b29781b308 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:37 -0500
+Subject: x86/mm: Add INVLPGB feature and Kconfig entry
+
+In addition, the CPU advertises the maximum number of pages that can be
+shot down with one INVLPGB instruction in CPUID. Save that information
+for later use.
+
+  [ bp: use cpu_has(), typos, massage. ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-3-riel@surriel.com
+---
+ arch/x86/Kconfig.cpu                     | 4 ++++
+ arch/x86/include/asm/cpufeatures.h       | 1 +
+ arch/x86/include/asm/disabled-features.h | 8 +++++++-
+ arch/x86/include/asm/tlbflush.h          | 3 +++
+ arch/x86/kernel/cpu/amd.c                | 6 ++++++
+ 5 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
+@@ -740,6 +740,10 @@ menuconfig PROCESSOR_SELECT
+ 	  This lets you choose what x86 vendor support code your kernel
+ 	  will include.
+ 
+config BROADCAST_TLB_FLUSH
+	def_bool y
+	depends on CPU_SUP_AMD && 64BIT
+
+ config CPU_SUP_INTEL
+ 	default y
+ 	bool "Support Intel processors" if PROCESSOR_SELECT
+--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
+@@ -338,6 +338,7 @@
+ #define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
+ #define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
+ #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
+ #define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
+ #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+ #define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
+--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
+@@ -129,6 +129,12 @@
+ #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
+ #endif
+ 
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+#define DISABLE_INVLPGB		0
+#else
+#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
+#endif
+
+ /*
+  * Make sure to add features to the correct mask
+  */
+@@ -146,7 +152,7 @@
+ #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
+ 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
+ #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
+-#define DISABLED_MASK13	0
+#define DISABLED_MASK13	(DISABLE_INVLPGB)
+ #define DISABLED_MASK14	0
+ #define DISABLED_MASK15	0
+ #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -183,6 +183,9 @@ static inline void cr4_init_shadow(void)
+ extern unsigned long mmu_cr4_features;
+ extern u32 *trampoline_cr4_features;
+ 
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
+
+ extern void initialize_tlbstate_and_flush(void);
+ 
+ /*
+--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
+@@ -29,6 +29,8 @@
+ 
+ #include "cpu.h"
+ 
+u16 invlpgb_count_max __ro_after_init;
+
+ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+ {
+ 	u32 gprs[8] = { 0 };
+@@ -1139,6 +1141,10 @@ static void cpu_detect_tlb_amd(struct cp
+ 		tlb_lli_2m[ENTRIES] = eax & mask;
+ 
+ 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+
+	/* Max number of pages INVLPGB can invalidate in one shot */
+	if (cpu_has(c, X86_FEATURE_INVLPGB))
+		invlpgb_count_max = (cpuid_edx(0x80000008) & 0xffff) + 1;
+ }
+ 
+ static const struct cpu_dev amd_cpu_dev = {
--- a/debian/patches/patchset-zen/invlpgb/0005-x86-mm-Add-INVLPGB-support-code.patch
+++ b/debian/patches/patchset-zen/invlpgb/0005-x86-mm-Add-INVLPGB-support-code.patch
@@ -0,0 +1,170 @@
+From 9bbface3289771c5990e97ca047a52faaeafdb83 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Fri, 28 Feb 2025 20:32:30 +0100
+Subject: x86/mm: Add INVLPGB support code
+
+Add helper functions and definitions needed to use broadcast TLB
+invalidation on AMD CPUs.
+
+  [ bp:
+      - Cleanup commit message
+      - Improve and expand comments
+      - push the preemption guards inside the invlpgb* helpers
+      - merge improvements from dhansen
+      - add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang
+	can't do DCE properly yet and looks at the inline asm and
+	complains about it getting a u64 argument on 32-bit code ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com
+---
+ arch/x86/include/asm/tlb.h | 132 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 132 insertions(+)
+
+--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
+@@ -6,6 +6,9 @@
+ static inline void tlb_flush(struct mmu_gather *tlb);
+ 
+ #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
+ 
+ static inline void tlb_flush(struct mmu_gather *tlb)
+ {
+@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long
+ 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ }
+ 
+enum addr_stride {
+	PTE_STRIDE = 0,
+	PMD_STRIDE = 1
+};
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (stride << 31) | (nr_pages - 1);
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+	__invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+	/*
+	 * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+	 * to have completed. Print a warning if the task has been migrated,
+	 * and might not be waiting on all the INVLPGBs issued during this TLB
+	 * invalidation sequence.
+	 */
+	cant_migrate();
+
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool stride)
+{
+	enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+	u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+	__invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	/*
+	 * TLBSYNC at the end needs to make sure all flushes done on the
+	 * current CPU have been executed system-wide. Therefore, make
+	 * sure nothing gets migrated in-between but disable preemption
+	 * as it is cheaper.
+	 */
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+	__tlbsync();
+}
+ #endif /* _ASM_X86_TLB_H */
--- a/debian/patches/patchset-zen/invlpgb/0006-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch
+++ b/debian/patches/patchset-zen/invlpgb/0006-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch
@@ -0,0 +1,97 @@
+From 293fdf15ead45cd235e12a4f62f81767f7bce528 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:39 -0500
+Subject: x86/mm: Use INVLPGB for kernel TLB flushes
+
+Use broadcast TLB invalidation for kernel addresses when available.
+Remove the need to send IPIs for kernel TLB flushes.
+
+   [ bp: Integrate dhansen's comments additions, merge the
+     flush_tlb_all() change into this one too. ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@surriel.com
+---
+ arch/x86/mm/tlb.c | 48 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 44 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1064,7 +1064,6 @@ void flush_tlb_mm_range(struct mm_struct
+ 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+ }
+ 
+-
+ static void do_flush_tlb_all(void *info)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -1074,7 +1073,32 @@ static void do_flush_tlb_all(void *info)
+ void flush_tlb_all(void)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+-	on_each_cpu(do_flush_tlb_all, NULL, 1);
+
+	/* First try (faster) hardware-assisted TLB invalidation. */
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		invlpgb_flush_all();
+	else
+		/* Fall back to the IPI-based invalidation. */
+		on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+	unsigned long addr, nr;
+
+	for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+		nr = (info->end - addr) >> PAGE_SHIFT;
+
+		/*
+		 * INVLPGB has a limit on the size of ranges it can
+		 * flush. Break up large flushes.
+		 */
+		nr = clamp_val(nr, 1, invlpgb_count_max);
+
+		invlpgb_flush_addr_nosync(addr, nr);
+	}
+	__tlbsync();
+ }
+ 
+ static void do_kernel_range_flush(void *info)
+@@ -1087,6 +1111,22 @@ static void do_kernel_range_flush(void *
+ 		flush_tlb_one_kernel(addr);
+ }
+ 
+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
+{
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		invlpgb_flush_all();
+	else
+		on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		invlpgb_kernel_range_flush(info);
+	else
+		on_each_cpu(do_kernel_range_flush, info, 1);
+}
+
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+ 	struct flush_tlb_info *info;
+@@ -1097,9 +1137,9 @@ void flush_tlb_kernel_range(unsigned lon
+ 				  TLB_GENERATION_INVALID);
+ 
+ 	if (info->end == TLB_FLUSH_ALL)
+-		on_each_cpu(do_flush_tlb_all, NULL, 1);
+		kernel_tlb_flush_all(info);
+ 	else
+-		on_each_cpu(do_kernel_range_flush, info, 1);
+		kernel_tlb_flush_range(info);
+ 
+ 	put_flush_tlb_info();
+ }
--- a/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
@@ -1,23 +1,25 @@
-From 5e5219596683c3b8178e09f6ec1e75154537325f Mon Sep 17 00:00:00 2001
+From a093136bdb306345cd686f47c8faae8a608cfb47 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:27 -0500
-Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing
+Date: Tue, 25 Feb 2025 22:00:41 -0500
+Subject: x86/mm: Use broadcast TLB flushing in page reclaim

-In the page reclaim code, we only track the CPU(s) where the TLB needs
-to be flushed, rather than all the individual mappings that may be getting
-invalidated.
+Page reclaim tracks only the CPU(s) where the TLB needs to be flushed, rather
+than all the individual mappings that may be getting invalidated.

 Use broadcast TLB flushing when that is available.

+  [ bp: Massage commit message. ]
+
 Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-7-riel@surriel.com
 ---
 arch/x86/mm/tlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1330,7 +1330,9 @@ void arch_tlbbatch_flush(struct arch_tlb
+@@ -1320,7 +1320,9 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
--- a/debian/patches/patchset-zen/invlpgb/0008-x86-mm-Add-global-ASID-allocation-helper-functions.patch
+++ b/debian/patches/patchset-zen/invlpgb/0008-x86-mm-Add-global-ASID-allocation-helper-functions.patch
@@ -0,0 +1,286 @@
+From ef345ff96b47f21932c489edd2ebb44fbe3cb517 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:42 -0500
+Subject: x86/mm: Add global ASID allocation helper functions
+
+Add functions to manage global ASID space. Multithreaded processes that are
+simultaneously active on 4 or more CPUs can get a global ASID, resulting in the
+same PCID being used for that process on every CPU.
+
+This in turn will allow the kernel to use hardware-assisted TLB flushing
+through AMD INVLPGB or Intel RAR for these processes.
+
+  [ bp:
+   - Extend use_global_asid() comment
+   - s/X86_BROADCAST_TLB_FLUSH/BROADCAST_TLB_FLUSH/g
+   - other touchups ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-8-riel@surriel.com
+---
+ arch/x86/include/asm/mmu.h         |  12 +++
+ arch/x86/include/asm/mmu_context.h |   2 +
+ arch/x86/include/asm/tlbflush.h    |  37 +++++++
+ arch/x86/mm/tlb.c                  | 154 ++++++++++++++++++++++++++++-
+ 4 files changed, 202 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
+@@ -69,6 +69,18 @@ typedef struct {
+ 	u16 pkey_allocation_map;
+ 	s16 execute_only_pkey;
+ #endif
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+	/*
+	 * The global ASID will be a non-zero value when the process has
+	 * the same ASID across all CPUs, allowing it to make use of
+	 * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+	 */
+	u16 global_asid;
+
+	/* The process is transitioning to a new global ASID number. */
+	bool asid_transition;
+#endif
+ } mm_context_t;
+ 
+ #define INIT_MM_CONTEXT(mm)						\
+--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
+@@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(s
+ #define enter_lazy_tlb enter_lazy_tlb
+ extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+ 
+extern void mm_free_global_asid(struct mm_struct *mm);
+
+ /*
+  * Init a new mm.  Used on mm copies, like at fork()
+  * and on mm's that are brand-new, like at execve().
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -6,6 +6,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/sched.h>
+ 
+#include <asm/barrier.h>
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
+@@ -234,6 +235,42 @@ void flush_tlb_one_kernel(unsigned long
+ void flush_tlb_multi(const struct cpumask *cpumask,
+ 		      const struct flush_tlb_info *info);
+ 
+static inline bool is_dyn_asid(u16 asid)
+{
+	return asid < TLB_NR_DYN_ASIDS;
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+	u16 asid;
+
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return 0;
+
+	asid = smp_load_acquire(&mm->context.global_asid);
+
+	/* mm->context.global_asid is either 0, or a global ASID */
+	VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+	return asid;
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+	/*
+	 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+	 * finish_asid_transition() needs to observe asid_transition = true
+	 * once it observes global_asid.
+	 */
+	mm->context.asid_transition = true;
+	smp_store_release(&mm->context.global_asid, asid);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #endif
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -74,13 +74,15 @@
+  * use different names for each of them:
+  *
+  * ASID  - [0, TLB_NR_DYN_ASIDS-1]
+- *         the canonical identifier for an mm
+ *         the canonical identifier for an mm, dynamically allocated on each CPU
+ *         [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
+ *         the canonical, global identifier for an mm, identical across all CPUs
+  *
+- * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * kPCID - [1, MAX_ASID_AVAILABLE]
+  *         the value we write into the PCID part of CR3; corresponds to the
+  *         ASID+1, because PCID 0 is special.
+  *
+- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
+  *         for KPTI each mm has two address spaces and thus needs two
+  *         PCID values, but we can still do with a single ASID denomination
+  *         for each mm. Corresponds to kPCID + 2048.
+@@ -252,6 +254,152 @@ static void choose_new_asid(struct mm_st
+ }
+ 
+ /*
+ * Global ASIDs are allocated for multi-threaded processes that are
+ * active on multiple CPUs simultaneously, giving each of those
+ * processes the same PCID on every CPU, for use with hardware-assisted
+ * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR.
+ *
+ * These global ASIDs are held for the lifetime of the process.
+ */
+static DEFINE_RAW_SPINLOCK(global_asid_lock);
+static u16 last_global_asid = MAX_ASID_AVAILABLE;
+static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE);
+static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE);
+static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
+
+/*
+ * When the search for a free ASID in the global ASID space reaches
+ * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously
+ * freed global ASIDs are safe to re-use.
+ *
+ * This way the global flush only needs to happen at ASID rollover
+ * time, and not at ASID allocation time.
+ */
+static void reset_global_asid_space(void)
+{
+	lockdep_assert_held(&global_asid_lock);
+
+	invlpgb_flush_all_nonglobals();
+
+	/*
+	 * The TLB flush above makes it safe to re-use the previously
+	 * freed global ASIDs.
+	 */
+	bitmap_andnot(global_asid_used, global_asid_used,
+			global_asid_freed, MAX_ASID_AVAILABLE);
+	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
+
+	/* Restart the search from the start of global ASID space. */
+	last_global_asid = TLB_NR_DYN_ASIDS;
+}
+
+static u16 allocate_global_asid(void)
+{
+	u16 asid;
+
+	lockdep_assert_held(&global_asid_lock);
+
+	/* The previous allocation hit the edge of available address space */
+	if (last_global_asid >= MAX_ASID_AVAILABLE - 1)
+		reset_global_asid_space();
+
+	asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid);
+
+	if (asid >= MAX_ASID_AVAILABLE && !global_asid_available) {
+		/* This should never happen. */
+		VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n",
+				global_asid_available);
+		return 0;
+	}
+
+	/* Claim this global ASID. */
+	__set_bit(asid, global_asid_used);
+	last_global_asid = asid;
+	global_asid_available--;
+	return asid;
+}
+
+/*
+ * Check whether a process is currently active on more than @threshold CPUs.
+ * This is a cheap estimation on whether or not it may make sense to assign
+ * a global ASID to this process, and use broadcast TLB invalidation.
+ */
+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
+{
+	int count = 0;
+	int cpu;
+
+	/* This quick check should eliminate most single threaded programs. */
+	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
+		return false;
+
+	/* Slower check to make sure. */
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		/* Skip the CPUs that aren't really running this process. */
+		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
+			continue;
+
+		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
+			continue;
+
+		if (++count > threshold)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Assign a global ASID to the current process, protecting against
+ * races between multiple threads in the process.
+ */
+static void use_global_asid(struct mm_struct *mm)
+{
+	u16 asid;
+
+	guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+	/* This process is already using broadcast TLB invalidation. */
+	if (mm_global_asid(mm))
+		return;
+
+	/*
+	 * The last global ASID was consumed while waiting for the lock.
+	 *
+	 * If this fires, a more aggressive ASID reuse scheme might be
+	 * needed.
+	 */
+	if (!global_asid_available) {
+		VM_WARN_ONCE(1, "Ran out of global ASIDs\n");
+		return;
+	}
+
+	asid = allocate_global_asid();
+	if (!asid)
+		return;
+
+	mm_assign_global_asid(mm, asid);
+}
+
+void mm_free_global_asid(struct mm_struct *mm)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return;
+
+	if (!mm_global_asid(mm))
+		return;
+
+	guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+	/* The global ASID can be re-used only after flush at wrap-around. */
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+	__set_bit(mm->context.global_asid, global_asid_freed);
+
+	mm->context.global_asid = 0;
+	global_asid_available++;
+#endif
+}
+
+/*
+  * Given an ASID, flush the corresponding user ASID.  We can delay this
+  * until the next time we switch to it.
+  *
--- a/debian/patches/patchset-zen/invlpgb/0009-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch
+++ b/debian/patches/patchset-zen/invlpgb/0009-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch
@@ -0,0 +1,215 @@
+From b3eb743c32515bf8fca7b619dd2a2c64b5812dd8 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:43 -0500
+Subject: x86/mm: Handle global ASID context switch and TLB flush
+
+Do context switch and TLB flush support for processes that use a global
+ASID and PCID across all CPUs.
+
+At both context switch time and TLB flush time, it needs to be checked whether
+a task is switching to a global ASID, and, if so, reload the TLB with the new
+ASID as appropriate.
+
+In both code paths, the TLB flush is avoided if a global ASID is used, because
+the global ASIDs are always kept up to date across CPUs, even when the
+process is not running on a CPU.
+
+  [ bp:
+   - Massage
+   - :%s/\<static_cpu_has\>/cpu_feature_enabled/cgi
+  ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-9-riel@surriel.com
+---
+ arch/x86/include/asm/tlbflush.h | 14 ++++++
+ arch/x86/mm/tlb.c               | 77 ++++++++++++++++++++++++++++++---
+ 2 files changed, 84 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -240,6 +240,11 @@ static inline bool is_dyn_asid(u16 asid)
+ 	return asid < TLB_NR_DYN_ASIDS;
+ }
+ 
+static inline bool is_global_asid(u16 asid)
+{
+	return !is_dyn_asid(asid);
+}
+
+ #ifdef CONFIG_BROADCAST_TLB_FLUSH
+ static inline u16 mm_global_asid(struct mm_struct *mm)
+ {
+@@ -266,9 +271,18 @@ static inline void mm_assign_global_asid
+ 	mm->context.asid_transition = true;
+ 	smp_store_release(&mm->context.global_asid, asid);
+ }
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return false;
+
+	return mm && READ_ONCE(mm->context.asid_transition);
+}
+ #else
+ static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+ static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+ #endif /* CONFIG_BROADCAST_TLB_FLUSH */
+ 
+ #ifdef CONFIG_PARAVIRT
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -227,6 +227,20 @@ static void choose_new_asid(struct mm_st
+ 		return;
+ 	}
+ 
+	/*
+	 * TLB consistency for global ASIDs is maintained with hardware assisted
+	 * remote TLB flushing. Global ASIDs are always up to date.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		u16 global_asid = mm_global_asid(next);
+
+		if (global_asid) {
+			*new_asid = global_asid;
+			*need_flush = false;
+			return;
+		}
+	}
+
+ 	if (this_cpu_read(cpu_tlbstate.invalidate_other))
+ 		clear_asid_other();
+ 
+@@ -400,6 +414,23 @@ void mm_free_global_asid(struct mm_struc
+ }
+ 
+ /*
+ * Is the mm transitioning from a CPU-local ASID to a global ASID?
+ */
+static bool mm_needs_global_asid(struct mm_struct *mm, u16 asid)
+{
+	u16 global_asid = mm_global_asid(mm);
+
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return false;
+
+	/* Process is transitioning to a global ASID */
+	if (global_asid && asid != global_asid)
+		return true;
+
+	return false;
+}
+
+/*
+  * Given an ASID, flush the corresponding user ASID.  We can delay this
+  * until the next time we switch to it.
+  *
+@@ -704,7 +735,8 @@ void switch_mm_irqs_off(struct mm_struct
+ 	 */
+ 	if (prev == next) {
+ 		/* Not actually switching mm's */
+-		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+		VM_WARN_ON(is_dyn_asid(prev_asid) &&
+			   this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+ 			   next->context.ctx_id);
+ 
+ 		/*
+@@ -721,6 +753,20 @@ void switch_mm_irqs_off(struct mm_struct
+ 				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
+ 			cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+		/* Check if the current mm is transitioning to a global ASID */
+		if (mm_needs_global_asid(next, prev_asid)) {
+			next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+			choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+			goto reload_tlb;
+		}
+
+		/*
+		 * Broadcast TLB invalidation keeps this ASID up to date
+		 * all the time.
+		 */
+		if (is_global_asid(prev_asid))
+			return;
+
+ 		/*
+ 		 * If the CPU is not in lazy TLB mode, we are just switching
+ 		 * from one thread in a process to another thread in the same
+@@ -755,6 +801,13 @@ void switch_mm_irqs_off(struct mm_struct
+ 		cond_mitigation(tsk);
+ 
+ 		/*
+		 * Let nmi_uaccess_okay() and finish_asid_transition()
+		 * know that CR3 is changing.
+		 */
+		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+		barrier();
+
+		/*
+ 		 * Leave this CPU in prev's mm_cpumask. Atomic writes to
+ 		 * mm_cpumask can be expensive under contention. The CPU
+ 		 * will be removed lazily at TLB flush time.
+@@ -768,14 +821,12 @@ void switch_mm_irqs_off(struct mm_struct
+ 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+ 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+-
+-		/* Let nmi_uaccess_okay() know that we're changing CR3. */
+-		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+-		barrier();
+ 	}
+ 
+reload_tlb:
+ 	new_lam = mm_lam_cr3_mask(next);
+ 	if (need_flush) {
+		VM_WARN_ON_ONCE(is_global_asid(new_asid));
+ 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+ 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+ 		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
+@@ -894,7 +945,7 @@ static void flush_tlb_func(void *info)
+ 	const struct flush_tlb_info *f = info;
+ 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+-	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+	u64 local_tlb_gen;
+ 	bool local = smp_processor_id() == f->initiating_cpu;
+ 	unsigned long nr_invalidate = 0;
+ 	u64 mm_tlb_gen;
+@@ -917,6 +968,16 @@ static void flush_tlb_func(void *info)
+ 	if (unlikely(loaded_mm == &init_mm))
+ 		return;
+ 
+	/* Reload the ASID if transitioning into or out of a global ASID */
+	if (mm_needs_global_asid(loaded_mm, loaded_mm_asid)) {
+		switch_mm_irqs_off(NULL, loaded_mm, NULL);
+		loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	}
+
+	/* Broadcast ASIDs are always kept up to date with INVLPGB. */
+	if (is_global_asid(loaded_mm_asid))
+		return;
+
+ 	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+ 		   loaded_mm->context.ctx_id);
+ 
+@@ -934,6 +995,8 @@ static void flush_tlb_func(void *info)
+ 		return;
+ 	}
+ 
+	local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+
+ 	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
+ 		     f->new_tlb_gen <= local_tlb_gen)) {
+ 		/*
+@@ -1101,7 +1164,7 @@ STATIC_NOPV void native_flush_tlb_multi(
+ 	 * up on the new contents of what used to be page tables, while
+ 	 * doing a speculative memory access.
+ 	 */
+-	if (info->freed_tables)
+	if (info->freed_tables || mm_in_asid_transition(info->mm))
+ 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
+ 	else
+ 		on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func,
--- a/debian/patches/patchset-zen/invlpgb/0010-x86-mm-Add-global-ASID-process-exit-helpers.patch
+++ b/debian/patches/patchset-zen/invlpgb/0010-x86-mm-Add-global-ASID-process-exit-helpers.patch
@@ -0,0 +1,88 @@
+From c63f1d0a496de7a926b92b52061905edfc8428a4 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:44 -0500
+Subject: x86/mm: Add global ASID process exit helpers
+
+A global ASID is allocated for the lifetime of a process. Free the global ASID
+at process exit time.
+
+  [ bp: Massage, create helpers, hide details inside them. ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-10-riel@surriel.com
+---
+ arch/x86/include/asm/mmu_context.h | 8 +++++++-
+ arch/x86/include/asm/tlbflush.h    | 9 +++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
+@@ -2,7 +2,6 @@
+ #ifndef _ASM_X86_MMU_CONTEXT_H
+ #define _ASM_X86_MMU_CONTEXT_H
+ 
+-#include <asm/desc.h>
+ #include <linux/atomic.h>
+ #include <linux/mm_types.h>
+ #include <linux/pkeys.h>
+@@ -13,6 +12,7 @@
+ #include <asm/paravirt.h>
+ #include <asm/debugreg.h>
+ #include <asm/gsseg.h>
+#include <asm/desc.h>
+ 
+ extern atomic64_t last_mm_ctx_id;
+ 
+@@ -139,6 +139,9 @@ static inline void mm_reset_untag_mask(s
+ #define enter_lazy_tlb enter_lazy_tlb
+ extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+ 
+#define mm_init_global_asid mm_init_global_asid
+extern void mm_init_global_asid(struct mm_struct *mm);
+
+ extern void mm_free_global_asid(struct mm_struct *mm);
+ 
+ /*
+@@ -163,6 +166,8 @@ static inline int init_new_context(struc
+ 		mm->context.execute_only_pkey = -1;
+ 	}
+ #endif
+
+	mm_init_global_asid(mm);
+ 	mm_reset_untag_mask(mm);
+ 	init_new_context_ldt(mm);
+ 	return 0;
+@@ -172,6 +177,7 @@ static inline int init_new_context(struc
+ static inline void destroy_context(struct mm_struct *mm)
+ {
+ 	destroy_context_ldt(mm);
+	mm_free_global_asid(mm);
+ }
+ 
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -261,6 +261,14 @@ static inline u16 mm_global_asid(struct
+ 	return asid;
+ }
+ 
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		mm->context.global_asid = 0;
+		mm->context.asid_transition = false;
+	}
+}
+
+ static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+ {
+ 	/*
+@@ -281,6 +289,7 @@ static inline bool mm_in_asid_transition
+ }
+ #else
+ static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+ static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+ static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+ #endif /* CONFIG_BROADCAST_TLB_FLUSH */
--- a/debian/patches/patchset-zen/invlpgb/0011-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch
+++ b/debian/patches/patchset-zen/invlpgb/0011-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch
@@ -0,0 +1,219 @@
+From e16bb18388207841efa841b9b11e69c886817024 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Tue, 25 Feb 2025 22:00:45 -0500
+Subject: x86/mm: Enable broadcast TLB invalidation for multi-threaded
+ processes
+
+There is not enough room in the 12-bit ASID address space to hand out
+broadcast ASIDs to every process. Only hand out broadcast ASIDs to processes
+when they are observed to be simultaneously running on 4 or more CPUs.
+
+This also allows single threaded process to continue using the cheaper, local
+TLB invalidation instructions like INVLPGB.
+
+Due to the structure of flush_tlb_mm_range(), the INVLPGB flushing is done in
+a generically named broadcast_tlb_flush() function which can later also be
+used for Intel RAR.
+
+Combined with the removal of unnecessary lru_add_drain calls() (see
+https://lore.kernel.org/r/20241219153253.3da9e8aa@fangorn) this results in
+a nice performance boost for the will-it-scale tlb_flush2_threads test on an
+AMD Milan system with 36 cores:
+
+  - vanilla kernel:           527k loops/second
+  - lru_add_drain removal:    731k loops/second
+  - only INVLPGB:             527k loops/second
+  - lru_add_drain + INVLPGB: 1157k loops/second
+
+Profiling with only the INVLPGB changes showed while TLB invalidation went
+down from 40% of the total CPU time to only around 4% of CPU time, the
+contention simply moved to the LRU lock.
+
+Fixing both at the same time about doubles the number of iterations per second
+from this case.
+
+Comparing will-it-scale tlb_flush2_threads with several different numbers of
+threads on a 72 CPU AMD Milan shows similar results. The number represents the
+total number of loops per second across all the threads:
+
+  threads	tip		INVLPGB
+
+  1		315k		304k
+  2		423k		424k
+  4		644k		1032k
+  8		652k		1267k
+  16		737k		1368k
+  32		759k		1199k
+  64		636k		1094k
+  72		609k		993k
+
+1 and 2 thread performance is similar with and without INVLPGB, because
+INVLPGB is only used on processes using 4 or more CPUs simultaneously.
+
+The number is the median across 5 runs.
+
+Some numbers closer to real world performance can be found at Phoronix, thanks
+to Michael:
+
+https://www.phoronix.com/news/AMD-INVLPGB-Linux-Benefits
+
+  [ bp:
+   - Massage
+   - :%s/\<static_cpu_has\>/cpu_feature_enabled/cgi
+   - :%s/\<clear_asid_transition\>/mm_clear_asid_transition/cgi
+   - Fold in a 0day bot fix: https://lore.kernel.org/oe-kbuild-all/202503040000.GtiWUsBm-lkp@intel.com
+   ]
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Link: https://lore.kernel.org/r/20250226030129.530345-11-riel@surriel.com
+---
+ arch/x86/include/asm/tlbflush.h |   6 ++
+ arch/x86/mm/tlb.c               | 104 +++++++++++++++++++++++++++++++-
+ 2 files changed, 109 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -280,6 +280,11 @@ static inline void mm_assign_global_asid
+ 	smp_store_release(&mm->context.global_asid, asid);
+ }
+ 
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+	WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+ static inline bool mm_in_asid_transition(struct mm_struct *mm)
+ {
+ 	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+@@ -291,6 +296,7 @@ static inline bool mm_in_asid_transition
+ static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+ static inline void mm_init_global_asid(struct mm_struct *mm) { }
+ static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+ static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+ #endif /* CONFIG_BROADCAST_TLB_FLUSH */
+ 
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -431,6 +431,105 @@ static bool mm_needs_global_asid(struct
+ }
+ 
+ /*
+ * x86 has 4k ASIDs (2k when compiled with KPTI), but the largest x86
+ * systems have over 8k CPUs. Because of this potential ASID shortage,
+ * global ASIDs are handed out to processes that have frequent TLB
+ * flushes and are active on 4 or more CPUs simultaneously.
+ */
+static void consider_global_asid(struct mm_struct *mm)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return;
+
+	/* Check every once in a while. */
+	if ((current->pid & 0x1f) != (jiffies & 0x1f))
+		return;
+
+	/*
+	 * Assign a global ASID if the process is active on
+	 * 4 or more CPUs simultaneously.
+	 */
+	if (mm_active_cpus_exceeds(mm, 3))
+		use_global_asid(mm);
+}
+
+static void finish_asid_transition(struct flush_tlb_info *info)
+{
+	struct mm_struct *mm = info->mm;
+	int bc_asid = mm_global_asid(mm);
+	int cpu;
+
+	if (!mm_in_asid_transition(mm))
+		return;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		/*
+		 * The remote CPU is context switching. Wait for that to
+		 * finish, to catch the unlikely case of it switching to
+		 * the target mm with an out of date ASID.
+		 */
+		while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
+			cpu_relax();
+
+		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
+			continue;
+
+		/*
+		 * If at least one CPU is not using the global ASID yet,
+		 * send a TLB flush IPI. The IPI should cause stragglers
+		 * to transition soon.
+		 *
+		 * This can race with the CPU switching to another task;
+		 * that results in a (harmless) extra IPI.
+		 */
+		if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
+			flush_tlb_multi(mm_cpumask(info->mm), info);
+			return;
+		}
+	}
+
+	/* All the CPUs running this process are using the global ASID. */
+	mm_clear_asid_transition(mm);
+}
+
+static void broadcast_tlb_flush(struct flush_tlb_info *info)
+{
+	bool pmd = info->stride_shift == PMD_SHIFT;
+	unsigned long asid = mm_global_asid(info->mm);
+	unsigned long addr = info->start;
+
+	/*
+	 * TLB flushes with INVLPGB are kicked off asynchronously.
+	 * The inc_mm_tlb_gen() guarantees page table updates are done
+	 * before these TLB flushes happen.
+	 */
+	if (info->end == TLB_FLUSH_ALL) {
+		invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (cpu_feature_enabled(X86_FEATURE_PTI))
+			invlpgb_flush_single_pcid_nosync(user_pcid(asid));
+	} else do {
+		unsigned long nr = 1;
+
+		if (info->stride_shift <= PMD_SHIFT) {
+			nr = (info->end - addr) >> info->stride_shift;
+			nr = clamp_val(nr, 1, invlpgb_count_max);
+		}
+
+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd);
+		if (cpu_feature_enabled(X86_FEATURE_PTI))
+			invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd);
+
+		addr += nr << info->stride_shift;
+	} while (addr < info->end);
+
+	finish_asid_transition(info);
+
+	/* Wait for the INVLPGBs kicked off above to finish. */
+	__tlbsync();
+}
+
+/*
+  * Given an ASID, flush the corresponding user ASID.  We can delay this
+  * until the next time we switch to it.
+  *
+@@ -1260,9 +1359,12 @@ void flush_tlb_mm_range(struct mm_struct
+ 	 * a local TLB flush is needed. Optimize this use-case by calling
+ 	 * flush_tlb_func_local() directly in this case.
+ 	 */
+-	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+	if (mm_global_asid(mm)) {
+		broadcast_tlb_flush(info);
+	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+ 		info->trim_cpumask = should_trim_cpumask(mm);
+ 		flush_tlb_multi(mm_cpumask(mm), info);
+		consider_global_asid(mm);
+ 	} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
+ 		lockdep_assert_irqs_enabled();
+ 		local_irq_disable();
--- a/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
@@ -1,28 +1,31 @@
-From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
+From 9c88454149bd22cc3d8618b4445d32aaf48cadce Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 5 Feb 2025 23:43:30 -0500
-Subject: x86/mm: enable AMD translation cache extensions
+Date: Tue, 25 Feb 2025 22:00:47 -0500
+Subject: x86/mm: Enable AMD translation cache extensions

 With AMD TCE (translation cache extensions) only the intermediate mappings
 that cover the address range zapped by INVLPG / INVLPGB get invalidated,
 rather than all intermediate mappings getting zapped at every TLB invalidation.

-This can help reduce the TLB miss rate, by keeping more intermediate
-mappings in the cache.
+This can help reduce the TLB miss rate, by keeping more intermediate mappings
+in the cache.

 From the AMD manual:

-Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit
-to 1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on
-TLB entries. When this bit is 0, these instructions remove the target PTE
-from the TLB as well as all upper-level table entries that are cached
-in the TLB, whether or not they are associated with the target PTE.
-When this bit is set, these instructions will remove the target PTE and
-only those upper-level entries that lead to the target PTE in
-the page table hierarchy, leaving unrelated upper-level entries intact.
+Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit to
+1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on TLB
+entries. When this bit is 0, these instructions remove the target PTE from the
+TLB as well as all upper-level table entries that are cached in the TLB,
+whether or not they are associated with the target PTE.  When this bit is set,
+these instructions will remove the target PTE and only those upper-level
+entries that lead to the target PTE in the page table hierarchy, leaving
+unrelated upper-level entries intact.
+
+  [ bp: use cpu_has()... I know, it is a mess. ]

 Signed-off-by: Rik van Riel <riel@surriel.com>
-Tested-by: Manali Shukla <Manali.Shukla@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250226030129.530345-13-riel@surriel.com
 ---
 arch/x86/include/asm/msr-index.h       | 2 ++
 arch/x86/kernel/cpu/amd.c              | 4 ++++
@@ -49,13 +52,13 @@ Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 /*
 --- a/arch/x86/kernel/cpu/amd.c
 +++ b/arch/x86/kernel/cpu/amd.c
-@@ -1071,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86
+@@ -1075,6 +1075,10 @@ static void init_amd(struct cpuinfo_x86
 
 	/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
 	clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
 +
 +	/* Enable Translation Cache Extension */
-+	if (cpu_feature_enabled(X86_FEATURE_TCE))
+	if (cpu_has(c, X86_FEATURE_TCE))
 +		msr_set_bit(MSR_EFER, _EFER_TCE);
 }
 
--- a/debian/patches/patchset-zen/invlpgb/0013-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch
+++ b/debian/patches/patchset-zen/invlpgb/0013-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch
@@ -0,0 +1,121 @@
+From 20dfd0edb14a1d0aecd5eb227f2db64487201976 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 4 Mar 2025 12:59:56 +0100
+Subject: x86/mm: Always set the ASID valid bit for the INVLPGB instruction
+
+When executing the INVLPGB instruction on a bare-metal host or hypervisor, if
+the ASID valid bit is not set, the instruction will flush the TLB entries that
+match the specified criteria for any ASID, not just the those of the host. If
+virtual machines are running on the system, this may result in inadvertent
+flushes of guest TLB entries.
+
+When executing the INVLPGB instruction in a guest and the INVLPGB instruction is
+not intercepted by the hypervisor, the hardware will replace the requested ASID
+with the guest ASID and set the ASID valid bit before doing the broadcast
+invalidation. Thus a guest is only able to flush its own TLB entries.
+
+So to limit the host TLB flushing reach, always set the ASID valid bit using an
+ASID value of 0 (which represents the host/hypervisor). This will will result in
+the desired effect in both host and guest.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20250304120449.GHZ8bsYYyEBOKQIxBm@fat_crate.local
+---
+ arch/x86/include/asm/tlb.h | 58 +++++++++++++++++++++-----------------
+ 1 file changed, 32 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
+@@ -33,6 +33,27 @@ enum addr_stride {
+ 	PMD_STRIDE = 1
+ };
+ 
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+
+ #ifdef CONFIG_BROADCAST_TLB_FLUSH
+ /*
+  * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+@@ -40,14 +61,20 @@ enum addr_stride {
+  * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+  * be done in a parallel fashion.
+  *
+- * The instruction takes the number of extra pages to invalidate, beyond
+- * the first page, while __invlpgb gets the more human readable number of
+- * pages to invalidate.
+ * The instruction takes the number of extra pages to invalidate, beyond the
+ * first page, while __invlpgb gets the more human readable number of pages to
+ * invalidate.
+  *
+  * The bits in rax[0:2] determine respectively which components of the address
+  * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+  * address in the specified range matches.
+  *
+ * Since it is desired to only flush TLB entries for the ASID that is executing
+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should
+ * always be set. On a host/hypervisor, the hardware will use the ASID value
+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will
+ * use the actual ASID value of the guest.
+ *
+  * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+  * this CPU have completed.
+  */
+@@ -55,9 +82,9 @@ static inline void __invlpgb(unsigned lo
+ 			     unsigned long addr, u16 nr_pages,
+ 			     enum addr_stride stride, u8 flags)
+ {
+-	u32 edx = (pcid << 16) | asid;
+	u64 rax = addr | flags | INVLPGB_FLAG_ASID;
+ 	u32 ecx = (stride << 31) | (nr_pages - 1);
+-	u64 rax = addr | flags;
+	u32 edx = (pcid << 16) | asid;
+ 
+ 	/* The low bits in rax are for flags. Verify addr is clean. */
+ 	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+@@ -93,27 +120,6 @@ static inline void __invlpgb_all(unsigne
+ static inline void __tlbsync(void) { }
+ #endif
+ 
+-/*
+- * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+- * of the three. For example:
+- * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+- * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+- *
+- * The first is used to invalidate (kernel) mappings at a particular
+- * address across all processes.
+- *
+- * The latter invalidates all TLB entries matching a PCID.
+- */
+-#define INVLPGB_FLAG_VA			BIT(0)
+-#define INVLPGB_FLAG_PCID		BIT(1)
+-#define INVLPGB_FLAG_ASID		BIT(2)
+-#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+-#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+-#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+-
+-/* The implied mode when all bits are clear: */
+-#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+-
+ static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ 						unsigned long addr,
+ 						u16 nr, bool stride)
--- a/debian/patches/patchset-zen/invlpgb/0014-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch
+++ b/debian/patches/patchset-zen/invlpgb/0014-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch
@@ -0,0 +1,70 @@
+From b5a210ad153e5448876c422f5c77d3dcd83abac6 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Wed, 19 Mar 2025 13:25:20 -0400
+Subject: x86/mm: Only do broadcast flush from reclaim if pages were unmapped
+
+Track whether pages were unmapped from any MM (even ones with a currently
+empty mm_cpumask) by the reclaim code, to figure out whether or not
+broadcast TLB flush should be done when reclaim finishes.
+
+The reason any MM must be tracked, and not only ones contributing to the
+tlbbatch cpumask, is that broadcast ASIDs are expected to be kept up to
+date even on CPUs where the MM is not currently active.
+
+This change allows reclaim to avoid doing TLB flushes when only clean page
+cache pages and/or slab memory were reclaimed, which is fairly common.
+
+( This is a simpler alternative to the code that was in my INVLPGB series
+  before, and it seems to capture most of the benefit due to how common
+  it is to reclaim only page cache. )
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/r/20250319132520.6b10ad90@fangorn
+---
+ arch/x86/include/asm/tlbbatch.h | 5 +++++
+ arch/x86/include/asm/tlbflush.h | 1 +
+ arch/x86/mm/tlb.c               | 3 ++-
+ 3 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
+@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch {
+ 	 * the PFNs being flushed..
+ 	 */
+ 	struct cpumask cpumask;
+	/*
+	 * Set if pages were unmapped from any MM, even one that does not
+	 * have active CPUs in its cpumask.
+	 */
+	bool unmapped_pages;
+ };
+ 
+ #endif /* _ARCH_X86_TLBBATCH_H */
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -353,6 +353,7 @@ static inline void arch_tlbbatch_add_pen
+ {
+ 	inc_mm_tlb_gen(mm);
+ 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+	batch->unmapped_pages = true;
+ 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+ }
+ 
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -1633,8 +1633,9 @@ void arch_tlbbatch_flush(struct arch_tlb
+ 	 * a local TLB flush is needed. Optimize this use-case by calling
+ 	 * flush_tlb_func_local() directly in this case.
+ 	 */
+-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) {
+ 		invlpgb_flush_all_nonglobals();
+		batch->unmapped_pages = false;
+ 	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ 		flush_tlb_multi(&batch->cpumask, info);
+ 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
--- a/debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch
+++ b/debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch
@@ -0,0 +1,398 @@
+From 4ad0ae3b81cd90c0729df9ac5f1ff21f4dad6130 Mon Sep 17 00:00:00 2001
+From: Oleksandr Natalenko <oleksandr@natalenko.name>
+Date: Mon, 30 Sep 2024 08:58:38 +0200
+Subject: mm: expose per-process KSM control via syscalls
+
+d7597f59d1d3 added a new API to enable per-process KSM control. It
+however uses prctl, which doesn't allow controlling KSM from outside of
+the current process.
+
+Hence, expose this API via 3 syscalls: process_ksm_enable,
+process_ksm_disable and process_ksm_status. Given sufficient privileges,
+auto-KSM can be enable by another process.
+
+Since these syscalls are not in the upstream kernel, also expose their
+numbers under /sys/kernel/process_ksm so that userspace tooling like
+uksmd knows how to use them.
+
+Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+---
+ arch/alpha/kernel/syscalls/syscall.tbl        |   3 +
+ arch/arm/tools/syscall.tbl                    |   3 +
+ arch/m68k/kernel/syscalls/syscall.tbl         |   3 +
+ arch/microblaze/kernel/syscalls/syscall.tbl   |   3 +
+ arch/mips/kernel/syscalls/syscall_n32.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_n64.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_o32.tbl     |   3 +
+ arch/parisc/kernel/syscalls/syscall.tbl       |   3 +
+ arch/powerpc/kernel/syscalls/syscall.tbl      |   3 +
+ arch/s390/kernel/syscalls/syscall.tbl         |   3 +
+ arch/sh/kernel/syscalls/syscall.tbl           |   3 +
+ arch/sparc/kernel/syscalls/syscall.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_32.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_64.tbl        |   3 +
+ arch/xtensa/kernel/syscalls/syscall.tbl       |   3 +
+ include/linux/syscalls.h                      |   3 +
+ include/uapi/asm-generic/unistd.h             |   9 +-
+ kernel/sys.c                                  | 138 ++++++++++++++++++
+ kernel/sys_ni.c                               |   3 +
+ scripts/syscall.tbl                           |   3 +
+ .../arch/powerpc/entry/syscalls/syscall.tbl   |   3 +
+ .../perf/arch/s390/entry/syscalls/syscall.tbl |   3 +
+ 22 files changed, 206 insertions(+), 1 deletion(-)
+
+--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
+@@ -506,3 +506,6 @@
+ 574	common	getxattrat			sys_getxattrat
+ 575	common	listxattrat			sys_listxattrat
+ 576	common	removexattrat			sys_removexattrat
+577	common	process_ksm_enable		sys_process_ksm_enable
+578	common	process_ksm_disable		sys_process_ksm_disable
+579	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
+@@ -481,3 +481,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
+@@ -466,3 +466,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
+@@ -472,3 +472,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
+@@ -405,3 +405,6 @@
+ 464	n32	getxattrat			sys_getxattrat
+ 465	n32	listxattrat			sys_listxattrat
+ 466	n32	removexattrat			sys_removexattrat
+467	n32	process_ksm_enable		sys_process_ksm_enable
+468	n32	process_ksm_disable		sys_process_ksm_disable
+469	n32	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
+@@ -381,3 +381,6 @@
+ 464	n64	getxattrat			sys_getxattrat
+ 465	n64	listxattrat			sys_listxattrat
+ 466	n64	removexattrat			sys_removexattrat
+467	n64	process_ksm_enable		sys_process_ksm_enable
+468	n64	process_ksm_disable		sys_process_ksm_disable
+469	n64	process_ksm_status		sys_process_ksm_status
+--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
+@@ -454,3 +454,6 @@
+ 464	o32	getxattrat			sys_getxattrat
+ 465	o32	listxattrat			sys_listxattrat
+ 466	o32	removexattrat			sys_removexattrat
+467	o32	process_ksm_enable		sys_process_ksm_enable
+468	o32	process_ksm_disable		sys_process_ksm_disable
+469	o32	process_ksm_status		sys_process_ksm_status
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -465,3 +465,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
+@@ -557,3 +557,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
+@@ -469,3 +469,6 @@
+ 464  common	getxattrat		sys_getxattrat			sys_getxattrat
+ 465  common	listxattrat		sys_listxattrat			sys_listxattrat
+ 466  common	removexattrat		sys_removexattrat		sys_removexattrat
+467  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
+468  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
+469  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
+--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
+@@ -470,3 +470,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
+@@ -512,3 +512,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -472,3 +472,6 @@
+ 464	i386	getxattrat		sys_getxattrat
+ 465	i386	listxattrat		sys_listxattrat
+ 466	i386	removexattrat		sys_removexattrat
+467	i386	process_ksm_enable		sys_process_ksm_enable
+468	i386	process_ksm_disable		sys_process_ksm_disable
+469	i386	process_ksm_status		sys_process_ksm_status
+--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
+@@ -390,6 +390,9 @@
+ 464	common	getxattrat		sys_getxattrat
+ 465	common	listxattrat		sys_listxattrat
+ 466	common	removexattrat		sys_removexattrat
+467	common	process_ksm_enable	sys_process_ksm_enable
+468	common	process_ksm_disable	sys_process_ksm_disable
+469	common	process_ksm_status	sys_process_ksm_status
+ 
+ #
+ # Due to a historical design error, certain syscalls are numbered differently
+--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
+@@ -437,3 +437,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
+@@ -831,6 +831,9 @@ asmlinkage long sys_madvise(unsigned lon
+ asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ 			size_t vlen, int behavior, unsigned int flags);
+ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
+ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
+ 			unsigned long prot, unsigned long pgoff,
+ 			unsigned long flags);
+--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
+@@ -850,8 +850,15 @@ __SYSCALL(__NR_listxattrat, sys_listxatt
+ #define __NR_removexattrat 466
+ __SYSCALL(__NR_removexattrat, sys_removexattrat)
+ 
+#define __NR_process_ksm_enable 467
+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
+#define __NR_process_ksm_disable 468
+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
+#define __NR_process_ksm_status 469
+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
+
+ #undef __NR_syscalls
+-#define __NR_syscalls 467
+#define __NR_syscalls 470
+ 
+ /*
+  * 32 bit systems traditionally used different
+--- a/kernel/sys.c
+++ b/kernel/sys.c
+@@ -2819,6 +2819,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
+ 	return error;
+ }
+ 
+#ifdef CONFIG_KSM
+enum pkc_action {
+	PKSM_ENABLE = 0,
+	PKSM_DISABLE,
+	PKSM_STATUS,
+};
+
+static long do_process_ksm_control(int pidfd, enum pkc_action action)
+{
+	long ret;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	unsigned int f_flags;
+
+	task = pidfd_get_task(pidfd, &f_flags);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+
+	/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+	if (IS_ERR_OR_NULL(mm)) {
+		ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		goto release_task;
+	}
+
+	/* Require CAP_SYS_NICE for influencing process performance. */
+	if (!capable(CAP_SYS_NICE)) {
+		ret = -EPERM;
+		goto release_mm;
+	}
+
+	if (mmap_write_lock_killable(mm)) {
+		ret = -EINTR;
+		goto release_mm;
+	}
+
+	switch (action) {
+		case PKSM_ENABLE:
+			ret = ksm_enable_merge_any(mm);
+			break;
+		case PKSM_DISABLE:
+			ret = ksm_disable_merge_any(mm);
+			break;
+		case PKSM_STATUS:
+			ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
+			break;
+	}
+
+	mmap_write_unlock(mm);
+
+release_mm:
+	mmput(mm);
+release_task:
+	put_task_struct(task);
+out:
+	return ret;
+}
+#endif /* CONFIG_KSM */
+
+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_ENABLE);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_DISABLE);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+	if (flags != 0)
+		return -EINVAL;
+
+	return do_process_ksm_control(pidfd, PKSM_STATUS);
+#else /* CONFIG_KSM */
+	return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+#ifdef CONFIG_KSM
+static ssize_t process_ksm_enable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_enable);
+}
+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
+
+static ssize_t process_ksm_disable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_disable);
+}
+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
+
+static ssize_t process_ksm_status_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", __NR_process_ksm_status);
+}
+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
+
+static struct attribute *process_ksm_sysfs_attrs[] = {
+	&process_ksm_enable_attr.attr,
+	&process_ksm_disable_attr.attr,
+	&process_ksm_status_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group process_ksm_sysfs_attr_group = {
+	.attrs = process_ksm_sysfs_attrs,
+	.name = "process_ksm",
+};
+
+static int __init process_ksm_sysfs_init(void)
+{
+	return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
+}
+subsys_initcall(process_ksm_sysfs_init);
+#endif /* CONFIG_KSM */
+
+ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
+ 		struct getcpu_cache __user *, unused)
+ {
+--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
+@@ -186,6 +186,9 @@ COND_SYSCALL(mincore);
+ COND_SYSCALL(madvise);
+ COND_SYSCALL(process_madvise);
+ COND_SYSCALL(process_mrelease);
+COND_SYSCALL(process_ksm_enable);
+COND_SYSCALL(process_ksm_disable);
+COND_SYSCALL(process_ksm_status);
+ COND_SYSCALL(remap_file_pages);
+ COND_SYSCALL(mbind);
+ COND_SYSCALL(get_mempolicy);
+--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
+@@ -407,3 +407,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable			sys_process_ksm_enable
+468	common	process_ksm_disable			sys_process_ksm_disable
+469	common	process_ksm_status			sys_process_ksm_status
+--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+@@ -557,3 +557,6 @@
+ 464	common	getxattrat			sys_getxattrat
+ 465	common	listxattrat			sys_listxattrat
+ 466	common	removexattrat			sys_removexattrat
+467	common	process_ksm_enable		sys_process_ksm_enable
+468	common	process_ksm_disable		sys_process_ksm_disable
+469	common	process_ksm_status		sys_process_ksm_status
+--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+@@ -469,3 +469,6 @@
+ 464  common	getxattrat		sys_getxattrat			sys_getxattrat
+ 465  common	listxattrat		sys_listxattrat			sys_listxattrat
+ 466  common	removexattrat		sys_removexattrat		sys_removexattrat
+467  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
+468  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
+469  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
--- a/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch
+++ b/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch
@@ -1,4 +1,4 @@
-From 95490afcba944883e7f911214391a1a1e2fa3261 Mon Sep 17 00:00:00 2001
+From 6d141e3121676e9ca50d6465a622b9a5d572219a Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
 Date: Mon, 26 Apr 2021 22:12:46 +0200
 Subject: ZEN: Add VHBA driver
@@ -10,8 +10,8 @@ tag    vhba-module-20240917
 drivers/scsi/Makefile      |    1 +
 drivers/scsi/vhba/Kconfig  |    9 +
 drivers/scsi/vhba/Makefile |    4 +
- drivers/scsi/vhba/vhba.c   | 1124 ++++++++++++++++++++++++++++++++++++
- 5 files changed, 1140 insertions(+)
+ drivers/scsi/vhba/vhba.c   | 1130 ++++++++++++++++++++++++++++++++++++
+ 5 files changed, 1146 insertions(+)
 create mode 100644 drivers/scsi/vhba/Kconfig
 create mode 100644 drivers/scsi/vhba/Makefile
 create mode 100644 drivers/scsi/vhba/vhba.c
@@ -56,7 +56,7 @@ tag    vhba-module-20240917
 +ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror
 --- /dev/null
 +++ b/drivers/scsi/vhba/vhba.c
-@@ -0,0 +1,1124 @@
+@@ -0,0 +1,1130 @@
 +/*
 + * vhba.c
 + *
@@ -1108,7 +1108,11 @@ tag    vhba-module-20240917
 +    return 0;
 +}
 +
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
 +static int vhba_remove (struct platform_device *pdev)
+#else
+static void vhba_remove (struct platform_device *pdev)
+#endif
 +{
 +    struct vhba_host *vhost;
 +    struct Scsi_Host *shost;
@@ -1121,7 +1125,9 @@ tag    vhba-module-20240917
 +
 +    kfree(vhost->commands);
 +
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
 +    return 0;
+#endif
 +}
 +
 +static void vhba_release (struct device * dev)
--- a/debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch
+++ b/debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch
@@ -0,0 +1,28 @@
+From 1f9910c9a54b424ad0cd415b981986937618c4ec Mon Sep 17 00:00:00 2001
+From: Rok Mandeljc <rok.mandeljc@gmail.com>
+Date: Mon, 3 Feb 2025 21:05:32 +0100
+Subject: VHBA: fix building with kernel 6.14-rc1
+
+Kernel 6.14-rc1 simplified the selection of tag allocation policy.
+Instead of enum-based value, a boolean is used, and the corresponding
+field in the `scsi_host_template` structure was renamed from
+`tag_alloc_policy` to `tag_alloc_policy_rr`.
+
+See: https://github.com/torvalds/linux/commit/ce32496
+---
+ drivers/scsi/vhba/vhba.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/vhba/vhba.c
+++ b/drivers/scsi/vhba/vhba.c
+@@ -537,7 +537,9 @@ static struct scsi_host_template vhba_te
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+     .slave_alloc = vhba_slave_alloc,
+ #endif
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 14, 0)
+    .tag_alloc_policy_rr = true,
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+     .tag_alloc_policy = BLK_TAG_ALLOC_RR,
+ #endif
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
--- a/debian/patches/patchset-zen/sauce/0002-vhba-Fix-compat-with-kernel-6.11.patch
+++ b/debian/patches/patchset-zen/sauce/0002-vhba-Fix-compat-with-kernel-6.11.patch
@@ -1,35 +0,0 @@
-From 8a6a60b5a71d7f85351a9350eb651c4ce15b8f00 Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
-Date: Sun, 15 Sep 2024 19:05:46 +0000
-Subject: vhba: Fix compat with kernel 6.11
-
-Upstream commit 0edb555a65d1ef047a9805051c36922b52a38a9d changed the
-return value of the `remove` callback from `int` to `void`.
---
- drivers/scsi/vhba/vhba.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
--- a/drivers/scsi/vhba/vhba.c
-+++ b/drivers/scsi/vhba/vhba.c
-@@ -1049,7 +1049,11 @@ static int vhba_probe (struct platform_d
-     return 0;
- }
- 
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
- static int vhba_remove (struct platform_device *pdev)
-+#else
-+static void vhba_remove (struct platform_device *pdev)
-+#endif
- {
-     struct vhba_host *vhost;
-     struct Scsi_Host *shost;
-@@ -1062,7 +1066,9 @@ static int vhba_remove (struct platform_
- 
-     kfree(vhost->commands);
- 
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
-     return 0;
-+#endif
- }
- 
- static void vhba_release (struct device * dev)
--- a/debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch
+++ b/debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch
@@ -1,4 +1,4 @@
-From 1cdff301de6db901bc2bfd7ce78016d9b824d667 Mon Sep 17 00:00:00 2001
+From 02b4d790bb05e24e7408a147f33e4e9ca0b805fa Mon Sep 17 00:00:00 2001
 From: Daniel Drake <drake@endlessm.com>
 Date: Tue, 4 Jun 2019 14:51:21 +0800
 Subject: ZEN: PCI: Add Intel remapped NVMe device support
@@ -135,8 +135,8 @@ Contains:
 }
 
 static int ahci_get_irq_vector(struct ata_host *host, int port)
-@@ -1896,7 +1889,9 @@ static int ahci_init_one(struct pci_dev
- 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
+@@ -1898,7 +1891,9 @@ static int ahci_init_one(struct pci_dev
+ 		return -ENOMEM;
 
 	/* detect remapped nvme devices */
 -	ahci_remap_check(pdev, ahci_pci_bar, hpriv);
--- a/debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch
+++ b/debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch
@@ -1,4 +1,4 @@
-From 87b0cab8d8701db7754e5778b93ff83ffc64c7ae Mon Sep 17 00:00:00 2001
+From 17190525fdc9c9f73fe22832ab0631e9e1bbad6d Mon Sep 17 00:00:00 2001
 From: Sultan Alsawaf <sultan@kerneltoast.com>
 Date: Sun, 8 Mar 2020 00:31:35 -0800
 Subject: ZEN: Disable stack conservation for GCC
@@ -15,7 +15,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>

 --- a/Makefile
 +++ b/Makefile
-@@ -1026,11 +1026,6 @@ KBUILD_CFLAGS	+= -fno-strict-overflow
+@@ -1078,11 +1078,6 @@ KBUILD_CFLAGS	+= -fno-strict-overflow
 # Make sure -fstack-check isn't enabled (like gentoo apparently did)
 KBUILD_CFLAGS  += -fno-stack-check
 
@@ -24,6 +24,6 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
 -KBUILD_CFLAGS   += -fconserve-stack
 -endif
 -
- # change __FILE__ to the relative path from the srctree
- KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
- 
+ # change __FILE__ to the relative path to the source directory
+ ifdef building_out_of_srctree
+ KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=)
--- a/debian/patches/patchset-zen/sauce/0005-ZEN-Initialize-ata-before-graphics.patch
+++ b/debian/patches/patchset-zen/sauce/0005-ZEN-Initialize-ata-before-graphics.patch
@@ -1,43 +0,0 @@
-From 48d2ea8801ccf8bd9cd48c12fce79040bbcae363 Mon Sep 17 00:00:00 2001
-From: Arjan van de Ven <arjan@linux.intel.com>
-Date: Thu, 2 Jun 2016 23:36:32 -0500
-Subject: ZEN: Initialize ata before graphics
-
-ATA init is the long pole in the boot process, and its asynchronous.
-move the graphics init after it so that ata and graphics initialize
-in parallel
---
- drivers/Makefile | 13 +++++++------
- 1 file changed, 7 insertions(+), 6 deletions(-)
-
--- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -64,14 +64,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y				+= gpu/
-
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
-# i810fb depends on char/agp/
-obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
- obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
-@@ -83,6 +77,13 @@ obj-y				+= macintosh/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb depends on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
--- a/debian/patches/patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch
+++ b/debian/patches/patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch
@@ -1,4 +1,4 @@
-From 2f3e9fbc48151e4499f9cbd810d9467ac34b0a3b Mon Sep 17 00:00:00 2001
+From 2b801ae725ae05be994d374efdce8fc2e828687f Mon Sep 17 00:00:00 2001
 From: Kenny Levinsen <kl@kl.wtf>
 Date: Sun, 27 Dec 2020 14:43:13 +0000
 Subject: ZEN: Input: evdev - use call_rcu when detaching client
--- a/debian/patches/patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch
+++ b/debian/patches/patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch
@@ -1,4 +1,4 @@
-From 51026b78d015797e216aadc4e80158181c2c2bb4 Mon Sep 17 00:00:00 2001
+From 3777b5340ebf0460e6fb79205b294dd4333c9d8b Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Mon, 11 Jul 2022 19:10:30 -0500
 Subject: ZEN: cpufreq: Remove schedutil dependency on Intel/AMD P-State
--- a/debian/patches/patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch
+++ b/debian/patches/patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch
@@ -1,4 +1,4 @@
-From 48c8812a4cea0190a037757589443f3103c610ba Mon Sep 17 00:00:00 2001
+From d00df0f150c9d04cd229d42e0af906db3dfb5190 Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Wed, 15 Jan 2020 20:43:56 -0600
 Subject: ZEN: intel-pstate: Implement "enable" parameter
@@ -30,7 +30,7 @@ selection.

 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2254,6 +2254,9 @@
+@@ -2283,6 +2283,9 @@
 			disable
 			  Do not enable intel_pstate as the default
 			  scaling driver for the supported processors
@@ -42,7 +42,7 @@ selection.
                           governors layer of cpufreq and provides it own
 --- a/drivers/cpufreq/intel_pstate.c
 +++ b/drivers/cpufreq/intel_pstate.c
-@@ -3817,6 +3817,8 @@ static int __init intel_pstate_setup(cha
+@@ -3827,6 +3827,8 @@ static int __init intel_pstate_setup(cha
 
 	if (!strcmp(str, "disable"))
 		no_load = 1;
--- a/debian/patches/patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch
+++ b/debian/patches/patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch
@@ -1,4 +1,4 @@
-From bbc56fdeaa2017d0bbed05e1e832e6d7e4bdd6e0 Mon Sep 17 00:00:00 2001
+From f03da22e562a7d65a97926a76f61daeef8a1eb0d Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Fri, 15 Mar 2024 12:36:51 -0500
 Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
@@ -13,7 +13,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with

 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-@@ -164,6 +164,7 @@ struct amdgpu_watchdog_timer {
+@@ -160,6 +160,7 @@ struct amdgpu_watchdog_timer {
  */
 extern int amdgpu_modeset;
 extern unsigned int amdgpu_vram_limit;
@@ -23,7 +23,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 extern int amdgpu_gtt_size;
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -138,6 +138,7 @@ enum AMDGPU_DEBUG_MASK {
+@@ -139,6 +139,7 @@ enum AMDGPU_DEBUG_MASK {
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -31,7 +31,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 int amdgpu_vis_vram_limit;
 int amdgpu_gart_size = -1; /* auto */
 int amdgpu_gtt_size = -1; /* auto */
-@@ -262,6 +263,15 @@ struct amdgpu_watchdog_timer amdgpu_watc
+@@ -258,6 +259,15 @@ struct amdgpu_watchdog_timer amdgpu_watc
 };
 
 /**
@@ -49,7 +49,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
  */
 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
 +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-@@ -3276,6 +3276,9 @@ static ssize_t amdgpu_hwmon_show_power_c
+@@ -3180,6 +3180,9 @@ static ssize_t amdgpu_hwmon_show_power_c
 					 struct device_attribute *attr,
 					 char *buf)
 {
@@ -61,7 +61,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 
 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
 +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
-@@ -2793,7 +2793,10 @@ int smu_get_power_limit(void *handle,
+@@ -2823,7 +2823,10 @@ int smu_get_power_limit(void *handle,
 			*limit = smu->max_power_limit;
 			break;
 		case SMU_PPT_LIMIT_MIN:
@@ -73,7 +73,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 			break;
 		default:
 			return -EINVAL;
-@@ -2817,7 +2820,14 @@ static int smu_set_power_limit(void *han
+@@ -2847,7 +2850,14 @@ static int smu_set_power_limit(void *han
 		if (smu->ppt_funcs->set_power_limit)
 			return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
 
--- a/debian/patches/patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
+++ b/debian/patches/patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
@@ -1,4 +1,4 @@
-From 2cceda3c699f19f9c2f287614db2fe5dd009f73a Mon Sep 17 00:00:00 2001
+From 5f93b67c4e2fa81be5cee3edd8ec056407d25f26 Mon Sep 17 00:00:00 2001
 From: Sultan Alsawaf <sultan@kerneltoast.com>
 Date: Sun, 19 Apr 2020 19:59:18 -0700
 Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free
@@ -43,14 +43,14 @@ Contains:

 --- a/mm/internal.h
 +++ b/mm/internal.h
-@@ -739,6 +739,7 @@ extern void post_alloc_hook(struct page
+@@ -741,6 +741,7 @@ void post_alloc_hook(struct page *page,
 extern bool free_pages_prepare(struct page *page, unsigned int order);
 
 extern int user_min_free_kbytes;
 +extern atomic_long_t kswapd_waiters;
 
- void free_unref_page(struct page *page, unsigned int order);
- void free_unref_folios(struct folio_batch *fbatch);
+ struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid,
+ 		nodemask_t *);
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t;
@@ -102,7 +102,7 @@ Contains:
 
 --- a/mm/vmscan.c
 +++ b/mm/vmscan.c
-@@ -6385,7 +6385,7 @@ retry:
+@@ -6382,7 +6382,7 @@ retry:
 	return 0;
 }
 
@@ -111,7 +111,7 @@ Contains:
 {
 	struct zone *zone;
 	unsigned long pfmemalloc_reserve = 0;
-@@ -6414,6 +6414,10 @@ static bool allow_direct_reclaim(pg_data
+@@ -6411,6 +6411,10 @@ static bool allow_direct_reclaim(pg_data
 
 	wmark_ok = free_pages > pfmemalloc_reserve / 2;
 
@@ -122,7 +122,7 @@ Contains:
 	/* kswapd must be awake if processes are being throttled */
 	if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
 		if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
-@@ -6479,7 +6483,7 @@ static bool throttle_direct_reclaim(gfp_
+@@ -6476,7 +6480,7 @@ static bool throttle_direct_reclaim(gfp_
 
 		/* Throttle based on the first usable node */
 		pgdat = zone->zone_pgdat;
@@ -131,7 +131,7 @@ Contains:
 			goto out;
 		break;
 	}
-@@ -6501,11 +6505,14 @@ static bool throttle_direct_reclaim(gfp_
+@@ -6498,11 +6502,14 @@ static bool throttle_direct_reclaim(gfp_
 	 */
 	if (!(gfp_mask & __GFP_FS))
 		wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
@@ -148,7 +148,7 @@ Contains:
 
 	if (fatal_signal_pending(current))
 		return true;
-@@ -7008,14 +7015,14 @@ restart:
+@@ -7005,14 +7012,14 @@ restart:
 		 * able to safely make forward progress. Wake them
 		 */
 		if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
--- a/debian/patches/patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch
+++ b/debian/patches/patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch
@@ -1,4 +1,4 @@
-From 1ec451a4bbac7cc00b59f8ca504d6a8898615880 Mon Sep 17 00:00:00 2001
+From 80b06f0f0bba019632e40c11231987a7e996c340 Mon Sep 17 00:00:00 2001
 From: EXtremeExploit <pedro.montes.alcalde@gmail.com>
 Date: Fri, 29 Nov 2024 13:05:27 -0300
 Subject: ZEN: ahci: Disable staggered spinup by default
--- a/debian/patches/patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch
+++ b/debian/patches/patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch
@@ -1,4 +1,4 @@
-From a31b09c511dd58e5032a3c941638207281b20ce4 Mon Sep 17 00:00:00 2001
+From ac35b7af0aac6a9eb996962130a99c9af75c8b08 Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Sat, 14 Dec 2024 11:23:18 -0600
 Subject: ZEN: kernel/Kconfig.preempt: Remove EXPERT conditional on PREEMPT_RT
@@ -11,12 +11,12 @@ items hidden by enabling CONFIG_EXPERT.

 --- a/kernel/Kconfig.preempt
 +++ b/kernel/Kconfig.preempt
-@@ -69,7 +69,7 @@ config PREEMPT
+@@ -88,7 +88,7 @@ endchoice
 
 config PREEMPT_RT
 	bool "Fully Preemptible Kernel (Real-Time)"
-	depends on EXPERT && ARCH_SUPPORTS_RT
-+	depends on ARCH_SUPPORTS_RT
+-	depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST
+	depends on ARCH_SUPPORTS_RT && !COMPILE_TEST
 	select PREEMPTION
 	help
 	  This option turns the kernel into a real-time kernel by replacing
--- a/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch
+++ b/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch
@@ -1,4 +1,4 @@
-From 530ee9b20cf436bcbb3a632cb19fb5e13a29dde7 Mon Sep 17 00:00:00 2001
+From 8bf253ea1b48fe101dc0161824b9a7d85f420b84 Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
 Date: Mon, 27 Jan 2020 18:10:06 +0100
 Subject: ZEN: INTERACTIVE: Base config item
@@ -9,7 +9,7 @@ Subject: ZEN: INTERACTIVE: Base config item

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -154,6 +154,12 @@ config THREAD_INFO_IN_TASK
+@@ -157,6 +157,12 @@ config THREAD_INFO_IN_TASK
 
 menu "General setup"
 
--- a/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch
+++ b/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch
@@ -1,4 +1,4 @@
-From d2f0a5801471b5f67344b2c92a2aa29f1aed626a Mon Sep 17 00:00:00 2001
+From d3b2ab943a1de0838c4bd515dbed45f8f1c3c2cc Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
 Date: Mon, 27 Jan 2020 18:11:05 +0100
 Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices
@@ -10,7 +10,7 @@ Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices

 --- a/block/elevator.c
 +++ b/block/elevator.c
-@@ -568,7 +568,11 @@ static struct elevator_type *elevator_ge
+@@ -560,7 +560,11 @@ static struct elevator_type *elevator_ge
 	    !blk_mq_is_shared_tags(q->tag_set->flags))
 		return NULL;
 
@@ -24,7 +24,7 @@ Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices
 /*
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -160,6 +160,10 @@ config ZEN_INTERACTIVE
+@@ -163,6 +163,10 @@ config ZEN_INTERACTIVE
 	help
 	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
 
--- a/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch
+++ b/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch
@@ -1,4 +1,4 @@
-From 346251fa257245b3a06e37de863a1dbafbf2bbc2 Mon Sep 17 00:00:00 2001
+From d941bedf16b95646be26364f00cf46c6649608a6 Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
 Date: Mon, 12 Dec 2022 00:03:03 +0100
 Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices
@@ -10,7 +10,7 @@ Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices

 --- a/block/elevator.c
 +++ b/block/elevator.c
-@@ -566,7 +566,13 @@ static struct elevator_type *elevator_ge
+@@ -558,7 +558,13 @@ static struct elevator_type *elevator_ge
 
 	if (q->nr_hw_queues != 1 &&
 	    !blk_mq_is_shared_tags(q->tag_set->flags))
@@ -26,7 +26,7 @@ Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices
 	return elevator_find_get("bfq");
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -163,6 +163,7 @@ config ZEN_INTERACTIVE
+@@ -166,6 +166,7 @@ config ZEN_INTERACTIVE
 	  --- Block Layer ----------------------------------------
 
 	    Default scheduler for SQ..: mq-deadline ->   bfq
--- a/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch
+++ b/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch
@@ -1,4 +1,4 @@
-From 26fcaf58616b8cb3ce042e31c640594ea2fb5987 Mon Sep 17 00:00:00 2001
+From d0ce01e1def080e52770f9a899476bb840807b37 Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
 Date: Mon, 27 Jan 2020 18:21:09 +0100
 Subject: ZEN: INTERACTIVE: Enable background reclaim of hugepages
@@ -32,7 +32,7 @@ Reasoning and details in the original patch: https://lwn.net/Articles/711248/

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -165,6 +165,10 @@ config ZEN_INTERACTIVE
+@@ -168,6 +168,10 @@ config ZEN_INTERACTIVE
 	    Default scheduler for SQ..: mq-deadline ->   bfq
 	    Default scheduler for MQ..:        none ->   kyber
 
@@ -45,7 +45,7 @@ Reasoning and details in the original patch: https://lwn.net/Articles/711248/
 
 --- a/mm/huge_memory.c
 +++ b/mm/huge_memory.c
-@@ -65,7 +65,11 @@ unsigned long transparent_hugepage_flags
+@@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
--- a/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
+++ b/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
@@ -1,4 +1,4 @@
-From 9e5b04df7190ab4750ae3c67714fd537ef4d79f5 Mon Sep 17 00:00:00 2001
+From f1fd33efd4b70519ff51b78c62d6fdf7d4f69620 Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
 Date: Tue, 31 Oct 2023 19:03:10 +0100
 Subject: ZEN: INTERACTIVE: Tune EEVDF for interactivity
@@ -42,7 +42,7 @@ caused by rebalancing too many tasks at once.

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -169,6 +169,13 @@ config ZEN_INTERACTIVE
+@@ -172,6 +172,13 @@ config ZEN_INTERACTIVE
 
 	    Background-reclaim hugepages...:   no   ->   yes
 
@@ -58,7 +58,7 @@ caused by rebalancing too many tasks at once.
 
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scalin
+@@ -76,10 +76,19 @@ unsigned int sysctl_sched_tunable_scalin
  *
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
@@ -78,7 +78,7 @@ caused by rebalancing too many tasks at once.
 
 static int __init setup_sched_thermal_decay_shift(char *str)
 {
-@@ -121,8 +130,12 @@ int __weak arch_asym_cpu_priority(int cp
+@@ -124,8 +133,12 @@ int __weak arch_asym_cpu_priority(int cp
  *
  * (default: 5 msec, units: microseconds)
  */
@@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once.
 /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -2797,7 +2797,7 @@ extern void deactivate_task(struct rq *r
+@@ -2837,7 +2837,7 @@ extern void deactivate_task(struct rq *r
 
 extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 
--- a/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch
+++ b/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch
@@ -1,4 +1,4 @@
-From f654ea11471f81ac7dd68467f552db25722df25e Mon Sep 17 00:00:00 2001
+From 75f2a8831bd24a35d9853b11dabc06a138c5e445 Mon Sep 17 00:00:00 2001
 From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
 Date: Mon, 27 Jan 2020 18:27:16 +0100
 Subject: ZEN: INTERACTIVE: Tune ondemand governor for interactivity
@@ -75,7 +75,7 @@ Remove MuQSS cpufreq configuration.
 
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -176,6 +176,12 @@ config ZEN_INTERACTIVE
+@@ -179,6 +179,12 @@ config ZEN_INTERACTIVE
 	    Bandwidth slice size...........:   5    ->   3    ms
 	    Task rebalancing threshold.....:  32    ->   8
 
--- a/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch
+++ b/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch
@@ -1,4 +1,4 @@
-From f138e9762fd03612db5593f4c267c8f8b5799159 Mon Sep 17 00:00:00 2001
+From b82d80a4195f179b9c0d0c80f662a7f42ed21ce8 Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Sat, 5 Mar 2022 11:37:14 -0600
 Subject: ZEN: INTERACTIVE: mm: Disable unevictable compaction
@@ -12,7 +12,7 @@ turn it off when CONFIG_ZEN_INTERACTIVE is set as well.

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -168,6 +168,7 @@ config ZEN_INTERACTIVE
+@@ -171,6 +171,7 @@ config ZEN_INTERACTIVE
 	  --- Virtual Memory Subsystem ---------------------------
 
 	    Background-reclaim hugepages...:   no   ->   yes
@@ -22,7 +22,7 @@ turn it off when CONFIG_ZEN_INTERACTIVE is set as well.
 
 --- a/mm/Kconfig
 +++ b/mm/Kconfig
-@@ -648,7 +648,7 @@ config COMPACTION
+@@ -691,7 +691,7 @@ config COMPACTION
 config COMPACT_UNEVICTABLE_DEFAULT
 	int
 	depends on COMPACTION
--- a/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch
+++ b/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch
@@ -1,4 +1,4 @@
-From 76960c3806e7dfb618f49677cc84dafbfe48e4c4 Mon Sep 17 00:00:00 2001
+From 7227af3e01f9ae5a2bcdc9aa652c973438938eb3 Mon Sep 17 00:00:00 2001
 From: Sultan Alsawaf <sultan@kerneltoast.com>
 Date: Sat, 28 Mar 2020 13:06:28 -0700
 Subject: ZEN: INTERACTIVE: mm: Disable watermark boosting by default
@@ -33,7 +33,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -169,6 +169,7 @@ config ZEN_INTERACTIVE
+@@ -172,6 +172,7 @@ config ZEN_INTERACTIVE
 
 	    Background-reclaim hugepages...:   no   ->   yes
 	    Compact unevictable............:   yes  ->   no
--- a/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch
+++ b/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch
@@ -1,4 +1,4 @@
-From fc3e794cecb686d4e05c6ed86fdf9b2dbd725ea9 Mon Sep 17 00:00:00 2001
+From 91187cefc66b9c186a78d7bd996088fc74c66c99 Mon Sep 17 00:00:00 2001
 From: Sultan Alsawaf <sultan@kerneltoast.com>
 Date: Wed, 20 Oct 2021 20:50:11 -0700
 Subject: ZEN: INTERACTIVE: mm: Lower the non-hugetlbpage pageblock size to
@@ -47,7 +47,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
 
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -170,6 +170,7 @@ config ZEN_INTERACTIVE
+@@ -173,6 +173,7 @@ config ZEN_INTERACTIVE
 	    Background-reclaim hugepages...:   no   ->   yes
 	    Compact unevictable............:   yes  ->   no
 	    Watermark boost factor.........:   1.5  ->   0
--- a/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch
+++ b/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch
@@ -1,4 +1,4 @@
-From be57a2710aef65116767d26930dd1251ff6e060f Mon Sep 17 00:00:00 2001
+From 779648709dc797dac595e3007b4c7c3fee254537 Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Sat, 21 May 2022 15:15:09 -0500
 Subject: ZEN: INTERACTIVE: dm-crypt: Disable workqueues for crypto ops
@@ -20,7 +20,7 @@ Fixes: https://github.com/zen-kernel/zen-kernel/issues/282

 --- a/drivers/md/dm-crypt.c
 +++ b/drivers/md/dm-crypt.c
-@@ -3308,6 +3308,11 @@ static int crypt_ctr(struct dm_target *t
+@@ -3305,6 +3305,11 @@ static int crypt_ctr(struct dm_target *t
 			goto bad;
 	}
 
@@ -34,7 +34,7 @@ Fixes: https://github.com/zen-kernel/zen-kernel/issues/282
 		goto bad;
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -164,6 +164,7 @@ config ZEN_INTERACTIVE
+@@ -167,6 +167,7 @@ config ZEN_INTERACTIVE
 
 	    Default scheduler for SQ..: mq-deadline ->   bfq
 	    Default scheduler for MQ..:        none ->   kyber
--- a/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
+++ b/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
@@ -1,4 +1,4 @@
-From 41fe25c2e4e89c6afd35e3feb720e5a6797857d3 Mon Sep 17 00:00:00 2001
+From ef87b1cb12134c34eed834315b03c4a6747b5716 Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Mon, 5 Sep 2022 11:35:20 -0500
 Subject: ZEN: INTERACTIVE: mm/swap: Disable swap-in readahead
@@ -20,7 +20,7 @@ same change so Zen Kernel users benefit.

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -172,6 +172,7 @@ config ZEN_INTERACTIVE
+@@ -175,6 +175,7 @@ config ZEN_INTERACTIVE
 	    Compact unevictable............:   yes  ->   no
 	    Watermark boost factor.........:   1.5  ->   0
 	    Pageblock order................:  10    ->   3
@@ -30,7 +30,7 @@ same change so Zen Kernel users benefit.
 
 --- a/mm/swap.c
 +++ b/mm/swap.c
-@@ -1080,6 +1080,10 @@ void folio_batch_remove_exceptionals(str
+@@ -1081,6 +1081,10 @@ void folio_batch_remove_exceptionals(str
  */
 void __init swap_setup(void)
 {
@@ -41,7 +41,7 @@ same change so Zen Kernel users benefit.
 	unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
 
 	/* Use a smaller cluster for small-memory machines */
-@@ -1091,4 +1095,5 @@ void __init swap_setup(void)
+@@ -1092,4 +1096,5 @@ void __init swap_setup(void)
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
--- a/debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch
+++ b/debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch
@@ -1,4 +1,4 @@
-From 40de9c08129e2d8e182a166df2f1e823f70fa31d Mon Sep 17 00:00:00 2001
+From cb33a6dc022faa07ac1e1cd544567b28a7e9afeb Mon Sep 17 00:00:00 2001
 From: Steven Barrett <steven@liquorix.net>
 Date: Sun, 19 Sep 2021 16:03:36 -0500
 Subject: ZEN: INTERACTIVE: Document PDS/BMQ configuration
@@ -9,7 +9,7 @@ Subject: ZEN: INTERACTIVE: Document PDS/BMQ configuration

 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -181,6 +181,11 @@ config ZEN_INTERACTIVE
+@@ -184,6 +184,11 @@ config ZEN_INTERACTIVE
 	    Bandwidth slice size...........:   5    ->   3    ms
 	    Task rebalancing threshold.....:  32    ->   8
 
--- a/debian/patches/patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch
+++ b/debian/patches/patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch
@@ -1,194 +0,0 @@
-From eacae6d88bcc8a925124f97b7788bb2bfac8b267 Mon Sep 17 00:00:00 2001
-From: Vinay Banakar <vny@google.com>
-Date: Mon, 20 Jan 2025 16:47:29 -0600
-Subject: mm: Optimize TLB flushes during page reclaim
-
-The current implementation in shrink_folio_list() performs full TLB
-flushes and issues IPIs for each individual page being reclaimed. This
-causes unnecessary overhead during memory reclaim, whether triggered
-by madvise(MADV_PAGEOUT) or kswapd, especially in scenarios where
-applications are actively moving cold pages to swap while maintaining
-high performance requirements for hot pages.
-
-The current code:
-1. Clears PTE and unmaps each page individually
-2. Performs a full TLB flush on all cores using the VMA (via CR3 write) or
-issues individual TLB shootdowns (invlpg+invlpcid) for single-core usage
-3. Submits each page individually to BIO
-
-This approach results in:
- Excessive full TLB flushes across all cores
- Unnecessary IPI storms when processing multiple pages
- Suboptimal I/O submission patterns
-
-I initially tried using selective TLB shootdowns (invlpg) instead of
-full TLB flushes per each page to avoid interference with other
-threads. However, this approach still required sending IPIs to all
-cores for each page, which did not significantly improve application
-throughput.
-
-This patch instead optimizes the process by batching operations,
-issuing one IPI per PMD instead of per page. This reduces interrupts
-by a factor of 512 and enables batching page submissions to BIO. The
-new approach:
-1. Collect dirty pages that need to be written back
-2. Issue a single TLB flush for all dirty pages in the batch
-3. Process the collected pages for writebacks (submit to BIO)
-
-Testing shows significant reduction in application throughput impact
-during page-out operations. Applications maintain better performance
-during memory reclaim, when triggered by explicit
-madvise(MADV_PAGEOUT) calls.
-
-I'd appreciate your feedback on this approach, especially on the
-correctness of batched BIO submissions. Looking forward to your
-comments.
-
-Signed-off-by: Vinay Banakar <vny@google.com>
---
- mm/vmscan.c | 120 ++++++++++++++++++++++++++++++++--------------------
- 1 file changed, 74 insertions(+), 46 deletions(-)
-
--- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -1053,6 +1053,7 @@ static unsigned int shrink_folio_list(st
- 	struct folio_batch free_folios;
- 	LIST_HEAD(ret_folios);
- 	LIST_HEAD(demote_folios);
-+	LIST_HEAD(pageout_list);
- 	unsigned int nr_reclaimed = 0, nr_demoted = 0;
- 	unsigned int pgactivate = 0;
- 	bool do_demote_pass;
-@@ -1365,52 +1366,9 @@ retry:
- 			if (!sc->may_writepage)
- 				goto keep_locked;
- 
-			/*
-			 * Folio is dirty. Flush the TLB if a writable entry
-			 * potentially exists to avoid CPU writes after I/O
-			 * starts and then write it out here.
-			 */
-			try_to_unmap_flush_dirty();
-			switch (pageout(folio, mapping, &plug, folio_list)) {
-			case PAGE_KEEP:
-				goto keep_locked;
-			case PAGE_ACTIVATE:
-				/*
-				 * If shmem folio is split when writeback to swap,
-				 * the tail pages will make their own pass through
-				 * this function and be accounted then.
-				 */
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				goto activate_locked;
-			case PAGE_SUCCESS:
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				stat->nr_pageout += nr_pages;
-
-				if (folio_test_writeback(folio))
-					goto keep;
-				if (folio_test_dirty(folio))
-					goto keep;
-
-				/*
-				 * A synchronous write - probably a ramdisk.  Go
-				 * ahead and try to reclaim the folio.
-				 */
-				if (!folio_trylock(folio))
-					goto keep;
-				if (folio_test_dirty(folio) ||
-				    folio_test_writeback(folio))
-					goto keep_locked;
-				mapping = folio_mapping(folio);
-				fallthrough;
-			case PAGE_CLEAN:
-				; /* try to free the folio below */
-			}
-+			/* Add to pageout list for defered bio submissions */
-+			list_add(&folio->lru, &pageout_list);
-+			continue;
- 		}
- 
- 		/*
-@@ -1521,6 +1479,76 @@ keep:
- 	}
- 	/* 'folio_list' is always empty here */
- 
-+       if (!list_empty(&pageout_list)) {
-+	       /*
-+	       * Batch TLB flushes by flushing once before processing all dirty pages.
-+	       * Since we operate on one PMD at a time, this batches TLB flushes at
-+	       * PMD granularity rather than per-page, reducing IPIs.
-+	       */
-+	       struct address_space *mapping;
-+	       try_to_unmap_flush_dirty();
-+
-+	       while (!list_empty(&pageout_list)) {
-+		       struct folio *folio = lru_to_folio(&pageout_list);
-+		       list_del(&folio->lru);
-+
-+		       /* Recheck if page got reactivated */
-+		       if (folio_test_active(folio) ||
-+			   (folio_mapped(folio) && folio_test_young(folio)))
-+			       goto skip_pageout_locked;
-+
-+		       mapping = folio_mapping(folio);
-+		       pageout_t pageout_res = pageout(folio, mapping, &plug, &pageout_list);
-+		       switch (pageout_res) {
-+		       case PAGE_KEEP:
-+			       goto skip_pageout_locked;
-+		       case PAGE_ACTIVATE:
-+			       goto skip_pageout_locked;
-+		       case PAGE_SUCCESS:
-+			       stat->nr_pageout += folio_nr_pages(folio);
-+
-+			       if (folio_test_writeback(folio) ||
-+				   folio_test_dirty(folio))
-+				       goto skip_pageout;
-+
-+			       /*
-+				* A synchronous write - probably a ramdisk.  Go
-+				* ahead and try to reclaim the folio.
-+				*/
-+			       if (!folio_trylock(folio))
-+				       goto skip_pageout;
-+			       if (folio_test_dirty(folio) ||
-+				   folio_test_writeback(folio))
-+				       goto skip_pageout_locked;
-+
-+			       // Try to free the page
-+			       if (!mapping ||
-+				   !__remove_mapping(mapping, folio, true,
-+						   sc->target_mem_cgroup))
-+				       goto skip_pageout_locked;
-+
-+			       nr_reclaimed += folio_nr_pages(folio);
-+			       folio_unlock(folio);
-+			       continue;
-+
-+		       case PAGE_CLEAN:
-+			       if (!mapping ||
-+				   !__remove_mapping(mapping, folio, true,
-+						   sc->target_mem_cgroup))
-+				       goto skip_pageout_locked;
-+
-+			       nr_reclaimed += folio_nr_pages(folio);
-+			       folio_unlock(folio);
-+			       continue;
-+		       }
-+
-+skip_pageout_locked:
-+		       folio_unlock(folio);
-+skip_pageout:
-+		       list_add(&folio->lru, &ret_folios);
-+	       }
-+       }
-+
- 	/* Migrate folios selected for demotion */
- 	nr_demoted = demote_folio_list(&demote_folios, pgdat);
- 	nr_reclaimed += nr_demoted;