release 6.12.13

2025-02-09 19:40:34 +03:00 · 2025-02-09 19:40:34 +03:00 · e0baaf49dd
commit e0baaf49dd
parent b8c80400f5
26 changed files with 385 additions and 221 deletions
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,10 @@
+linux (6.12.13-1) sid; urgency=medium
+
+  * New upstream stable update:
+    https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.13
+
+ -- Konstantin Demin <rockdrilla@gmail.com>  Sun, 09 Feb 2025 19:34:35 +0300
+
 linux (6.12.12-1) sid; urgency=medium

  * New upstream stable update:
--- a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch
+++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch
@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>

 --- a/include/linux/perf_event.h
 +++ b/include/linux/perf_event.h
-@@ -1617,6 +1617,11 @@ int perf_cpu_time_max_percent_handler(co
+@@ -1623,6 +1623,11 @@ int perf_cpu_time_max_percent_handler(co
 int perf_event_max_stack_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 
@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
-@@ -12681,6 +12686,9 @@ SYSCALL_DEFINE5(perf_event_open,
+@@ -12682,6 +12687,9 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (err)
 		return err;
 
--- a/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
+++ b/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
@ -56,7 +56,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 			    struct rate_sample *rs);
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -2767,6 +2767,7 @@ static bool tcp_write_xmit(struct sock *
+@@ -2770,6 +2770,7 @@ static bool tcp_write_xmit(struct sock *
 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
 			tcp_init_tso_segs(skb, mss_now);
--- a/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
+++ b/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
  * between different flows.
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -1603,7 +1603,7 @@ int tcp_fragment(struct sock *sk, enum t
+@@ -1606,7 +1606,7 @@ int tcp_fragment(struct sock *sk, enum t
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@ -64,7 +64,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 	long limit;
 	int nlen;
 	u8 flags;
-@@ -1678,6 +1678,30 @@ int tcp_fragment(struct sock *sk, enum t
+@@ -1681,6 +1681,30 @@ int tcp_fragment(struct sock *sk, enum t
 
 		if (diff)
 			tcp_adjust_pcount(sk, skb, diff);
--- a/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
+++ b/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
@ -97,7 +97,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 };
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -2059,13 +2059,12 @@ static u32 tcp_tso_autosize(const struct
+@@ -2062,13 +2062,12 @@ static u32 tcp_tso_autosize(const struct
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
--- a/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
+++ b/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -3005,6 +3005,7 @@ void tcp_send_loss_probe(struct sock *sk
+@@ -3008,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
 
--- a/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
+++ b/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
@ -88,7 +88,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
+@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
 	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
@ -100,7 +100,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
 			use_ecn = true;
 	}
-@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
+@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
 		tp->ecn_flags = TCP_ECN_OK;
 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
 			INET_ECN_xmit(sk);
--- a/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
+++ b/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
@ -47,7 +47,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 /* BBR marks the current round trip as a loss round. */
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
+@@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
 				th->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 			}
--- a/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
@ -1,6 +1,6 @@
-From 6cb30d7518301094b9c7397a24a22cf538a1d64c Mon Sep 17 00:00:00 2001
+From e11153c4df0fee7caadec3714a60a4936d6a9ea2 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:20 -0500
+Date: Wed, 5 Feb 2025 23:43:20 -0500
 Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional

 Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
@ -29,6 +29,7 @@ That makes it safe to use INVLPGB on AMD CPUs.

 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/Kconfig           |  2 +-
 arch/x86/kernel/paravirt.c |  7 +------
--- a/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
@ -1,6 +1,6 @@
-From df8f812b62c450b98b972ad0a4d5a5ba400f5eae Mon Sep 17 00:00:00 2001
+From e8008cb69c5e4efbaedd70b0fb692343e4aa0e51 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:21 -0500
+Date: Wed, 5 Feb 2025 23:43:21 -0500
 Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call

 Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.
@ -10,6 +10,7 @@ and not going through the paravirt function pointers.

 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/hyperv/mmu.c                 | 1 -
 arch/x86/include/asm/paravirt.h       | 5 -----
--- a/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
@ -1,6 +1,6 @@
-From 8b2bd3f69b50cfe59eee4506413715878bcbb901 Mon Sep 17 00:00:00 2001
+From 7ac6508c4db81eced5f6e3d7c8913af1da6cf110 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:22 -0500
+Date: Wed, 5 Feb 2025 23:43:22 -0500
 Subject: x86/mm: consolidate full flush threshold decision

 Reduce code duplication by consolidating the decision point
@ -10,15 +10,34 @@ inside get_flush_tlb_info.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Dave Hansen <dave.hansen@intel.com>
 ---
- arch/x86/mm/tlb.c | 43 ++++++++++++++++++++-----------------------
- 1 file changed, 20 insertions(+), 23 deletions(-)
+ arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
+ 1 file changed, 31 insertions(+), 25 deletions(-)

 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -981,6 +981,15 @@ static struct flush_tlb_info *get_flush_
+@@ -973,14 +973,32 @@ static struct flush_tlb_info *get_flush_
+ 	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+ #endif
+ 
+-	info->start		= start;
+-	info->end		= end;
+	/*
+	 * Round the start and end addresses to the page size specified
+	 * by the stride shift. This ensures partial pages at the end of
+	 * a range get fully invalidated.
+	 */
+	info->start		= round_down(start, 1 << stride_shift);
+	info->end		= round_up(end, 1 << stride_shift);
+ 	info->mm		= mm;
+ 	info->stride_shift	= stride_shift;
+ 	info->freed_tables	= freed_tables;
 	info->new_tlb_gen	= new_tlb_gen;
 	info->initiating_cpu	= smp_processor_id();
 
+	WARN_ONCE(start != info->start || end != info->end,
+		  "TLB flush not stride %x aligned. Start %lx, end %lx\n",
+		  1 << stride_shift, start, end);
+
 +	/*
 +	 * If the number of flushes is so large that a full flush
 +	 * would be faster, do a full flush.
@ -31,7 +50,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 	return info;
 }
 
-@@ -998,17 +1007,8 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -998,17 +1016,8 @@ void flush_tlb_mm_range(struct mm_struct
 				bool freed_tables)
 {
 	struct flush_tlb_info *info;
@ -50,7 +69,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 
 	/* This is also a barrier that synchronizes with switch_mm(). */
 	new_tlb_gen = inc_mm_tlb_gen(mm);
-@@ -1060,22 +1060,19 @@ static void do_kernel_range_flush(void *
+@@ -1060,22 +1069,19 @@ static void do_kernel_range_flush(void *
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
@ -61,12 +80,12 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 -	} else {
 -		struct flush_tlb_info *info;
 +	struct flush_tlb_info *info;
+
+	guard(preempt)();
 
 -		preempt_disable();
 -		info = get_flush_tlb_info(NULL, start, end, 0, false,
 -					  TLB_GENERATION_INVALID);
-+	guard(preempt)();
-+
 +	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
 +				  TLB_GENERATION_INVALID);
 
@ -82,7 +101,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 }
 
 /*
-@@ -1247,7 +1244,7 @@ void arch_tlbbatch_flush(struct arch_tlb
+@@ -1247,7 +1253,7 @@ void arch_tlbbatch_flush(struct arch_tlb
 
 	int cpu = get_cpu();
 
--- a/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
@ -1,6 +1,6 @@
-From a182b0471ba3c3329d93abfa07e3d452183a9137 Mon Sep 17 00:00:00 2001
+From e772b2eb66e5c3cf668feadab678f2a88d896189 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:23 -0500
+Date: Wed, 5 Feb 2025 23:43:23 -0500
 Subject: x86/mm: get INVLPGB count max from CPUID

 The CPU advertises the maximum number of pages that can be shot down
@ -9,6 +9,7 @@ with one INVLPGB instruction in the CPUID data.
 Save that information for later use.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/Kconfig.cpu               | 5 +++++
 arch/x86/include/asm/cpufeatures.h | 1 +
--- a/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
@ -1,12 +1,13 @@
-From cc3f8dd3033c79abd9f37a94efed74a535a703c9 Mon Sep 17 00:00:00 2001
+From 7a896b12875e2b988acbf0229fb4bcf9157b83bd Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:24 -0500
+Date: Wed, 5 Feb 2025 23:43:24 -0500
 Subject: x86/mm: add INVLPGB support code

 Add invlpgb.h with the helper functions and definitions needed to use
 broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/invlpgb.h  | 101 ++++++++++++++++++++++++++++++++
 arch/x86/include/asm/tlbflush.h |   1 +
--- a/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
@ -1,6 +1,6 @@
-From 6b6686f0d7e228d0a2d8c166204adea5484c20d7 Mon Sep 17 00:00:00 2001
+From 99f2b0eda74d7ec76c9c48b78f9d30d251501c28 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:25 -0500
+Date: Wed, 5 Feb 2025 23:43:25 -0500
 Subject: x86/mm: use INVLPGB for kernel TLB flushes

 Use broadcast TLB invalidation for kernel addresses when available.
@ -8,13 +8,14 @@ Use broadcast TLB invalidation for kernel addresses when available.
 Remove the need to send IPIs for kernel TLB flushes.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1048,6 +1048,30 @@ void flush_tlb_all(void)
+@@ -1057,6 +1057,30 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
@ -45,7 +46,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 static void do_kernel_range_flush(void *info)
 {
 	struct flush_tlb_info *f = info;
-@@ -1067,7 +1091,9 @@ void flush_tlb_kernel_range(unsigned lon
+@@ -1076,7 +1100,9 @@ void flush_tlb_kernel_range(unsigned lon
 	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
 				  TLB_GENERATION_INVALID);
 
--- a/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
@ -1,19 +1,20 @@
-From 6cffce503223f9076a5e16177905ba3ab6d9f7d8 Mon Sep 17 00:00:00 2001
+From 1ef7edb5b2375d4010ed2ad0c7d87fcfa7ab4519 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:26 -0500
+Date: Wed, 5 Feb 2025 23:43:26 -0500
 Subject: x86/mm: use INVLPGB in flush_tlb_all

 The flush_tlb_all() function is not used a whole lot, but we might
 as well use broadcast TLB flushing there, too.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1036,6 +1036,19 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -1045,6 +1045,19 @@ void flush_tlb_mm_range(struct mm_struct
 }
 
 
@ -33,7 +34,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 static void do_flush_tlb_all(void *info)
 {
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -1044,6 +1057,8 @@ static void do_flush_tlb_all(void *info)
+@@ -1053,6 +1066,8 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
--- a/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
@ -1,6 +1,6 @@
-From 3d23d79d14cdd3c68dc5bffbaf34a60eaca7fa40 Mon Sep 17 00:00:00 2001
+From 5e5219596683c3b8178e09f6ec1e75154537325f Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:27 -0500
+Date: Wed, 5 Feb 2025 23:43:27 -0500
 Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing

 In the page reclaim code, we only track the CPU(s) where the TLB needs
@ -10,13 +10,14 @@ invalidated.
 Use broadcast TLB flushing when that is available.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1292,7 +1292,9 @@ void arch_tlbbatch_flush(struct arch_tlb
+@@ -1301,7 +1301,9 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
--- a/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
@ -1,6 +1,6 @@
-From 79c9df0c7637c8ba8a1833889a2ace355d56c96e Mon Sep 17 00:00:00 2001
+From c7212dc64d8e9e4f12f1c6edea3b75c350a30381 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:28 -0500
+Date: Wed, 5 Feb 2025 23:43:28 -0500
 Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
 processes

@ -9,10 +9,10 @@ and newer CPUs.

 In order to not exhaust PCID space, and keep TLB flushes local for single
 threaded processes, we only hand out broadcast ASIDs to processes active on
-3 or more CPUs, and gradually increase the threshold as broadcast ASID space
-is depleted.
+4 or more CPUs.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/mmu.h         |   6 +
 arch/x86/include/asm/mmu_context.h |  14 ++
@ -100,12 +100,12 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 +	return !is_dyn_asid(asid);
 +}
 +
-+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
 +{
 +	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
 +		return false;
 +
-+	return info->mm && READ_ONCE(info->mm->context.asid_transition);
+	return mm && READ_ONCE(mm->context.asid_transition);
 +}
 +
 +static inline u16 mm_global_asid(struct mm_struct *mm)
@ -133,7 +133,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 +	return false;
 +}
 +
-+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
 +{
 +	return false;
 +}
@ -583,11 +583,11 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 	 * doing a speculative memory access.
 	 */
 -	if (info->freed_tables)
-+	if (info->freed_tables || in_asid_transition(info))
+	if (info->freed_tables || in_asid_transition(info->mm))
 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
 	else
 		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
-@@ -1021,8 +1338,11 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -1030,8 +1347,11 @@ void flush_tlb_mm_range(struct mm_struct
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
--- a/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
@ -0,0 +1,251 @@
+From 6f601cdcd33be8fc0da98c6bab777575af3260b8 Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@surriel.com>
+Date: Wed, 5 Feb 2025 23:43:29 -0500
+Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
+
+Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
+queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
+
+This also allows us to avoid adding the CPUs of processes using broadcast
+flushing to the batch->cpumask, and will hopefully further reduce TLB
+flushing from the reclaim and compaction paths.
+
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
+---
+ arch/x86/include/asm/invlpgb.h  | 21 +++++----
+ arch/x86/include/asm/tlbflush.h | 17 ++++---
+ arch/x86/mm/tlb.c               | 80 +++++++++++++++++++++++++++++++--
+ 3 files changed, 95 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
+@@ -31,9 +31,8 @@ static inline void __invlpgb(unsigned lo
+ }
+ 
+ /* Wait for INVLPGB originated by this CPU to complete. */
+-static inline void tlbsync(void)
+static inline void __tlbsync(void)
+ {
+-	cant_migrate();
+ 	/* TLBSYNC: supported in binutils >= 0.36. */
+ 	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+ }
+@@ -61,19 +60,19 @@ static inline void invlpgb_flush_user(un
+ 				      unsigned long addr)
+ {
+ 	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
+-	tlbsync();
+	__tlbsync();
+ }
+ 
+-static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+-						unsigned long addr,
+-						u16 nr,
+-						bool pmd_stride)
+static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						  unsigned long addr,
+						  u16 nr,
+						  bool pmd_stride)
+ {
+ 	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+ }
+ 
+ /* Flush all mappings for a given PCID, not including globals. */
+-static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+ {
+ 	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
+ }
+@@ -82,11 +81,11 @@ static inline void invlpgb_flush_single_
+ static inline void invlpgb_flush_all(void)
+ {
+ 	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
+-	tlbsync();
+	__tlbsync();
+ }
+ 
+ /* Flush addr, including globals, for all PCIDs. */
+-static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+ {
+ 	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
+ }
+@@ -95,7 +94,7 @@ static inline void invlpgb_flush_addr_no
+ static inline void invlpgb_flush_all_nonglobals(void)
+ {
+ 	__invlpgb(0, 0, 0, 0, 0, 0);
+-	tlbsync();
+	__tlbsync();
+ }
+ 
+ #endif /* _ASM_X86_INVLPGB */
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -106,6 +106,7 @@ struct tlb_state {
+ 	 * need to be invalidated.
+ 	 */
+ 	bool invalidate_other;
+	bool need_tlbsync;
+ 
+ #ifdef CONFIG_ADDRESS_MASKING
+ 	/*
+@@ -309,6 +310,10 @@ static inline void broadcast_tlb_flush(s
+ static inline void consider_global_asid(struct mm_struct *mm)
+ {
+ }
+
+static inline void tlbsync(void)
+{
+}
+ #endif
+ 
+ #ifdef CONFIG_PARAVIRT
+@@ -358,21 +363,15 @@ static inline u64 inc_mm_tlb_gen(struct
+ 	return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ 
+-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+-					     struct mm_struct *mm,
+-					     unsigned long uaddr)
+-{
+-	inc_mm_tlb_gen(mm);
+-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+-}
+-
+ static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+ {
+ 	flush_tlb_mm(mm);
+ }
+ 
+ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr);
+ 
+ static inline bool pte_flags_need_flush(unsigned long oldflags,
+ 					unsigned long newflags,
+--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
+@@ -488,6 +488,37 @@ static void finish_asid_transition(struc
+ 	WRITE_ONCE(mm->context.asid_transition, false);
+ }
+ 
+static inline void tlbsync(void)
+{
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		return;
+	__tlbsync();
+	this_cpu_write(cpu_tlbstate.need_tlbsync, false);
+}
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool pmd_stride)
+{
+	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_flush_single_pcid_nosync(pcid);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb_flush_addr_nosync(addr, nr);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+ static void broadcast_tlb_flush(struct flush_tlb_info *info)
+ {
+ 	bool pmd = info->stride_shift == PMD_SHIFT;
+@@ -794,6 +825,8 @@ void switch_mm_irqs_off(struct mm_struct
+ 	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
+ 		WARN_ON_ONCE(!irqs_disabled());
+ 
+	tlbsync();
+
+ 	/*
+ 	 * Verify that CR3 is what we think it is.  This will catch
+ 	 * hypothetical buggy code that directly switches to swapper_pg_dir
+@@ -976,6 +1009,8 @@ reload_tlb:
+  */
+ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+	tlbsync();
+
+ 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+ 		return;
+ 
+@@ -1621,9 +1656,7 @@ void arch_tlbbatch_flush(struct arch_tlb
+ 	 * a local TLB flush is needed. Optimize this use-case by calling
+ 	 * flush_tlb_func_local() directly in this case.
+ 	 */
+-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+-		invlpgb_flush_all_nonglobals();
+-	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ 		flush_tlb_multi(&batch->cpumask, info);
+ 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+ 		lockdep_assert_irqs_enabled();
+@@ -1632,12 +1665,53 @@ void arch_tlbbatch_flush(struct arch_tlb
+ 		local_irq_enable();
+ 	}
+ 
+	/*
+	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+	 * The cpumask above contains only CPUs that were running tasks
+	 * not using broadcast TLB flushing.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		tlbsync();
+
+ 	cpumask_clear(&batch->cpumask);
+ 
+ 	put_flush_tlb_info();
+ 	put_cpu();
+ }
+ 
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr)
+{
+	u16 asid = mm_global_asid(mm);
+
+	if (asid) {
+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (static_cpu_has(X86_FEATURE_PTI))
+			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
+
+		/*
+		 * Some CPUs might still be using a local ASID for this
+		 * process, and require IPIs, while others are using the
+		 * global ASID.
+		 *
+		 * In this corner case we need to do both the broadcast
+		 * TLB invalidation, and send IPIs. The IPIs will help
+		 * stragglers transition to the broadcast ASID.
+		 */
+		if (in_asid_transition(mm))
+			asid = 0;
+	}
+
+	if (!asid) {
+		inc_mm_tlb_gen(mm);
+		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+	}
+
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+ /*
+  * Blindly accessing user memory from NMI context can be dangerous
+  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
@ -1,6 +1,6 @@
-From 0678da9f0870f0d211d49808a66e98abc0c58438 Mon Sep 17 00:00:00 2001
+From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:30 -0500
+Date: Wed, 5 Feb 2025 23:43:30 -0500
 Subject: x86/mm: enable AMD translation cache extensions

 With AMD TCE (translation cache extensions) only the intermediate mappings
@ -22,6 +22,7 @@ only those upper-level entries that lead to the target PTE in
 the page table hierarchy, leaving unrelated upper-level entries intact.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/msr-index.h       | 2 ++
 arch/x86/kernel/cpu/amd.c              | 4 ++++
--- a/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
@ -1,6 +1,6 @@
-From 02d1759eda082f9595f3232f5dffd5d49943924a Mon Sep 17 00:00:00 2001
+From 7b8ef03b059bca98d2af696c3ec2adcaa673f7e4 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:31 -0500
+Date: Wed, 5 Feb 2025 23:43:31 -0500
 Subject: x86/mm: only invalidate final translations with INVLPGB

 Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
@ -11,23 +11,24 @@ On the (rare) occasions where we free page tables we do a full flush,
 ensuring intermediate translations get flushed from the TLB.

 Signed-off-by: Rik van Riel <riel@surriel.com>
+Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/invlpgb.h | 10 ++++++++--
- arch/x86/mm/tlb.c              |  8 ++++----
- 2 files changed, 12 insertions(+), 6 deletions(-)
+ arch/x86/mm/tlb.c              | 13 +++++++------
+ 2 files changed, 15 insertions(+), 8 deletions(-)

 --- a/arch/x86/include/asm/invlpgb.h
 +++ b/arch/x86/include/asm/invlpgb.h
-@@ -67,9 +67,15 @@ static inline void invlpgb_flush_user(un
- static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
- 						unsigned long addr,
- 						u16 nr,
-						bool pmd_stride)
-+						bool pmd_stride,
-+						bool freed_tables)
+@@ -66,9 +66,15 @@ static inline void invlpgb_flush_user(un
+ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ 						  unsigned long addr,
+ 						  u16 nr,
+-						  bool pmd_stride)
+						  bool pmd_stride,
+						  bool freed_tables)
 {
 -	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
-+	unsigned long flags = INVLPGB_PCID | INVLPGB_VA;
+	u8 flags = INVLPGB_PCID | INVLPGB_VA;
 +
 +	if (!freed_tables)
 +		flags |= INVLPGB_FINAL_ONLY;
@ -38,7 +39,20 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 /* Flush all mappings for a given PCID, not including globals. */
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -518,10 +518,10 @@ static void broadcast_tlb_flush(struct f
+@@ -498,9 +498,10 @@ static inline void tlbsync(void)
+ 
+ static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ 						unsigned long addr,
+-						u16 nr, bool pmd_stride)
+						u16 nr, bool pmd_stride,
+						bool freed_tables)
+ {
+-	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride, freed_tables);
+ 	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ 		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+ }
+@@ -549,10 +550,10 @@ static void broadcast_tlb_flush(struct f
 		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
 		nr = max(nr, 1);
 
@ -51,10 +65,10 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 
 		addr += nr << info->stride_shift;
 	} while (addr < info->end);
-@@ -1654,10 +1654,10 @@ void arch_tlbbatch_add_pending(struct ar
- 			batch->used_invlpgb = true;
- 			migrate_disable();
- 		}
+@@ -1686,10 +1687,10 @@ void arch_tlbbatch_add_pending(struct ar
+ 	u16 asid = mm_global_asid(mm);
+ 
+ 	if (asid) {
 -		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
 +		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
 		/* Do any CPUs supporting INVLPGB need PTI? */
--- a/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
@ -1,4 +1,4 @@
-From b61dfc43cfc7511795366dfd9260f0959ca2f2d2 Mon Sep 17 00:00:00 2001
+From 7b0836fcad644d24d6318bf63013ec1b35d6a27b Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
 Date: Thu, 19 Dec 2024 15:32:53 -0500
 Subject: mm: remove unnecessary calls to lru_add_drain
--- a/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
@ -1,4 +1,4 @@
-From e2d1ffb13e3909dab142f0f8ec8f934b79930717 Mon Sep 17 00:00:00 2001
+From 7ecab5a83d3155baa009cd6bc6e18959fee8be62 Mon Sep 17 00:00:00 2001
 From: Vincenzo Frascino <vincenzo.frascino@arm.com>
 Date: Mon, 14 Oct 2024 16:13:39 +0100
 Subject: vdso: Introduce vdso/page.h
--- a/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
@ -1,4 +1,4 @@
-From 4478ee194402472199e05d3e27a87f0fc775cc18 Mon Sep 17 00:00:00 2001
+From d1bcf51400e790e65945a29078bd816bd61aa148 Mon Sep 17 00:00:00 2001
 From: Arnd Bergmann <arnd@arndb.de>
 Date: Thu, 24 Oct 2024 13:34:26 +0000
 Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures
--- a/debian/patches/patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+++ b/debian/patches/patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
@ -1,135 +0,0 @@
-From 647727eaa06fc61fbc55de4c09ab0c0fe7bc7263 Mon Sep 17 00:00:00 2001
-From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:29 -0500
-Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
-
-Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
-queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
-
-This also allows us to avoid adding the CPUs of processes using broadcast
-flushing to the batch->cpumask, and will hopefully further reduce TLB
-flushing from the reclaim and compaction paths.
-
-Signed-off-by: Rik van Riel <riel@surriel.com>
---
- arch/x86/include/asm/tlbbatch.h |  1 +
- arch/x86/include/asm/tlbflush.h | 12 ++-----
- arch/x86/mm/tlb.c               | 57 +++++++++++++++++++++++++++++++--
- 3 files changed, 58 insertions(+), 12 deletions(-)
-
--- a/arch/x86/include/asm/tlbbatch.h
-+++ b/arch/x86/include/asm/tlbbatch.h
-@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
- 	 * the PFNs being flushed..
- 	 */
- 	struct cpumask cpumask;
-+	bool used_invlpgb;
- };
- 
- #endif /* _ARCH_X86_TLBBATCH_H */
--- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -358,21 +358,15 @@ static inline u64 inc_mm_tlb_gen(struct
- 	return atomic64_inc_return(&mm->context.tlb_gen);
- }
- 
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-					     struct mm_struct *mm,
-					     unsigned long uaddr)
-{
-	inc_mm_tlb_gen(mm);
-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
- static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
- {
- 	flush_tlb_mm(mm);
- }
- 
- extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-+					     struct mm_struct *mm,
-+					     unsigned long uaddr);
- 
- static inline bool pte_flags_need_flush(unsigned long oldflags,
- 					unsigned long newflags,
--- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -1612,9 +1612,7 @@ void arch_tlbbatch_flush(struct arch_tlb
- 	 * a local TLB flush is needed. Optimize this use-case by calling
- 	 * flush_tlb_func_local() directly in this case.
- 	 */
-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
-		invlpgb_flush_all_nonglobals();
-	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
-+	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
- 		flush_tlb_multi(&batch->cpumask, info);
- 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
- 		lockdep_assert_irqs_enabled();
-@@ -1623,12 +1621,65 @@ void arch_tlbbatch_flush(struct arch_tlb
- 		local_irq_enable();
- 	}
- 
-+	/*
-+	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
-+	 * The cpumask above contains only CPUs that were running tasks
-+	 * not using broadcast TLB flushing.
-+	 */
-+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
-+		tlbsync();
-+		migrate_enable();
-+		batch->used_invlpgb = false;
-+	}
-+
- 	cpumask_clear(&batch->cpumask);
- 
- 	put_flush_tlb_info();
- 	put_cpu();
- }
- 
-+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-+					     struct mm_struct *mm,
-+					     unsigned long uaddr)
-+{
-+	u16 asid = mm_global_asid(mm);
-+
-+	if (asid) {
-+		/*
-+		 * Queue up an asynchronous invalidation. The corresponding
-+		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
-+		 * on the same CPU.
-+		 */
-+		if (!batch->used_invlpgb) {
-+			batch->used_invlpgb = true;
-+			migrate_disable();
-+		}
-+		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
-+		/* Do any CPUs supporting INVLPGB need PTI? */
-+		if (static_cpu_has(X86_FEATURE_PTI))
-+			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
-+
-+		/*
-+		 * Some CPUs might still be using a local ASID for this
-+		 * process, and require IPIs, while others are using the
-+		 * global ASID.
-+		 *
-+		 * In this corner case we need to do both the broadcast
-+		 * TLB invalidation, and send IPIs. The IPIs will help
-+		 * stragglers transition to the broadcast ASID.
-+		 */
-+		if (READ_ONCE(mm->context.asid_transition))
-+			asid = 0;
-+	}
-+
-+	if (!asid) {
-+		inc_mm_tlb_gen(mm);
-+		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-+	}
-+
-+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-+}
-+
- /*
-  * Blindly accessing user memory from NMI context can be dangerous
-  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
+++ b/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once.
 /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -2825,7 +2825,7 @@ extern void deactivate_task(struct rq *r
+@@ -2797,7 +2797,7 @@ extern void deactivate_task(struct rq *r
 
 extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 
--- a/debian/patches/series
+++ b/debian/patches/series
@ -203,21 +203,21 @@ patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch
 patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch
 patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch

-patchset-zen/nvlpgb-v7/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
-patchset-zen/nvlpgb-v7/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
-patchset-zen/nvlpgb-v7/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
-patchset-zen/nvlpgb-v7/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
-patchset-zen/nvlpgb-v7/0005-x86-mm-add-INVLPGB-support-code.patch
-patchset-zen/nvlpgb-v7/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
-patchset-zen/nvlpgb-v7/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
-patchset-zen/nvlpgb-v7/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
-patchset-zen/nvlpgb-v7/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
-patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
-patchset-zen/nvlpgb-v7/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
-patchset-zen/nvlpgb-v7/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
-patchset-zen/nvlpgb-v7/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
-patchset-zen/nvlpgb-v7/0014-vdso-Introduce-vdso-page.h.patch
-patchset-zen/nvlpgb-v7/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
+patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
+patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
+patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
+patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
+patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch

 patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch