release 6.12.13

2025-02-09 19:40:34 +03:00
parent b8c80400f5
commit e0baaf49dd
26 changed files with 385 additions and 221 deletions
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
 linux (6.12.13-1) sid; urgency=medium
  * New upstream stable update:
    https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.13
 -- Konstantin Demin <rockdrilla@gmail.com>  Sun, 09 Feb 2025 19:34:35 +0300
 linux (6.12.12-1) sid; urgency=medium
  * New upstream stable update:
--- a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch
+++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch
@@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 --- a/include/linux/perf_event.h
 +++ b/include/linux/perf_event.h
-@@ -1617,6 +1617,11 @@ int perf_cpu_time_max_percent_handler(co
+@@ -1623,6 +1623,11 @@ int perf_cpu_time_max_percent_handler(co
 int perf_event_max_stack_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
@@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
-@@ -12681,6 +12686,9 @@ SYSCALL_DEFINE5(perf_event_open,
+@@ -12682,6 +12687,9 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (err)
 		return err;
--- a/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
+++ b/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
@@ -56,7 +56,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 			    struct rate_sample *rs);
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -2767,6 +2767,7 @@ static bool tcp_write_xmit(struct sock *
+@@ -2770,6 +2770,7 @@ static bool tcp_write_xmit(struct sock *
 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
 			tcp_init_tso_segs(skb, mss_now);
--- a/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
+++ b/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
@@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
  * between different flows.
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -1603,7 +1603,7 @@ int tcp_fragment(struct sock *sk, enum t
+@@ -1606,7 +1606,7 @@ int tcp_fragment(struct sock *sk, enum t
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@@ -64,7 +64,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 	long limit;
 	int nlen;
 	u8 flags;
-@@ -1678,6 +1678,30 @@ int tcp_fragment(struct sock *sk, enum t
+@@ -1681,6 +1681,30 @@ int tcp_fragment(struct sock *sk, enum t
 		if (diff)
 			tcp_adjust_pcount(sk, skb, diff);
--- a/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
+++ b/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
@@ -97,7 +97,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 };
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -2059,13 +2059,12 @@ static u32 tcp_tso_autosize(const struct
+@@ -2062,13 +2062,12 @@ static u32 tcp_tso_autosize(const struct
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
--- a/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
+++ b/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
@@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -3005,6 +3005,7 @@ void tcp_send_loss_probe(struct sock *sk
+@@ -3008,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
--- a/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
+++ b/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
@@ -88,7 +88,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
+@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
 	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
@@ -100,7 +100,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
 			use_ecn = true;
 	}
-@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
+@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
 		tp->ecn_flags = TCP_ECN_OK;
 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
 			INET_ECN_xmit(sk);
--- a/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
+++ b/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
@@ -47,7 +47,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 /* BBR marks the current round trip as a loss round. */
 --- a/net/ipv4/tcp_output.c
 +++ b/net/ipv4/tcp_output.c
-@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
+@@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
 				th->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 			}
--- a/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
@@ -1,6 +1,6 @@
-From 6cb30d7518301094b9c7397a24a22cf538a1d64c Mon Sep 17 00:00:00 2001
+From e11153c4df0fee7caadec3714a60a4936d6a9ea2 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:20 -0500
+Date: Wed, 5 Feb 2025 23:43:20 -0500
 Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional
 Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
@@ -29,6 +29,7 @@ That makes it safe to use INVLPGB on AMD CPUs.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Peter Zijlstra <peterz@infradead.org>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/Kconfig           |  2 +-
 arch/x86/kernel/paravirt.c |  7 +------
--- a/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
@@ -1,6 +1,6 @@
-From df8f812b62c450b98b972ad0a4d5a5ba400f5eae Mon Sep 17 00:00:00 2001
+From e8008cb69c5e4efbaedd70b0fb692343e4aa0e51 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:21 -0500
+Date: Wed, 5 Feb 2025 23:43:21 -0500
 Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call
 Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.
@@ -10,6 +10,7 @@ and not going through the paravirt function pointers.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/hyperv/mmu.c                 | 1 -
 arch/x86/include/asm/paravirt.h       | 5 -----
--- a/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
@@ -1,6 +1,6 @@
-From 8b2bd3f69b50cfe59eee4506413715878bcbb901 Mon Sep 17 00:00:00 2001
+From 7ac6508c4db81eced5f6e3d7c8913af1da6cf110 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:22 -0500
+Date: Wed, 5 Feb 2025 23:43:22 -0500
 Subject: x86/mm: consolidate full flush threshold decision
 Reduce code duplication by consolidating the decision point
@@ -10,15 +10,34 @@ inside get_flush_tlb_info.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Suggested-by: Dave Hansen <dave.hansen@intel.com>
 ---
- arch/x86/mm/tlb.c | 43 ++++++++++++++++++++-----------------------
+ arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
- 1 file changed, 20 insertions(+), 23 deletions(-)
+ 1 file changed, 31 insertions(+), 25 deletions(-)
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -981,6 +981,15 @@ static struct flush_tlb_info *get_flush_
+@@ -973,14 +973,32 @@ static struct flush_tlb_info *get_flush_
 	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
 #endif
 -	info->start		= start;
 -	info->end		= end;
 +	/*
 +	 * Round the start and end addresses to the page size specified
 +	 * by the stride shift. This ensures partial pages at the end of
 +	 * a range get fully invalidated.
 +	 */
 +	info->start		= round_down(start, 1 << stride_shift);
 +	info->end		= round_up(end, 1 << stride_shift);
 	info->mm		= mm;
 	info->stride_shift	= stride_shift;
 	info->freed_tables	= freed_tables;
 	info->new_tlb_gen	= new_tlb_gen;
 	info->initiating_cpu	= smp_processor_id();
 +	WARN_ONCE(start != info->start || end != info->end,
 +		  "TLB flush not stride %x aligned. Start %lx, end %lx\n",
 +		  1 << stride_shift, start, end);
 +
 +	/*
 +	 * If the number of flushes is so large that a full flush
 +	 * would be faster, do a full flush.
@@ -31,7 +50,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 	return info;
 }
-@@ -998,17 +1007,8 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -998,17 +1016,8 @@ void flush_tlb_mm_range(struct mm_struct
 				bool freed_tables)
 {
 	struct flush_tlb_info *info;
@@ -50,7 +69,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 	/* This is also a barrier that synchronizes with switch_mm(). */
 	new_tlb_gen = inc_mm_tlb_gen(mm);
-@@ -1060,22 +1060,19 @@ static void do_kernel_range_flush(void *
+@@ -1060,22 +1069,19 @@ static void do_kernel_range_flush(void *
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
@@ -61,12 +80,12 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 -	} else {
 -		struct flush_tlb_info *info;
 +	struct flush_tlb_info *info;
 +
 +	guard(preempt)();
 -		preempt_disable();
 -		info = get_flush_tlb_info(NULL, start, end, 0, false,
 -					  TLB_GENERATION_INVALID);
 +	guard(preempt)();
 +
 +	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
 +				  TLB_GENERATION_INVALID);
@@ -82,7 +101,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
 }
 /*
-@@ -1247,7 +1244,7 @@ void arch_tlbbatch_flush(struct arch_tlb
+@@ -1247,7 +1253,7 @@ void arch_tlbbatch_flush(struct arch_tlb
 	int cpu = get_cpu();
--- a/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
@@ -1,6 +1,6 @@
-From a182b0471ba3c3329d93abfa07e3d452183a9137 Mon Sep 17 00:00:00 2001
+From e772b2eb66e5c3cf668feadab678f2a88d896189 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:23 -0500
+Date: Wed, 5 Feb 2025 23:43:23 -0500
 Subject: x86/mm: get INVLPGB count max from CPUID
 The CPU advertises the maximum number of pages that can be shot down
@@ -9,6 +9,7 @@ with one INVLPGB instruction in the CPUID data.
 Save that information for later use.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/Kconfig.cpu               | 5 +++++
 arch/x86/include/asm/cpufeatures.h | 1 +
--- a/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
@@ -1,12 +1,13 @@
-From cc3f8dd3033c79abd9f37a94efed74a535a703c9 Mon Sep 17 00:00:00 2001
+From 7a896b12875e2b988acbf0229fb4bcf9157b83bd Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:24 -0500
+Date: Wed, 5 Feb 2025 23:43:24 -0500
 Subject: x86/mm: add INVLPGB support code
 Add invlpgb.h with the helper functions and definitions needed to use
 broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/invlpgb.h  | 101 ++++++++++++++++++++++++++++++++
 arch/x86/include/asm/tlbflush.h |   1 +
--- a/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
@@ -1,6 +1,6 @@
-From 6b6686f0d7e228d0a2d8c166204adea5484c20d7 Mon Sep 17 00:00:00 2001
+From 99f2b0eda74d7ec76c9c48b78f9d30d251501c28 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:25 -0500
+Date: Wed, 5 Feb 2025 23:43:25 -0500
 Subject: x86/mm: use INVLPGB for kernel TLB flushes
 Use broadcast TLB invalidation for kernel addresses when available.
@@ -8,13 +8,14 @@ Use broadcast TLB invalidation for kernel addresses when available.
 Remove the need to send IPIs for kernel TLB flushes.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1048,6 +1048,30 @@ void flush_tlb_all(void)
+@@ -1057,6 +1057,30 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
@@ -45,7 +46,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 static void do_kernel_range_flush(void *info)
 {
 	struct flush_tlb_info *f = info;
-@@ -1067,7 +1091,9 @@ void flush_tlb_kernel_range(unsigned lon
+@@ -1076,7 +1100,9 @@ void flush_tlb_kernel_range(unsigned lon
 	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
 				  TLB_GENERATION_INVALID);
--- a/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
@@ -1,19 +1,20 @@
-From 6cffce503223f9076a5e16177905ba3ab6d9f7d8 Mon Sep 17 00:00:00 2001
+From 1ef7edb5b2375d4010ed2ad0c7d87fcfa7ab4519 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:26 -0500
+Date: Wed, 5 Feb 2025 23:43:26 -0500
 Subject: x86/mm: use INVLPGB in flush_tlb_all
 The flush_tlb_all() function is not used a whole lot, but we might
 as well use broadcast TLB flushing there, too.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1036,6 +1036,19 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -1045,6 +1045,19 @@ void flush_tlb_mm_range(struct mm_struct
 }
@@ -33,7 +34,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 static void do_flush_tlb_all(void *info)
 {
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -1044,6 +1057,8 @@ static void do_flush_tlb_all(void *info)
+@@ -1053,6 +1066,8 @@ static void do_flush_tlb_all(void *info)
 void flush_tlb_all(void)
 {
--- a/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
@@ -1,6 +1,6 @@
-From 3d23d79d14cdd3c68dc5bffbaf34a60eaca7fa40 Mon Sep 17 00:00:00 2001
+From 5e5219596683c3b8178e09f6ec1e75154537325f Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:27 -0500
+Date: Wed, 5 Feb 2025 23:43:27 -0500
 Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing
 In the page reclaim code, we only track the CPU(s) where the TLB needs
@@ -10,13 +10,14 @@ invalidated.
 Use broadcast TLB flushing when that is available.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/mm/tlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -1292,7 +1292,9 @@ void arch_tlbbatch_flush(struct arch_tlb
+@@ -1301,7 +1301,9 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
--- a/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
@@ -1,6 +1,6 @@
-From 79c9df0c7637c8ba8a1833889a2ace355d56c96e Mon Sep 17 00:00:00 2001
+From c7212dc64d8e9e4f12f1c6edea3b75c350a30381 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:28 -0500
+Date: Wed, 5 Feb 2025 23:43:28 -0500
 Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
 processes
@@ -9,10 +9,10 @@ and newer CPUs.
 In order to not exhaust PCID space, and keep TLB flushes local for single
 threaded processes, we only hand out broadcast ASIDs to processes active on
-3 or more CPUs, and gradually increase the threshold as broadcast ASID space
+4 or more CPUs.
 is depleted.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/mmu.h         |   6 +
 arch/x86/include/asm/mmu_context.h |  14 ++
@@ -100,12 +100,12 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 +	return !is_dyn_asid(asid);
 +}
 +
-+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
 +{
 +	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
 +		return false;
 +
-+	return info->mm && READ_ONCE(info->mm->context.asid_transition);
+	return mm && READ_ONCE(mm->context.asid_transition);
 +}
 +
 +static inline u16 mm_global_asid(struct mm_struct *mm)
@@ -133,7 +133,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 +	return false;
 +}
 +
-+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
 +{
 +	return false;
 +}
@@ -583,11 +583,11 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 	 * doing a speculative memory access.
 	 */
 -	if (info->freed_tables)
-+	if (info->freed_tables || in_asid_transition(info))
+	if (info->freed_tables || in_asid_transition(info->mm))
 		on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
 	else
 		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
-@@ -1021,8 +1338,11 @@ void flush_tlb_mm_range(struct mm_struct
+@@ -1030,8 +1347,11 @@ void flush_tlb_mm_range(struct mm_struct
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
--- a/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
@@ -0,0 +1,251 @@
 From 6f601cdcd33be8fc0da98c6bab777575af3260b8 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
 Date: Wed, 5 Feb 2025 23:43:29 -0500
 Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
 Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
 queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
 This also allows us to avoid adding the CPUs of processes using broadcast
 flushing to the batch->cpumask, and will hopefully further reduce TLB
 flushing from the reclaim and compaction paths.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/invlpgb.h  | 21 +++++----
 arch/x86/include/asm/tlbflush.h | 17 ++++---
 arch/x86/mm/tlb.c               | 80 +++++++++++++++++++++++++++++++--
 3 files changed, 95 insertions(+), 23 deletions(-)
 --- a/arch/x86/include/asm/invlpgb.h
 +++ b/arch/x86/include/asm/invlpgb.h
@@ -31,9 +31,8 @@ static inline void __invlpgb(unsigned lo
 }
 /* Wait for INVLPGB originated by this CPU to complete. */
 -static inline void tlbsync(void)
 +static inline void __tlbsync(void)
 {
 -	cant_migrate();
 	/* TLBSYNC: supported in binutils >= 0.36. */
 	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
 }
@@ -61,19 +60,19 @@ static inline void invlpgb_flush_user(un
 				      unsigned long addr)
 {
 	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
 -	tlbsync();
 +	__tlbsync();
 }
 -static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
 -						unsigned long addr,
 -						u16 nr,
 -						bool pmd_stride)
 +static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
 +						  unsigned long addr,
 +						  u16 nr,
 +						  bool pmd_stride)
 {
 	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
 }
 /* Flush all mappings for a given PCID, not including globals. */
 -static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 +static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 {
 	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
 }
@@ -82,11 +81,11 @@ static inline void invlpgb_flush_single_
 static inline void invlpgb_flush_all(void)
 {
 	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
 -	tlbsync();
 +	__tlbsync();
 }
 /* Flush addr, including globals, for all PCIDs. */
 -static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 +static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 {
 	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
 }
@@ -95,7 +94,7 @@ static inline void invlpgb_flush_addr_no
 static inline void invlpgb_flush_all_nonglobals(void)
 {
 	__invlpgb(0, 0, 0, 0, 0, 0);
 -	tlbsync();
 +	__tlbsync();
 }
 #endif /* _ASM_X86_INVLPGB */
 --- a/arch/x86/include/asm/tlbflush.h
 +++ b/arch/x86/include/asm/tlbflush.h
@@ -106,6 +106,7 @@ struct tlb_state {
 	 * need to be invalidated.
 	 */
 	bool invalidate_other;
 +	bool need_tlbsync;
 #ifdef CONFIG_ADDRESS_MASKING
 	/*
@@ -309,6 +310,10 @@ static inline void broadcast_tlb_flush(s
 static inline void consider_global_asid(struct mm_struct *mm)
 {
 }
 +
 +static inline void tlbsync(void)
 +{
 +}
 #endif
 #ifdef CONFIG_PARAVIRT
@@ -358,21 +363,15 @@ static inline u64 inc_mm_tlb_gen(struct
 	return atomic64_inc_return(&mm->context.tlb_gen);
 }
 -static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 -					     struct mm_struct *mm,
 -					     unsigned long uaddr)
 -{
 -	inc_mm_tlb_gen(mm);
 -	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 -	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 -}
 -
 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
 }
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 +extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr);
 static inline bool pte_flags_need_flush(unsigned long oldflags,
 					unsigned long newflags,
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
@@ -488,6 +488,37 @@ static void finish_asid_transition(struc
 	WRITE_ONCE(mm->context.asid_transition, false);
 }
 +static inline void tlbsync(void)
 +{
 +	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
 +		return;
 +	__tlbsync();
 +	this_cpu_write(cpu_tlbstate.need_tlbsync, false);
 +}
 +
 +static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
 +						unsigned long addr,
 +						u16 nr, bool pmd_stride)
 +{
 +	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
 +	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
 +		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
 +}
 +
 +static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
 +{
 +	__invlpgb_flush_single_pcid_nosync(pcid);
 +	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
 +		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
 +}
 +
 +static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
 +{
 +	__invlpgb_flush_addr_nosync(addr, nr);
 +	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
 +		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
 +}
 +
 static void broadcast_tlb_flush(struct flush_tlb_info *info)
 {
 	bool pmd = info->stride_shift == PMD_SHIFT;
@@ -794,6 +825,8 @@ void switch_mm_irqs_off(struct mm_struct
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
 		WARN_ON_ONCE(!irqs_disabled());
 +	tlbsync();
 +
 	/*
 	 * Verify that CR3 is what we think it is.  This will catch
 	 * hypothetical buggy code that directly switches to swapper_pg_dir
@@ -976,6 +1009,8 @@ reload_tlb:
  */
 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 +	tlbsync();
 +
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
 		return;
@@ -1621,9 +1656,7 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
 -	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
 -		invlpgb_flush_all_nonglobals();
 -	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
 +	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
 		flush_tlb_multi(&batch->cpumask, info);
 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
 		lockdep_assert_irqs_enabled();
@@ -1632,12 +1665,53 @@ void arch_tlbbatch_flush(struct arch_tlb
 		local_irq_enable();
 	}
 +	/*
 +	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
 +	 * The cpumask above contains only CPUs that were running tasks
 +	 * not using broadcast TLB flushing.
 +	 */
 +	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
 +		tlbsync();
 +
 	cpumask_clear(&batch->cpumask);
 	put_flush_tlb_info();
 	put_cpu();
 }
 +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr)
 +{
 +	u16 asid = mm_global_asid(mm);
 +
 +	if (asid) {
 +		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
 +			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
 +
 +		/*
 +		 * Some CPUs might still be using a local ASID for this
 +		 * process, and require IPIs, while others are using the
 +		 * global ASID.
 +		 *
 +		 * In this corner case we need to do both the broadcast
 +		 * TLB invalidation, and send IPIs. The IPIs will help
 +		 * stragglers transition to the broadcast ASID.
 +		 */
 +		if (in_asid_transition(mm))
 +			asid = 0;
 +	}
 +
 +	if (!asid) {
 +		inc_mm_tlb_gen(mm);
 +		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 +	}
 +
 +	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 +}
 +
 /*
  * Blindly accessing user memory from NMI context can be dangerous
  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
@@ -1,6 +1,6 @@
-From 0678da9f0870f0d211d49808a66e98abc0c58438 Mon Sep 17 00:00:00 2001
+From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:30 -0500
+Date: Wed, 5 Feb 2025 23:43:30 -0500
 Subject: x86/mm: enable AMD translation cache extensions
 With AMD TCE (translation cache extensions) only the intermediate mappings
@@ -22,6 +22,7 @@ only those upper-level entries that lead to the target PTE in
 the page table hierarchy, leaving unrelated upper-level entries intact.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/msr-index.h       | 2 ++
 arch/x86/kernel/cpu/amd.c              | 4 ++++
--- a/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
@@ -1,6 +1,6 @@
-From 02d1759eda082f9595f3232f5dffd5d49943924a Mon Sep 17 00:00:00 2001
+From 7b8ef03b059bca98d2af696c3ec2adcaa673f7e4 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
-Date: Wed, 22 Jan 2025 23:23:31 -0500
+Date: Wed, 5 Feb 2025 23:43:31 -0500
 Subject: x86/mm: only invalidate final translations with INVLPGB
 Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
@@ -11,15 +11,16 @@ On the (rare) occasions where we free page tables we do a full flush,
 ensuring intermediate translations get flushed from the TLB.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 Tested-by: Manali Shukla <Manali.Shukla@amd.com>
 ---
 arch/x86/include/asm/invlpgb.h | 10 ++++++++--
- arch/x86/mm/tlb.c              |  8 ++++----
+ arch/x86/mm/tlb.c              | 13 +++++++------
- 2 files changed, 12 insertions(+), 6 deletions(-)
+ 2 files changed, 15 insertions(+), 8 deletions(-)
 --- a/arch/x86/include/asm/invlpgb.h
 +++ b/arch/x86/include/asm/invlpgb.h
-@@ -67,9 +67,15 @@ static inline void invlpgb_flush_user(un
+@@ -66,9 +66,15 @@ static inline void invlpgb_flush_user(un
- static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
 						  unsigned long addr,
 						  u16 nr,
 -						  bool pmd_stride)
@@ -27,7 +28,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 +						  bool freed_tables)
 {
 -	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
-+	unsigned long flags = INVLPGB_PCID | INVLPGB_VA;
+	u8 flags = INVLPGB_PCID | INVLPGB_VA;
 +
 +	if (!freed_tables)
 +		flags |= INVLPGB_FINAL_ONLY;
@@ -38,7 +39,20 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 /* Flush all mappings for a given PCID, not including globals. */
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
-@@ -518,10 +518,10 @@ static void broadcast_tlb_flush(struct f
+@@ -498,9 +498,10 @@ static inline void tlbsync(void)
 static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
 						unsigned long addr,
 -						u16 nr, bool pmd_stride)
 +						u16 nr, bool pmd_stride,
 +						bool freed_tables)
 {
 -	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
 +	__invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride, freed_tables);
 	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
 		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
 }
@@ -549,10 +550,10 @@ static void broadcast_tlb_flush(struct f
 		nr = min(maxnr, (info->end - addr) >> info->stride_shift);
 		nr = max(nr, 1);
@@ -51,10 +65,10 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
 		addr += nr << info->stride_shift;
 	} while (addr < info->end);
-@@ -1654,10 +1654,10 @@ void arch_tlbbatch_add_pending(struct ar
+@@ -1686,10 +1687,10 @@ void arch_tlbbatch_add_pending(struct ar
- 			batch->used_invlpgb = true;
+ 	u16 asid = mm_global_asid(mm);
- 			migrate_disable();
+ 
- 		}
+ 	if (asid) {
 -		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
 +		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
 		/* Do any CPUs supporting INVLPGB need PTI? */
--- a/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
@@ -1,4 +1,4 @@
-From b61dfc43cfc7511795366dfd9260f0959ca2f2d2 Mon Sep 17 00:00:00 2001
+From 7b0836fcad644d24d6318bf63013ec1b35d6a27b Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
 Date: Thu, 19 Dec 2024 15:32:53 -0500
 Subject: mm: remove unnecessary calls to lru_add_drain
--- a/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
@@ -1,4 +1,4 @@
-From e2d1ffb13e3909dab142f0f8ec8f934b79930717 Mon Sep 17 00:00:00 2001
+From 7ecab5a83d3155baa009cd6bc6e18959fee8be62 Mon Sep 17 00:00:00 2001
 From: Vincenzo Frascino <vincenzo.frascino@arm.com>
 Date: Mon, 14 Oct 2024 16:13:39 +0100
 Subject: vdso: Introduce vdso/page.h
--- a/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
+++ b/debian/patches/patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
@@ -1,4 +1,4 @@
-From 4478ee194402472199e05d3e27a87f0fc775cc18 Mon Sep 17 00:00:00 2001
+From d1bcf51400e790e65945a29078bd816bd61aa148 Mon Sep 17 00:00:00 2001
 From: Arnd Bergmann <arnd@arndb.de>
 Date: Thu, 24 Oct 2024 13:34:26 +0000
 Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures
--- a/debian/patches/patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+++ b/debian/patches/patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
@@ -1,135 +0,0 @@
 From 647727eaa06fc61fbc55de4c09ab0c0fe7bc7263 Mon Sep 17 00:00:00 2001
 From: Rik van Riel <riel@surriel.com>
 Date: Wed, 22 Jan 2025 23:23:29 -0500
 Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
 Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
 queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
 This also allows us to avoid adding the CPUs of processes using broadcast
 flushing to the batch->cpumask, and will hopefully further reduce TLB
 flushing from the reclaim and compaction paths.
 Signed-off-by: Rik van Riel <riel@surriel.com>
 ---
 arch/x86/include/asm/tlbbatch.h |  1 +
 arch/x86/include/asm/tlbflush.h | 12 ++-----
 arch/x86/mm/tlb.c               | 57 +++++++++++++++++++++++++++++++--
 3 files changed, 58 insertions(+), 12 deletions(-)
 --- a/arch/x86/include/asm/tlbbatch.h
 +++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
 	 * the PFNs being flushed..
 	 */
 	struct cpumask cpumask;
 +	bool used_invlpgb;
 };
 #endif /* _ARCH_X86_TLBBATCH_H */
 --- a/arch/x86/include/asm/tlbflush.h
 +++ b/arch/x86/include/asm/tlbflush.h
@@ -358,21 +358,15 @@ static inline u64 inc_mm_tlb_gen(struct
 	return atomic64_inc_return(&mm->context.tlb_gen);
 }
 -static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 -					     struct mm_struct *mm,
 -					     unsigned long uaddr)
 -{
 -	inc_mm_tlb_gen(mm);
 -	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 -	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 -}
 -
 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
 }
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 +extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr);
 static inline bool pte_flags_need_flush(unsigned long oldflags,
 					unsigned long newflags,
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
@@ -1612,9 +1612,7 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * a local TLB flush is needed. Optimize this use-case by calling
 	 * flush_tlb_func_local() directly in this case.
 	 */
 -	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
 -		invlpgb_flush_all_nonglobals();
 -	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
 +	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
 		flush_tlb_multi(&batch->cpumask, info);
 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
 		lockdep_assert_irqs_enabled();
@@ -1623,12 +1621,65 @@ void arch_tlbbatch_flush(struct arch_tlb
 		local_irq_enable();
 	}
 +	/*
 +	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
 +	 * The cpumask above contains only CPUs that were running tasks
 +	 * not using broadcast TLB flushing.
 +	 */
 +	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
 +		tlbsync();
 +		migrate_enable();
 +		batch->used_invlpgb = false;
 +	}
 +
 	cpumask_clear(&batch->cpumask);
 	put_flush_tlb_info();
 	put_cpu();
 }
 +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 +					     struct mm_struct *mm,
 +					     unsigned long uaddr)
 +{
 +	u16 asid = mm_global_asid(mm);
 +
 +	if (asid) {
 +		/*
 +		 * Queue up an asynchronous invalidation. The corresponding
 +		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
 +		 * on the same CPU.
 +		 */
 +		if (!batch->used_invlpgb) {
 +			batch->used_invlpgb = true;
 +			migrate_disable();
 +		}
 +		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
 +		/* Do any CPUs supporting INVLPGB need PTI? */
 +		if (static_cpu_has(X86_FEATURE_PTI))
 +			invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
 +
 +		/*
 +		 * Some CPUs might still be using a local ASID for this
 +		 * process, and require IPIs, while others are using the
 +		 * global ASID.
 +		 *
 +		 * In this corner case we need to do both the broadcast
 +		 * TLB invalidation, and send IPIs. The IPIs will help
 +		 * stragglers transition to the broadcast ASID.
 +		 */
 +		if (READ_ONCE(mm->context.asid_transition))
 +			asid = 0;
 +	}
 +
 +	if (!asid) {
 +		inc_mm_tlb_gen(mm);
 +		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 +	}
 +
 +	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 +}
 +
 /*
  * Blindly accessing user memory from NMI context can be dangerous
  * if we're in the middle of switching the current user task or
--- a/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
+++ b/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
@@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once.
 /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -2825,7 +2825,7 @@ extern void deactivate_task(struct rq *r
+@@ -2797,7 +2797,7 @@ extern void deactivate_task(struct rq *r
 extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -203,21 +203,21 @@ patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch
 patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch
 patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch
-patchset-zen/nvlpgb-v7/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
+patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
-patchset-zen/nvlpgb-v7/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
+patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
-patchset-zen/nvlpgb-v7/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
+patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
-patchset-zen/nvlpgb-v7/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
+patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
-patchset-zen/nvlpgb-v7/0005-x86-mm-add-INVLPGB-support-code.patch
+patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
-patchset-zen/nvlpgb-v7/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
+patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
-patchset-zen/nvlpgb-v7/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
+patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
-patchset-zen/nvlpgb-v7/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
+patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
-patchset-zen/nvlpgb-v7/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
+patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
-patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
+patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
-patchset-zen/nvlpgb-v7/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
+patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
-patchset-zen/nvlpgb-v7/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
+patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
-patchset-zen/nvlpgb-v7/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
+patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
-patchset-zen/nvlpgb-v7/0014-vdso-Introduce-vdso-page.h.patch
+patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
-patchset-zen/nvlpgb-v7/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
+patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
 patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch