1
0

release 6.12.13

This commit is contained in:
Konstantin Demin 2025-02-09 19:40:34 +03:00
parent b8c80400f5
commit e0baaf49dd
26 changed files with 385 additions and 221 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
linux (6.12.13-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.13
-- Konstantin Demin <rockdrilla@gmail.com> Sun, 09 Feb 2025 19:34:35 +0300
linux (6.12.12-1) sid; urgency=medium linux (6.12.12-1) sid; urgency=medium
* New upstream stable update: * New upstream stable update:

View File

@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/include/linux/perf_event.h --- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h +++ b/include/linux/perf_event.h
@@ -1617,6 +1617,11 @@ int perf_cpu_time_max_percent_handler(co @@ -1623,6 +1623,11 @@ int perf_cpu_time_max_percent_handler(co
int perf_event_max_stack_handler(const struct ctl_table *table, int write, int perf_event_max_stack_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos); void *buffer, size_t *lenp, loff_t *ppos);
@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
/* Minimum for 512 kiB + 1 user control page */ /* Minimum for 512 kiB + 1 user control page */
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -12681,6 +12686,9 @@ SYSCALL_DEFINE5(perf_event_open, @@ -12682,6 +12687,9 @@ SYSCALL_DEFINE5(perf_event_open,
if (err) if (err)
return err; return err;

View File

@ -56,7 +56,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
struct rate_sample *rs); struct rate_sample *rs);
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -2767,6 +2767,7 @@ static bool tcp_write_xmit(struct sock * @@ -2770,6 +2770,7 @@ static bool tcp_write_xmit(struct sock *
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now); tcp_init_tso_segs(skb, mss_now);

View File

@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
* between different flows. * between different flows.
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -1603,7 +1603,7 @@ int tcp_fragment(struct sock *sk, enum t @@ -1606,7 +1606,7 @@ int tcp_fragment(struct sock *sk, enum t
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff; struct sk_buff *buff;
@ -64,7 +64,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
long limit; long limit;
int nlen; int nlen;
u8 flags; u8 flags;
@@ -1678,6 +1678,30 @@ int tcp_fragment(struct sock *sk, enum t @@ -1681,6 +1681,30 @@ int tcp_fragment(struct sock *sk, enum t
if (diff) if (diff)
tcp_adjust_pcount(sk, skb, diff); tcp_adjust_pcount(sk, skb, diff);

View File

@ -97,7 +97,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
}; };
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -2059,13 +2059,12 @@ static u32 tcp_tso_autosize(const struct @@ -2062,13 +2062,12 @@ static u32 tcp_tso_autosize(const struct
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{ {
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;

View File

@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -3005,6 +3005,7 @@ void tcp_send_loss_probe(struct sock *sk @@ -3008,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk
if (WARN_ON(!skb || !tcp_skb_pcount(skb))) if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer; goto rearm_timer;

View File

@ -88,7 +88,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock @@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn; tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
@ -100,7 +100,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
use_ecn = true; use_ecn = true;
} }
@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock @@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
tp->ecn_flags = TCP_ECN_OK; tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk); INET_ECN_xmit(sk);

View File

@ -47,7 +47,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
/* BBR marks the current round trip as a loss round. */ /* BBR marks the current round trip as a loss round. */
--- a/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c
@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk @@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
th->cwr = 1; th->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
} }

View File

@ -1,6 +1,6 @@
From 6cb30d7518301094b9c7397a24a22cf538a1d64c Mon Sep 17 00:00:00 2001 From e11153c4df0fee7caadec3714a60a4936d6a9ea2 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:20 -0500 Date: Wed, 5 Feb 2025 23:43:20 -0500
Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional
Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
@ -29,6 +29,7 @@ That makes it safe to use INVLPGB on AMD CPUs.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org> Suggested-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/Kconfig | 2 +- arch/x86/Kconfig | 2 +-
arch/x86/kernel/paravirt.c | 7 +------ arch/x86/kernel/paravirt.c | 7 +------

View File

@ -1,6 +1,6 @@
From df8f812b62c450b98b972ad0a4d5a5ba400f5eae Mon Sep 17 00:00:00 2001 From e8008cb69c5e4efbaedd70b0fb692343e4aa0e51 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:21 -0500 Date: Wed, 5 Feb 2025 23:43:21 -0500
Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call
Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table. Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.
@ -10,6 +10,7 @@ and not going through the paravirt function pointers.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com> Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/hyperv/mmu.c | 1 - arch/x86/hyperv/mmu.c | 1 -
arch/x86/include/asm/paravirt.h | 5 ----- arch/x86/include/asm/paravirt.h | 5 -----

View File

@ -1,6 +1,6 @@
From 8b2bd3f69b50cfe59eee4506413715878bcbb901 Mon Sep 17 00:00:00 2001 From 7ac6508c4db81eced5f6e3d7c8913af1da6cf110 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:22 -0500 Date: Wed, 5 Feb 2025 23:43:22 -0500
Subject: x86/mm: consolidate full flush threshold decision Subject: x86/mm: consolidate full flush threshold decision
Reduce code duplication by consolidating the decision point Reduce code duplication by consolidating the decision point
@ -10,15 +10,34 @@ inside get_flush_tlb_info.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com> Suggested-by: Dave Hansen <dave.hansen@intel.com>
--- ---
arch/x86/mm/tlb.c | 43 ++++++++++++++++++++----------------------- arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
1 file changed, 20 insertions(+), 23 deletions(-) 1 file changed, 31 insertions(+), 25 deletions(-)
--- a/arch/x86/mm/tlb.c --- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -981,6 +981,15 @@ static struct flush_tlb_info *get_flush_ @@ -973,14 +973,32 @@ static struct flush_tlb_info *get_flush_
BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
#endif
- info->start = start;
- info->end = end;
+ /*
+ * Round the start and end addresses to the page size specified
+ * by the stride shift. This ensures partial pages at the end of
+ * a range get fully invalidated.
+ */
+ info->start = round_down(start, 1 << stride_shift);
+ info->end = round_up(end, 1 << stride_shift);
info->mm = mm;
info->stride_shift = stride_shift;
info->freed_tables = freed_tables;
info->new_tlb_gen = new_tlb_gen; info->new_tlb_gen = new_tlb_gen;
info->initiating_cpu = smp_processor_id(); info->initiating_cpu = smp_processor_id();
+ WARN_ONCE(start != info->start || end != info->end,
+ "TLB flush not stride %x aligned. Start %lx, end %lx\n",
+ 1 << stride_shift, start, end);
+
+ /* + /*
+ * If the number of flushes is so large that a full flush + * If the number of flushes is so large that a full flush
+ * would be faster, do a full flush. + * would be faster, do a full flush.
@ -31,7 +50,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
return info; return info;
} }
@@ -998,17 +1007,8 @@ void flush_tlb_mm_range(struct mm_struct @@ -998,17 +1016,8 @@ void flush_tlb_mm_range(struct mm_struct
bool freed_tables) bool freed_tables)
{ {
struct flush_tlb_info *info; struct flush_tlb_info *info;
@ -50,7 +69,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
/* This is also a barrier that synchronizes with switch_mm(). */ /* This is also a barrier that synchronizes with switch_mm(). */
new_tlb_gen = inc_mm_tlb_gen(mm); new_tlb_gen = inc_mm_tlb_gen(mm);
@@ -1060,22 +1060,19 @@ static void do_kernel_range_flush(void * @@ -1060,22 +1069,19 @@ static void do_kernel_range_flush(void *
void flush_tlb_kernel_range(unsigned long start, unsigned long end) void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{ {
@ -61,12 +80,12 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
- } else { - } else {
- struct flush_tlb_info *info; - struct flush_tlb_info *info;
+ struct flush_tlb_info *info; + struct flush_tlb_info *info;
+
+ guard(preempt)();
- preempt_disable(); - preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false, - info = get_flush_tlb_info(NULL, start, end, 0, false,
- TLB_GENERATION_INVALID); - TLB_GENERATION_INVALID);
+ guard(preempt)();
+
+ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, + info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+ TLB_GENERATION_INVALID); + TLB_GENERATION_INVALID);
@ -82,7 +101,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
} }
/* /*
@@ -1247,7 +1244,7 @@ void arch_tlbbatch_flush(struct arch_tlb @@ -1247,7 +1253,7 @@ void arch_tlbbatch_flush(struct arch_tlb
int cpu = get_cpu(); int cpu = get_cpu();

View File

@ -1,6 +1,6 @@
From a182b0471ba3c3329d93abfa07e3d452183a9137 Mon Sep 17 00:00:00 2001 From e772b2eb66e5c3cf668feadab678f2a88d896189 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:23 -0500 Date: Wed, 5 Feb 2025 23:43:23 -0500
Subject: x86/mm: get INVLPGB count max from CPUID Subject: x86/mm: get INVLPGB count max from CPUID
The CPU advertises the maximum number of pages that can be shot down The CPU advertises the maximum number of pages that can be shot down
@ -9,6 +9,7 @@ with one INVLPGB instruction in the CPUID data.
Save that information for later use. Save that information for later use.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/Kconfig.cpu | 5 +++++ arch/x86/Kconfig.cpu | 5 +++++
arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 +

View File

@ -1,12 +1,13 @@
From cc3f8dd3033c79abd9f37a94efed74a535a703c9 Mon Sep 17 00:00:00 2001 From 7a896b12875e2b988acbf0229fb4bcf9157b83bd Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:24 -0500 Date: Wed, 5 Feb 2025 23:43:24 -0500
Subject: x86/mm: add INVLPGB support code Subject: x86/mm: add INVLPGB support code
Add invlpgb.h with the helper functions and definitions needed to use Add invlpgb.h with the helper functions and definitions needed to use
broadcast TLB invalidation on AMD EPYC 3 and newer CPUs. broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/include/asm/invlpgb.h | 101 ++++++++++++++++++++++++++++++++ arch/x86/include/asm/invlpgb.h | 101 ++++++++++++++++++++++++++++++++
arch/x86/include/asm/tlbflush.h | 1 + arch/x86/include/asm/tlbflush.h | 1 +

View File

@ -1,6 +1,6 @@
From 6b6686f0d7e228d0a2d8c166204adea5484c20d7 Mon Sep 17 00:00:00 2001 From 99f2b0eda74d7ec76c9c48b78f9d30d251501c28 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:25 -0500 Date: Wed, 5 Feb 2025 23:43:25 -0500
Subject: x86/mm: use INVLPGB for kernel TLB flushes Subject: x86/mm: use INVLPGB for kernel TLB flushes
Use broadcast TLB invalidation for kernel addresses when available. Use broadcast TLB invalidation for kernel addresses when available.
@ -8,13 +8,14 @@ Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes. Remove the need to send IPIs for kernel TLB flushes.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++- arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++-
1 file changed, 27 insertions(+), 1 deletion(-) 1 file changed, 27 insertions(+), 1 deletion(-)
--- a/arch/x86/mm/tlb.c --- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -1048,6 +1048,30 @@ void flush_tlb_all(void) @@ -1057,6 +1057,30 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1); on_each_cpu(do_flush_tlb_all, NULL, 1);
} }
@ -45,7 +46,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
static void do_kernel_range_flush(void *info) static void do_kernel_range_flush(void *info)
{ {
struct flush_tlb_info *f = info; struct flush_tlb_info *f = info;
@@ -1067,7 +1091,9 @@ void flush_tlb_kernel_range(unsigned lon @@ -1076,7 +1100,9 @@ void flush_tlb_kernel_range(unsigned lon
info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
TLB_GENERATION_INVALID); TLB_GENERATION_INVALID);

View File

@ -1,19 +1,20 @@
From 6cffce503223f9076a5e16177905ba3ab6d9f7d8 Mon Sep 17 00:00:00 2001 From 1ef7edb5b2375d4010ed2ad0c7d87fcfa7ab4519 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:26 -0500 Date: Wed, 5 Feb 2025 23:43:26 -0500
Subject: x86/mm: use INVLPGB in flush_tlb_all Subject: x86/mm: use INVLPGB in flush_tlb_all
The flush_tlb_all() function is not used a whole lot, but we might The flush_tlb_all() function is not used a whole lot, but we might
as well use broadcast TLB flushing there, too. as well use broadcast TLB flushing there, too.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/mm/tlb.c | 15 +++++++++++++++ arch/x86/mm/tlb.c | 15 +++++++++++++++
1 file changed, 15 insertions(+) 1 file changed, 15 insertions(+)
--- a/arch/x86/mm/tlb.c --- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -1036,6 +1036,19 @@ void flush_tlb_mm_range(struct mm_struct @@ -1045,6 +1045,19 @@ void flush_tlb_mm_range(struct mm_struct
} }
@ -33,7 +34,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
static void do_flush_tlb_all(void *info) static void do_flush_tlb_all(void *info)
{ {
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1044,6 +1057,8 @@ static void do_flush_tlb_all(void *info) @@ -1053,6 +1066,8 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void) void flush_tlb_all(void)
{ {

View File

@ -1,6 +1,6 @@
From 3d23d79d14cdd3c68dc5bffbaf34a60eaca7fa40 Mon Sep 17 00:00:00 2001 From 5e5219596683c3b8178e09f6ec1e75154537325f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:27 -0500 Date: Wed, 5 Feb 2025 23:43:27 -0500
Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing
In the page reclaim code, we only track the CPU(s) where the TLB needs In the page reclaim code, we only track the CPU(s) where the TLB needs
@ -10,13 +10,14 @@ invalidated.
Use broadcast TLB flushing when that is available. Use broadcast TLB flushing when that is available.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/mm/tlb.c | 4 +++- arch/x86/mm/tlb.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-) 1 file changed, 3 insertions(+), 1 deletion(-)
--- a/arch/x86/mm/tlb.c --- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -1292,7 +1292,9 @@ void arch_tlbbatch_flush(struct arch_tlb @@ -1301,7 +1301,9 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling * a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case. * flush_tlb_func_local() directly in this case.
*/ */

View File

@ -1,6 +1,6 @@
From 79c9df0c7637c8ba8a1833889a2ace355d56c96e Mon Sep 17 00:00:00 2001 From c7212dc64d8e9e4f12f1c6edea3b75c350a30381 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:28 -0500 Date: Wed, 5 Feb 2025 23:43:28 -0500
Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
processes processes
@ -9,10 +9,10 @@ and newer CPUs.
In order to not exhaust PCID space, and keep TLB flushes local for single In order to not exhaust PCID space, and keep TLB flushes local for single
threaded processes, we only hand out broadcast ASIDs to processes active on threaded processes, we only hand out broadcast ASIDs to processes active on
3 or more CPUs, and gradually increase the threshold as broadcast ASID space 4 or more CPUs.
is depleted.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/include/asm/mmu.h | 6 + arch/x86/include/asm/mmu.h | 6 +
arch/x86/include/asm/mmu_context.h | 14 ++ arch/x86/include/asm/mmu_context.h | 14 ++
@ -100,12 +100,12 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
+ return !is_dyn_asid(asid); + return !is_dyn_asid(asid);
+} +}
+ +
+static inline bool in_asid_transition(const struct flush_tlb_info *info) +static inline bool in_asid_transition(struct mm_struct *mm)
+{ +{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false; + return false;
+ +
+ return info->mm && READ_ONCE(info->mm->context.asid_transition); + return mm && READ_ONCE(mm->context.asid_transition);
+} +}
+ +
+static inline u16 mm_global_asid(struct mm_struct *mm) +static inline u16 mm_global_asid(struct mm_struct *mm)
@ -133,7 +133,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
+ return false; + return false;
+} +}
+ +
+static inline bool in_asid_transition(const struct flush_tlb_info *info) +static inline bool in_asid_transition(struct mm_struct *mm)
+{ +{
+ return false; + return false;
+} +}
@ -583,11 +583,11 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
* doing a speculative memory access. * doing a speculative memory access.
*/ */
- if (info->freed_tables) - if (info->freed_tables)
+ if (info->freed_tables || in_asid_transition(info)) + if (info->freed_tables || in_asid_transition(info->mm))
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
else else
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
@@ -1021,8 +1338,11 @@ void flush_tlb_mm_range(struct mm_struct @@ -1030,8 +1347,11 @@ void flush_tlb_mm_range(struct mm_struct
* a local TLB flush is needed. Optimize this use-case by calling * a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case. * flush_tlb_func_local() directly in this case.
*/ */

View File

@ -0,0 +1,251 @@
From 6f601cdcd33be8fc0da98c6bab777575af3260b8 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 5 Feb 2025 23:43:29 -0500
Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
This also allows us to avoid adding the CPUs of processes using broadcast
flushing to the batch->cpumask, and will hopefully further reduce TLB
flushing from the reclaim and compaction paths.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/invlpgb.h | 21 +++++----
arch/x86/include/asm/tlbflush.h | 17 ++++---
arch/x86/mm/tlb.c | 80 +++++++++++++++++++++++++++++++--
3 files changed, 95 insertions(+), 23 deletions(-)
--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
@@ -31,9 +31,8 @@ static inline void __invlpgb(unsigned lo
}
/* Wait for INVLPGB originated by this CPU to complete. */
-static inline void tlbsync(void)
+static inline void __tlbsync(void)
{
- cant_migrate();
/* TLBSYNC: supported in binutils >= 0.36. */
asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
}
@@ -61,19 +60,19 @@ static inline void invlpgb_flush_user(un
unsigned long addr)
{
__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
- tlbsync();
+ __tlbsync();
}
-static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
- unsigned long addr,
- u16 nr,
- bool pmd_stride)
+static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr,
+ bool pmd_stride)
{
__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
}
/* Flush all mappings for a given PCID, not including globals. */
-static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
{
__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
}
@@ -82,11 +81,11 @@ static inline void invlpgb_flush_single_
static inline void invlpgb_flush_all(void)
{
__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
- tlbsync();
+ __tlbsync();
}
/* Flush addr, including globals, for all PCIDs. */
-static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
{
__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
}
@@ -95,7 +94,7 @@ static inline void invlpgb_flush_addr_no
static inline void invlpgb_flush_all_nonglobals(void)
{
__invlpgb(0, 0, 0, 0, 0, 0);
- tlbsync();
+ __tlbsync();
}
#endif /* _ASM_X86_INVLPGB */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -106,6 +106,7 @@ struct tlb_state {
* need to be invalidated.
*/
bool invalidate_other;
+ bool need_tlbsync;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -309,6 +310,10 @@ static inline void broadcast_tlb_flush(s
static inline void consider_global_asid(struct mm_struct *mm)
{
}
+
+static inline void tlbsync(void)
+{
+}
#endif
#ifdef CONFIG_PARAVIRT
@@ -358,21 +363,15 @@ static inline u64 inc_mm_tlb_gen(struct
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
-{
- inc_mm_tlb_gen(mm);
- cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr);
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -488,6 +488,37 @@ static void finish_asid_transition(struc
WRITE_ONCE(mm->context.asid_transition, false);
}
+static inline void tlbsync(void)
+{
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ return;
+ __tlbsync();
+ this_cpu_write(cpu_tlbstate.need_tlbsync, false);
+}
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool pmd_stride)
+{
+ __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_flush_single_pcid_nosync(pcid);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb_flush_addr_nosync(addr, nr);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
static void broadcast_tlb_flush(struct flush_tlb_info *info)
{
bool pmd = info->stride_shift == PMD_SHIFT;
@@ -794,6 +825,8 @@ void switch_mm_irqs_off(struct mm_struct
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(!irqs_disabled());
+ tlbsync();
+
/*
* Verify that CR3 is what we think it is. This will catch
* hypothetical buggy code that directly switches to swapper_pg_dir
@@ -976,6 +1009,8 @@ reload_tlb:
*/
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
+ tlbsync();
+
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
return;
@@ -1621,9 +1656,7 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
- invlpgb_flush_all_nonglobals();
- } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
flush_tlb_multi(&batch->cpumask, info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
@@ -1632,12 +1665,53 @@ void arch_tlbbatch_flush(struct arch_tlb
local_irq_enable();
}
+ /*
+ * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+ * The cpumask above contains only CPUs that were running tasks
+ * not using broadcast TLB flushing.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ tlbsync();
+
cpumask_clear(&batch->cpumask);
put_flush_tlb_info();
put_cpu();
}
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ u16 asid = mm_global_asid(mm);
+
+ if (asid) {
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ /* Do any CPUs supporting INVLPGB need PTI? */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
+
+ /*
+ * Some CPUs might still be using a local ASID for this
+ * process, and require IPIs, while others are using the
+ * global ASID.
+ *
+ * In this corner case we need to do both the broadcast
+ * TLB invalidation, and send IPIs. The IPIs will help
+ * stragglers transition to the broadcast ASID.
+ */
+ if (in_asid_transition(mm))
+ asid = 0;
+ }
+
+ if (!asid) {
+ inc_mm_tlb_gen(mm);
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ }
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
/*
* Blindly accessing user memory from NMI context can be dangerous
* if we're in the middle of switching the current user task or

View File

@ -1,6 +1,6 @@
From 0678da9f0870f0d211d49808a66e98abc0c58438 Mon Sep 17 00:00:00 2001 From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:30 -0500 Date: Wed, 5 Feb 2025 23:43:30 -0500
Subject: x86/mm: enable AMD translation cache extensions Subject: x86/mm: enable AMD translation cache extensions
With AMD TCE (translation cache extensions) only the intermediate mappings With AMD TCE (translation cache extensions) only the intermediate mappings
@ -22,6 +22,7 @@ only those upper-level entries that lead to the target PTE in
the page table hierarchy, leaving unrelated upper-level entries intact. the page table hierarchy, leaving unrelated upper-level entries intact.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/amd.c | 4 ++++ arch/x86/kernel/cpu/amd.c | 4 ++++

View File

@ -1,6 +1,6 @@
From 02d1759eda082f9595f3232f5dffd5d49943924a Mon Sep 17 00:00:00 2001 From 7b8ef03b059bca98d2af696c3ec2adcaa673f7e4 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:31 -0500 Date: Wed, 5 Feb 2025 23:43:31 -0500
Subject: x86/mm: only invalidate final translations with INVLPGB Subject: x86/mm: only invalidate final translations with INVLPGB
Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB. Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
@ -11,15 +11,16 @@ On the (rare) occasions where we free page tables we do a full flush,
ensuring intermediate translations get flushed from the TLB. ensuring intermediate translations get flushed from the TLB.
Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
--- ---
arch/x86/include/asm/invlpgb.h | 10 ++++++++-- arch/x86/include/asm/invlpgb.h | 10 ++++++++--
arch/x86/mm/tlb.c | 8 ++++---- arch/x86/mm/tlb.c | 13 +++++++------
2 files changed, 12 insertions(+), 6 deletions(-) 2 files changed, 15 insertions(+), 8 deletions(-)
--- a/arch/x86/include/asm/invlpgb.h --- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h +++ b/arch/x86/include/asm/invlpgb.h
@@ -67,9 +67,15 @@ static inline void invlpgb_flush_user(un @@ -66,9 +66,15 @@ static inline void invlpgb_flush_user(un
static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr, unsigned long addr,
u16 nr, u16 nr,
- bool pmd_stride) - bool pmd_stride)
@ -27,7 +28,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
+ bool freed_tables) + bool freed_tables)
{ {
- __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA); - __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+ unsigned long flags = INVLPGB_PCID | INVLPGB_VA; + u8 flags = INVLPGB_PCID | INVLPGB_VA;
+ +
+ if (!freed_tables) + if (!freed_tables)
+ flags |= INVLPGB_FINAL_ONLY; + flags |= INVLPGB_FINAL_ONLY;
@ -38,7 +39,20 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
/* Flush all mappings for a given PCID, not including globals. */ /* Flush all mappings for a given PCID, not including globals. */
--- a/arch/x86/mm/tlb.c --- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -518,10 +518,10 @@ static void broadcast_tlb_flush(struct f @@ -498,9 +498,10 @@ static inline void tlbsync(void)
static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
- u16 nr, bool pmd_stride)
+ u16 nr, bool pmd_stride,
+ bool freed_tables)
{
- __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+ __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride, freed_tables);
if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
this_cpu_write(cpu_tlbstate.need_tlbsync, true);
}
@@ -549,10 +550,10 @@ static void broadcast_tlb_flush(struct f
nr = min(maxnr, (info->end - addr) >> info->stride_shift); nr = min(maxnr, (info->end - addr) >> info->stride_shift);
nr = max(nr, 1); nr = max(nr, 1);
@ -51,10 +65,10 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
addr += nr << info->stride_shift; addr += nr << info->stride_shift;
} while (addr < info->end); } while (addr < info->end);
@@ -1654,10 +1654,10 @@ void arch_tlbbatch_add_pending(struct ar @@ -1686,10 +1687,10 @@ void arch_tlbbatch_add_pending(struct ar
batch->used_invlpgb = true; u16 asid = mm_global_asid(mm);
migrate_disable();
} if (asid) {
- invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false); - invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false); + invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
/* Do any CPUs supporting INVLPGB need PTI? */ /* Do any CPUs supporting INVLPGB need PTI? */

View File

@ -1,4 +1,4 @@
From b61dfc43cfc7511795366dfd9260f0959ca2f2d2 Mon Sep 17 00:00:00 2001 From 7b0836fcad644d24d6318bf63013ec1b35d6a27b Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com> From: Rik van Riel <riel@surriel.com>
Date: Thu, 19 Dec 2024 15:32:53 -0500 Date: Thu, 19 Dec 2024 15:32:53 -0500
Subject: mm: remove unnecessary calls to lru_add_drain Subject: mm: remove unnecessary calls to lru_add_drain

View File

@ -1,4 +1,4 @@
From e2d1ffb13e3909dab142f0f8ec8f934b79930717 Mon Sep 17 00:00:00 2001 From 7ecab5a83d3155baa009cd6bc6e18959fee8be62 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com> From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 14 Oct 2024 16:13:39 +0100 Date: Mon, 14 Oct 2024 16:13:39 +0100
Subject: vdso: Introduce vdso/page.h Subject: vdso: Introduce vdso/page.h

View File

@ -1,4 +1,4 @@
From 4478ee194402472199e05d3e27a87f0fc775cc18 Mon Sep 17 00:00:00 2001 From d1bcf51400e790e65945a29078bd816bd61aa148 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de> From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 24 Oct 2024 13:34:26 +0000 Date: Thu, 24 Oct 2024 13:34:26 +0000
Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures

View File

@ -1,135 +0,0 @@
From 647727eaa06fc61fbc55de4c09ab0c0fe7bc7263 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:29 -0500
Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
This also allows us to avoid adding the CPUs of processes using broadcast
flushing to the batch->cpumask, and will hopefully further reduce TLB
flushing from the reclaim and compaction paths.
Signed-off-by: Rik van Riel <riel@surriel.com>
---
arch/x86/include/asm/tlbbatch.h | 1 +
arch/x86/include/asm/tlbflush.h | 12 ++-----
arch/x86/mm/tlb.c | 57 +++++++++++++++++++++++++++++++--
3 files changed, 58 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
* the PFNs being flushed..
*/
struct cpumask cpumask;
+ bool used_invlpgb;
};
#endif /* _ARCH_X86_TLBBATCH_H */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -358,21 +358,15 @@ static inline u64 inc_mm_tlb_gen(struct
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
-{
- inc_mm_tlb_gen(mm);
- cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr);
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1612,9 +1612,7 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
- invlpgb_flush_all_nonglobals();
- } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
flush_tlb_multi(&batch->cpumask, info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
@@ -1623,12 +1621,65 @@ void arch_tlbbatch_flush(struct arch_tlb
local_irq_enable();
}
+ /*
+ * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+ * The cpumask above contains only CPUs that were running tasks
+ * not using broadcast TLB flushing.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
+ tlbsync();
+ migrate_enable();
+ batch->used_invlpgb = false;
+ }
+
cpumask_clear(&batch->cpumask);
put_flush_tlb_info();
put_cpu();
}
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ u16 asid = mm_global_asid(mm);
+
+ if (asid) {
+ /*
+ * Queue up an asynchronous invalidation. The corresponding
+ * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
+ * on the same CPU.
+ */
+ if (!batch->used_invlpgb) {
+ batch->used_invlpgb = true;
+ migrate_disable();
+ }
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ /* Do any CPUs supporting INVLPGB need PTI? */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
+
+ /*
+ * Some CPUs might still be using a local ASID for this
+ * process, and require IPIs, while others are using the
+ * global ASID.
+ *
+ * In this corner case we need to do both the broadcast
+ * TLB invalidation, and send IPIs. The IPIs will help
+ * stragglers transition to the broadcast ASID.
+ */
+ if (READ_ONCE(mm->context.asid_transition))
+ asid = 0;
+ }
+
+ if (!asid) {
+ inc_mm_tlb_gen(mm);
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ }
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
/*
* Blindly accessing user memory from NMI context can be dangerous
* if we're in the middle of switching the current user task or

View File

@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once.
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */ /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
--- a/kernel/sched/sched.h --- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -2825,7 +2825,7 @@ extern void deactivate_task(struct rq *r @@ -2797,7 +2797,7 @@ extern void deactivate_task(struct rq *r
extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);

30
debian/patches/series vendored
View File

@ -203,21 +203,21 @@ patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch
patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch
patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch
patchset-zen/nvlpgb-v7/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
patchset-zen/nvlpgb-v7/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
patchset-zen/nvlpgb-v7/0003-x86-mm-consolidate-full-flush-threshold-decision.patch patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
patchset-zen/nvlpgb-v7/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
patchset-zen/nvlpgb-v7/0005-x86-mm-add-INVLPGB-support-code.patch patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
patchset-zen/nvlpgb-v7/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
patchset-zen/nvlpgb-v7/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
patchset-zen/nvlpgb-v7/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
patchset-zen/nvlpgb-v7/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
patchset-zen/nvlpgb-v7/0011-x86-mm-enable-AMD-translation-cache-extensions.patch patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
patchset-zen/nvlpgb-v7/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
patchset-zen/nvlpgb-v7/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
patchset-zen/nvlpgb-v7/0014-vdso-Introduce-vdso-page.h.patch patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
patchset-zen/nvlpgb-v7/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch