1
0

release 6.12.13

This commit is contained in:
Konstantin Demin 2025-02-09 19:40:34 +03:00
parent b8c80400f5
commit e0baaf49dd
26 changed files with 385 additions and 221 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
linux (6.12.13-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.13
-- Konstantin Demin <rockdrilla@gmail.com> Sun, 09 Feb 2025 19:34:35 +0300
linux (6.12.12-1) sid; urgency=medium
* New upstream stable update:

View File

@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1617,6 +1617,11 @@ int perf_cpu_time_max_percent_handler(co
@@ -1623,6 +1623,11 @@ int perf_cpu_time_max_percent_handler(co
int perf_event_max_stack_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
/* Minimum for 512 kiB + 1 user control page */
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -12681,6 +12686,9 @@ SYSCALL_DEFINE5(perf_event_open,
@@ -12682,6 +12687,9 @@ SYSCALL_DEFINE5(perf_event_open,
if (err)
return err;

View File

@ -56,7 +56,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
struct rate_sample *rs);
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2767,6 +2767,7 @@ static bool tcp_write_xmit(struct sock *
@@ -2770,6 +2770,7 @@ static bool tcp_write_xmit(struct sock *
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);

View File

@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
* between different flows.
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1603,7 +1603,7 @@ int tcp_fragment(struct sock *sk, enum t
@@ -1606,7 +1606,7 @@ int tcp_fragment(struct sock *sk, enum t
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@ -64,7 +64,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
long limit;
int nlen;
u8 flags;
@@ -1678,6 +1678,30 @@ int tcp_fragment(struct sock *sk, enum t
@@ -1681,6 +1681,30 @@ int tcp_fragment(struct sock *sk, enum t
if (diff)
tcp_adjust_pcount(sk, skb, diff);

View File

@ -97,7 +97,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
};
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2059,13 +2059,12 @@ static u32 tcp_tso_autosize(const struct
@@ -2062,13 +2062,12 @@ static u32 tcp_tso_autosize(const struct
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;

View File

@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3005,6 +3005,7 @@ void tcp_send_loss_probe(struct sock *sk
@@ -3008,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;

View File

@ -88,7 +88,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
@ -100,7 +100,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
use_ecn = true;
}
@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk);

View File

@ -47,7 +47,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
/* BBR marks the current round trip as a loss round. */
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
@@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
th->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}

View File

@ -1,6 +1,6 @@
From 6cb30d7518301094b9c7397a24a22cf538a1d64c Mon Sep 17 00:00:00 2001
From e11153c4df0fee7caadec3714a60a4936d6a9ea2 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:20 -0500
Date: Wed, 5 Feb 2025 23:43:20 -0500
Subject: x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional
Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
@ -29,6 +29,7 @@ That makes it safe to use INVLPGB on AMD CPUs.
Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/Kconfig | 2 +-
arch/x86/kernel/paravirt.c | 7 +------

View File

@ -1,6 +1,6 @@
From df8f812b62c450b98b972ad0a4d5a5ba400f5eae Mon Sep 17 00:00:00 2001
From e8008cb69c5e4efbaedd70b0fb692343e4aa0e51 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:21 -0500
Date: Wed, 5 Feb 2025 23:43:21 -0500
Subject: x86/mm: remove pv_ops.mmu.tlb_remove_table call
Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table.
@ -10,6 +10,7 @@ and not going through the paravirt function pointers.
Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Qi Zheng <zhengqi.arch@bytedance.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/hyperv/mmu.c | 1 -
arch/x86/include/asm/paravirt.h | 5 -----

View File

@ -1,6 +1,6 @@
From 8b2bd3f69b50cfe59eee4506413715878bcbb901 Mon Sep 17 00:00:00 2001
From 7ac6508c4db81eced5f6e3d7c8913af1da6cf110 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:22 -0500
Date: Wed, 5 Feb 2025 23:43:22 -0500
Subject: x86/mm: consolidate full flush threshold decision
Reduce code duplication by consolidating the decision point
@ -10,15 +10,34 @@ inside get_flush_tlb_info.
Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
---
arch/x86/mm/tlb.c | 43 ++++++++++++++++++++-----------------------
1 file changed, 20 insertions(+), 23 deletions(-)
arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
1 file changed, 31 insertions(+), 25 deletions(-)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -981,6 +981,15 @@ static struct flush_tlb_info *get_flush_
@@ -973,14 +973,32 @@ static struct flush_tlb_info *get_flush_
BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
#endif
- info->start = start;
- info->end = end;
+ /*
+ * Round the start and end addresses to the page size specified
+ * by the stride shift. This ensures partial pages at the end of
+ * a range get fully invalidated.
+ */
+ info->start = round_down(start, 1 << stride_shift);
+ info->end = round_up(end, 1 << stride_shift);
info->mm = mm;
info->stride_shift = stride_shift;
info->freed_tables = freed_tables;
info->new_tlb_gen = new_tlb_gen;
info->initiating_cpu = smp_processor_id();
+ WARN_ONCE(start != info->start || end != info->end,
+ "TLB flush not stride %x aligned. Start %lx, end %lx\n",
+ 1 << stride_shift, start, end);
+
+ /*
+ * If the number of flushes is so large that a full flush
+ * would be faster, do a full flush.
@ -31,7 +50,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
return info;
}
@@ -998,17 +1007,8 @@ void flush_tlb_mm_range(struct mm_struct
@@ -998,17 +1016,8 @@ void flush_tlb_mm_range(struct mm_struct
bool freed_tables)
{
struct flush_tlb_info *info;
@ -50,7 +69,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
/* This is also a barrier that synchronizes with switch_mm(). */
new_tlb_gen = inc_mm_tlb_gen(mm);
@@ -1060,22 +1060,19 @@ static void do_kernel_range_flush(void *
@@ -1060,22 +1069,19 @@ static void do_kernel_range_flush(void *
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
@ -61,12 +80,12 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
- } else {
- struct flush_tlb_info *info;
+ struct flush_tlb_info *info;
+
+ guard(preempt)();
- preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false,
- TLB_GENERATION_INVALID);
+ guard(preempt)();
+
+ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+ TLB_GENERATION_INVALID);
@ -82,7 +101,7 @@ Suggested-by: Dave Hansen <dave.hansen@intel.com>
}
/*
@@ -1247,7 +1244,7 @@ void arch_tlbbatch_flush(struct arch_tlb
@@ -1247,7 +1253,7 @@ void arch_tlbbatch_flush(struct arch_tlb
int cpu = get_cpu();

View File

@ -1,6 +1,6 @@
From a182b0471ba3c3329d93abfa07e3d452183a9137 Mon Sep 17 00:00:00 2001
From e772b2eb66e5c3cf668feadab678f2a88d896189 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:23 -0500
Date: Wed, 5 Feb 2025 23:43:23 -0500
Subject: x86/mm: get INVLPGB count max from CPUID
The CPU advertises the maximum number of pages that can be shot down
@ -9,6 +9,7 @@ with one INVLPGB instruction in the CPUID data.
Save that information for later use.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/Kconfig.cpu | 5 +++++
arch/x86/include/asm/cpufeatures.h | 1 +

View File

@ -1,12 +1,13 @@
From cc3f8dd3033c79abd9f37a94efed74a535a703c9 Mon Sep 17 00:00:00 2001
From 7a896b12875e2b988acbf0229fb4bcf9157b83bd Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:24 -0500
Date: Wed, 5 Feb 2025 23:43:24 -0500
Subject: x86/mm: add INVLPGB support code
Add invlpgb.h with the helper functions and definitions needed to use
broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/invlpgb.h | 101 ++++++++++++++++++++++++++++++++
arch/x86/include/asm/tlbflush.h | 1 +

View File

@ -1,6 +1,6 @@
From 6b6686f0d7e228d0a2d8c166204adea5484c20d7 Mon Sep 17 00:00:00 2001
From 99f2b0eda74d7ec76c9c48b78f9d30d251501c28 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:25 -0500
Date: Wed, 5 Feb 2025 23:43:25 -0500
Subject: x86/mm: use INVLPGB for kernel TLB flushes
Use broadcast TLB invalidation for kernel addresses when available.
@ -8,13 +8,14 @@ Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/mm/tlb.c | 28 +++++++++++++++++++++++++++-
1 file changed, 27 insertions(+), 1 deletion(-)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1048,6 +1048,30 @@ void flush_tlb_all(void)
@@ -1057,6 +1057,30 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
@ -45,7 +46,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
static void do_kernel_range_flush(void *info)
{
struct flush_tlb_info *f = info;
@@ -1067,7 +1091,9 @@ void flush_tlb_kernel_range(unsigned lon
@@ -1076,7 +1100,9 @@ void flush_tlb_kernel_range(unsigned lon
info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
TLB_GENERATION_INVALID);

View File

@ -1,19 +1,20 @@
From 6cffce503223f9076a5e16177905ba3ab6d9f7d8 Mon Sep 17 00:00:00 2001
From 1ef7edb5b2375d4010ed2ad0c7d87fcfa7ab4519 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:26 -0500
Date: Wed, 5 Feb 2025 23:43:26 -0500
Subject: x86/mm: use INVLPGB in flush_tlb_all
The flush_tlb_all() function is not used a whole lot, but we might
as well use broadcast TLB flushing there, too.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/mm/tlb.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1036,6 +1036,19 @@ void flush_tlb_mm_range(struct mm_struct
@@ -1045,6 +1045,19 @@ void flush_tlb_mm_range(struct mm_struct
}
@ -33,7 +34,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1044,6 +1057,8 @@ static void do_flush_tlb_all(void *info)
@@ -1053,6 +1066,8 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{

View File

@ -1,6 +1,6 @@
From 3d23d79d14cdd3c68dc5bffbaf34a60eaca7fa40 Mon Sep 17 00:00:00 2001
From 5e5219596683c3b8178e09f6ec1e75154537325f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:27 -0500
Date: Wed, 5 Feb 2025 23:43:27 -0500
Subject: x86/mm: use broadcast TLB flushing for page reclaim TLB flushing
In the page reclaim code, we only track the CPU(s) where the TLB needs
@ -10,13 +10,14 @@ invalidated.
Use broadcast TLB flushing when that is available.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/mm/tlb.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1292,7 +1292,9 @@ void arch_tlbbatch_flush(struct arch_tlb
@@ -1301,7 +1301,9 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/

View File

@ -1,6 +1,6 @@
From 79c9df0c7637c8ba8a1833889a2ace355d56c96e Mon Sep 17 00:00:00 2001
From c7212dc64d8e9e4f12f1c6edea3b75c350a30381 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:28 -0500
Date: Wed, 5 Feb 2025 23:43:28 -0500
Subject: x86/mm: enable broadcast TLB invalidation for multi-threaded
processes
@ -9,10 +9,10 @@ and newer CPUs.
In order to not exhaust PCID space, and keep TLB flushes local for single
threaded processes, we only hand out broadcast ASIDs to processes active on
3 or more CPUs, and gradually increase the threshold as broadcast ASID space
is depleted.
4 or more CPUs.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/mmu.h | 6 +
arch/x86/include/asm/mmu_context.h | 14 ++
@ -100,12 +100,12 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
+ return !is_dyn_asid(asid);
+}
+
+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ return info->mm && READ_ONCE(info->mm->context.asid_transition);
+ return mm && READ_ONCE(mm->context.asid_transition);
+}
+
+static inline u16 mm_global_asid(struct mm_struct *mm)
@ -133,7 +133,7 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
+ return false;
+}
+
+static inline bool in_asid_transition(const struct flush_tlb_info *info)
+static inline bool in_asid_transition(struct mm_struct *mm)
+{
+ return false;
+}
@ -583,11 +583,11 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
* doing a speculative memory access.
*/
- if (info->freed_tables)
+ if (info->freed_tables || in_asid_transition(info))
+ if (info->freed_tables || in_asid_transition(info->mm))
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
else
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
@@ -1021,8 +1338,11 @@ void flush_tlb_mm_range(struct mm_struct
@@ -1030,8 +1347,11 @@ void flush_tlb_mm_range(struct mm_struct
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/

View File

@ -0,0 +1,251 @@
From 6f601cdcd33be8fc0da98c6bab777575af3260b8 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 5 Feb 2025 23:43:29 -0500
Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
This also allows us to avoid adding the CPUs of processes using broadcast
flushing to the batch->cpumask, and will hopefully further reduce TLB
flushing from the reclaim and compaction paths.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/invlpgb.h | 21 +++++----
arch/x86/include/asm/tlbflush.h | 17 ++++---
arch/x86/mm/tlb.c | 80 +++++++++++++++++++++++++++++++--
3 files changed, 95 insertions(+), 23 deletions(-)
--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
@@ -31,9 +31,8 @@ static inline void __invlpgb(unsigned lo
}
/* Wait for INVLPGB originated by this CPU to complete. */
-static inline void tlbsync(void)
+static inline void __tlbsync(void)
{
- cant_migrate();
/* TLBSYNC: supported in binutils >= 0.36. */
asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
}
@@ -61,19 +60,19 @@ static inline void invlpgb_flush_user(un
unsigned long addr)
{
__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
- tlbsync();
+ __tlbsync();
}
-static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
- unsigned long addr,
- u16 nr,
- bool pmd_stride)
+static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr,
+ bool pmd_stride)
{
__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
}
/* Flush all mappings for a given PCID, not including globals. */
-static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+static inline void __invlpgb_flush_single_pcid_nosync(unsigned long pcid)
{
__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
}
@@ -82,11 +81,11 @@ static inline void invlpgb_flush_single_
static inline void invlpgb_flush_all(void)
{
__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
- tlbsync();
+ __tlbsync();
}
/* Flush addr, including globals, for all PCIDs. */
-static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+static inline void __invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
{
__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
}
@@ -95,7 +94,7 @@ static inline void invlpgb_flush_addr_no
static inline void invlpgb_flush_all_nonglobals(void)
{
__invlpgb(0, 0, 0, 0, 0, 0);
- tlbsync();
+ __tlbsync();
}
#endif /* _ASM_X86_INVLPGB */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -106,6 +106,7 @@ struct tlb_state {
* need to be invalidated.
*/
bool invalidate_other;
+ bool need_tlbsync;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -309,6 +310,10 @@ static inline void broadcast_tlb_flush(s
static inline void consider_global_asid(struct mm_struct *mm)
{
}
+
+static inline void tlbsync(void)
+{
+}
#endif
#ifdef CONFIG_PARAVIRT
@@ -358,21 +363,15 @@ static inline u64 inc_mm_tlb_gen(struct
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
-{
- inc_mm_tlb_gen(mm);
- cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr);
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -488,6 +488,37 @@ static void finish_asid_transition(struc
WRITE_ONCE(mm->context.asid_transition, false);
}
+static inline void tlbsync(void)
+{
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ return;
+ __tlbsync();
+ this_cpu_write(cpu_tlbstate.need_tlbsync, false);
+}
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool pmd_stride)
+{
+ __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_flush_single_pcid_nosync(pcid);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb_flush_addr_nosync(addr, nr);
+ if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+ this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
static void broadcast_tlb_flush(struct flush_tlb_info *info)
{
bool pmd = info->stride_shift == PMD_SHIFT;
@@ -794,6 +825,8 @@ void switch_mm_irqs_off(struct mm_struct
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(!irqs_disabled());
+ tlbsync();
+
/*
* Verify that CR3 is what we think it is. This will catch
* hypothetical buggy code that directly switches to swapper_pg_dir
@@ -976,6 +1009,8 @@ reload_tlb:
*/
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
+ tlbsync();
+
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
return;
@@ -1621,9 +1656,7 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
- invlpgb_flush_all_nonglobals();
- } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
flush_tlb_multi(&batch->cpumask, info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
@@ -1632,12 +1665,53 @@ void arch_tlbbatch_flush(struct arch_tlb
local_irq_enable();
}
+ /*
+ * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+ * The cpumask above contains only CPUs that were running tasks
+ * not using broadcast TLB flushing.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ tlbsync();
+
cpumask_clear(&batch->cpumask);
put_flush_tlb_info();
put_cpu();
}
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ u16 asid = mm_global_asid(mm);
+
+ if (asid) {
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ /* Do any CPUs supporting INVLPGB need PTI? */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
+
+ /*
+ * Some CPUs might still be using a local ASID for this
+ * process, and require IPIs, while others are using the
+ * global ASID.
+ *
+ * In this corner case we need to do both the broadcast
+ * TLB invalidation, and send IPIs. The IPIs will help
+ * stragglers transition to the broadcast ASID.
+ */
+ if (in_asid_transition(mm))
+ asid = 0;
+ }
+
+ if (!asid) {
+ inc_mm_tlb_gen(mm);
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ }
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
/*
* Blindly accessing user memory from NMI context can be dangerous
* if we're in the middle of switching the current user task or

View File

@ -1,6 +1,6 @@
From 0678da9f0870f0d211d49808a66e98abc0c58438 Mon Sep 17 00:00:00 2001
From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:30 -0500
Date: Wed, 5 Feb 2025 23:43:30 -0500
Subject: x86/mm: enable AMD translation cache extensions
With AMD TCE (translation cache extensions) only the intermediate mappings
@ -22,6 +22,7 @@ only those upper-level entries that lead to the target PTE in
the page table hierarchy, leaving unrelated upper-level entries intact.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/amd.c | 4 ++++

View File

@ -1,6 +1,6 @@
From 02d1759eda082f9595f3232f5dffd5d49943924a Mon Sep 17 00:00:00 2001
From 7b8ef03b059bca98d2af696c3ec2adcaa673f7e4 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:31 -0500
Date: Wed, 5 Feb 2025 23:43:31 -0500
Subject: x86/mm: only invalidate final translations with INVLPGB
Use the INVLPGB_FINAL_ONLY flag when invalidating mappings with INVPLGB.
@ -11,23 +11,24 @@ On the (rare) occasions where we free page tables we do a full flush,
ensuring intermediate translations get flushed from the TLB.
Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
---
arch/x86/include/asm/invlpgb.h | 10 ++++++++--
arch/x86/mm/tlb.c | 8 ++++----
2 files changed, 12 insertions(+), 6 deletions(-)
arch/x86/mm/tlb.c | 13 +++++++------
2 files changed, 15 insertions(+), 8 deletions(-)
--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
@@ -67,9 +67,15 @@ static inline void invlpgb_flush_user(un
static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
u16 nr,
- bool pmd_stride)
+ bool pmd_stride,
+ bool freed_tables)
@@ -66,9 +66,15 @@ static inline void invlpgb_flush_user(un
static inline void __invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
u16 nr,
- bool pmd_stride)
+ bool pmd_stride,
+ bool freed_tables)
{
- __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+ unsigned long flags = INVLPGB_PCID | INVLPGB_VA;
+ u8 flags = INVLPGB_PCID | INVLPGB_VA;
+
+ if (!freed_tables)
+ flags |= INVLPGB_FINAL_ONLY;
@ -38,7 +39,20 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
/* Flush all mappings for a given PCID, not including globals. */
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -518,10 +518,10 @@ static void broadcast_tlb_flush(struct f
@@ -498,9 +498,10 @@ static inline void tlbsync(void)
static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
unsigned long addr,
- u16 nr, bool pmd_stride)
+ u16 nr, bool pmd_stride,
+ bool freed_tables)
{
- __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride);
+ __invlpgb_flush_user_nr_nosync(pcid, addr, nr, pmd_stride, freed_tables);
if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
this_cpu_write(cpu_tlbstate.need_tlbsync, true);
}
@@ -549,10 +550,10 @@ static void broadcast_tlb_flush(struct f
nr = min(maxnr, (info->end - addr) >> info->stride_shift);
nr = max(nr, 1);
@ -51,10 +65,10 @@ Signed-off-by: Rik van Riel <riel@surriel.com>
addr += nr << info->stride_shift;
} while (addr < info->end);
@@ -1654,10 +1654,10 @@ void arch_tlbbatch_add_pending(struct ar
batch->used_invlpgb = true;
migrate_disable();
}
@@ -1686,10 +1687,10 @@ void arch_tlbbatch_add_pending(struct ar
u16 asid = mm_global_asid(mm);
if (asid) {
- invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false);
/* Do any CPUs supporting INVLPGB need PTI? */

View File

@ -1,4 +1,4 @@
From b61dfc43cfc7511795366dfd9260f0959ca2f2d2 Mon Sep 17 00:00:00 2001
From 7b0836fcad644d24d6318bf63013ec1b35d6a27b Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Thu, 19 Dec 2024 15:32:53 -0500
Subject: mm: remove unnecessary calls to lru_add_drain

View File

@ -1,4 +1,4 @@
From e2d1ffb13e3909dab142f0f8ec8f934b79930717 Mon Sep 17 00:00:00 2001
From 7ecab5a83d3155baa009cd6bc6e18959fee8be62 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 14 Oct 2024 16:13:39 +0100
Subject: vdso: Introduce vdso/page.h

View File

@ -1,4 +1,4 @@
From 4478ee194402472199e05d3e27a87f0fc775cc18 Mon Sep 17 00:00:00 2001
From d1bcf51400e790e65945a29078bd816bd61aa148 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 24 Oct 2024 13:34:26 +0000
Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures

View File

@ -1,135 +0,0 @@
From 647727eaa06fc61fbc55de4c09ab0c0fe7bc7263 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Wed, 22 Jan 2025 23:23:29 -0500
Subject: x86/mm: do targeted broadcast flushing from tlbbatch code
Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.
This also allows us to avoid adding the CPUs of processes using broadcast
flushing to the batch->cpumask, and will hopefully further reduce TLB
flushing from the reclaim and compaction paths.
Signed-off-by: Rik van Riel <riel@surriel.com>
---
arch/x86/include/asm/tlbbatch.h | 1 +
arch/x86/include/asm/tlbflush.h | 12 ++-----
arch/x86/mm/tlb.c | 57 +++++++++++++++++++++++++++++++--
3 files changed, 58 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
* the PFNs being flushed..
*/
struct cpumask cpumask;
+ bool used_invlpgb;
};
#endif /* _ARCH_X86_TLBBATCH_H */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -358,21 +358,15 @@ static inline u64 inc_mm_tlb_gen(struct
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
-{
- inc_mm_tlb_gen(mm);
- cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr);
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1612,9 +1612,7 @@ void arch_tlbbatch_flush(struct arch_tlb
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
- invlpgb_flush_all_nonglobals();
- } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
flush_tlb_multi(&batch->cpumask, info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
@@ -1623,12 +1621,65 @@ void arch_tlbbatch_flush(struct arch_tlb
local_irq_enable();
}
+ /*
+ * If we issued (asynchronous) INVLPGB flushes, wait for them here.
+ * The cpumask above contains only CPUs that were running tasks
+ * not using broadcast TLB flushing.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
+ tlbsync();
+ migrate_enable();
+ batch->used_invlpgb = false;
+ }
+
cpumask_clear(&batch->cpumask);
put_flush_tlb_info();
put_cpu();
}
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ u16 asid = mm_global_asid(mm);
+
+ if (asid) {
+ /*
+ * Queue up an asynchronous invalidation. The corresponding
+ * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
+ * on the same CPU.
+ */
+ if (!batch->used_invlpgb) {
+ batch->used_invlpgb = true;
+ migrate_disable();
+ }
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
+ /* Do any CPUs supporting INVLPGB need PTI? */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false);
+
+ /*
+ * Some CPUs might still be using a local ASID for this
+ * process, and require IPIs, while others are using the
+ * global ASID.
+ *
+ * In this corner case we need to do both the broadcast
+ * TLB invalidation, and send IPIs. The IPIs will help
+ * stragglers transition to the broadcast ASID.
+ */
+ if (READ_ONCE(mm->context.asid_transition))
+ asid = 0;
+ }
+
+ if (!asid) {
+ inc_mm_tlb_gen(mm);
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ }
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
/*
* Blindly accessing user memory from NMI context can be dangerous
* if we're in the middle of switching the current user task or

View File

@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once.
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2825,7 +2825,7 @@ extern void deactivate_task(struct rq *r
@@ -2797,7 +2797,7 @@ extern void deactivate_task(struct rq *r
extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);

30
debian/patches/series vendored
View File

@ -203,21 +203,21 @@ patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch
patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch
patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch
patchset-zen/nvlpgb-v7/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
patchset-zen/nvlpgb-v7/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
patchset-zen/nvlpgb-v7/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
patchset-zen/nvlpgb-v7/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
patchset-zen/nvlpgb-v7/0005-x86-mm-add-INVLPGB-support-code.patch
patchset-zen/nvlpgb-v7/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
patchset-zen/nvlpgb-v7/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
patchset-zen/nvlpgb-v7/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
patchset-zen/nvlpgb-v7/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
patchset-zen/nvlpgb-v7/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
patchset-zen/nvlpgb-v7/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
patchset-zen/nvlpgb-v7/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
patchset-zen/nvlpgb-v7/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
patchset-zen/nvlpgb-v7/0014-vdso-Introduce-vdso-page.h.patch
patchset-zen/nvlpgb-v7/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
patchset-zen/invlpgb-v9/0001-x86-mm-make-MMU_GATHER_RCU_TABLE_FREE-unconditional.patch
patchset-zen/invlpgb-v9/0002-x86-mm-remove-pv_ops.mmu.tlb_remove_table-call.patch
patchset-zen/invlpgb-v9/0003-x86-mm-consolidate-full-flush-threshold-decision.patch
patchset-zen/invlpgb-v9/0004-x86-mm-get-INVLPGB-count-max-from-CPUID.patch
patchset-zen/invlpgb-v9/0005-x86-mm-add-INVLPGB-support-code.patch
patchset-zen/invlpgb-v9/0006-x86-mm-use-INVLPGB-for-kernel-TLB-flushes.patch
patchset-zen/invlpgb-v9/0007-x86-mm-use-INVLPGB-in-flush_tlb_all.patch
patchset-zen/invlpgb-v9/0008-x86-mm-use-broadcast-TLB-flushing-for-page-reclaim-T.patch
patchset-zen/invlpgb-v9/0009-x86-mm-enable-broadcast-TLB-invalidation-for-multi-t.patch
patchset-zen/invlpgb-v9/0010-x86-mm-do-targeted-broadcast-flushing-from-tlbbatch-.patch
patchset-zen/invlpgb-v9/0011-x86-mm-enable-AMD-translation-cache-extensions.patch
patchset-zen/invlpgb-v9/0012-x86-mm-only-invalidate-final-translations-with-INVLP.patch
patchset-zen/invlpgb-v9/0013-mm-remove-unnecessary-calls-to-lru_add_drain.patch
patchset-zen/invlpgb-v9/0014-vdso-Introduce-vdso-page.h.patch
patchset-zen/invlpgb-v9/0015-vdso-Change-PAGE_MASK-to-signed-on-all-32-bit-archit.patch
patchset-zen/tlb/0001-mm-Optimize-TLB-flushes-during-page-reclaim.patch