150 lines
4.8 KiB
Diff
150 lines
4.8 KiB
Diff
From 1901291057a3f1bf2bf94c7a4ddf3253d3116acb Mon Sep 17 00:00:00 2001
|
|
From: Rik van Riel <riel@surriel.com>
|
|
Date: Thu, 13 Feb 2025 11:13:52 -0500
|
|
Subject: x86/mm: Make MMU_GATHER_RCU_TABLE_FREE unconditional
|
|
|
|
Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
|
|
paravirt, and not when running on bare metal.
|
|
|
|
There is no real good reason to do things differently for
|
|
each setup. Make them all the same.
|
|
|
|
Currently get_user_pages_fast synchronizes against page table
|
|
freeing in two different ways:
|
|
|
|
- on bare metal, by blocking IRQs, which block TLB flush IPIs
|
|
- on paravirt, with MMU_GATHER_RCU_TABLE_FREE
|
|
|
|
This is done because some paravirt TLB flush implementations
|
|
handle the TLB flush in the hypervisor, and will do the flush
|
|
even when the target CPU has interrupts disabled.
|
|
|
|
Always handle page table freeing with MMU_GATHER_RCU_TABLE_FREE.
|
|
Using RCU synchronization between page table freeing and get_user_pages_fast()
|
|
allows bare metal to also do TLB flushing while interrupts are disabled.
|
|
|
|
Various places in the mm do still block IRQs or disable preemption
|
|
as an implicit way to block RCU frees.
|
|
|
|
That makes it safe to use INVLPGB on AMD CPUs.
|
|
|
|
Suggested-by: Peter Zijlstra <peterz@infradead.org>
|
|
Signed-off-by: Rik van Riel <riel@surriel.com>
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
|
|
Tested-by: Brendan Jackman <jackmanb@google.com>
|
|
Tested-by: Michael Kelley <mhklinux@outlook.com>
|
|
Link: https://lore.kernel.org/r/20250213161423.449435-2-riel@surriel.com
|
|
---
|
|
arch/x86/Kconfig | 2 +-
|
|
arch/x86/kernel/paravirt.c | 17 +----------------
|
|
arch/x86/mm/pgtable.c | 27 ++++-----------------------
|
|
3 files changed, 6 insertions(+), 40 deletions(-)
|
|
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -277,7 +277,7 @@ config X86
|
|
select HAVE_PCI
|
|
select HAVE_PERF_REGS
|
|
select HAVE_PERF_USER_STACK_DUMP
|
|
- select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
|
|
+ select MMU_GATHER_RCU_TABLE_FREE
|
|
select MMU_GATHER_MERGE_VMAS
|
|
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
|
--- a/arch/x86/kernel/paravirt.c
|
|
+++ b/arch/x86/kernel/paravirt.c
|
|
@@ -59,21 +59,6 @@ void __init native_pv_lock_init(void)
|
|
static_branch_enable(&virt_spin_lock_key);
|
|
}
|
|
|
|
-#ifndef CONFIG_PT_RECLAIM
|
|
-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
|
-{
|
|
- struct ptdesc *ptdesc = (struct ptdesc *)table;
|
|
-
|
|
- pagetable_dtor(ptdesc);
|
|
- tlb_remove_page(tlb, ptdesc_page(ptdesc));
|
|
-}
|
|
-#else
|
|
-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
|
-{
|
|
- tlb_remove_table(tlb, table);
|
|
-}
|
|
-#endif
|
|
-
|
|
struct static_key paravirt_steal_enabled;
|
|
struct static_key paravirt_steal_rq_enabled;
|
|
|
|
@@ -197,7 +182,7 @@ struct paravirt_patch_template pv_ops =
|
|
.mmu.flush_tlb_kernel = native_flush_tlb_global,
|
|
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
|
|
.mmu.flush_tlb_multi = native_flush_tlb_multi,
|
|
- .mmu.tlb_remove_table = native_tlb_remove_table,
|
|
+ .mmu.tlb_remove_table = tlb_remove_table,
|
|
|
|
.mmu.exit_mmap = paravirt_nop,
|
|
.mmu.notify_page_enc_status_changed = paravirt_nop,
|
|
--- a/arch/x86/mm/pgtable.c
|
|
+++ b/arch/x86/mm/pgtable.c
|
|
@@ -18,25 +18,6 @@ EXPORT_SYMBOL(physical_mask);
|
|
#define PGTABLE_HIGHMEM 0
|
|
#endif
|
|
|
|
-#ifndef CONFIG_PARAVIRT
|
|
-#ifndef CONFIG_PT_RECLAIM
|
|
-static inline
|
|
-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
|
-{
|
|
- struct ptdesc *ptdesc = (struct ptdesc *)table;
|
|
-
|
|
- pagetable_dtor(ptdesc);
|
|
- tlb_remove_page(tlb, ptdesc_page(ptdesc));
|
|
-}
|
|
-#else
|
|
-static inline
|
|
-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
|
-{
|
|
- tlb_remove_table(tlb, table);
|
|
-}
|
|
-#endif /* !CONFIG_PT_RECLAIM */
|
|
-#endif /* !CONFIG_PARAVIRT */
|
|
-
|
|
gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
|
|
|
|
pgtable_t pte_alloc_one(struct mm_struct *mm)
|
|
@@ -64,7 +45,7 @@ early_param("userpte", setup_userpte);
|
|
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
|
|
{
|
|
paravirt_release_pte(page_to_pfn(pte));
|
|
- paravirt_tlb_remove_table(tlb, page_ptdesc(pte));
|
|
+ tlb_remove_table(tlb, page_ptdesc(pte));
|
|
}
|
|
|
|
#if CONFIG_PGTABLE_LEVELS > 2
|
|
@@ -78,21 +59,21 @@ void ___pmd_free_tlb(struct mmu_gather *
|
|
#ifdef CONFIG_X86_PAE
|
|
tlb->need_flush_all = 1;
|
|
#endif
|
|
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd));
|
|
+ tlb_remove_table(tlb, virt_to_ptdesc(pmd));
|
|
}
|
|
|
|
#if CONFIG_PGTABLE_LEVELS > 3
|
|
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
|
|
{
|
|
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
|
|
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud));
|
|
+ tlb_remove_table(tlb, virt_to_ptdesc(pud));
|
|
}
|
|
|
|
#if CONFIG_PGTABLE_LEVELS > 4
|
|
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
|
|
{
|
|
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
|
|
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d));
|
|
+ tlb_remove_table(tlb, virt_to_ptdesc(p4d));
|
|
}
|
|
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
|
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|