2025-02-09 19:40:34 +03:00
|
|
|
From 101ba03a6474bbc52971505abf1e3ee9613f255b Mon Sep 17 00:00:00 2001
|
2025-01-28 09:25:42 +03:00
|
|
|
From: Rik van Riel <riel@surriel.com>
|
2025-02-09 19:40:34 +03:00
|
|
|
Date: Wed, 5 Feb 2025 23:43:30 -0500
|
2025-01-28 09:25:42 +03:00
|
|
|
Subject: x86/mm: enable AMD translation cache extensions
|
|
|
|
|
|
|
|
With AMD TCE (translation cache extensions) only the intermediate mappings
|
|
|
|
that cover the address range zapped by INVLPG / INVLPGB get invalidated,
|
|
|
|
rather than all intermediate mappings getting zapped at every TLB invalidation.
|
|
|
|
|
|
|
|
This can help reduce the TLB miss rate, by keeping more intermediate
|
|
|
|
mappings in the cache.
|
|
|
|
|
|
|
|
From the AMD manual:
|
|
|
|
|
|
|
|
Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit
|
|
|
|
to 1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on
|
|
|
|
TLB entries. When this bit is 0, these instructions remove the target PTE
|
|
|
|
from the TLB as well as all upper-level table entries that are cached
|
|
|
|
in the TLB, whether or not they are associated with the target PTE.
|
|
|
|
When this bit is set, these instructions will remove the target PTE and
|
|
|
|
only those upper-level entries that lead to the target PTE in
|
|
|
|
the page table hierarchy, leaving unrelated upper-level entries intact.
|
|
|
|
|
|
|
|
Signed-off-by: Rik van Riel <riel@surriel.com>
|
2025-02-09 19:40:34 +03:00
|
|
|
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
|
2025-01-28 09:25:42 +03:00
|
|
|
---
|
|
|
|
arch/x86/include/asm/msr-index.h | 2 ++
|
|
|
|
arch/x86/kernel/cpu/amd.c | 4 ++++
|
|
|
|
tools/arch/x86/include/asm/msr-index.h | 2 ++
|
|
|
|
3 files changed, 8 insertions(+)
|
|
|
|
|
|
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
|
|
|
@@ -25,6 +25,7 @@
|
|
|
|
#define _EFER_SVME 12 /* Enable virtualization */
|
|
|
|
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
|
|
|
|
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
|
|
|
|
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
|
|
|
|
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
|
|
|
|
|
|
|
|
#define EFER_SCE (1<<_EFER_SCE)
|
|
|
|
@@ -34,6 +35,7 @@
|
|
|
|
#define EFER_SVME (1<<_EFER_SVME)
|
|
|
|
#define EFER_LMSLE (1<<_EFER_LMSLE)
|
|
|
|
#define EFER_FFXSR (1<<_EFER_FFXSR)
|
|
|
|
+#define EFER_TCE (1<<_EFER_TCE)
|
|
|
|
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
|
|
|
|
|
|
|
|
/*
|
|
|
|
--- a/arch/x86/kernel/cpu/amd.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/amd.c
|
|
|
|
@@ -1071,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86
|
|
|
|
|
|
|
|
/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
|
|
|
|
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
|
|
|
|
+
|
|
|
|
+ /* Enable Translation Cache Extension */
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_TCE))
|
|
|
|
+ msr_set_bit(MSR_EFER, _EFER_TCE);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
--- a/tools/arch/x86/include/asm/msr-index.h
|
|
|
|
+++ b/tools/arch/x86/include/asm/msr-index.h
|
|
|
|
@@ -25,6 +25,7 @@
|
|
|
|
#define _EFER_SVME 12 /* Enable virtualization */
|
|
|
|
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
|
|
|
|
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
|
|
|
|
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
|
|
|
|
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
|
|
|
|
|
|
|
|
#define EFER_SCE (1<<_EFER_SCE)
|
|
|
|
@@ -34,6 +35,7 @@
|
|
|
|
#define EFER_SVME (1<<_EFER_SVME)
|
|
|
|
#define EFER_LMSLE (1<<_EFER_LMSLE)
|
|
|
|
#define EFER_FFXSR (1<<_EFER_FFXSR)
|
|
|
|
+#define EFER_TCE (1<<_EFER_TCE)
|
|
|
|
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
|
|
|
|
|
|
|
|
/*
|