163 lines
5.2 KiB
Diff
163 lines
5.2 KiB
Diff
From 3c32c0d457a2c4b2817f57e1e2c9cbba4624639e Mon Sep 17 00:00:00 2001
|
|
From: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Date: Fri, 22 Nov 2024 11:33:05 -0800
|
|
Subject: futex: improve user space accesses
|
|
|
|
Josh Poimboeuf reports that he got a "will-it-scale.per_process_ops 1.9%
|
|
improvement" report for his patch that changed __get_user() to use
|
|
pointer masking instead of the explicit speculation barrier. However,
|
|
that patch doesn't actually work in the general case, because some (very
|
|
bad) architecture-specific code actually depends on __get_user() also
|
|
working on kernel addresses.
|
|
|
|
A profile showed that the offending __get_user() was the futex code,
|
|
which really should be fixed up to not use that horrid legacy case.
|
|
Rewrite futex_get_value_locked() to use the modern user acccess helpers,
|
|
and inline it so that the compiler not only avoids the function call for
|
|
a few instructions, but can do CSE on the address masking.
|
|
|
|
It also turns out the x86 futex functions have unnecessary barriers in
|
|
other places, so let's fix those up too.
|
|
|
|
Link: https://lore.kernel.org/all/20241115230653.hfvzyf3aqqntgp63@jpoimboe/
|
|
Reported-by: Josh Poimboeuf <jpoimboe@kernel.org>
|
|
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
---
|
|
arch/x86/include/asm/futex.h | 8 +++--
|
|
kernel/futex/core.c | 22 --------------
|
|
kernel/futex/futex.h | 59 ++++++++++++++++++++++++++++++++++--
|
|
3 files changed, 63 insertions(+), 26 deletions(-)
|
|
|
|
--- a/arch/x86/include/asm/futex.h
|
|
+++ b/arch/x86/include/asm/futex.h
|
|
@@ -48,7 +48,9 @@ do { \
|
|
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
|
|
u32 __user *uaddr)
|
|
{
|
|
- if (!user_access_begin(uaddr, sizeof(u32)))
|
|
+ if (can_do_masked_user_access())
|
|
+ uaddr = masked_user_access_begin(uaddr);
|
|
+ else if (!user_access_begin(uaddr, sizeof(u32)))
|
|
return -EFAULT;
|
|
|
|
switch (op) {
|
|
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_i
|
|
{
|
|
int ret = 0;
|
|
|
|
- if (!user_access_begin(uaddr, sizeof(u32)))
|
|
+ if (can_do_masked_user_access())
|
|
+ uaddr = masked_user_access_begin(uaddr);
|
|
+ else if (!user_access_begin(uaddr, sizeof(u32)))
|
|
return -EFAULT;
|
|
asm volatile("\n"
|
|
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
|
|
--- a/kernel/futex/core.c
|
|
+++ b/kernel/futex/core.c
|
|
@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct
|
|
return NULL;
|
|
}
|
|
|
|
-int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
|
|
-{
|
|
- int ret;
|
|
-
|
|
- pagefault_disable();
|
|
- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
|
|
- pagefault_enable();
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int futex_get_value_locked(u32 *dest, u32 __user *from)
|
|
-{
|
|
- int ret;
|
|
-
|
|
- pagefault_disable();
|
|
- ret = __get_user(*dest, from);
|
|
- pagefault_enable();
|
|
-
|
|
- return ret ? -EFAULT : 0;
|
|
-}
|
|
-
|
|
/**
|
|
* wait_for_owner_exiting - Block until the owner has exited
|
|
* @ret: owner's current futex lock status
|
|
--- a/kernel/futex/futex.h
|
|
+++ b/kernel/futex/futex.h
|
|
@@ -6,6 +6,7 @@
|
|
#include <linux/rtmutex.h>
|
|
#include <linux/sched/wake_q.h>
|
|
#include <linux/compat.h>
|
|
+#include <linux/uaccess.h>
|
|
|
|
#ifdef CONFIG_PREEMPT_RT
|
|
#include <linux/rcuwait.h>
|
|
@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct fut
|
|
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
|
|
|
|
extern int fault_in_user_writeable(u32 __user *uaddr);
|
|
-extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
|
|
-extern int futex_get_value_locked(u32 *dest, u32 __user *from);
|
|
extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
|
|
|
|
+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ pagefault_disable();
|
|
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
|
|
+ pagefault_enable();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This does a plain atomic user space read, and the user pointer has
|
|
+ * already been verified earlier by get_futex_key() to be both aligned
|
|
+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
|
|
+ *
|
|
+ * We still want to avoid any speculation, and while __get_user() is
|
|
+ * the traditional model for this, it's actually slower then doing
|
|
+ * this manually these days.
|
|
+ *
|
|
+ * We could just have a per-architecture special function for it,
|
|
+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
|
|
+ * than force everybody to do that, write it out long-hand using
|
|
+ * the low-level user-access infrastructure.
|
|
+ *
|
|
+ * This looks a bit overkill, but generally just results in a couple
|
|
+ * of instructions.
|
|
+ */
|
|
+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
|
|
+{
|
|
+ u32 val;
|
|
+
|
|
+ if (can_do_masked_user_access())
|
|
+ from = masked_user_access_begin(from);
|
|
+ else if (!user_read_access_begin(from, sizeof(*from)))
|
|
+ return -EFAULT;
|
|
+ unsafe_get_user(val, from, Efault);
|
|
+ user_access_end();
|
|
+ *dest = val;
|
|
+ return 0;
|
|
+Efault:
|
|
+ user_access_end();
|
|
+ return -EFAULT;
|
|
+}
|
|
+
|
|
+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ pagefault_disable();
|
|
+ ret = futex_read_inatomic(dest, from);
|
|
+ pagefault_enable();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
extern void __futex_unqueue(struct futex_q *q);
|
|
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
|
|
extern int futex_unqueue(struct futex_q *q);
|