158 lines
6.5 KiB
Diff
158 lines
6.5 KiB
Diff
From 148516b367dd591d31a2b98ad913417bc1b9c865 Mon Sep 17 00:00:00 2001
|
|
From: David Hildenbrand <david@redhat.com>
|
|
Date: Mon, 18 Aug 2025 19:53:58 +0200
|
|
Subject: mm/mremap: fix WARN with uffd that has remap events disabled
|
|
|
|
Registering userfaultd on a VMA that spans at least one PMD and then
|
|
mremap()'ing that VMA can trigger a WARN when recovering from a failed
|
|
page table move due to a page table allocation error.
|
|
|
|
The code ends up doing the right thing (recurse, avoiding moving actual
|
|
page tables), but triggering that WARN is unpleasant:
|
|
|
|
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_normal_pmd mm/mremap.c:357 [inline]
|
|
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_pgt_entry mm/mremap.c:595 [inline]
|
|
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_page_tables+0x3832/0x44a0 mm/mremap.c:852
|
|
Modules linked in:
|
|
CPU: 2 UID: 0 PID: 6133 Comm: syz.0.19 Not tainted 6.17.0-rc1-syzkaller-00004-g53e760d89498 #0 PREEMPT(full)
|
|
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
|
|
RIP: 0010:move_normal_pmd mm/mremap.c:357 [inline]
|
|
RIP: 0010:move_pgt_entry mm/mremap.c:595 [inline]
|
|
RIP: 0010:move_page_tables+0x3832/0x44a0 mm/mremap.c:852
|
|
Code: ...
|
|
RSP: 0018:ffffc900037a76d8 EFLAGS: 00010293
|
|
RAX: 0000000000000000 RBX: 0000000032930007 RCX: ffffffff820c6645
|
|
RDX: ffff88802e56a440 RSI: ffffffff820c7201 RDI: 0000000000000007
|
|
RBP: ffff888037728fc0 R08: 0000000000000007 R09: 0000000000000000
|
|
R10: 0000000032930007 R11: 0000000000000000 R12: 0000000000000000
|
|
R13: ffffc900037a79a8 R14: 0000000000000001 R15: dffffc0000000000
|
|
FS: 000055556316a500(0000) GS:ffff8880d68bc000(0000) knlGS:0000000000000000
|
|
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
|
CR2: 0000001b30863fff CR3: 0000000050171000 CR4: 0000000000352ef0
|
|
Call Trace:
|
|
<TASK>
|
|
copy_vma_and_data+0x468/0x790 mm/mremap.c:1215
|
|
move_vma+0x548/0x1780 mm/mremap.c:1282
|
|
mremap_to+0x1b7/0x450 mm/mremap.c:1406
|
|
do_mremap+0xfad/0x1f80 mm/mremap.c:1921
|
|
__do_sys_mremap+0x119/0x170 mm/mremap.c:1977
|
|
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
|
|
do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94
|
|
entry_SYSCALL_64_after_hwframe+0x77/0x7f
|
|
RIP: 0033:0x7f00d0b8ebe9
|
|
Code: ...
|
|
RSP: 002b:00007ffe5ea5ee98 EFLAGS: 00000246 ORIG_RAX: 0000000000000019
|
|
RAX: ffffffffffffffda RBX: 00007f00d0db5fa0 RCX: 00007f00d0b8ebe9
|
|
RDX: 0000000000400000 RSI: 0000000000c00000 RDI: 0000200000000000
|
|
RBP: 00007ffe5ea5eef0 R08: 0000200000c00000 R09: 0000000000000000
|
|
R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000002
|
|
R13: 00007f00d0db5fa0 R14: 00007f00d0db5fa0 R15: 0000000000000005
|
|
</TASK>
|
|
|
|
The underlying issue is that we recurse during the original page table
|
|
move, but not during the recovery move.
|
|
|
|
Fix it by checking for both VMAs and performing the check before the
|
|
pmd_none() sanity check.
|
|
|
|
Add a new helper where we perform+document that check for the PMD and PUD
|
|
level.
|
|
|
|
Thanks to Harry for bisecting.
|
|
|
|
Link: https://lkml.kernel.org/r/20250818175358.1184757-1-david@redhat.com
|
|
Fixes: 0cef0bb836e3 ("mm: clear uffd-wp PTE/PMD state on mremap()")
|
|
Signed-off-by: David Hildenbrand <david@redhat.com>
|
|
Reported-by: syzbot+4d9a13f0797c46a29e42@syzkaller.appspotmail.com
|
|
Closes: https://lkml.kernel.org/r/689bb893.050a0220.7f033.013a.GAE@google.com
|
|
Tested-by: Harry Yoo <harry.yoo@oracle.com>
|
|
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
|
|
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
|
Cc: Vlastimil Babka <vbabka@suse.cz>
|
|
Cc: Jann Horn <jannh@google.com>
|
|
Cc: Pedro Falcato <pfalcato@suse.de>
|
|
Cc: <stable@vger.kernel.org>
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
---
|
|
mm/mremap.c | 41 +++++++++++++++++++++++------------------
|
|
1 file changed, 23 insertions(+), 18 deletions(-)
|
|
|
|
--- a/mm/mremap.c
|
|
+++ b/mm/mremap.c
|
|
@@ -294,6 +294,25 @@ static inline bool arch_supports_page_ta
|
|
}
|
|
#endif
|
|
|
|
+static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc)
|
|
+{
|
|
+ /*
|
|
+ * If we are moving a VMA that has uffd-wp registered but with
|
|
+ * remap events disabled (new VMA will not be registered with uffd), we
|
|
+ * need to ensure that the uffd-wp state is cleared from all pgtables.
|
|
+ * This means recursing into lower page tables in move_page_tables().
|
|
+ *
|
|
+ * We might get called with VMAs reversed when recovering from a
|
|
+ * failed page table move. In that case, the
|
|
+ * "old"-but-actually-"originally new" VMA during recovery will not have
|
|
+ * a uffd context. Recursing into lower page tables during the original
|
|
+ * move but not during the recovery move will cause trouble, because we
|
|
+ * run into already-existing page tables. So check both VMAs.
|
|
+ */
|
|
+ return !vma_has_uffd_without_event_remap(pmc->old) &&
|
|
+ !vma_has_uffd_without_event_remap(pmc->new);
|
|
+}
|
|
+
|
|
#ifdef CONFIG_HAVE_MOVE_PMD
|
|
static bool move_normal_pmd(struct pagetable_move_control *pmc,
|
|
pmd_t *old_pmd, pmd_t *new_pmd)
|
|
@@ -306,6 +325,8 @@ static bool move_normal_pmd(struct paget
|
|
|
|
if (!arch_supports_page_table_move())
|
|
return false;
|
|
+ if (!uffd_supports_page_table_move(pmc))
|
|
+ return false;
|
|
/*
|
|
* The destination pmd shouldn't be established, free_pgtables()
|
|
* should have released it.
|
|
@@ -332,15 +353,6 @@ static bool move_normal_pmd(struct paget
|
|
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
|
|
return false;
|
|
|
|
- /* If this pmd belongs to a uffd vma with remap events disabled, we need
|
|
- * to ensure that the uffd-wp state is cleared from all pgtables. This
|
|
- * means recursing into lower page tables in move_page_tables(), and we
|
|
- * can reuse the existing code if we simply treat the entry as "not
|
|
- * moved".
|
|
- */
|
|
- if (vma_has_uffd_without_event_remap(vma))
|
|
- return false;
|
|
-
|
|
/*
|
|
* We don't have to worry about the ordering of src and dst
|
|
* ptlocks because exclusive mmap_lock prevents deadlock.
|
|
@@ -389,6 +401,8 @@ static bool move_normal_pud(struct paget
|
|
|
|
if (!arch_supports_page_table_move())
|
|
return false;
|
|
+ if (!uffd_supports_page_table_move(pmc))
|
|
+ return false;
|
|
/*
|
|
* The destination pud shouldn't be established, free_pgtables()
|
|
* should have released it.
|
|
@@ -396,15 +410,6 @@ static bool move_normal_pud(struct paget
|
|
if (WARN_ON_ONCE(!pud_none(*new_pud)))
|
|
return false;
|
|
|
|
- /* If this pud belongs to a uffd vma with remap events disabled, we need
|
|
- * to ensure that the uffd-wp state is cleared from all pgtables. This
|
|
- * means recursing into lower page tables in move_page_tables(), and we
|
|
- * can reuse the existing code if we simply treat the entry as "not
|
|
- * moved".
|
|
- */
|
|
- if (vma_has_uffd_without_event_remap(vma))
|
|
- return false;
|
|
-
|
|
/*
|
|
* We don't have to worry about the ordering of src and dst
|
|
* ptlocks because exclusive mmap_lock prevents deadlock.
|