release 6.15.3
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
From cda8b1022f32bb7a917148f75f4641e7a5b3e729 Mon Sep 17 00:00:00 2001
|
||||
From 1616d0edbdf3b36a8f4694d35bcf88fa1242c7e8 Mon Sep 17 00:00:00 2001
|
||||
From: Jinliang Zheng <alexjlzheng@tencent.com>
|
||||
Date: Tue, 15 Apr 2025 17:02:32 +0800
|
||||
Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler()
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 30a724581b5037176f6492359c189ebb180ccf1f Mon Sep 17 00:00:00 2001
|
||||
From 87f7435508fde20e21c6b744723a3203e2045f46 Mon Sep 17 00:00:00 2001
|
||||
From: GONG Ruiqi <gongruiqi1@huawei.com>
|
||||
Date: Sun, 27 Apr 2025 10:53:03 +0800
|
||||
Subject: vgacon: Add check for vc_origin address range in vgacon_scroll()
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 5cf26cf9fd9c11cb1543aac026f8928829895663 Mon Sep 17 00:00:00 2001
|
||||
From 4aed4d2a911e165342a339c886101dbe3acad5e2 Mon Sep 17 00:00:00 2001
|
||||
From: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Date: Mon, 28 Apr 2025 18:34:06 +0300
|
||||
Subject: fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 54c7f478f1a9d58f5609a48d461c7d495bb8301a Mon Sep 17 00:00:00 2001
|
||||
From 10c7fce24a1ad9197a8eabbba454a9a872f03d5c Mon Sep 17 00:00:00 2001
|
||||
From: Murad Masimov <m.masimov@mt-integration.ru>
|
||||
Date: Mon, 28 Apr 2025 18:34:07 +0300
|
||||
Subject: fbdev: Fix fb_set_var to prevent null-ptr-deref in
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 9cb2f9d210f915aabe54c5061d84f3fbe93c71ea Mon Sep 17 00:00:00 2001
|
||||
From 13ccad7713b89e7693feb5346e7893dc8edce7a8 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:15 +0200
|
||||
Subject: anon_inode: use a proper mode internally
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From ea4199112ae6d8da866417f50e035be01488c502 Mon Sep 17 00:00:00 2001
|
||||
From 5a3eea2c3e9675a8b713eef0d52b7c437f1f613b Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:17 +0200
|
||||
Subject: anon_inode: explicitly block ->setattr()
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 79f54c5bc7c6097a379c83e9ed56bee27cf1218a Mon Sep 17 00:00:00 2001
|
||||
From 8c9775d285f9755477a8b1f8b215102dce014ed2 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 7 Apr 2025 11:54:19 +0200
|
||||
Subject: anon_inode: raise SB_I_NODEV and SB_I_NOEXEC
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From edaacbee0f33b7371ec460723d1042a6c5a4bb9d Mon Sep 17 00:00:00 2001
|
||||
From d90681a50098e204f2e111b9433f6fc73a939854 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Brauner <brauner@kernel.org>
|
||||
Date: Mon, 21 Apr 2025 10:27:40 +0200
|
||||
Subject: fs: add S_ANON_INODE
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From ab287d709809b6dfe4d3c42016a543d976533d51 Mon Sep 17 00:00:00 2001
|
||||
From c161e0ffb55a12b9b26819fa0ecf8217ab781e97 Mon Sep 17 00:00:00 2001
|
||||
From: Zijun Hu <quic_zijuhu@quicinc.com>
|
||||
Date: Wed, 7 May 2025 19:50:26 +0800
|
||||
Subject: configfs: Do not override creating attribute file failure in
|
||||
|
@@ -1,104 +0,0 @@
|
||||
From 896b7b0d6ed53a7fe159c4b76f25407c816aa619 Mon Sep 17 00:00:00 2001
|
||||
From: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Date: Fri, 23 May 2025 19:20:36 -0400
|
||||
Subject: Don't propagate mounts into detached trees
|
||||
|
||||
All versions up to 6.14 did not propagate mount events into detached
|
||||
tree. Shortly after 6.14 a merge of vfs-6.15-rc1.mount.namespace
|
||||
(130e696aa68b) has changed that.
|
||||
|
||||
Unfortunately, that has caused userland regressions (reported in
|
||||
https://lore.kernel.org/all/CAOYeF9WQhFDe+BGW=Dp5fK8oRy5AgZ6zokVyTj1Wp4EUiYgt4w@mail.gmail.com/)
|
||||
|
||||
Straight revert wouldn't be an option - in particular, the variant in 6.14
|
||||
had a bug that got fixed in d1ddc6f1d9f0 ("fix IS_MNT_PROPAGATING uses")
|
||||
and we don't want to bring the bug back.
|
||||
|
||||
This is a modification of manual revert posted by Christian, with changes
|
||||
needed to avoid reintroducing the breakage in scenario described in
|
||||
d1ddc6f1d9f0.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Allison Karlitskaya <lis@redhat.com>
|
||||
Tested-by: Allison Karlitskaya <lis@redhat.com>
|
||||
Acked-by: Christian Brauner <brauner@kernel.org>
|
||||
Co-developed-by: Christian Brauner <brauner@kernel.org>
|
||||
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
|
||||
---
|
||||
fs/mount.h | 5 -----
|
||||
fs/namespace.c | 15 ++-------------
|
||||
fs/pnode.c | 4 ++--
|
||||
3 files changed, 4 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/fs/mount.h
|
||||
+++ b/fs/mount.h
|
||||
@@ -7,10 +7,6 @@
|
||||
|
||||
extern struct list_head notify_list;
|
||||
|
||||
-typedef __u32 __bitwise mntns_flags_t;
|
||||
-
|
||||
-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0))
|
||||
-
|
||||
struct mnt_namespace {
|
||||
struct ns_common ns;
|
||||
struct mount * root;
|
||||
@@ -37,7 +33,6 @@ struct mnt_namespace {
|
||||
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
|
||||
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
|
||||
refcount_t passive; /* number references not pinning @mounts */
|
||||
- mntns_flags_t mntns_flags;
|
||||
} __randomize_layout;
|
||||
|
||||
struct mnt_pcp {
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -3648,7 +3648,7 @@ static int do_move_mount(struct path *ol
|
||||
if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
|
||||
goto out;
|
||||
|
||||
- if (is_anon_ns(ns)) {
|
||||
+ if (is_anon_ns(ns) && ns == p->mnt_ns) {
|
||||
/*
|
||||
* Ending up with two files referring to the root of the
|
||||
* same anonymous mount namespace would cause an error
|
||||
@@ -3656,16 +3656,7 @@ static int do_move_mount(struct path *ol
|
||||
* twice into the mount tree which would be rejected
|
||||
* later. But be explicit about it right here.
|
||||
*/
|
||||
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
|
||||
- goto out;
|
||||
-
|
||||
- /*
|
||||
- * If this is an anonymous mount tree ensure that mount
|
||||
- * propagation can detect mounts that were just
|
||||
- * propagated to the target mount tree so we don't
|
||||
- * propagate onto them.
|
||||
- */
|
||||
- ns->mntns_flags |= MNTNS_PROPAGATING;
|
||||
+ goto out;
|
||||
} else if (is_anon_ns(p->mnt_ns)) {
|
||||
/*
|
||||
* Don't allow moving an attached mount tree to an
|
||||
@@ -3722,8 +3713,6 @@ static int do_move_mount(struct path *ol
|
||||
if (attached)
|
||||
put_mountpoint(old_mp);
|
||||
out:
|
||||
- if (is_anon_ns(ns))
|
||||
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
|
||||
unlock_mount(mp);
|
||||
if (!err) {
|
||||
if (attached) {
|
||||
--- a/fs/pnode.c
|
||||
+++ b/fs/pnode.c
|
||||
@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m
|
||||
/* skip if mountpoint isn't visible in m */
|
||||
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
|
||||
return 0;
|
||||
- /* skip if m is in the anon_ns we are emptying */
|
||||
- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING)
|
||||
+ /* skip if m is in the anon_ns */
|
||||
+ if (is_anon_ns(m->mnt_ns))
|
||||
return 0;
|
||||
|
||||
if (peers(m, last_dest)) {
|
@@ -1,4 +1,4 @@
|
||||
From f0579d45f2e03fa3ba0d9466e79a31ea37acb487 Mon Sep 17 00:00:00 2001
|
||||
From 9c2fdcdf9d8963a6fa30005a859816639d0bbf95 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:54 -0600
|
||||
Subject: Revert "Disable FOP_DONTCACHE for now due to bugs"
|
@@ -1,51 +0,0 @@
|
||||
From bc86aaf0e0256220ca787fdbb57a73429ade1129 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:52 -0600
|
||||
Subject: mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback
|
||||
|
||||
It's possible for the folio to either get marked for writeback or
|
||||
redirtied. Add a helper, filemap_end_dropbehind(), which guards the
|
||||
folio_unmap_invalidate() call behind check for the folio being both
|
||||
non-dirty and not under writeback AFTER the folio lock has been
|
||||
acquired. Use this helper folio_end_dropbehind_write().
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes")
|
||||
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 13 +++++++++++--
|
||||
1 file changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct
|
||||
}
|
||||
EXPORT_SYMBOL(folio_wait_private_2_killable);
|
||||
|
||||
+static void filemap_end_dropbehind(struct folio *folio)
|
||||
+{
|
||||
+ struct address_space *mapping = folio->mapping;
|
||||
+
|
||||
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
||||
+
|
||||
+ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
|
||||
+ folio_unmap_invalidate(mapping, folio, 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* If folio was marked as dropbehind, then pages should be dropped when writeback
|
||||
* completes. Do that now. If we fail, it's likely because of a big folio -
|
||||
@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(s
|
||||
* invalidation in that case.
|
||||
*/
|
||||
if (in_task() && folio_trylock(folio)) {
|
||||
- if (folio->mapping)
|
||||
- folio_unmap_invalidate(folio->mapping, folio, 0);
|
||||
+ filemap_end_dropbehind(folio);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
@@ -1,4 +1,4 @@
|
||||
From 3b4614564770691cf3a6eb88127268ef6a84180c Mon Sep 17 00:00:00 2001
|
||||
From 0274339dc053815d099e9c336f11c1e9e5641792 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:55 -0600
|
||||
Subject: mm/filemap: unify read/write dropbehind naming
|
@@ -1,4 +1,4 @@
|
||||
From 6003153e1bc4ad4952773081d7b89aa1ab2274c3 Mon Sep 17 00:00:00 2001
|
||||
From de09560d2e6fbb14ea586063217277e5ebc1bc71 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:56 -0600
|
||||
Subject: mm/filemap: unify dropbehind flag testing and clearing
|
@@ -1,51 +0,0 @@
|
||||
From fad76185ca91983990c660642151083eb05cbfc0 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Tue, 27 May 2025 07:28:53 -0600
|
||||
Subject: mm/filemap: use filemap_end_dropbehind() for read invalidation
|
||||
|
||||
Use the filemap_end_dropbehind() helper rather than calling
|
||||
folio_unmap_invalidate() directly, as we need to check if the folio has
|
||||
been redirtied or marked for writeback once the folio lock has been
|
||||
re-acquired.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Trond Myklebust <trondmy@hammerspace.com>
|
||||
Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE")
|
||||
Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
---
|
||||
mm/filemap.c | 7 +++----
|
||||
1 file changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/mm/filemap.c
|
||||
+++ b/mm/filemap.c
|
||||
@@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t
|
||||
return (pos1 >> shift == pos2 >> shift);
|
||||
}
|
||||
|
||||
-static void filemap_end_dropbehind_read(struct address_space *mapping,
|
||||
- struct folio *folio)
|
||||
+static void filemap_end_dropbehind_read(struct folio *folio)
|
||||
{
|
||||
if (!folio_test_dropbehind(folio))
|
||||
return;
|
||||
@@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(
|
||||
return;
|
||||
if (folio_trylock(folio)) {
|
||||
if (folio_test_clear_dropbehind(folio))
|
||||
- folio_unmap_invalidate(mapping, folio, 0);
|
||||
+ filemap_end_dropbehind(folio);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
||||
@@ -2774,7 +2773,7 @@ put_folios:
|
||||
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
- filemap_end_dropbehind_read(mapping, folio);
|
||||
+ filemap_end_dropbehind_read(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
folio_batch_init(&fbatch);
|
@@ -1,4 +1,4 @@
|
||||
From 61c0b2450f2b85c5053fa4f71d9c619b34d3af6c Mon Sep 17 00:00:00 2001
|
||||
From c041325f222c774573ad73d35939451a4e221e52 Mon Sep 17 00:00:00 2001
|
||||
From: Shivank Garg <shivankg@amd.com>
|
||||
Date: Mon, 26 May 2025 18:28:18 +0000
|
||||
Subject: mm/khugepaged: fix race with folio split/free using temporary
|
@@ -1,4 +1,4 @@
|
||||
From 214092002cbd9945b7cc6314e76ec42b3f588c01 Mon Sep 17 00:00:00 2001
|
||||
From 76653593bdf5fda03717991681b5d60e2af015e9 Mon Sep 17 00:00:00 2001
|
||||
From: Shivank Garg <shivankg@amd.com>
|
||||
Date: Wed, 30 Apr 2025 10:01:51 +0000
|
||||
Subject: mm: add folio_expected_ref_count() for reference count calculation
|
@@ -1,4 +1,4 @@
|
||||
From 0f52f05148589fe4115322a9cc8ffab760091a0a Mon Sep 17 00:00:00 2001
|
||||
From 1e9a258def978a9388a50ae43c85557b0598a7d3 Mon Sep 17 00:00:00 2001
|
||||
From: Pu Lehui <pulehui@huawei.com>
|
||||
Date: Thu, 29 May 2025 15:56:47 +0000
|
||||
Subject: mm: fix uprobe pte be overwritten when expanding vma
|
@@ -1,4 +1,4 @@
|
||||
From 6f1e03b94f7777323aaefd9286d992a1cbd0adf7 Mon Sep 17 00:00:00 2001
|
||||
From 2d8c79ec421253aab9560a47a7e73d678c84585c Mon Sep 17 00:00:00 2001
|
||||
From: Jann Horn <jannh@google.com>
|
||||
Date: Tue, 27 May 2025 23:23:53 +0200
|
||||
Subject: mm/hugetlb: unshare page tables during VMA split, not before
|
@@ -1,4 +1,4 @@
|
||||
From cbd0e47470ea4db11acf3612edf91b5047a90d24 Mon Sep 17 00:00:00 2001
|
||||
From e1280358284feaf844db5c6a76078b2c1738c5ae Mon Sep 17 00:00:00 2001
|
||||
From: Jann Horn <jannh@google.com>
|
||||
Date: Tue, 27 May 2025 23:23:54 +0200
|
||||
Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
|
@@ -1,4 +1,4 @@
|
||||
From cb42e10062f07934d60ce2a9bc154ea7ac0bab5a Mon Sep 17 00:00:00 2001
|
||||
From b36611870ea72c82eb78d90a017658394bdb9690 Mon Sep 17 00:00:00 2001
|
||||
From: SeongJae Park <sj@kernel.org>
|
||||
Date: Mon, 2 Jun 2025 10:49:26 -0700
|
||||
Subject: mm/madvise: handle madvise_lock() failure during race unwinding
|
@@ -1,4 +1,4 @@
|
||||
From 0aeb6f83ff11709bb4b6fc9afa2f742681ca36e1 Mon Sep 17 00:00:00 2001
|
||||
From f0ab226d0eae3aa7e26524efc040026a65ead640 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Zimmermann <tzimmermann@suse.de>
|
||||
Date: Wed, 28 May 2025 10:02:08 +0200
|
||||
Subject: video: screen_info: Relocate framebuffers behind PCI bridges
|
@@ -1,4 +1,4 @@
|
||||
From 06ff725d11ea8713876187973c834fb595cb26f1 Mon Sep 17 00:00:00 2001
|
||||
From 717bcb42b8cd4119c88249fbfc26d08e25a2ca24 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Zimmermann <tzimmermann@suse.de>
|
||||
Date: Tue, 3 Jun 2025 17:48:20 +0200
|
||||
Subject: sysfb: Fix screen_info type check for VGA
|
@@ -1,4 +1,4 @@
|
||||
From 7856e6900a09ed537366a5e0c774be8926ee022e Mon Sep 17 00:00:00 2001
|
||||
From 08b1e02fc44abc04d813dbc827812db9ebca0dad Mon Sep 17 00:00:00 2001
|
||||
From: Luo Gengkun <luogengkun@huaweicloud.com>
|
||||
Date: Mon, 21 Apr 2025 03:50:21 +0000
|
||||
Subject: watchdog: fix watchdog may detect false positive of softlockup
|
@@ -1,4 +1,4 @@
|
||||
From 45c6602b7fa2a9dfd05a1f9289504c2437205ce4 Mon Sep 17 00:00:00 2001
|
||||
From ff8503c4997332bb5708c3b77f8a19f334e947a9 Mon Sep 17 00:00:00 2001
|
||||
From: Harshit Agarwal <harshit@nutanix.com>
|
||||
Date: Tue, 25 Feb 2025 18:05:53 +0000
|
||||
Subject: sched/rt: Fix race in push_rt_task
|
@@ -1,4 +1,4 @@
|
||||
From 14b4658d3fa78b169f36e62e722a076a7c50afd8 Mon Sep 17 00:00:00 2001
|
||||
From e02cbdc12bf63da363d7e3391376819241d67fbe Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Tue, 28 Jan 2025 15:39:49 +0100
|
||||
Subject: sched/fair: Adhere to place_entity() constraints
|
@@ -1,4 +1,4 @@
|
||||
From 65419a1e04de111460c4f38c47f1db39e71c3357 Mon Sep 17 00:00:00 2001
|
||||
From 7257e4f8df6b5783978ab06063fc8529ee2631d5 Mon Sep 17 00:00:00 2001
|
||||
From: Suren Baghdasaryan <surenb@google.com>
|
||||
Date: Wed, 21 May 2025 09:06:02 -0700
|
||||
Subject: alloc_tag: handle module codetag load errors as module load failures
|
@@ -1,113 +0,0 @@
|
||||
From ba4c83076943b477c90015581cc88e262a7d772f Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 26 Feb 2025 16:01:57 +0100
|
||||
Subject: x86/iopl: Cure TIF_IO_BITMAP inconsistencies
|
||||
|
||||
io_bitmap_exit() is invoked from exit_thread() when a task exists or
|
||||
when a fork fails. In the latter case the exit_thread() cleans up
|
||||
resources which were allocated during fork().
|
||||
|
||||
io_bitmap_exit() invokes task_update_io_bitmap(), which in turn ends up
|
||||
in tss_update_io_bitmap(). tss_update_io_bitmap() operates on the
|
||||
current task. If current has TIF_IO_BITMAP set, but no bitmap installed,
|
||||
tss_update_io_bitmap() crashes with a NULL pointer dereference.
|
||||
|
||||
There are two issues, which lead to that problem:
|
||||
|
||||
1) io_bitmap_exit() should not invoke task_update_io_bitmap() when
|
||||
the task, which is cleaned up, is not the current task. That's a
|
||||
clear indicator for a cleanup after a failed fork().
|
||||
|
||||
2) A task should not have TIF_IO_BITMAP set and neither a bitmap
|
||||
installed nor IOPL emulation level 3 activated.
|
||||
|
||||
This happens when a kernel thread is created in the context of
|
||||
a user space thread, which has TIF_IO_BITMAP set as the thread
|
||||
flags are copied and the IO bitmap pointer is cleared.
|
||||
|
||||
Other than in the failed fork() case this has no impact because
|
||||
kernel threads including IO workers never return to user space and
|
||||
therefore never invoke tss_update_io_bitmap().
|
||||
|
||||
Cure this by adding the missing cleanups and checks:
|
||||
|
||||
1) Prevent io_bitmap_exit() to invoke task_update_io_bitmap() if
|
||||
the to be cleaned up task is not the current task.
|
||||
|
||||
2) Clear TIF_IO_BITMAP in copy_thread() unconditionally. For user
|
||||
space forks it is set later, when the IO bitmap is inherited in
|
||||
io_bitmap_share().
|
||||
|
||||
For paranoia sake, add a warning into tss_update_io_bitmap() to catch
|
||||
the case, when that code is invoked with inconsistent state.
|
||||
|
||||
Fixes: ea5f1cd7ab49 ("x86/ioperm: Remove bitmap if all permissions dropped")
|
||||
Reported-by: syzbot+e2b1803445d236442e54@syzkaller.appspotmail.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lore.kernel.org/87wmdceom2.ffs@tglx
|
||||
---
|
||||
arch/x86/kernel/ioport.c | 13 +++++++++----
|
||||
arch/x86/kernel/process.c | 6 ++++++
|
||||
2 files changed, 15 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/ioport.c
|
||||
+++ b/arch/x86/kernel/ioport.c
|
||||
@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct
|
||||
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
|
||||
}
|
||||
|
||||
-static void task_update_io_bitmap(struct task_struct *tsk)
|
||||
+static void task_update_io_bitmap(void)
|
||||
{
|
||||
+ struct task_struct *tsk = current;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
|
||||
if (t->iopl_emul == 3 || t->io_bitmap) {
|
||||
@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *
|
||||
struct io_bitmap *iobm = tsk->thread.io_bitmap;
|
||||
|
||||
tsk->thread.io_bitmap = NULL;
|
||||
- task_update_io_bitmap(tsk);
|
||||
+ /*
|
||||
+ * Don't touch the TSS when invoked on a failed fork(). TSS
|
||||
+ * reflects the state of @current and not the state of @tsk.
|
||||
+ */
|
||||
+ if (tsk == current)
|
||||
+ task_update_io_bitmap();
|
||||
if (iobm && refcount_dec_and_test(&iobm->refcnt))
|
||||
kfree(iobm);
|
||||
}
|
||||
@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, leve
|
||||
}
|
||||
|
||||
t->iopl_emul = level;
|
||||
- task_update_io_bitmap(current);
|
||||
-
|
||||
+ task_update_io_bitmap();
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/arch/x86/kernel/process.c
|
||||
+++ b/arch/x86/kernel/process.c
|
||||
@@ -181,6 +181,7 @@ int copy_thread(struct task_struct *p, c
|
||||
frame->ret_addr = (unsigned long) ret_from_fork_asm;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.io_bitmap = NULL;
|
||||
+ clear_tsk_thread_flag(p, TIF_IO_BITMAP);
|
||||
p->thread.iopl_warn = 0;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
@@ -469,6 +470,11 @@ void native_tss_update_io_bitmap(void)
|
||||
} else {
|
||||
struct io_bitmap *iobm = t->io_bitmap;
|
||||
|
||||
+ if (WARN_ON_ONCE(!iobm)) {
|
||||
+ clear_thread_flag(TIF_IO_BITMAP);
|
||||
+ native_tss_invalidate_io_bitmap();
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Only copy bitmap data when the sequence number differs. The
|
||||
* update time is accounted to the incoming task.
|
@@ -1,4 +1,4 @@
|
||||
From 3848ddd6068c425b732da6e8c78b047ed28c6114 Mon Sep 17 00:00:00 2001
|
||||
From 57fdc30dcdad60e3b868682cc1e77083c091aef5 Mon Sep 17 00:00:00 2001
|
||||
From: Chuck Lever <chuck.lever@oracle.com>
|
||||
Date: Sun, 27 Apr 2025 12:39:59 -0400
|
||||
Subject: svcrdma: Unregister the device if svc_rdma_accept() fails
|
||||
@@ -19,7 +19,7 @@ Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
|
||||
|
||||
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
|
||||
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
|
||||
@@ -575,6 +575,7 @@ static struct svc_xprt *svc_rdma_accept(
|
||||
@@ -577,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept(
|
||||
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
|
||||
ib_destroy_qp(newxprt->sc_qp);
|
||||
rdma_destroy_id(newxprt->sc_cm_id);
|
@@ -1,4 +1,4 @@
|
||||
From 38b409dd5c2fd9496fde05db4fb538a7e3593922 Mon Sep 17 00:00:00 2001
|
||||
From 92e99ba55ff0ce68ea7567331beda21861da2028 Mon Sep 17 00:00:00 2001
|
||||
From: Chuck Lever <chuck.lever@oracle.com>
|
||||
Date: Wed, 21 May 2025 16:34:13 -0400
|
||||
Subject: SUNRPC: Prevent hang on NFS mount with xprtsec=[m]tls
|
@@ -1,4 +1,4 @@
|
||||
From c3e0e5bd29d97f8e5663026e8c2f25e08f1c4544 Mon Sep 17 00:00:00 2001
|
||||
From ac0c5ac5efecec7f731a1d80ec40ef3d34adc5ee Mon Sep 17 00:00:00 2001
|
||||
From: Saurabh Sengar <ssengar@linux.microsoft.com>
|
||||
Date: Thu, 29 May 2025 03:18:30 -0700
|
||||
Subject: hv_netvsc: fix potential deadlock in netvsc_vf_setxdp()
|
@@ -1,4 +1,4 @@
|
||||
From 0f48fca427618cecf6683fa8e46cb8d0b66bb93d Mon Sep 17 00:00:00 2001
|
||||
From 485c82a86fb97fb86cac303348c85b6cf71fd787 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Kicinski <kuba@kernel.org>
|
||||
Date: Mon, 9 Jun 2025 17:12:44 -0700
|
||||
Subject: net: clear the dst when changing skb protocol
|
||||
@@ -53,7 +53,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
--- a/net/core/filter.c
|
||||
+++ b/net/core/filter.c
|
||||
@@ -3232,6 +3232,13 @@ static const struct bpf_func_proto bpf_s
|
||||
@@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_s
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
@@ -67,7 +67,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
|
||||
{
|
||||
/* Caller already did skb_cow() with len as headroom,
|
||||
@@ -3328,7 +3335,7 @@ static int bpf_skb_proto_4_to_6(struct s
|
||||
@@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct s
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
skb_clear_hash(skb);
|
||||
|
||||
return 0;
|
||||
@@ -3358,7 +3365,7 @@ static int bpf_skb_proto_6_to_4(struct s
|
||||
@@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct s
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
skb_clear_hash(skb);
|
||||
|
||||
return 0;
|
||||
@@ -3549,10 +3556,10 @@ static int bpf_skb_net_grow(struct sk_bu
|
||||
@@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_bu
|
||||
/* Match skb->protocol to new outer l3 protocol */
|
||||
if (skb->protocol == htons(ETH_P_IP) &&
|
||||
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
|
||||
@@ -98,7 +98,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
}
|
||||
|
||||
if (skb_is_gso(skb)) {
|
||||
@@ -3605,10 +3612,10 @@ static int bpf_skb_net_shrink(struct sk_
|
||||
@@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_
|
||||
/* Match skb->protocol to new outer l3 protocol */
|
||||
if (skb->protocol == htons(ETH_P_IP) &&
|
||||
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
|
@@ -1,4 +1,4 @@
|
||||
From 59765af017c206b162b2ceb8d56a171e40a17719 Mon Sep 17 00:00:00 2001
|
||||
From 2bf1f4a3adcecc53c1012e460d1412cece3747ce Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Wed, 11 Jun 2025 08:35:01 +0000
|
||||
Subject: net_sched: sch_sfq: reject invalid perturb period
|
||||
@@ -35,7 +35,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
--- a/net/sched/sch_sfq.c
|
||||
+++ b/net/sched/sch_sfq.c
|
||||
@@ -653,6 +653,14 @@ static int sfq_change(struct Qdisc *sch,
|
||||
@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch,
|
||||
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -50,7 +50,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
|
||||
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
|
||||
return -EINVAL;
|
||||
@@ -669,14 +677,12 @@ static int sfq_change(struct Qdisc *sch,
|
||||
@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch,
|
||||
headdrop = q->headdrop;
|
||||
maxdepth = q->maxdepth;
|
||||
maxflows = q->maxflows;
|
@@ -1,4 +1,4 @@
|
||||
From d7b5f2aa34c56bd2a2d3cda2a7eb7aeb24df6179 Mon Sep 17 00:00:00 2001
|
||||
From 90a5248443f925040b46e32fcf6715615c73e396 Mon Sep 17 00:00:00 2001
|
||||
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Date: Fri, 6 Jun 2025 13:50:32 +0100
|
||||
Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure
|
@@ -1,4 +1,4 @@
|
||||
From db96fe27668a3bb56fa5d745d1c2eed49a95a56f Mon Sep 17 00:00:00 2001
|
||||
From 7c9d5350d8acfe1b876a8acabdf247b44a803d58 Mon Sep 17 00:00:00 2001
|
||||
From: Ryan Roberts <ryan.roberts@arm.com>
|
||||
Date: Fri, 6 Jun 2025 10:28:07 +0100
|
||||
Subject: mm: close theoretical race where stale TLB entries could linger
|
@@ -1,4 +1,4 @@
|
||||
From f8c6b0801edd6f50057610c67120ffb42027f2c2 Mon Sep 17 00:00:00 2001
|
||||
From 862a81c79f0bea8ede0352b637b44716f02f71b9 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Fri, 13 Jun 2025 11:01:49 -0600
|
||||
Subject: io_uring/kbuf: don't truncate end buffer for multiple buffer peeks
|
@@ -1,4 +1,4 @@
|
||||
From a2ef8773db38d0c3a41761dbed6fc57afa440161 Mon Sep 17 00:00:00 2001
|
||||
From bb3d761325a1707c8064a3d7dd556ed6a501a2e7 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Fri, 13 Jun 2025 13:37:41 -0600
|
||||
Subject: nvme: always punt polled uring_cmd end_io work to task_work
|
@@ -1,4 +1,4 @@
|
||||
From bb51adf56b5adc7075252cd17136c2288c116602 Mon Sep 17 00:00:00 2001
|
||||
From a57621608b2cbcbd0c7da184e9012b9b111a8577 Mon Sep 17 00:00:00 2001
|
||||
From: Damien Le Moal <dlemoal@kernel.org>
|
||||
Date: Wed, 11 Jun 2025 09:59:15 +0900
|
||||
Subject: block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion
|
@@ -1,51 +0,0 @@
|
||||
From b504e1cd491c55390370059280d5fbaa045d5543 Mon Sep 17 00:00:00 2001
|
||||
From: Oleg Nesterov <oleg@redhat.com>
|
||||
Date: Fri, 13 Jun 2025 19:26:50 +0200
|
||||
Subject: posix-cpu-timers: fix race between handle_posix_cpu_timers() and
|
||||
posix_cpu_timer_del()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
If an exiting non-autoreaping task has already passed exit_notify() and
|
||||
calls handle_posix_cpu_timers() from IRQ, it can be reaped by its parent
|
||||
or debugger right after unlock_task_sighand().
|
||||
|
||||
If a concurrent posix_cpu_timer_del() runs at that moment, it won't be
|
||||
able to detect timer->it.cpu.firing != 0: cpu_timer_task_rcu() and/or
|
||||
lock_task_sighand() will fail.
|
||||
|
||||
Add the tsk->exit_state check into run_posix_cpu_timers() to fix this.
|
||||
|
||||
This fix is not needed if CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y, because
|
||||
exit_task_work() is called before exit_notify(). But the check still
|
||||
makes sense, task_work_add(&tsk->posix_cputimers_work.work) will fail
|
||||
anyway in this case.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: Benoît Sevens <bsevens@google.com>
|
||||
Fixes: 0bdd2ed4138e ("sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()")
|
||||
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
---
|
||||
kernel/time/posix-cpu-timers.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
--- a/kernel/time/posix-cpu-timers.c
|
||||
+++ b/kernel/time/posix-cpu-timers.c
|
||||
@@ -1406,6 +1406,15 @@ void run_posix_cpu_timers(void)
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/*
|
||||
+ * Ensure that release_task(tsk) can't happen while
|
||||
+ * handle_posix_cpu_timers() is running. Otherwise, a concurrent
|
||||
+ * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and
|
||||
+ * miss timer->it.cpu.firing != 0.
|
||||
+ */
|
||||
+ if (tsk->exit_state)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
* If the actual expiry is deferred to task work context and the
|
||||
* work is already scheduled there is no point to do anything here.
|
||||
*/
|
@@ -1,4 +1,4 @@
|
||||
From 56ae62470a95ac8249c43f5c0d50da2a83c350e0 Mon Sep 17 00:00:00 2001
|
||||
From 7fc5a2cbcc8459cab6ae8c5dd1220768027ccb70 Mon Sep 17 00:00:00 2001
|
||||
From: Jens Axboe <axboe@kernel.dk>
|
||||
Date: Wed, 11 Jun 2025 08:48:46 -0600
|
||||
Subject: block: use plug request list tail for one-shot backmerge attempt
|
149
debian/patches/patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch
vendored
Normal file
149
debian/patches/patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
From 8ad4520fc849262ab23adbabebd366d4755035bc Mon Sep 17 00:00:00 2001
|
||||
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
|
||||
Date: Tue, 3 Jun 2025 14:14:45 +0300
|
||||
Subject: Revert "mm/execmem: Unify early execmem_cache behaviour"
|
||||
|
||||
The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache
|
||||
behaviour") changed early behaviour of execemem ROX cache to allow its
|
||||
usage in early x86 code that allocates text pages when
|
||||
CONFIG_MITGATION_ITS is enabled.
|
||||
|
||||
The permission management of the pages allocated from execmem for ITS
|
||||
mitigation is now completely contained in arch/x86/kernel/alternatives.c
|
||||
and therefore there is no need to special case early allocations in
|
||||
execmem.
|
||||
|
||||
This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414.
|
||||
|
||||
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org
|
||||
---
|
||||
arch/x86/mm/init_32.c | 3 ---
|
||||
arch/x86/mm/init_64.c | 3 ---
|
||||
include/linux/execmem.h | 8 +-------
|
||||
mm/execmem.c | 40 +++-------------------------------------
|
||||
4 files changed, 4 insertions(+), 50 deletions(-)
|
||||
|
||||
--- a/arch/x86/mm/init_32.c
|
||||
+++ b/arch/x86/mm/init_32.c
|
||||
@@ -30,7 +30,6 @@
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/gfp.h>
|
||||
-#include <linux/execmem.h>
|
||||
|
||||
#include <asm/asm.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
@@ -756,8 +755,6 @@ void mark_rodata_ro(void)
|
||||
pr_info("Write protecting kernel text and read-only data: %luk\n",
|
||||
size >> 10);
|
||||
|
||||
- execmem_cache_make_ro();
|
||||
-
|
||||
kernel_set_to_readonly = 1;
|
||||
|
||||
#ifdef CONFIG_CPA_DEBUG
|
||||
--- a/arch/x86/mm/init_64.c
|
||||
+++ b/arch/x86/mm/init_64.c
|
||||
@@ -34,7 +34,6 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/kcore.h>
|
||||
#include <linux/bootmem_info.h>
|
||||
-#include <linux/execmem.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
@@ -1392,8 +1391,6 @@ void mark_rodata_ro(void)
|
||||
(end - start) >> 10);
|
||||
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
|
||||
|
||||
- execmem_cache_make_ro();
|
||||
-
|
||||
kernel_set_to_readonly = 1;
|
||||
|
||||
/*
|
||||
--- a/include/linux/execmem.h
|
||||
+++ b/include/linux/execmem.h
|
||||
@@ -54,7 +54,7 @@ enum execmem_range_flags {
|
||||
EXECMEM_ROX_CACHE = (1 << 1),
|
||||
};
|
||||
|
||||
-#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM)
|
||||
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
|
||||
/**
|
||||
* execmem_fill_trapping_insns - set memory to contain instructions that
|
||||
* will trap
|
||||
@@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size
|
||||
* Return: 0 on success or negative error code on failure.
|
||||
*/
|
||||
int execmem_restore_rox(void *ptr, size_t size);
|
||||
-
|
||||
-/*
|
||||
- * Called from mark_readonly(), where the system transitions to ROX.
|
||||
- */
|
||||
-void execmem_cache_make_ro(void);
|
||||
#else
|
||||
static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
|
||||
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
|
||||
-static inline void execmem_cache_make_ro(void) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
--- a/mm/execmem.c
|
||||
+++ b/mm/execmem.c
|
||||
@@ -254,34 +254,6 @@ out_unlock:
|
||||
return ptr;
|
||||
}
|
||||
|
||||
-static bool execmem_cache_rox = false;
|
||||
-
|
||||
-void execmem_cache_make_ro(void)
|
||||
-{
|
||||
- struct maple_tree *free_areas = &execmem_cache.free_areas;
|
||||
- struct maple_tree *busy_areas = &execmem_cache.busy_areas;
|
||||
- MA_STATE(mas_free, free_areas, 0, ULONG_MAX);
|
||||
- MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX);
|
||||
- struct mutex *mutex = &execmem_cache.mutex;
|
||||
- void *area;
|
||||
-
|
||||
- execmem_cache_rox = true;
|
||||
-
|
||||
- mutex_lock(mutex);
|
||||
-
|
||||
- mas_for_each(&mas_free, area, ULONG_MAX) {
|
||||
- unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT;
|
||||
- set_memory_ro(mas_free.index, pages);
|
||||
- }
|
||||
-
|
||||
- mas_for_each(&mas_busy, area, ULONG_MAX) {
|
||||
- unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT;
|
||||
- set_memory_ro(mas_busy.index, pages);
|
||||
- }
|
||||
-
|
||||
- mutex_unlock(mutex);
|
||||
-}
|
||||
-
|
||||
static int execmem_cache_populate(struct execmem_range *range, size_t size)
|
||||
{
|
||||
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
|
||||
@@ -302,15 +274,9 @@ static int execmem_cache_populate(struct
|
||||
/* fill memory with instructions that will trap */
|
||||
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
|
||||
|
||||
- if (execmem_cache_rox) {
|
||||
- err = set_memory_rox((unsigned long)p, vm->nr_pages);
|
||||
- if (err)
|
||||
- goto err_free_mem;
|
||||
- } else {
|
||||
- err = set_memory_x((unsigned long)p, vm->nr_pages);
|
||||
- if (err)
|
||||
- goto err_free_mem;
|
||||
- }
|
||||
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
|
||||
+ if (err)
|
||||
+ goto err_free_mem;
|
||||
|
||||
err = execmem_cache_add(p, alloc_size);
|
||||
if (err)
|
63
debian/patches/patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch
vendored
Normal file
63
debian/patches/patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
From 85bfdd784bd61df94fd42daca141ed173f647e8c Mon Sep 17 00:00:00 2001
|
||||
From: Kai Huang <kai.huang@intel.com>
|
||||
Date: Sat, 7 Jun 2025 01:07:37 +1200
|
||||
Subject: x86/virt/tdx: Avoid indirect calls to TDX assembly functions
|
||||
|
||||
Two 'static inline' TDX helper functions (sc_retry() and
|
||||
sc_retry_prerr()) take function pointer arguments which refer to
|
||||
assembly functions. Normally, the compiler inlines the TDX helper,
|
||||
realizes that the function pointer targets are completely static --
|
||||
thus can be resolved at compile time -- and generates direct call
|
||||
instructions.
|
||||
|
||||
But, other times (like when CONFIG_CC_OPTIMIZE_FOR_SIZE=y), the
|
||||
compiler declines to inline the helpers and will instead generate
|
||||
indirect call instructions.
|
||||
|
||||
Indirect calls to assembly functions require special annotation (for
|
||||
various Control Flow Integrity mechanisms). But TDX assembly
|
||||
functions lack the special annotations and can only be called
|
||||
directly.
|
||||
|
||||
Annotate both the helpers as '__always_inline' to prod the compiler
|
||||
into maintaining the direct calls. There is no guarantee here, but
|
||||
Peter has volunteered to report the compiler bug if this assumption
|
||||
ever breaks[1].
|
||||
|
||||
Fixes: 1e66a7e27539 ("x86/virt/tdx: Handle SEAMCALL no entropy error in common code")
|
||||
Fixes: df01f5ae07dd ("x86/virt/tdx: Add SEAMCALL error printing for module initialization")
|
||||
Signed-off-by: Kai Huang <kai.huang@intel.com>
|
||||
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lore.kernel.org/lkml/20250605145914.GW39944@noisy.programming.kicks-ass.net/ [1]
|
||||
Link: https://lore.kernel.org/all/20250606130737.30713-1-kai.huang%40intel.com
|
||||
---
|
||||
arch/x86/include/asm/tdx.h | 2 +-
|
||||
arch/x86/virt/vmx/tdx/tdx.c | 5 +++--
|
||||
2 files changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/include/asm/tdx.h
|
||||
+++ b/arch/x86/include/asm/tdx.h
|
||||
@@ -100,7 +100,7 @@ void tdx_init(void);
|
||||
|
||||
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
|
||||
|
||||
-static inline u64 sc_retry(sc_func_t func, u64 fn,
|
||||
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
|
||||
struct tdx_module_args *args)
|
||||
{
|
||||
int retry = RDRAND_RETRY_LOOPS;
|
||||
--- a/arch/x86/virt/vmx/tdx/tdx.c
|
||||
+++ b/arch/x86/virt/vmx/tdx/tdx.c
|
||||
@@ -69,8 +69,9 @@ static inline void seamcall_err_ret(u64
|
||||
args->r9, args->r10, args->r11);
|
||||
}
|
||||
|
||||
-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
|
||||
- u64 fn, struct tdx_module_args *args)
|
||||
+static __always_inline int sc_retry_prerr(sc_func_t func,
|
||||
+ sc_err_func_t err_func,
|
||||
+ u64 fn, struct tdx_module_args *args)
|
||||
{
|
||||
u64 sret = sc_retry(func, fn, args);
|
||||
|
31
debian/patches/patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch
vendored
Normal file
31
debian/patches/patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
From a94cf5c6e7e31be9d4788916ce847adb15735d81 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Tue, 3 Jun 2025 14:14:41 +0300
|
||||
Subject: x86/mm/pat: don't collapse pages without PSE set
|
||||
|
||||
Collapsing pages to a leaf PMD or PUD should be done only if
|
||||
X86_FEATURE_PSE is available, which is not the case when running e.g.
|
||||
as a Xen PV guest.
|
||||
|
||||
Fixes: 41d88484c71c ("x86/mm/pat: restore large ROX pages after fragmentation")
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lore.kernel.org/r/20250528123557.12847-3-jgross@suse.com
|
||||
---
|
||||
arch/x86/mm/pat/set_memory.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/arch/x86/mm/pat/set_memory.c
|
||||
+++ b/arch/x86/mm/pat/set_memory.c
|
||||
@@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd,
|
||||
pgprot_t pgprot;
|
||||
int i = 0;
|
||||
|
||||
+ if (!cpu_feature_enabled(X86_FEATURE_PSE))
|
||||
+ return 0;
|
||||
+
|
||||
addr &= PMD_MASK;
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
first = *pte;
|
34
debian/patches/patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch
vendored
Normal file
34
debian/patches/patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
From 8f28d595d167316469bb33b701e27b4b79c1aab1 Mon Sep 17 00:00:00 2001
|
||||
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
|
||||
Date: Tue, 3 Jun 2025 14:14:42 +0300
|
||||
Subject: x86/Kconfig: only enable ROX cache in execmem when STRICT_MODULE_RWX
|
||||
is set
|
||||
|
||||
Currently ROX cache in execmem is enabled regardless of
|
||||
STRICT_MODULE_RWX setting. This breaks an assumption that module memory
|
||||
is writable when STRICT_MODULE_RWX is disabled, for instance for kernel
|
||||
debuggin.
|
||||
|
||||
Only enable ROX cache in execmem when STRICT_MODULE_RWX is set to
|
||||
restore the original behaviour of module text permissions.
|
||||
|
||||
Fixes: 64f6a4e10c05 ("x86: re-enable EXECMEM_ROX support")
|
||||
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lkml.kernel.org/r/20250603111446.2609381-3-rppt@kernel.org
|
||||
---
|
||||
arch/x86/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -88,7 +88,7 @@ config X86
|
||||
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
|
||||
select ARCH_HAS_EARLY_DEBUG if KGDB
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
- select ARCH_HAS_EXECMEM_ROX if X86_64
|
||||
+ select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX
|
||||
select ARCH_HAS_FAST_MULTIPLIER
|
||||
select ARCH_HAS_FORTIFY_SOURCE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
110
debian/patches/patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch
vendored
Normal file
110
debian/patches/patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
From 24fd2e3cef1b98f4417b8015ba24a8a4dcaae0c1 Mon Sep 17 00:00:00 2001
|
||||
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
|
||||
Date: Tue, 3 Jun 2025 14:14:43 +0300
|
||||
Subject: x86/its: move its_pages array to struct mod_arch_specific
|
||||
|
||||
The of pages with ITS thunks allocated for modules are tracked by an
|
||||
array in 'struct module'.
|
||||
|
||||
Since this is very architecture specific data structure, move it to
|
||||
'struct mod_arch_specific'.
|
||||
|
||||
No functional changes.
|
||||
|
||||
Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches")
|
||||
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org
|
||||
---
|
||||
arch/x86/include/asm/module.h | 8 ++++++++
|
||||
arch/x86/kernel/alternative.c | 19 ++++++++++---------
|
||||
include/linux/module.h | 5 -----
|
||||
3 files changed, 18 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/arch/x86/include/asm/module.h
|
||||
+++ b/arch/x86/include/asm/module.h
|
||||
@@ -5,12 +5,20 @@
|
||||
#include <asm-generic/module.h>
|
||||
#include <asm/orc_types.h>
|
||||
|
||||
+struct its_array {
|
||||
+#ifdef CONFIG_MITIGATION_ITS
|
||||
+ void **pages;
|
||||
+ int num;
|
||||
+#endif
|
||||
+};
|
||||
+
|
||||
struct mod_arch_specific {
|
||||
#ifdef CONFIG_UNWINDER_ORC
|
||||
unsigned int num_orcs;
|
||||
int *orc_unwind_ip;
|
||||
struct orc_entry *orc_unwind;
|
||||
#endif
|
||||
+ struct its_array its_pages;
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_MODULE_H */
|
||||
--- a/arch/x86/kernel/alternative.c
|
||||
+++ b/arch/x86/kernel/alternative.c
|
||||
@@ -195,8 +195,8 @@ void its_fini_mod(struct module *mod)
|
||||
its_page = NULL;
|
||||
mutex_unlock(&text_mutex);
|
||||
|
||||
- for (int i = 0; i < mod->its_num_pages; i++) {
|
||||
- void *page = mod->its_page_array[i];
|
||||
+ for (int i = 0; i < mod->arch.its_pages.num; i++) {
|
||||
+ void *page = mod->arch.its_pages.pages[i];
|
||||
execmem_restore_rox(page, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
@@ -206,11 +206,11 @@ void its_free_mod(struct module *mod)
|
||||
if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
|
||||
return;
|
||||
|
||||
- for (int i = 0; i < mod->its_num_pages; i++) {
|
||||
- void *page = mod->its_page_array[i];
|
||||
+ for (int i = 0; i < mod->arch.its_pages.num; i++) {
|
||||
+ void *page = mod->arch.its_pages.pages[i];
|
||||
execmem_free(page);
|
||||
}
|
||||
- kfree(mod->its_page_array);
|
||||
+ kfree(mod->arch.its_pages.pages);
|
||||
}
|
||||
#endif /* CONFIG_MODULES */
|
||||
|
||||
@@ -223,14 +223,15 @@ static void *its_alloc(void)
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
if (its_mod) {
|
||||
- void *tmp = krealloc(its_mod->its_page_array,
|
||||
- (its_mod->its_num_pages+1) * sizeof(void *),
|
||||
+ struct its_array *pages = &its_mod->arch.its_pages;
|
||||
+ void *tmp = krealloc(pages->pages,
|
||||
+ (pages->num+1) * sizeof(void *),
|
||||
GFP_KERNEL);
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
|
||||
- its_mod->its_page_array = tmp;
|
||||
- its_mod->its_page_array[its_mod->its_num_pages++] = page;
|
||||
+ pages->pages = tmp;
|
||||
+ pages->pages[pages->num++] = page;
|
||||
|
||||
execmem_make_temp_rw(page, PAGE_SIZE);
|
||||
}
|
||||
--- a/include/linux/module.h
|
||||
+++ b/include/linux/module.h
|
||||
@@ -586,11 +586,6 @@ struct module {
|
||||
atomic_t refcnt;
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_MITIGATION_ITS
|
||||
- int its_num_pages;
|
||||
- void **its_page_array;
|
||||
-#endif
|
||||
-
|
||||
#ifdef CONFIG_CONSTRUCTORS
|
||||
/* Constructor functions. */
|
||||
ctor_fn_t *ctors;
|
148
debian/patches/patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch
vendored
Normal file
148
debian/patches/patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
From 48d82c4dd03de376a6f673bda0f4f2b97138d855 Mon Sep 17 00:00:00 2001
|
||||
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
|
||||
Date: Tue, 3 Jun 2025 14:14:44 +0300
|
||||
Subject: x86/its: explicitly manage permissions for ITS pages
|
||||
|
||||
execmem_alloc() sets permissions differently depending on the kernel
|
||||
configuration, CPU support for PSE and whether a page is allocated
|
||||
before or after mark_rodata_ro().
|
||||
|
||||
Add tracking for pages allocated for ITS when patching the core kernel
|
||||
and make sure the permissions for ITS pages are explicitly managed for
|
||||
both kernel and module allocations.
|
||||
|
||||
Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches")
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
|
||||
Cc: stable@vger.kernel.org
|
||||
Link: https://lkml.kernel.org/r/20250603111446.2609381-5-rppt@kernel.org
|
||||
---
|
||||
arch/x86/kernel/alternative.c | 74 ++++++++++++++++++++++++-----------
|
||||
1 file changed, 52 insertions(+), 22 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/alternative.c
|
||||
+++ b/arch/x86/kernel/alternative.c
|
||||
@@ -138,6 +138,24 @@ static struct module *its_mod;
|
||||
#endif
|
||||
static void *its_page;
|
||||
static unsigned int its_offset;
|
||||
+struct its_array its_pages;
|
||||
+
|
||||
+static void *__its_alloc(struct its_array *pages)
|
||||
+{
|
||||
+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
|
||||
+ if (!page)
|
||||
+ return NULL;
|
||||
+
|
||||
+ void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *),
|
||||
+ GFP_KERNEL);
|
||||
+ if (!tmp)
|
||||
+ return NULL;
|
||||
+
|
||||
+ pages->pages = tmp;
|
||||
+ pages->pages[pages->num++] = page;
|
||||
+
|
||||
+ return no_free_ptr(page);
|
||||
+}
|
||||
|
||||
/* Initialize a thunk with the "jmp *reg; int3" instructions. */
|
||||
static void *its_init_thunk(void *thunk, int reg)
|
||||
@@ -173,6 +191,21 @@ static void *its_init_thunk(void *thunk,
|
||||
return thunk + offset;
|
||||
}
|
||||
|
||||
+static void its_pages_protect(struct its_array *pages)
|
||||
+{
|
||||
+ for (int i = 0; i < pages->num; i++) {
|
||||
+ void *page = pages->pages[i];
|
||||
+ execmem_restore_rox(page, PAGE_SIZE);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void its_fini_core(void)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
|
||||
+ its_pages_protect(&its_pages);
|
||||
+ kfree(its_pages.pages);
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_MODULES
|
||||
void its_init_mod(struct module *mod)
|
||||
{
|
||||
@@ -195,10 +228,8 @@ void its_fini_mod(struct module *mod)
|
||||
its_page = NULL;
|
||||
mutex_unlock(&text_mutex);
|
||||
|
||||
- for (int i = 0; i < mod->arch.its_pages.num; i++) {
|
||||
- void *page = mod->arch.its_pages.pages[i];
|
||||
- execmem_restore_rox(page, PAGE_SIZE);
|
||||
- }
|
||||
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
|
||||
+ its_pages_protect(&mod->arch.its_pages);
|
||||
}
|
||||
|
||||
void its_free_mod(struct module *mod)
|
||||
@@ -216,28 +247,23 @@ void its_free_mod(struct module *mod)
|
||||
|
||||
static void *its_alloc(void)
|
||||
{
|
||||
- void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
|
||||
+ struct its_array *pages = &its_pages;
|
||||
+ void *page;
|
||||
|
||||
+#ifdef CONFIG_MODULE
|
||||
+ if (its_mod)
|
||||
+ pages = &its_mod->arch.its_pages;
|
||||
+#endif
|
||||
+
|
||||
+ page = __its_alloc(pages);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
-#ifdef CONFIG_MODULES
|
||||
- if (its_mod) {
|
||||
- struct its_array *pages = &its_mod->arch.its_pages;
|
||||
- void *tmp = krealloc(pages->pages,
|
||||
- (pages->num+1) * sizeof(void *),
|
||||
- GFP_KERNEL);
|
||||
- if (!tmp)
|
||||
- return NULL;
|
||||
-
|
||||
- pages->pages = tmp;
|
||||
- pages->pages[pages->num++] = page;
|
||||
+ execmem_make_temp_rw(page, PAGE_SIZE);
|
||||
+ if (pages == &its_pages)
|
||||
+ set_memory_x((unsigned long)page, 1);
|
||||
|
||||
- execmem_make_temp_rw(page, PAGE_SIZE);
|
||||
- }
|
||||
-#endif /* CONFIG_MODULES */
|
||||
-
|
||||
- return no_free_ptr(page);
|
||||
+ return page;
|
||||
}
|
||||
|
||||
static void *its_allocate_thunk(int reg)
|
||||
@@ -291,7 +317,9 @@ u8 *its_static_thunk(int reg)
|
||||
return thunk;
|
||||
}
|
||||
|
||||
-#endif
|
||||
+#else
|
||||
+static inline void its_fini_core(void) {}
|
||||
+#endif /* CONFIG_MITIGATION_ITS */
|
||||
|
||||
/*
|
||||
* Nomenclature for variable names to simplify and clarify this code and ease
|
||||
@@ -2368,6 +2396,8 @@ void __init alternative_instructions(voi
|
||||
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
|
||||
apply_returns(__return_sites, __return_sites_end);
|
||||
|
||||
+ its_fini_core();
|
||||
+
|
||||
/*
|
||||
* Adjust all CALL instructions to point to func()-10, including
|
||||
* those in .altinstr_replacement.
|
32
debian/patches/patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch
vendored
Normal file
32
debian/patches/patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
From 9bed8caa4c73f2d524d9600c74e6cbcff71c2456 Mon Sep 17 00:00:00 2001
|
||||
From: Yosry Ahmed <yosry.ahmed@linux.dev>
|
||||
Date: Tue, 29 Apr 2025 08:32:15 -0700
|
||||
Subject: KVM: SVM: Clear current_vmcb during vCPU free for all *possible* CPUs
|
||||
|
||||
When freeing a vCPU and thus its VMCB, clear current_vmcb for all possible
|
||||
CPUs, not just online CPUs, as it's theoretically possible a CPU could go
|
||||
offline and come back online in conjunction with KVM reusing the page for
|
||||
a new VMCB.
|
||||
|
||||
Link: https://lore.kernel.org/all/20250320013759.3965869-1-yosry.ahmed@linux.dev
|
||||
Fixes: fd65d3142f73 ("kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb")
|
||||
Cc: stable@vger.kernel.org
|
||||
Cc: Jim Mattson <jmattson@google.com>
|
||||
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
|
||||
[sean: split to separate patch, write changelog]
|
||||
Signed-off-by: Sean Christopherson <seanjc@google.com>
|
||||
---
|
||||
arch/x86/kvm/svm/svm.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/kvm/svm/svm.c
|
||||
+++ b/arch/x86/kvm/svm/svm.c
|
||||
@@ -1488,7 +1488,7 @@ static void svm_clear_current_vmcb(struc
|
||||
{
|
||||
int i;
|
||||
|
||||
- for_each_online_cpu(i)
|
||||
+ for_each_possible_cpu(i)
|
||||
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
|
||||
}
|
||||
|
43
debian/patches/patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch
vendored
Normal file
43
debian/patches/patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
From d74cb6c8b70d9b5ad8482f4821679b83bad9de63 Mon Sep 17 00:00:00 2001
|
||||
From: Chao Gao <chao.gao@intel.com>
|
||||
Date: Mon, 24 Mar 2025 22:08:48 +0800
|
||||
Subject: KVM: VMX: Flush shadow VMCS on emergency reboot
|
||||
|
||||
Ensure the shadow VMCS cache is evicted during an emergency reboot to
|
||||
prevent potential memory corruption if the cache is evicted after reboot.
|
||||
|
||||
This issue was identified through code inspection, as __loaded_vmcs_clear()
|
||||
flushes both the normal VMCS and the shadow VMCS.
|
||||
|
||||
Avoid checking the "launched" state during an emergency reboot, unlike the
|
||||
behavior in __loaded_vmcs_clear(). This is important because reboot NMIs
|
||||
can interfere with operations like copy_shadow_to_vmcs12(), where shadow
|
||||
VMCSes are loaded directly using VMPTRLD. In such cases, if NMIs occur
|
||||
right after the VMCS load, the shadow VMCSes will be active but the
|
||||
"launched" state may not be set.
|
||||
|
||||
Fixes: 16f5b9034b69 ("KVM: nVMX: Copy processor-specific shadow-vmcs to VMCS12")
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Chao Gao <chao.gao@intel.com>
|
||||
Reviewed-by: Kai Huang <kai.huang@intel.com>
|
||||
Link: https://lore.kernel.org/r/20250324140849.2099723-1-chao.gao@intel.com
|
||||
Signed-off-by: Sean Christopherson <seanjc@google.com>
|
||||
---
|
||||
arch/x86/kvm/vmx/vmx.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/kvm/vmx/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx/vmx.c
|
||||
@@ -769,8 +769,11 @@ void vmx_emergency_disable_virtualizatio
|
||||
return;
|
||||
|
||||
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
|
||||
- loaded_vmcss_on_cpu_link)
|
||||
+ loaded_vmcss_on_cpu_link) {
|
||||
vmcs_clear(v->vmcs);
|
||||
+ if (v->shadow_vmcs)
|
||||
+ vmcs_clear(v->shadow_vmcs);
|
||||
+ }
|
||||
|
||||
kvm_cpu_vmxoff();
|
||||
}
|
64
debian/patches/patchset-pf/fixes/0044-cgroup-freezer-fix-incomplete-freezing-when-attachin.patch
vendored
Normal file
64
debian/patches/patchset-pf/fixes/0044-cgroup-freezer-fix-incomplete-freezing-when-attachin.patch
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
From 6e492900893c011cbe13fbb881cf1e11df08982b Mon Sep 17 00:00:00 2001
|
||||
From: Chen Ridong <chenridong@huawei.com>
|
||||
Date: Wed, 18 Jun 2025 07:32:17 +0000
|
||||
Subject: cgroup,freezer: fix incomplete freezing when attaching tasks
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
An issue was found:
|
||||
|
||||
# cd /sys/fs/cgroup/freezer/
|
||||
# mkdir test
|
||||
# echo FROZEN > test/freezer.state
|
||||
# cat test/freezer.state
|
||||
FROZEN
|
||||
# sleep 1000 &
|
||||
[1] 863
|
||||
# echo 863 > test/cgroup.procs
|
||||
# cat test/freezer.state
|
||||
FREEZING
|
||||
|
||||
When tasks are migrated to a frozen cgroup, the freezer fails to
|
||||
immediately freeze the tasks, causing the cgroup to remain in the
|
||||
"FREEZING".
|
||||
|
||||
The freeze_task() function is called before clearing the CGROUP_FROZEN
|
||||
flag. This causes the freezing() check to incorrectly return false,
|
||||
preventing __freeze_task() from being invoked for the migrated task.
|
||||
|
||||
To fix this issue, clear the CGROUP_FROZEN state before calling
|
||||
freeze_task().
|
||||
|
||||
Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
|
||||
Cc: stable@vger.kernel.org # v6.1+
|
||||
Reported-by: Zhong Jiawei <zhongjiawei1@huawei.com>
|
||||
Signed-off-by: Chen Ridong <chenridong@huawei.com>
|
||||
Acked-by: Michal Koutný <mkoutny@suse.com>
|
||||
Signed-off-by: Tejun Heo <tj@kernel.org>
|
||||
---
|
||||
kernel/cgroup/legacy_freezer.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
|
||||
index 039d1eb2f215..507b8f19a262 100644
|
||||
--- a/kernel/cgroup/legacy_freezer.c
|
||||
+++ b/kernel/cgroup/legacy_freezer.c
|
||||
@@ -188,13 +188,12 @@ static void freezer_attach(struct cgroup_taskset *tset)
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
__thaw_task(task);
|
||||
} else {
|
||||
- freeze_task(task);
|
||||
-
|
||||
/* clear FROZEN and propagate upwards */
|
||||
while (freezer && (freezer->state & CGROUP_FROZEN)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
freezer = parent_freezer(freezer);
|
||||
}
|
||||
+ freeze_task(task);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.50.0
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From c207229d3f7b851d246f1904bc4cab7ae9ada58b Mon Sep 17 00:00:00 2001
|
||||
From ef4d2ebb50f1bd0d5b2e3f1aa2280d7d31e4a3c9 Mon Sep 17 00:00:00 2001
|
||||
From: Maninder Singh <maninder1.s@samsung.com>
|
||||
Date: Thu, 6 Mar 2025 14:50:06 +0530
|
||||
Subject: NFSD: unregister filesystem in case genl_register_family() fails
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From bda3cf19bcf44807c401b807dee83aadda959287 Mon Sep 17 00:00:00 2001
|
||||
From 6c2a6b3e27a3a02fd9f3f92458d4995014dfe69f Mon Sep 17 00:00:00 2001
|
||||
From: Maninder Singh <maninder1.s@samsung.com>
|
||||
Date: Thu, 6 Mar 2025 14:50:07 +0530
|
||||
Subject: NFSD: fix race between nfsd registration and exports_proc
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From b9293b51ea6182618e474edfbeb5cd34f5e875e8 Mon Sep 17 00:00:00 2001
|
||||
From 0d4fc17cb5da09d14dbff91da7e28e50d3f54af2 Mon Sep 17 00:00:00 2001
|
||||
From: Olga Kornievskaia <okorniev@redhat.com>
|
||||
Date: Fri, 21 Mar 2025 20:13:04 -0400
|
||||
Subject: nfsd: fix access checking for NLM under XPRTSEC policies
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 778e820deed49a0dee6115c0aa903e626ab635f6 Mon Sep 17 00:00:00 2001
|
||||
From 2fa924062a9494772cd997cb8b1ec572cfe6490f Mon Sep 17 00:00:00 2001
|
||||
From: NeilBrown <neil@brown.name>
|
||||
Date: Fri, 28 Mar 2025 11:05:59 +1100
|
||||
Subject: nfsd: nfsd4_spo_must_allow() must check this is a v4 compound request
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 8a7faf80fbb9ecdea403cb4f882354e8a5201acb Mon Sep 17 00:00:00 2001
|
||||
From c860b8340bf921de66aa7871f40507dd5628926f Mon Sep 17 00:00:00 2001
|
||||
From: Li Lingfeng <lilingfeng3@huawei.com>
|
||||
Date: Mon, 14 Apr 2025 22:38:52 +0800
|
||||
Subject: nfsd: Initialize ssc before laundromat_work to prevent NULL
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 12e39177848d11c6ac5ad70ce530364fac7f36d3 Mon Sep 17 00:00:00 2001
|
||||
From 01089ae8fff5bcc6e9949d50d76b70f2a16abe89 Mon Sep 17 00:00:00 2001
|
||||
From: Chuck Lever <chuck.lever@oracle.com>
|
||||
Date: Wed, 7 May 2025 10:45:15 -0400
|
||||
Subject: NFSD: Implement FATTR4_CLONE_BLKSIZE attribute
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 2623f0468759aba585c7ae86adc1cf1cb11e1b63 Mon Sep 17 00:00:00 2001
|
||||
From e0246422dfc08dec0fc3c96f3201bab6ceec6774 Mon Sep 17 00:00:00 2001
|
||||
From: Max Kellermann <max.kellermann@ionos.com>
|
||||
Date: Wed, 23 Apr 2025 15:22:50 +0200
|
||||
Subject: fs/nfs/read: fix double-unlock bug in nfs_return_empty_folio()
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From d87e5957afccde6cc0719ab0a554757dcafa85ce Mon Sep 17 00:00:00 2001
|
||||
From d9f4762296075cc67d9974d093a87064075853e1 Mon Sep 17 00:00:00 2001
|
||||
From: Scott Mayhew <smayhew@redhat.com>
|
||||
Date: Wed, 30 Apr 2025 07:12:29 -0400
|
||||
Subject: NFSv4: Don't check for OPEN feature support in v4.1
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 9e7464ef730cfe5bbab845ff12b295575d874216 Mon Sep 17 00:00:00 2001
|
||||
From 7147868788966e9032cdeb0cf33bd1ae47785088 Mon Sep 17 00:00:00 2001
|
||||
From: Mike Snitzer <snitzer@kernel.org>
|
||||
Date: Tue, 13 May 2025 12:08:31 -0400
|
||||
Subject: NFS: always probe for LOCALIO support asynchronously
|
||||
|
45
debian/patches/patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch
vendored
Normal file
45
debian/patches/patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
From 9d330e139e9993f2489fcfe3048c8e737085646d Mon Sep 17 00:00:00 2001
|
||||
From: Namjae Jeon <linkinjeon@kernel.org>
|
||||
Date: Fri, 13 Jun 2025 10:12:43 +0900
|
||||
Subject: ksmbd: fix null pointer dereference in destroy_previous_session
|
||||
|
||||
If client set ->PreviousSessionId on kerberos session setup stage,
|
||||
NULL pointer dereference error will happen. Since sess->user is not
|
||||
set yet, It can pass the user argument as NULL to destroy_previous_session.
|
||||
sess->user will be set in ksmbd_krb5_authenticate(). So this patch move
|
||||
calling destroy_previous_session() after ksmbd_krb5_authenticate().
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27391
|
||||
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
|
||||
Signed-off-by: Steve French <stfrench@microsoft.com>
|
||||
---
|
||||
fs/smb/server/smb2pdu.c | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/fs/smb/server/smb2pdu.c
|
||||
+++ b/fs/smb/server/smb2pdu.c
|
||||
@@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmb
|
||||
out_len = work->response_sz -
|
||||
(le16_to_cpu(rsp->SecurityBufferOffset) + 4);
|
||||
|
||||
- /* Check previous session */
|
||||
- prev_sess_id = le64_to_cpu(req->PreviousSessionId);
|
||||
- if (prev_sess_id && prev_sess_id != sess->id)
|
||||
- destroy_previous_session(conn, sess->user, prev_sess_id);
|
||||
-
|
||||
retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
|
||||
out_blob, &out_len);
|
||||
if (retval) {
|
||||
ksmbd_debug(SMB, "krb5 authentication failed\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ /* Check previous session */
|
||||
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
|
||||
+ if (prev_sess_id && prev_sess_id != sess->id)
|
||||
+ destroy_previous_session(conn, sess->user, prev_sess_id);
|
||||
+
|
||||
rsp->SecurityBufferLength = cpu_to_le16(out_len);
|
||||
|
||||
if ((conn->sign || server_conf.enforced_signing) ||
|
@@ -1,81 +0,0 @@
|
||||
From c63d4a0865e8e7549e1305cc67b88a355a4a9a02 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Chinner <dchinner@redhat.com>
|
||||
Date: Thu, 1 May 2025 09:27:24 +1000
|
||||
Subject: xfs: don't assume perags are initialised when trimming AGs
|
||||
|
||||
When running fstrim immediately after mounting a V4 filesystem,
|
||||
the fstrim fails to trim all the free space in the filesystem. It
|
||||
only trims the first extent in the by-size free space tree in each
|
||||
AG and then returns. If a second fstrim is then run, it runs
|
||||
correctly and the entire free space in the filesystem is iterated
|
||||
and discarded correctly.
|
||||
|
||||
The problem lies in the setup of the trim cursor - it assumes that
|
||||
pag->pagf_longest is valid without either reading the AGF first or
|
||||
checking if xfs_perag_initialised_agf(pag) is true or not.
|
||||
|
||||
As a result, when a filesystem is mounted without reading the AGF
|
||||
(e.g. a clean mount on a v4 filesystem) and the first operation is a
|
||||
fstrim call, pag->pagf_longest is zero and so the free extent search
|
||||
starts at the wrong end of the by-size btree and exits after
|
||||
discarding the first record in the tree.
|
||||
|
||||
Fix this by deferring the initialisation of tcur->count to after
|
||||
we have locked the AGF and guaranteed that the perag is properly
|
||||
initialised. We trigger this on tcur->count == 0 after locking the
|
||||
AGF, as this will only occur on the first call to
|
||||
xfs_trim_gather_extents() for each AG. If we need to iterate,
|
||||
tcur->count will be set to the length of the record we need to
|
||||
restart at, so we can use this to ensure we only sample a valid
|
||||
pag->pagf_longest value for the iteration.
|
||||
|
||||
Signed-off-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Bill O'Donnell <bodonnel@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Fixes: 89cfa899608f ("xfs: reduce AGF hold times during fstrim operations")
|
||||
Cc: <stable@vger.kernel.org> # v6.6
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
fs/xfs/xfs_discard.c | 17 ++++++++++++++++-
|
||||
1 file changed, 16 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/fs/xfs/xfs_discard.c
|
||||
+++ b/fs/xfs/xfs_discard.c
|
||||
@@ -167,6 +167,14 @@ xfs_discard_extents(
|
||||
return error;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Care must be taken setting up the trim cursor as the perags may not have been
|
||||
+ * initialised when the cursor is initialised. e.g. a clean mount which hasn't
|
||||
+ * read in AGFs and the first operation run on the mounted fs is a trim. This
|
||||
+ * can result in perag fields that aren't initialised until
|
||||
+ * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for
|
||||
+ * the free space search.
|
||||
+ */
|
||||
struct xfs_trim_cur {
|
||||
xfs_agblock_t start;
|
||||
xfs_extlen_t count;
|
||||
@@ -204,6 +212,14 @@ xfs_trim_gather_extents(
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
+ /*
|
||||
+ * First time through tcur->count will not have been initialised as
|
||||
+ * pag->pagf_longest is not guaranteed to be valid before we read
|
||||
+ * the AGF buffer above.
|
||||
+ */
|
||||
+ if (!tcur->count)
|
||||
+ tcur->count = pag->pagf_longest;
|
||||
+
|
||||
if (tcur->by_bno) {
|
||||
/* sub-AG discard request always starts at tcur->start */
|
||||
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
|
||||
@@ -350,7 +366,6 @@ xfs_trim_perag_extents(
|
||||
{
|
||||
struct xfs_trim_cur tcur = {
|
||||
.start = start,
|
||||
- .count = pag->pagf_longest,
|
||||
.end = end,
|
||||
.minlen = minlen,
|
||||
};
|
Reference in New Issue
Block a user