release 6.14.1

2025-04-07 13:34:51 +03:00
parent cb529499fc
commit 12ad6316be
76 changed files with 2260 additions and 46 deletions
--- a/debian/patches/patchset-pf/fixes/0001-tpm-do-not-start-chip-while-suspended.patch
+++ b/debian/patches/patchset-pf/fixes/0001-tpm-do-not-start-chip-while-suspended.patch
@@ -1,4 +1,4 @@
-From 52af8f543922b47a31ddbb6ffb81f40ad9993309 Mon Sep 17 00:00:00 2001
+From 9efac88375330a6f29f091e9dd5fd6154670ba56 Mon Sep 17 00:00:00 2001
 From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
 Date: Fri, 7 Feb 2025 15:07:46 -0300
 Subject: tpm: do not start chip while suspended
--- a/debian/patches/patchset-pf/fixes/0003-EDAC-igen6-Fix-the-flood-of-invalid-error-reports.patch
+++ b/debian/patches/patchset-pf/fixes/0003-EDAC-igen6-Fix-the-flood-of-invalid-error-reports.patch
@@ -0,0 +1,56 @@
+From 8886788eed16c79124bc530950f09c3f2fa881a8 Mon Sep 17 00:00:00 2001
+From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Date: Wed, 12 Feb 2025 16:33:54 +0800
+Subject: EDAC/igen6: Fix the flood of invalid error reports
+
+The ECC_ERROR_LOG register of certain SoCs may contain the invalid value
+~0, which results in a flood of invalid error reports in polling mode.
+
+Fix the flood of invalid error reports by skipping the invalid ECC error
+log value ~0.
+
+Fixes: e14232afa944 ("EDAC/igen6: Add polling support")
+Reported-by: Ramses <ramses@well-founded.dev>
+Closes: https://lore.kernel.org/all/OISL8Rv--F-9@well-founded.dev/
+Tested-by: Ramses <ramses@well-founded.dev>
+Reported-by: John <therealgraysky@proton.me>
+Closes: https://lore.kernel.org/all/p5YcxOE6M3Ncxpn2-Ia_wCt61EM4LwIiN3LroQvT_-G2jMrFDSOW5k2A9D8UUzD2toGpQBN1eI0sL5dSKnkO8iteZegLoQEj-DwQaMhGx4A=@proton.me/
+Tested-by: John <therealgraysky@proton.me>
+Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Link: https://lore.kernel.org/r/20250212083354.31919-1-qiuxu.zhuo@intel.com
+---
+ drivers/edac/igen6_edac.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
+@@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct
+ {
+ 	u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
+ 
+-	if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
+-		/* Clear CE/UE bits by writing 1s */
+-		writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+-		return ecclog;
+-	}
+	/*
+	 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
+	 *        the invalid value ~0. This will result in a flood of invalid
+	 *        error reports in polling mode. Skip it.
+	 */
+	if (ecclog == ~0)
+		return 0;
+ 
+-	return 0;
+	/* Neither a CE nor a UE. Skip it.*/
+	if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
+		return 0;
+
+	/* Clear CE/UE bits by writing 1s */
+	writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+
+	return ecclog;
+ }
+ 
+ static void errsts_clear(struct igen6_imc *imc)
--- a/debian/patches/patchset-pf/fixes/0004-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch
+++ b/debian/patches/patchset-pf/fixes/0004-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch
@@ -0,0 +1,36 @@
+From b40bdfdcffa333ad169327c5b8fe1b93542c7e0a Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Tue, 18 Mar 2025 15:32:30 -0700
+Subject: x86/tools: Drop duplicate unlikely() definition in
+ insn_decoder_test.c
+
+After commit c104c16073b7 ("Kunit to check the longest symbol length"),
+there is a warning when building with clang because there is now a
+definition of unlikely from compiler.h in tools/include/linux, which
+conflicts with the one in the instruction decoder selftest:
+
+  arch/x86/tools/insn_decoder_test.c:15:9: warning: 'unlikely' macro redefined [-Wmacro-redefined]
+
+Remove the second unlikely() definition, as it is no longer necessary,
+clearing up the warning.
+
+Fixes: c104c16073b7 ("Kunit to check the longest symbol length")
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Shuah Khan <skhan@linuxfoundation.org>
+Link: https://lore.kernel.org/r/20250318-x86-decoder-test-fix-unlikely-redef-v1-1-74c84a7bf05b@kernel.org
+---
+ arch/x86/tools/insn_decoder_test.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
+@@ -11,8 +11,6 @@
+ #include <unistd.h>
+ #include <stdarg.h>
+ 
+-#define unlikely(cond) (cond)
+-
+ #include <asm/insn.h>
+ #include <inat.c>
+ #include <insn.c>
--- a/debian/patches/patchset-pf/fixes/0005-tpm-tpm_tis-Fix-timeout-handling-when-waiting-for-TP.patch
+++ b/debian/patches/patchset-pf/fixes/0005-tpm-tpm_tis-Fix-timeout-handling-when-waiting-for-TP.patch
@@ -0,0 +1,44 @@
+From 073fb5ff9a001882fa884a0a8efddc88860ad791 Mon Sep 17 00:00:00 2001
+From: Jonathan McDowell <noodles@meta.com>
+Date: Wed, 12 Mar 2025 07:31:57 +0200
+Subject: tpm, tpm_tis: Fix timeout handling when waiting for TPM status
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The change to only use interrupts to handle supported status changes
+introduced an issue when it is necessary to poll for the status. Rather
+than checking for the status after sleeping the code now sleeps after
+the check. This means a correct, but slower, status change on the part
+of the TPM can be missed, resulting in a spurious timeout error,
+especially on a more loaded system. Switch back to sleeping *then*
+checking. An up front check of the status has been done at the start of
+the function, so this does not cause an additional delay when the status
+is already what we're looking for.
+
+Cc: stable@vger.kernel.org # v6.4+
+Fixes: e87fcf0dc2b4 ("tpm, tpm_tis: Only handle supported interrupts")
+Signed-off-by: Jonathan McDowell <noodles@meta.com>
+Reviewed-by: Michal Suchánek <msuchanek@suse.de>
+Reviewed-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+---
+ drivers/char/tpm/tpm_tis_core.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -114,11 +114,10 @@ again:
+ 		return 0;
+ 	/* process status changes without irq support */
+ 	do {
+		usleep_range(priv->timeout_min, priv->timeout_max);
+ 		status = chip->ops->status(chip);
+ 		if ((status & mask) == mask)
+ 			return 0;
+-		usleep_range(priv->timeout_min,
+-			     priv->timeout_max);
+ 	} while (time_before(jiffies, stop));
+ 	return -ETIME;
+ }
--- a/debian/patches/patchset-pf/fixes/0006-x86-mm-Fix-flush_tlb_range-when-used-for-zapping-nor.patch
+++ b/debian/patches/patchset-pf/fixes/0006-x86-mm-Fix-flush_tlb_range-when-used-for-zapping-nor.patch
@@ -0,0 +1,50 @@
+From e24882a961e2d85cc4c8319a56734a0d7c7867fc Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Fri, 3 Jan 2025 19:39:38 +0100
+Subject: x86/mm: Fix flush_tlb_range() when used for zapping normal PMDs
+
+On the following path, flush_tlb_range() can be used for zapping normal
+PMD entries (PMD entries that point to page tables) together with the PTE
+entries in the pointed-to page table:
+
+    collapse_pte_mapped_thp
+      pmdp_collapse_flush
+        flush_tlb_range
+
+The arm64 version of flush_tlb_range() has a comment describing that it can
+be used for page table removal, and does not use any last-level
+invalidation optimizations. Fix the X86 version by making it behave the
+same way.
+
+Currently, X86 only uses this information for the following two purposes,
+which I think means the issue doesn't have much impact:
+
+ - In native_flush_tlb_multi() for checking if lazy TLB CPUs need to be
+   IPI'd to avoid issues with speculative page table walks.
+ - In Hyper-V TLB paravirtualization, again for lazy TLB stuff.
+
+The patch "x86/mm: only invalidate final translations with INVLPGB" which
+is currently under review (see
+<https://lore.kernel.org/all/20241230175550.4046587-13-riel@surriel.com/>)
+would probably be making the impact of this a lot worse.
+
+Fixes: 016c4d92cd16 ("x86/mm/tlb: Add freed_tables argument to flush_tlb_mm_range")
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20250103-x86-collapse-flush-fix-v1-1-3c521856cfa6@google.com
+---
+ arch/x86/include/asm/tlbflush.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
+@@ -311,7 +311,7 @@ static inline bool mm_in_asid_transition
+ 	flush_tlb_mm_range((vma)->vm_mm, start, end,			\
+ 			   ((vma)->vm_flags & VM_HUGETLB)		\
+ 				? huge_page_shift(hstate_vma(vma))	\
+-				: PAGE_SHIFT, false)
+				: PAGE_SHIFT, true)
+ 
+ extern void flush_tlb_all(void);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
--- a/debian/patches/patchset-pf/fixes/0007-x86-tsc-Always-save-restore-TSC-sched_clock-on-suspe.patch
+++ b/debian/patches/patchset-pf/fixes/0007-x86-tsc-Always-save-restore-TSC-sched_clock-on-suspe.patch
@@ -0,0 +1,68 @@
+From 7a0abf17cceb511425b7af34291243b4a270e770 Mon Sep 17 00:00:00 2001
+From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
+Date: Sat, 15 Feb 2025 17:58:16 -0300
+Subject: x86/tsc: Always save/restore TSC sched_clock() on suspend/resume
+
+TSC could be reset in deep ACPI sleep states, even with invariant TSC.
+
+That's the reason we have sched_clock() save/restore functions, to deal
+with this situation. But what happens is that such functions are guarded
+with a check for the stability of sched_clock - if not considered stable,
+the save/restore routines aren't executed.
+
+On top of that, we have a clear comment in native_sched_clock() saying
+that *even* with TSC unstable, we continue using TSC for sched_clock due
+to its speed.
+
+In other words, if we have a situation of TSC getting detected as unstable,
+it marks the sched_clock as unstable as well, so subsequent S3 sleep cycles
+could bring bogus sched_clock values due to the lack of the save/restore
+mechanism, causing warnings like this:
+
+  [22.954918] ------------[ cut here ]------------
+  [22.954923] Delta way too big! 18446743750843854390 ts=18446744072977390405 before=322133536015 after=322133536015 write stamp=18446744072977390405
+  [22.954923] If you just came from a suspend/resume,
+  [22.954923] please switch to the trace global clock:
+  [22.954923]   echo global > /sys/kernel/tracing/trace_clock
+  [22.954923] or add trace_clock=global to the kernel command line
+  [22.954937] WARNING: CPU: 2 PID: 5728 at kernel/trace/ring_buffer.c:2890 rb_add_timestamp+0x193/0x1c0
+
+Notice that the above was reproduced even with "trace_clock=global".
+
+The fix for that is to _always_ save/restore the sched_clock on suspend
+cycle _if TSC is used_ as sched_clock - only if we fallback to jiffies
+the sched_clock_stable() check becomes relevant to save/restore the
+sched_clock.
+
+Debugged-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/r/20250215210314.351480-1-gpiccoli@igalia.com
+---
+ arch/x86/kernel/tsc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
+@@ -959,7 +959,7 @@ static unsigned long long cyc2ns_suspend
+ 
+ void tsc_save_sched_clock_state(void)
+ {
+-	if (!sched_clock_stable())
+	if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
+ 		return;
+ 
+ 	cyc2ns_suspend = sched_clock();
+@@ -979,7 +979,7 @@ void tsc_restore_sched_clock_state(void)
+ 	unsigned long flags;
+ 	int cpu;
+ 
+-	if (!sched_clock_stable())
+	if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
+ 		return;
+ 
+ 	local_irq_save(flags);
--- a/debian/patches/patchset-pf/fixes/0008-uprobes-x86-Harden-uretprobe-syscall-trampoline-chec.patch
+++ b/debian/patches/patchset-pf/fixes/0008-uprobes-x86-Harden-uretprobe-syscall-trampoline-chec.patch
@@ -0,0 +1,87 @@
+From bbbc88e65bb8036be1fe3386c0061d9be4c5a442 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Wed, 12 Feb 2025 23:04:33 +0100
+Subject: uprobes/x86: Harden uretprobe syscall trampoline check
+
+Jann reported a possible issue when trampoline_check_ip returns
+address near the bottom of the address space that is allowed to
+call into the syscall if uretprobes are not set up:
+
+   https://lore.kernel.org/bpf/202502081235.5A6F352985@keescook/T/#m9d416df341b8fbc11737dacbcd29f0054413cbbf
+
+Though the mmap minimum address restrictions will typically prevent
+creating mappings there, let's make sure uretprobe syscall checks
+for that.
+
+Fixes: ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Reviewed-by: Kees Cook <kees@kernel.org>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Acked-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20250212220433.3624297-1-jolsa@kernel.org
+---
+ arch/x86/kernel/uprobes.c | 14 +++++++++-----
+ include/linux/uprobes.h   |  2 ++
+ kernel/events/uprobes.c   |  2 +-
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
+@@ -357,19 +357,23 @@ void *arch_uprobe_trampoline(unsigned lo
+ 	return &insn;
+ }
+ 
+-static unsigned long trampoline_check_ip(void)
+static unsigned long trampoline_check_ip(unsigned long tramp)
+ {
+-	unsigned long tramp = uprobe_get_trampoline_vaddr();
+-
+ 	return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry);
+ }
+ 
+ SYSCALL_DEFINE0(uretprobe)
+ {
+ 	struct pt_regs *regs = task_pt_regs(current);
+-	unsigned long err, ip, sp, r11_cx_ax[3];
+	unsigned long err, ip, sp, r11_cx_ax[3], tramp;
+
+	/* If there's no trampoline, we are called from wrong place. */
+	tramp = uprobe_get_trampoline_vaddr();
+	if (unlikely(tramp == UPROBE_NO_TRAMPOLINE_VADDR))
+		goto sigill;
+ 
+-	if (regs->ip != trampoline_check_ip())
+	/* Make sure the ip matches the only allowed sys_uretprobe caller. */
+	if (unlikely(regs->ip != trampoline_check_ip(tramp)))
+ 		goto sigill;
+ 
+ 	err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax));
+--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
+@@ -39,6 +39,8 @@ struct page;
+ 
+ #define MAX_URETPROBE_DEPTH		64
+ 
+#define UPROBE_NO_TRAMPOLINE_VADDR	(~0UL)
+
+ struct uprobe_consumer {
+ 	/*
+ 	 * handler() can return UPROBE_HANDLER_REMOVE to signal the need to
+--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
+@@ -2169,8 +2169,8 @@ void uprobe_copy_process(struct task_str
+  */
+ unsigned long uprobe_get_trampoline_vaddr(void)
+ {
+	unsigned long trampoline_vaddr = UPROBE_NO_TRAMPOLINE_VADDR;
+ 	struct xol_area *area;
+-	unsigned long trampoline_vaddr = -1;
+ 
+ 	/* Pairs with xol_add_vma() smp_store_release() */
+ 	area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */
--- a/debian/patches/patchset-pf/fixes/0009-block-make-sure-nr_integrity_segments-is-cloned-in-b.patch
+++ b/debian/patches/patchset-pf/fixes/0009-block-make-sure-nr_integrity_segments-is-cloned-in-b.patch
@@ -0,0 +1,32 @@
+From f4511f63677bd3e7831561b1407a69a71cb519bc Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Mon, 10 Mar 2025 19:54:53 +0800
+Subject: block: make sure ->nr_integrity_segments is cloned in
+ blk_rq_prep_clone
+
+Make sure ->nr_integrity_segments is cloned in blk_rq_prep_clone(),
+otherwise requests cloned by device-mapper multipath will not have the
+proper nr_integrity_segments values set, then BUG() is hit from
+sg_alloc_table_chained().
+
+Fixes: b0fd271d5fba ("block: add request clone interface (v2)")
+Cc: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20250310115453.2271109-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+---
+ block/blk-mq.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/block/blk-mq.c
+++ b/block/blk-mq.c
+@@ -3314,6 +3314,7 @@ int blk_rq_prep_clone(struct request *rq
+ 		rq->special_vec = rq_src->special_vec;
+ 	}
+ 	rq->nr_phys_segments = rq_src->nr_phys_segments;
+	rq->nr_integrity_segments = rq_src->nr_integrity_segments;
+ 
+ 	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
+ 		goto free_and_out;
--- a/debian/patches/patchset-pf/fixes/0010-PCI-Fix-wrong-length-of-devres-array.patch
+++ b/debian/patches/patchset-pf/fixes/0010-PCI-Fix-wrong-length-of-devres-array.patch
@@ -0,0 +1,40 @@
+From 46b8c87f1aa08a0794b45b394c5462f33bec54b0 Mon Sep 17 00:00:00 2001
+From: Philipp Stanner <phasta@kernel.org>
+Date: Wed, 12 Mar 2025 09:06:34 +0100
+Subject: PCI: Fix wrong length of devres array
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The array for the iomapping cookie addresses has a length of
+PCI_STD_NUM_BARS. This constant, however, only describes standard BARs;
+while PCI can allow for additional, special BARs.
+
+The total number of PCI resources is described by constant
+PCI_NUM_RESOURCES, which is also used in, e.g., pci_select_bars().
+
+Thus, the devres array has so far been too small.
+
+Change the length of the devres array to PCI_NUM_RESOURCES.
+
+Link: https://lore.kernel.org/r/20250312080634.13731-3-phasta@kernel.org
+Fixes: bbaff68bf4a4 ("PCI: Add managed partial-BAR request and map infrastructure")
+Signed-off-by: Philipp Stanner <phasta@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
+Cc: stable@vger.kernel.org	# v6.11+
+---
+ drivers/pci/devres.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/devres.c
+++ b/drivers/pci/devres.c
+@@ -40,7 +40,7 @@
+  * Legacy struct storing addresses to whole mapped BARs.
+  */
+ struct pcim_iomap_devres {
+-	void __iomem *table[PCI_STD_NUM_BARS];
+	void __iomem *table[PCI_NUM_RESOURCES];
+ };
+ 
+ /* Used to restore the old INTx state on driver detach. */
--- a/debian/patches/patchset-pf/fixes/0011-exec-fix-the-racy-usage-of-fs_struct-in_exec.patch
+++ b/debian/patches/patchset-pf/fixes/0011-exec-fix-the-racy-usage-of-fs_struct-in_exec.patch
@@ -0,0 +1,84 @@
+From 9741b8592433f51ed477c9dba6d304562aa7de18 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 24 Mar 2025 17:00:03 +0100
+Subject: exec: fix the racy usage of fs_struct->in_exec
+
+check_unsafe_exec() sets fs->in_exec under cred_guard_mutex, then execve()
+paths clear fs->in_exec lockless. This is fine if exec succeeds, but if it
+fails we have the following race:
+
+	T1 sets fs->in_exec = 1, fails, drops cred_guard_mutex
+
+	T2 sets fs->in_exec = 1
+
+	T1 clears fs->in_exec
+
+	T2 continues with fs->in_exec == 0
+
+Change fs/exec.c to clear fs->in_exec with cred_guard_mutex held.
+
+Reported-by: syzbot+1c486d0b62032c82a968@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001f.GAE@google.com/
+Cc: stable@vger.kernel.org
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/r/20250324160003.GA8878@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+---
+ fs/exec.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/fs/exec.c
+++ b/fs/exec.c
+@@ -1229,13 +1229,12 @@ int begin_new_exec(struct linux_binprm *
+ 	 */
+ 	bprm->point_of_no_return = true;
+ 
+-	/*
+-	 * Make this the only thread in the thread group.
+-	 */
+	/* Make this the only thread in the thread group */
+ 	retval = de_thread(me);
+ 	if (retval)
+ 		goto out;
+-
+	/* see the comment in check_unsafe_exec() */
+	current->fs->in_exec = 0;
+ 	/*
+ 	 * Cancel any io_uring activity across execve
+ 	 */
+@@ -1497,6 +1496,8 @@ static void free_bprm(struct linux_binpr
+ 	}
+ 	free_arg_pages(bprm);
+ 	if (bprm->cred) {
+		/* in case exec fails before de_thread() succeeds */
+		current->fs->in_exec = 0;
+ 		mutex_unlock(&current->signal->cred_guard_mutex);
+ 		abort_creds(bprm->cred);
+ 	}
+@@ -1618,6 +1619,10 @@ static void check_unsafe_exec(struct lin
+ 	 * suid exec because the differently privileged task
+ 	 * will be able to manipulate the current directory, etc.
+ 	 * It would be nice to force an unshare instead...
+	 *
+	 * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
+	 * from another sub-thread until de_thread() succeeds, this
+	 * state is protected by cred_guard_mutex we hold.
+ 	 */
+ 	n_fs = 1;
+ 	spin_lock(&p->fs->lock);
+@@ -1862,7 +1867,6 @@ static int bprm_execve(struct linux_binp
+ 
+ 	sched_mm_cid_after_execve(current);
+ 	/* execve succeeded */
+-	current->fs->in_exec = 0;
+ 	current->in_execve = 0;
+ 	rseq_execve(current);
+ 	user_events_execve(current);
+@@ -1881,7 +1885,6 @@ out:
+ 		force_fatal_sig(SIGSEGV);
+ 
+ 	sched_mm_cid_after_execve(current);
+-	current->fs->in_exec = 0;
+ 	current->in_execve = 0;
+ 
+ 	return retval;