release 6.14.4

2025-04-26 01:02:31 +03:00
parent f2e779751a
commit 23be27fbba
43 changed files with 497 additions and 637 deletions
--- a/debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch
+++ b/debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch
@@ -1,4 +1,4 @@
-From a1eb9a3160dc9e3cee6abdeab8e41c2265a2d7a1 Mon Sep 17 00:00:00 2001
+From 4506de20739ac4726a258faa98609a552184d2d2 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Sergio=20Gonz=C3=A1lez=20Collado?=
 <sergio.collado@gmail.com>
 Date: Sun, 2 Mar 2025 23:15:18 +0100
--- a/debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch
+++ b/debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch
@@ -1,4 +1,4 @@
-From 1ff7499aaa4cec11be79e97c118978fd781073a6 Mon Sep 17 00:00:00 2001
+From b5a4b82efd19d0687a5582a58f6830bf714e34fc Mon Sep 17 00:00:00 2001
 From: Nathan Chancellor <nathan@kernel.org>
 Date: Tue, 18 Mar 2025 15:32:30 -0700
 Subject: x86/tools: Drop duplicate unlikely() definition in
--- a/debian/patches/patchset-pf/fixes/0003-drm-amdgpu-mes11-optimize-MES-pipe-FW-version-fetchi.patch
+++ b/debian/patches/patchset-pf/fixes/0003-drm-amdgpu-mes11-optimize-MES-pipe-FW-version-fetchi.patch
@@ -1,29 +0,0 @@
-From 72096487bfe8ebc52731c264536418c51854d999 Mon Sep 17 00:00:00 2001
-From: Alex Deucher <alexander.deucher@amd.com>
-Date: Thu, 27 Mar 2025 17:33:49 -0400
-Subject: drm/amdgpu/mes11: optimize MES pipe FW version fetching
-
-Don't fetch it again if we already have it.  It seems the
-don't reliably have the proper value at resume in some
-cases.
-
-Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4083
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/121
---
- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
-+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
-@@ -899,6 +899,10 @@ static void mes_v11_0_get_fw_version(str
- {
- 	int pipe;
- 
-+	/* return early if we have already fetched these */
-+	if (adev->mes.sched_version && adev->mes.kiq_version)
-+		return;
-+
- 	/* get MES scheduler/KIQ versions */
- 	mutex_lock(&adev->srbm_mutex);
- 
--- a/debian/patches/patchset-pf/fixes/0003-tpm-Mask-TPM-RC-in-tpm2_start_auth_session.patch
+++ b/debian/patches/patchset-pf/fixes/0003-tpm-Mask-TPM-RC-in-tpm2_start_auth_session.patch
@@ -1,4 +1,4 @@
-From a1dfb99dca82ff97b00ce76f8f987ade471875d1 Mon Sep 17 00:00:00 2001
+From 762de1df7e501e019c3ae273c7e5e2d4c04b303c Mon Sep 17 00:00:00 2001
 From: Jarkko Sakkinen <jarkko@kernel.org>
 Date: Mon, 7 Apr 2025 15:28:05 +0300
 Subject: tpm: Mask TPM RC in tpm2_start_auth_session()
--- a/debian/patches/patchset-pf/fixes/0004-ice-mark-ice_write_prof_mask_reg-as-noinline.patch
+++ b/debian/patches/patchset-pf/fixes/0004-ice-mark-ice_write_prof_mask_reg-as-noinline.patch
@@ -1,4 +1,4 @@
-From 7b594a3c7b41db58884da466607417ca27c08a1d Mon Sep 17 00:00:00 2001
+From e3d18eed972374cfbac1e58cf109209b07c1e27e Mon Sep 17 00:00:00 2001
 From: Oleksandr Natalenko <oleksandr@natalenko.name>
 Date: Tue, 8 Apr 2025 12:02:36 +0200
 Subject: ice: mark ice_write_prof_mask_reg() as noinline
--- a/debian/patches/patchset-pf/fixes/0005-fixes-6.14-update-tpm2_start_auth_session-fix.patch
+++ b/debian/patches/patchset-pf/fixes/0005-fixes-6.14-update-tpm2_start_auth_session-fix.patch
@@ -1,4 +1,4 @@
-From 42a4f494db975d62916c73f5d637aef9be343d70 Mon Sep 17 00:00:00 2001
+From 74c95e079dc8b3c53ade90b2070458c0c69f3fdf Mon Sep 17 00:00:00 2001
 From: Oleksandr Natalenko <oleksandr@natalenko.name>
 Date: Tue, 8 Apr 2025 19:51:44 +0200
 Subject: fixes-6.14: update tpm2_start_auth_session() fix
--- a/debian/patches/patchset-pf/fixes/0006-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch
+++ b/debian/patches/patchset-pf/fixes/0006-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch
@@ -1,4 +1,4 @@
-From d3140c22ed2bc3c98dcf251659d78572e154a993 Mon Sep 17 00:00:00 2001
+From e56acee381a8e07edf1920fb58f3166f911b6e5c Mon Sep 17 00:00:00 2001
 From: Lingbo Kong <quic_lingbok@quicinc.com>
 Date: Wed, 26 Feb 2025 19:31:18 +0800
 Subject: wifi: ath12k: Abort scan before removing link interface to prevent
--- a/debian/patches/patchset-pf/fixes/0007-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch
+++ b/debian/patches/patchset-pf/fixes/0007-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch
@@ -1,4 +1,4 @@
-From fa165a32074fba27286cc9d2464a647642ad6bc7 Mon Sep 17 00:00:00 2001
+From 8d0e02f81d08c7b1e082028af0f55a22e7e1dfb2 Mon Sep 17 00:00:00 2001
 From: Christian Brauner <brauner@kernel.org>
 Date: Tue, 15 Apr 2025 10:22:04 +0200
 Subject: Kconfig: switch CONFIG_SYSFS_SYCALL default to n
--- a/debian/patches/patchset-pf/fixes/0007-drm-amdgpu-mes12-optimize-MES-pipe-FW-version-fetchi.patch
+++ b/debian/patches/patchset-pf/fixes/0007-drm-amdgpu-mes12-optimize-MES-pipe-FW-version-fetchi.patch
@@ -1,47 +0,0 @@
-From f1e8e30bef3757904d9e963f02ef297cd0c33240 Mon Sep 17 00:00:00 2001
-From: Alex Deucher <alexander.deucher@amd.com>
-Date: Fri, 28 Mar 2025 09:08:57 -0400
-Subject: drm/amdgpu/mes12: optimize MES pipe FW version fetching
-
-Don't fetch it again if we already have it.  It seems the
-registers don't reliably have the value at resume in some
-cases.
-
-Fixes: 785f0f9fe742 ("drm/amdgpu: Add mes v12_0 ip block support (v4)")
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 21 ++++++++++++---------
- 1 file changed, 12 insertions(+), 9 deletions(-)
-
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
-+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
-@@ -1390,17 +1390,20 @@ static int mes_v12_0_queue_init(struct a
- 		mes_v12_0_queue_init_register(ring);
- 	}
- 
-	/* get MES scheduler/KIQ versions */
-	mutex_lock(&adev->srbm_mutex);
-	soc21_grbm_select(adev, 3, pipe, 0, 0);
-+	if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
-+	    ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
-+		/* get MES scheduler/KIQ versions */
-+		mutex_lock(&adev->srbm_mutex);
-+		soc21_grbm_select(adev, 3, pipe, 0, 0);
- 
-	if (pipe == AMDGPU_MES_SCHED_PIPE)
-		adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
-	else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
-		adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
-+		if (pipe == AMDGPU_MES_SCHED_PIPE)
-+			adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
-+		else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
-+			adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
- 
-	soc21_grbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-+		soc21_grbm_select(adev, 0, 0, 0, 0);
-+		mutex_unlock(&adev->srbm_mutex);
-+	}
- 
- 	return 0;
- }
--- a/debian/patches/patchset-pf/fixes/0008-sched-eevdf-Fix-se-slice-being-set-to-U64_MAX-and-re.patch
+++ b/debian/patches/patchset-pf/fixes/0008-sched-eevdf-Fix-se-slice-being-set-to-U64_MAX-and-re.patch
@@ -0,0 +1,80 @@
+From ea3ec10cacc746176a25dbd74c8d168e1c096a62 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Fri, 25 Apr 2025 01:51:24 -0700
+Subject: sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash
+
+There is a code path in dequeue_entities() that can set the slice of a
+sched_entity to U64_MAX, which sometimes results in a crash.
+
+The offending case is when dequeue_entities() is called to dequeue a
+delayed group entity, and then the entity's parent's dequeue is delayed.
+In that case:
+
+1. In the if (entity_is_task(se)) else block at the beginning of
+   dequeue_entities(), slice is set to
+   cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then
+   it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX.
+2. The first for_each_sched_entity() loop dequeues the entity.
+3. If the entity was its parent's only child, then the next iteration
+   tries to dequeue the parent.
+4. If the parent's dequeue needs to be delayed, then it breaks from the
+   first for_each_sched_entity() loop _without updating slice_.
+5. The second for_each_sched_entity() loop sets the parent's ->slice to
+   the saved slice, which is still U64_MAX.
+
+This throws off subsequent calculations with potentially catastrophic
+results. A manifestation we saw in production was:
+
+6. In update_entity_lag(), se->slice is used to calculate limit, which
+   ends up as a huge negative number.
+7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit
+   is negative, vlag > limit, so se->vlag is set to the same huge
+   negative number.
+8. In place_entity(), se->vlag is scaled, which overflows and results in
+   another huge (positive or negative) number.
+9. The adjusted lag is subtracted from se->vruntime, which increases or
+   decreases se->vruntime by a huge number.
+10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which
+    incorrectly returns false because the vruntime is so far from the
+    other vruntimes on the queue, causing the
+    (vruntime - cfs_rq->min_vruntime) * load calulation to overflow.
+11. Nothing appears to be eligible, so pick_eevdf() returns NULL.
+12. pick_next_entity() tries to dereference the return value of
+    pick_eevdf() and crashes.
+
+Dumping the cfs_rq states from the core dumps with drgn showed tell-tale
+huge vruntime ranges and bogus vlag values, and I also traced se->slice
+being set to U64_MAX on live systems (which was usually "benign" since
+the rest of the runqueue needed to be in a particular state to crash).
+
+Fix it in dequeue_entities() by always setting slice from the first
+non-empty cfs_rq.
+
+Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy")
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com
+---
+ kernel/sched/fair.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
+@@ -7096,9 +7096,6 @@ static int dequeue_entities(struct rq *r
+ 		h_nr_idle = task_has_idle_policy(p);
+ 		if (task_sleep || task_delayed || !se->sched_delayed)
+ 			h_nr_runnable = 1;
+-	} else {
+-		cfs_rq = group_cfs_rq(se);
+-		slice = cfs_rq_min_slice(cfs_rq);
+ 	}
+ 
+ 	for_each_sched_entity(se) {
+@@ -7108,6 +7105,7 @@ static int dequeue_entities(struct rq *r
+ 			if (p && &p->se == se)
+ 				return -1;
+ 
+			slice = cfs_rq_min_slice(cfs_rq);
+ 			break;
+ 		}
+ 
--- a/debian/patches/patchset-pf/fixes/0008-wifi-iwlwifi-pcie-set-state-to-no-FW-before-reset-ha.patch
+++ b/debian/patches/patchset-pf/fixes/0008-wifi-iwlwifi-pcie-set-state-to-no-FW-before-reset-ha.patch
@@ -1,50 +0,0 @@
-From 81c23adad48324b73fe0993f332407c5be050bb5 Mon Sep 17 00:00:00 2001
-From: Johannes Berg <johannes.berg@intel.com>
-Date: Thu, 3 Apr 2025 11:04:37 +0000
-Subject: wifi: iwlwifi: pcie: set state to no-FW before reset handshake
-
-The reset handshake attempts to kill the firmware, and it'll go
-into a pretty much dead state once we do that. However, if it
-times out, then we'll attempt to dump the firmware to be able
-to see why it didn't respond. During this dump, we cannot treat
-it as if it was still running, since we just tried to kill it,
-otherwise dumping will attempt to send a DBGC stop command. As
-this command will time out, we'll go into a reset loop.
-
-For now, fix this by setting the trans->state to say firmware
-isn't running before doing the reset handshake. In the longer
-term, we should clean up the way this state is handled.
-
-It's not entirely clear but it seems likely that this issue was
-introduced by my rework of the error handling, prior to that it
-would've been synchronous at that point and (I think) not have
-attempted to reset since it was already doing down.
-
-Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219967
-Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219968
-Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/128
-Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling")
-Signed-off-by: Johannes Berg <johannes.berg@intel.com>
-Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
---
- drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
-+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
-@@ -147,8 +147,14 @@ static void _iwl_trans_pcie_gen2_stop_de
- 		return;
- 
- 	if (trans->state >= IWL_TRANS_FW_STARTED &&
-	    trans_pcie->fw_reset_handshake)
-+	    trans_pcie->fw_reset_handshake) {
-+		/*
-+		 * Reset handshake can dump firmware on timeout, but that
-+		 * should assume that the firmware is already dead.
-+		 */
-+		trans->state = IWL_TRANS_NO_FW;
- 		iwl_trans_pcie_fw_reset_handshake(trans);
-+	}
- 
- 	trans_pcie->is_down = true;
-