release 6.11.8

2024-11-15 10:44:41 +03:00 · 2024-11-15 10:44:41 +03:00 · 428ce77115
commit 428ce77115
parent 02e7703ba0
11 changed files with 534 additions and 64 deletions
--- a/debian/bin/genpatch-pfkernel
+++ b/debian/bin/genpatch-pfkernel
@ -5,11 +5,12 @@ export GIT_OPTIONAL_LOCKS=0

 w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"

-dst='debian/patches/pf'
+dst='debian/patches/pf-tmp'
 src='../linux-extras'
 branches='amd-pstate amd-rapl cpu cpuidle crypto fixes ksm zstd'

-[ -d "${dst}" ]
+if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi
+mkdir -p "${dst}"

 kver=
 if [ -n "$1" ] ; then
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,10 @@
+linux (6.11.8-1) sid; urgency=medium
+
+  * New upstream stable update:
+    https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.11.8
+
+ -- Konstantin Demin <rockdrilla@gmail.com>  Fri, 15 Nov 2024 10:17:08 +0300
+
 linux (6.11.7-1) sid; urgency=medium

  * New upstream stable update:
--- a/debian/config/amd64/config.mobile
+++ b/debian/config/amd64/config.mobile
@ -3281,6 +3281,7 @@ CONFIG_CAN_C_CAN_PCI=m
 ## file: drivers/net/can/cc770/Kconfig
 ##
 CONFIG_CAN_CC770=m
+CONFIG_CAN_CC770_ISA=m
 CONFIG_CAN_CC770_PLATFORM=m

 ##
@ -3323,6 +3324,7 @@ CONFIG_CAN_KVASER_PCI=m
 CONFIG_CAN_PEAK_PCI=m
 CONFIG_CAN_PEAK_PCIEC=y
 CONFIG_CAN_PLX_PCI=m
+CONFIG_CAN_SJA1000_ISA=m
 CONFIG_CAN_SJA1000_PLATFORM=m

 ##
--- a/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch
+++ b/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch
@ -18,7 +18,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>

 --- a/fs/btrfs/super.c
 +++ b/fs/btrfs/super.c
-@@ -2646,7 +2646,7 @@ module_exit(exit_btrfs_fs)
+@@ -2631,7 +2631,7 @@ module_exit(exit_btrfs_fs)
 
 MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
 MODULE_LICENSE("GPL");
--- a/debian/patches/patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
+++ b/debian/patches/patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
--- a/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
+++ b/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
@ -1,57 +0,0 @@
-From 1d120544580708eae6bd5981b308ca17735edaac Mon Sep 17 00:00:00 2001
-From: Vitaly Lifshits <vitaly.lifshits@intel.com>
-Date: Tue, 1 Oct 2024 20:08:48 +0300
-Subject: e1000e: Remove Meteor Lake SMBUS workarounds
-
-This is a partial revert to commit 76a0a3f9cc2f ("e1000e: fix force smbus
-during suspend flow"). That commit fixed a sporadic PHY access issue but
-introduced a regression in runtime suspend flows.
-The original issue on Meteor Lake systems was rare in terms of the
-reproduction rate and the number of the systems affected.
-
-After the integration of commit 0a6ad4d9e169 ("e1000e: avoid failing the
-system during pm_suspend"), PHY access loss can no longer cause a
-system-level suspend failure. As it only occurs when the LAN cable is
-disconnected, and is recovered during system resume flow. Therefore, its
-functional impact is low, and the priority is given to stabilizing
-runtime suspend.
-
-Fixes: 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow")
-Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
---
- drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++-------------
- 1 file changed, 4 insertions(+), 13 deletions(-)
-
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
-+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
-@@ -1205,12 +1205,10 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
- 	if (ret_val)
- 		goto out;
- 
-	if (hw->mac.type != e1000_pch_mtp) {
-		ret_val = e1000e_force_smbus(hw);
-		if (ret_val) {
-			e_dbg("Failed to force SMBUS: %d\n", ret_val);
-			goto release;
-		}
-+	ret_val = e1000e_force_smbus(hw);
-+	if (ret_val) {
-+		e_dbg("Failed to force SMBUS: %d\n", ret_val);
-+		goto release;
- 	}
- 
- 	/* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
-@@ -1273,13 +1271,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
- 	}
- 
- release:
-	if (hw->mac.type == e1000_pch_mtp) {
-		ret_val = e1000e_force_smbus(hw);
-		if (ret_val)
-			e_dbg("Failed to force SMBUS over MTL system: %d\n",
-			      ret_val);
-	}
-
- 	hw->phy.ops.release(hw);
- out:
- 	if (ret_val)
--- a/debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
+++ b/debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
@ -0,0 +1,326 @@
+From 11fa4cfe7134f44f2cdac4b25636fc3291096979 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 8 Nov 2024 08:07:37 -0500
+Subject: KVM: x86: switch hugepage recovery thread to vhost_task
+
+kvm_vm_create_worker_thread() is meant to be used for kthreads that
+can consume significant amounts of CPU time on behalf of a VM or in
+response to how the VM behaves (for example how it accesses its memory).
+Therefore it wants to charge the CPU time consumed by that work to
+the VM's container.
+
+However, because of these threads, cgroups which have kvm instances inside
+never complete freezing.  This can be trivially reproduced:
+
+  root@test ~# mkdir /sys/fs/cgroup/test
+  root@test ~# echo $fish_pid > /sys/fs/cgroup/test/cgroup.procs
+  root@test ~# qemu-system-x86_64 --nographic -enable-kvm
+
+and in another terminal:
+
+  root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze
+  root@test ~# cat /sys/fs/cgroup/test/cgroup.events
+  populated 1
+  frozen 0
+
+The cgroup freezing happens in the signal delivery path but
+kvm_vm_worker_thread() thread never call into the signal delivery path while
+joining non-root cgroups, so they never get frozen. Because the cgroup
+freezer determines whether a given cgroup is frozen by comparing the number
+of frozen threads to the total number of threads in the cgroup, the cgroup
+never becomes frozen and users waiting for the state transition may hang
+indefinitely.
+
+Since the worker kthread is tied to a user process, it's better if
+it behaves similarly to user tasks as much as possible, including
+being able to send SIGSTOP and SIGCONT.  In fact, vhost_task is all
+that kvm_vm_create_worker_thread() wanted to be and more: not only it
+inherits the userspace process's cgroups, it has other niceties like
+being parented properly in the process tree.  Use it instead of the
+homegrown alternative.
+
+(Commit message based on emails from Tejun).
+
+Reported-by: Tejun Heo <tj@kernel.org>
+Reported-by: Luca Boccassi <bluca@debian.org>
+Tested-by: Luca Boccassi <bluca@debian.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+---
+ arch/x86/include/asm/kvm_host.h |   4 +-
+ arch/x86/kvm/Kconfig            |   1 +
+ arch/x86/kvm/mmu/mmu.c          |  67 +++++++++++----------
+ include/linux/kvm_host.h        |   6 --
+ virt/kvm/kvm_main.c             | 103 --------------------------------
+ 5 files changed, 39 insertions(+), 142 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
+@@ -26,6 +26,7 @@
+ #include <linux/irqbypass.h>
+ #include <linux/hyperv.h>
+ #include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
+ 
+ #include <asm/apic.h>
+ #include <asm/pvclock-abi.h>
+@@ -1445,7 +1446,8 @@ struct kvm_arch {
+ 	bool sgx_provisioning_allowed;
+ 
+ 	struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
+-	struct task_struct *nx_huge_page_recovery_thread;
+	struct vhost_task *nx_huge_page_recovery_thread;
+	u64 nx_huge_page_next;
+ 
+ #ifdef CONFIG_X86_64
+ 	/* The number of TDP MMU pages across all roots. */
+--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
+@@ -29,6 +29,7 @@ config KVM
+ 	select HAVE_KVM_IRQ_BYPASS
+ 	select HAVE_KVM_IRQ_ROUTING
+ 	select HAVE_KVM_READONLY_MEM
+	select VHOST_TASK
+ 	select KVM_ASYNC_PF
+ 	select USER_RETURN_NOTIFIER
+ 	select KVM_MMIO
+--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
+@@ -7160,7 +7160,7 @@ static int set_nx_huge_pages(const char
+ 			kvm_mmu_zap_all_fast(kvm);
+ 			mutex_unlock(&kvm->slots_lock);
+ 
+-			wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+			vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
+ 		}
+ 		mutex_unlock(&kvm_lock);
+ 	}
+@@ -7306,7 +7306,7 @@ static int set_nx_huge_pages_recovery_pa
+ 		mutex_lock(&kvm_lock);
+ 
+ 		list_for_each_entry(kvm, &vm_list, vm_list)
+-			wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+			vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
+ 
+ 		mutex_unlock(&kvm_lock);
+ 	}
+@@ -7409,62 +7409,65 @@ static void kvm_recover_nx_huge_pages(st
+ 	srcu_read_unlock(&kvm->srcu, rcu_idx);
+ }
+ 
+-static long get_nx_huge_page_recovery_timeout(u64 start_time)
+#define NX_HUGE_PAGE_DISABLED (-1)
+
+static u64 get_nx_huge_page_recovery_next(void)
+ {
+ 	bool enabled;
+ 	uint period;
+ 
+ 	enabled = calc_nx_huge_pages_recovery_period(&period);
+ 
+-	return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
+-		       : MAX_SCHEDULE_TIMEOUT;
+	return enabled ? get_jiffies_64() + msecs_to_jiffies(period)
+		: NX_HUGE_PAGE_DISABLED;
+ }
+ 
+-static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
+static void kvm_nx_huge_page_recovery_worker_kill(void *data)
+ {
+-	u64 start_time;
+-	long remaining_time;
+-
+-	while (true) {
+-		start_time = get_jiffies_64();
+-		remaining_time = get_nx_huge_page_recovery_timeout(start_time);
+-
+-		set_current_state(TASK_INTERRUPTIBLE);
+-		while (!kthread_should_stop() && remaining_time > 0) {
+-			schedule_timeout(remaining_time);
+-			remaining_time = get_nx_huge_page_recovery_timeout(start_time);
+-			set_current_state(TASK_INTERRUPTIBLE);
+-		}
+}
+ 
+-		set_current_state(TASK_RUNNING);
+static bool kvm_nx_huge_page_recovery_worker(void *data)
+{
+	struct kvm *kvm = data;
+	long remaining_time;
+ 
+-		if (kthread_should_stop())
+-			return 0;
+	if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED)
+		return false;
+ 
+-		kvm_recover_nx_huge_pages(kvm);
+	remaining_time = kvm->arch.nx_huge_page_next - get_jiffies_64();
+	if (remaining_time > 0) {
+		schedule_timeout(remaining_time);
+		/* check for signals and come back */
+		return true;
+ 	}
+
+	__set_current_state(TASK_RUNNING);
+	kvm_recover_nx_huge_pages(kvm);
+	kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+	return true;
+ }
+ 
+ int kvm_mmu_post_init_vm(struct kvm *kvm)
+ {
+-	int err;
+-
+ 	if (nx_hugepage_mitigation_hard_disabled)
+ 		return 0;
+ 
+-	err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
+-					  "kvm-nx-lpage-recovery",
+-					  &kvm->arch.nx_huge_page_recovery_thread);
+-	if (!err)
+-		kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
+	kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+	kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
+		kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
+		kvm, "kvm-nx-lpage-recovery");
+ 
+-	return err;
+	if (!kvm->arch.nx_huge_page_recovery_thread)
+		return -ENOMEM;
+
+	vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+	return 0;
+ }
+ 
+ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+ {
+ 	if (kvm->arch.nx_huge_page_recovery_thread)
+-		kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
+		vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread);
+ }
+ 
+ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
+@@ -2370,12 +2370,6 @@ static inline int kvm_arch_vcpu_run_pid_
+ }
+ #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+ 
+-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
+-
+-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+-				uintptr_t data, const char *name,
+-				struct task_struct **thread_ptr);
+-
+ #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
+ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
+ {
+--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
+@@ -6573,106 +6573,3 @@ void kvm_exit(void)
+ 	kvm_irqfd_exit();
+ }
+ EXPORT_SYMBOL_GPL(kvm_exit);
+-
+-struct kvm_vm_worker_thread_context {
+-	struct kvm *kvm;
+-	struct task_struct *parent;
+-	struct completion init_done;
+-	kvm_vm_thread_fn_t thread_fn;
+-	uintptr_t data;
+-	int err;
+-};
+-
+-static int kvm_vm_worker_thread(void *context)
+-{
+-	/*
+-	 * The init_context is allocated on the stack of the parent thread, so
+-	 * we have to locally copy anything that is needed beyond initialization
+-	 */
+-	struct kvm_vm_worker_thread_context *init_context = context;
+-	struct task_struct *parent;
+-	struct kvm *kvm = init_context->kvm;
+-	kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+-	uintptr_t data = init_context->data;
+-	int err;
+-
+-	err = kthread_park(current);
+-	/* kthread_park(current) is never supposed to return an error */
+-	WARN_ON(err != 0);
+-	if (err)
+-		goto init_complete;
+-
+-	err = cgroup_attach_task_all(init_context->parent, current);
+-	if (err) {
+-		kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+-			__func__, err);
+-		goto init_complete;
+-	}
+-
+-	set_user_nice(current, task_nice(init_context->parent));
+-
+-init_complete:
+-	init_context->err = err;
+-	complete(&init_context->init_done);
+-	init_context = NULL;
+-
+-	if (err)
+-		goto out;
+-
+-	/* Wait to be woken up by the spawner before proceeding. */
+-	kthread_parkme();
+-
+-	if (!kthread_should_stop())
+-		err = thread_fn(kvm, data);
+-
+-out:
+-	/*
+-	 * Move kthread back to its original cgroup to prevent it lingering in
+-	 * the cgroup of the VM process, after the latter finishes its
+-	 * execution.
+-	 *
+-	 * kthread_stop() waits on the 'exited' completion condition which is
+-	 * set in exit_mm(), via mm_release(), in do_exit(). However, the
+-	 * kthread is removed from the cgroup in the cgroup_exit() which is
+-	 * called after the exit_mm(). This causes the kthread_stop() to return
+-	 * before the kthread actually quits the cgroup.
+-	 */
+-	rcu_read_lock();
+-	parent = rcu_dereference(current->real_parent);
+-	get_task_struct(parent);
+-	rcu_read_unlock();
+-	cgroup_attach_task_all(parent, current);
+-	put_task_struct(parent);
+-
+-	return err;
+-}
+-
+-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+-				uintptr_t data, const char *name,
+-				struct task_struct **thread_ptr)
+-{
+-	struct kvm_vm_worker_thread_context init_context = {};
+-	struct task_struct *thread;
+-
+-	*thread_ptr = NULL;
+-	init_context.kvm = kvm;
+-	init_context.parent = current;
+-	init_context.thread_fn = thread_fn;
+-	init_context.data = data;
+-	init_completion(&init_context.init_done);
+-
+-	thread = kthread_run(kvm_vm_worker_thread, &init_context,
+-			     "%s-%d", name, task_pid_nr(current));
+-	if (IS_ERR(thread))
+-		return PTR_ERR(thread);
+-
+-	/* kthread_run is never supposed to return NULL */
+-	WARN_ON(thread == NULL);
+-
+-	wait_for_completion(&init_context.init_done);
+-
+-	if (!init_context.err)
+-		*thread_ptr = thread;
+-
+-	return init_context.err;
+-}
--- a/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch
+++ b/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch
@ -61,7 +61,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 
 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
 +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
-@@ -2775,7 +2775,10 @@ int smu_get_power_limit(void *handle,
+@@ -2785,7 +2785,10 @@ int smu_get_power_limit(void *handle,
 			*limit = smu->max_power_limit;
 			break;
 		case SMU_PPT_LIMIT_MIN:
@ -73,7 +73,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
 			break;
 		default:
 			return -EINVAL;
-@@ -2799,7 +2802,14 @@ static int smu_set_power_limit(void *han
+@@ -2809,7 +2812,14 @@ static int smu_set_power_limit(void *han
 		if (smu->ppt_funcs->set_power_limit)
 			return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
 
--- a/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
+++ b/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
@ -0,0 +1,167 @@
+From c47df2793088980a32d6706da886fe32f7f045e6 Mon Sep 17 00:00:00 2001
+From: Sultan Alsawaf <sultan@kerneltoast.com>
+Date: Sun, 19 Apr 2020 19:59:18 -0700
+Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free
+ pages
+
+Contains:
+  - mm: Stop kswapd early when nothing's waiting for it to free pages
+
+    Keeping kswapd running when all the failed allocations that invoked it
+    are satisfied incurs a high overhead due to unnecessary page eviction
+    and writeback, as well as spurious VM pressure events to various
+    registered shrinkers. When kswapd doesn't need to work to make an
+    allocation succeed anymore, stop it prematurely to save resources.
+
+    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
+
+  - mm: Don't stop kswapd on a per-node basis when there are no waiters
+
+    The page allocator wakes all kswapds in an allocation context's allowed
+    nodemask in the slow path, so it doesn't make sense to have the kswapd-
+    waiter count per each NUMA node. Instead, it should be a global counter
+    to stop all kswapds when there are no failed allocation requests.
+
+    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
+
+  - mm: Increment kswapd_waiters for throttled direct reclaimers
+
+    Throttled direct reclaimers will wake up kswapd and wait for kswapd to
+    satisfy their page allocation request, even when the failed allocation
+    lacks the __GFP_KSWAPD_RECLAIM flag in its gfp mask. As a result, kswapd
+    may think that there are no waiters and thus exit prematurely, causing
+    throttled direct reclaimers lacking __GFP_KSWAPD_RECLAIM to stall on
+    waiting for kswapd to wake them up. Incrementing the kswapd_waiters
+    counter when such direct reclaimers become throttled fixes the problem.
+
+    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
+---
+ mm/internal.h   |  1 +
+ mm/page_alloc.c | 17 ++++++++++++++---
+ mm/vmscan.c     | 19 +++++++++++++------
+ 3 files changed, 28 insertions(+), 9 deletions(-)
+
+--- a/mm/internal.h
+++ b/mm/internal.h
+@@ -686,6 +686,7 @@ extern void post_alloc_hook(struct page
+ extern bool free_pages_prepare(struct page *page, unsigned int order);
+ 
+ extern int user_min_free_kbytes;
+extern atomic_long_t kswapd_waiters;
+ 
+ void free_unref_page(struct page *page, unsigned int order);
+ void free_unref_folios(struct folio_batch *fbatch);
+--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
+@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t;
+  */
+ #define FPI_TO_TAIL		((__force fpi_t)BIT(1))
+ 
+atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);
+
+ /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
+ static DEFINE_MUTEX(pcp_batch_high_lock);
+ #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
+@@ -4189,6 +4191,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+ 	unsigned int cpuset_mems_cookie;
+ 	unsigned int zonelist_iter_cookie;
+ 	int reserve_flags;
+	bool woke_kswapd = false;
+ 
+ restart:
+ 	compaction_retries = 0;
+@@ -4228,8 +4231,13 @@ restart:
+ 			goto nopage;
+ 	}
+ 
+-	if (alloc_flags & ALLOC_KSWAPD)
+	if (alloc_flags & ALLOC_KSWAPD) {
+		if (!woke_kswapd) {
+			atomic_long_inc(&kswapd_waiters);
+			woke_kswapd = true;
+		}
+ 		wake_all_kswapds(order, gfp_mask, ac);
+	}
+ 
+ 	/*
+ 	 * The adjusted alloc_flags might result in immediate success, so try
+@@ -4445,9 +4453,12 @@ nopage:
+ 		goto retry;
+ 	}
+ fail:
+-	warn_alloc(gfp_mask, ac->nodemask,
+-			"page allocation failure: order:%u", order);
+ got_pg:
+	if (woke_kswapd)
+		atomic_long_dec(&kswapd_waiters);
+	if (!page)
+		warn_alloc(gfp_mask, ac->nodemask,
+				"page allocation failure: order:%u", order);
+ 	return page;
+ }
+ 
+--- a/mm/vmscan.c
+++ b/mm/vmscan.c
+@@ -6332,7 +6332,7 @@ retry:
+ 	return 0;
+ }
+ 
+-static bool allow_direct_reclaim(pg_data_t *pgdat)
+static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
+ {
+ 	struct zone *zone;
+ 	unsigned long pfmemalloc_reserve = 0;
+@@ -6361,6 +6361,10 @@ static bool allow_direct_reclaim(pg_data
+ 
+ 	wmark_ok = free_pages > pfmemalloc_reserve / 2;
+ 
+	/* The throttled direct reclaimer is now a kswapd waiter */
+	if (unlikely(!using_kswapd && !wmark_ok))
+		atomic_long_inc(&kswapd_waiters);
+
+ 	/* kswapd must be awake if processes are being throttled */
+ 	if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
+ 		if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
+@@ -6426,7 +6430,7 @@ static bool throttle_direct_reclaim(gfp_
+ 
+ 		/* Throttle based on the first usable node */
+ 		pgdat = zone->zone_pgdat;
+-		if (allow_direct_reclaim(pgdat))
+		if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
+ 			goto out;
+ 		break;
+ 	}
+@@ -6448,11 +6452,14 @@ static bool throttle_direct_reclaim(gfp_
+ 	 */
+ 	if (!(gfp_mask & __GFP_FS))
+ 		wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
+-			allow_direct_reclaim(pgdat), HZ);
+			allow_direct_reclaim(pgdat, true), HZ);
+ 	else
+ 		/* Throttle until kswapd wakes the process */
+ 		wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
+-			allow_direct_reclaim(pgdat));
+			allow_direct_reclaim(pgdat, true));
+
+	if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
+		atomic_long_dec(&kswapd_waiters);
+ 
+ 	if (fatal_signal_pending(current))
+ 		return true;
+@@ -6955,14 +6962,14 @@ restart:
+ 		 * able to safely make forward progress. Wake them
+ 		 */
+ 		if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
+-				allow_direct_reclaim(pgdat))
+				allow_direct_reclaim(pgdat, true))
+ 			wake_up_all(&pgdat->pfmemalloc_wait);
+ 
+ 		/* Check if kswapd should be suspending */
+ 		__fs_reclaim_release(_THIS_IP_);
+ 		ret = kthread_freezable_should_stop(&was_frozen);
+ 		__fs_reclaim_acquire(_THIS_IP_);
+-		if (was_frozen || ret)
+		if (was_frozen || ret || !atomic_long_read(&kswapd_waiters))
+ 			break;
+ 
+ 		/*
--- a/debian/patches/series
+++ b/debian/patches/series
@ -301,12 +301,13 @@ patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.pat
 patchset-zen/sauce/0022-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch
 patchset-zen/sauce/0023-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
 patchset-zen/sauce/0024-ZEN-Update-VHBA-driver.patch
+patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch

 patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
 patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch
 patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
 patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch
-patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
-patchset-pf/fixes/0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
+patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
+patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch

 patchset-zen/fixes/0001-Partially-revert-drm-amd-amdgpu-add-pipe1-hardware-s.patch
--- a/debian/templates/image.preinst.in
+++ b/debian/templates/image.preinst.in
@ -20,4 +20,27 @@ if [ -d /etc/kernel/preinst.d ] ; then
    /etc/kernel/preinst.d
 fi

+f='/etc/apt/apt.conf.d/krd-linux'
+while : ; do
+    [ -s "$f" ] || break
+    h=$(sha256sum -b "$f" | awk '{print $1}')
+    [ "$h" = '70e8b9a9dd5f6e153840ed7046285b927cff37a2859e0fbc512a3eb8c576de24' ] || break
+    exit 0
+done
+cat > "$f" <<-'EOF'
+APT
+{
+  NeverAutoRemove
+  {
+    "^krd-linux-.*$";
+  };
+
+  VersionedKernelPackages
+  {
+    "krd-linux-.*";
+  };
+};
+EOF
+chmod 0644 "$f"
+
 exit 0