From 428ce77115d56d34c1ee190f1a5c5a673f4501ee Mon Sep 17 00:00:00 2001 From: Konstantin Demin Date: Fri, 15 Nov 2024 10:44:41 +0300 Subject: [PATCH] release 6.11.8 --- debian/bin/genpatch-pfkernel | 5 +- debian/changelog | 7 + debian/config/amd64/config.mobile | 2 + ...ftdep-declarations-for-hard-coded-cr.patch | 2 +- ...add-stripe-entries-for-NOCOW-writes.patch} | 0 ...Remove-Meteor-Lake-SMBUS-workarounds.patch | 57 --- ...ugepage-recovery-thread-to-vhost_tas.patch | 326 ++++++++++++++++++ ...m-Allow-override-of-min_power_limit-.patch | 4 +- ...pd-early-when-nothing-s-waiting-for-.patch | 167 +++++++++ debian/patches/series | 5 +- debian/templates/image.preinst.in | 23 ++ 11 files changed, 534 insertions(+), 64 deletions(-) rename debian/patches/patchset-pf/fixes/{0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch => 0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch} (100%) delete mode 100644 debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch create mode 100644 debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch create mode 100644 debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch diff --git a/debian/bin/genpatch-pfkernel b/debian/bin/genpatch-pfkernel index d7490ce..b8b8591 100755 --- a/debian/bin/genpatch-pfkernel +++ b/debian/bin/genpatch-pfkernel @@ -5,11 +5,12 @@ export GIT_OPTIONAL_LOCKS=0 w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w" -dst='debian/patches/pf' +dst='debian/patches/pf-tmp' src='../linux-extras' branches='amd-pstate amd-rapl cpu cpuidle crypto fixes ksm zstd' -[ -d "${dst}" ] +if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi +mkdir -p "${dst}" kver= if [ -n "$1" ] ; then diff --git a/debian/changelog b/debian/changelog index d55c9dc..bdd344f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +linux (6.11.8-1) sid; urgency=medium + + * New upstream stable update: + https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.11.8 + + -- Konstantin Demin Fri, 15 Nov 2024 10:17:08 +0300 + linux (6.11.7-1) sid; urgency=medium * New upstream stable update: diff --git a/debian/config/amd64/config.mobile b/debian/config/amd64/config.mobile index 8b418a5..e35bab8 100644 --- a/debian/config/amd64/config.mobile +++ b/debian/config/amd64/config.mobile @@ -3281,6 +3281,7 @@ CONFIG_CAN_C_CAN_PCI=m ## file: drivers/net/can/cc770/Kconfig ## CONFIG_CAN_CC770=m +CONFIG_CAN_CC770_ISA=m CONFIG_CAN_CC770_PLATFORM=m ## @@ -3323,6 +3324,7 @@ CONFIG_CAN_KVASER_PCI=m CONFIG_CAN_PEAK_PCI=m CONFIG_CAN_PEAK_PCIEC=y CONFIG_CAN_PLX_PCI=m +CONFIG_CAN_SJA1000_ISA=m CONFIG_CAN_SJA1000_PLATFORM=m ## diff --git a/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch b/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch index 6ab61bd..30e99bc 100644 --- a/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch +++ b/debian/patches/bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch @@ -18,7 +18,7 @@ Signed-off-by: Ben Hutchings --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c -@@ -2646,7 +2646,7 @@ module_exit(exit_btrfs_fs) +@@ -2631,7 +2631,7 @@ module_exit(exit_btrfs_fs) MODULE_DESCRIPTION("B-Tree File System (BTRFS)"); MODULE_LICENSE("GPL"); diff --git a/debian/patches/patchset-pf/fixes/0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch b/debian/patches/patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch similarity index 100% rename from debian/patches/patchset-pf/fixes/0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch rename to debian/patches/patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch diff --git a/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch b/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch deleted file mode 100644 index 5b33527..0000000 --- a/debian/patches/patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 1d120544580708eae6bd5981b308ca17735edaac Mon Sep 17 00:00:00 2001 -From: Vitaly Lifshits -Date: Tue, 1 Oct 2024 20:08:48 +0300 -Subject: e1000e: Remove Meteor Lake SMBUS workarounds - -This is a partial revert to commit 76a0a3f9cc2f ("e1000e: fix force smbus -during suspend flow"). That commit fixed a sporadic PHY access issue but -introduced a regression in runtime suspend flows. -The original issue on Meteor Lake systems was rare in terms of the -reproduction rate and the number of the systems affected. - -After the integration of commit 0a6ad4d9e169 ("e1000e: avoid failing the -system during pm_suspend"), PHY access loss can no longer cause a -system-level suspend failure. As it only occurs when the LAN cable is -disconnected, and is recovered during system resume flow. Therefore, its -functional impact is low, and the priority is given to stabilizing -runtime suspend. - -Fixes: 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow") -Signed-off-by: Vitaly Lifshits ---- - drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++------------- - 1 file changed, 4 insertions(+), 13 deletions(-) - ---- a/drivers/net/ethernet/intel/e1000e/ich8lan.c -+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c -@@ -1205,12 +1205,10 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000 - if (ret_val) - goto out; - -- if (hw->mac.type != e1000_pch_mtp) { -- ret_val = e1000e_force_smbus(hw); -- if (ret_val) { -- e_dbg("Failed to force SMBUS: %d\n", ret_val); -- goto release; -- } -+ ret_val = e1000e_force_smbus(hw); -+ if (ret_val) { -+ e_dbg("Failed to force SMBUS: %d\n", ret_val); -+ goto release; - } - - /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable -@@ -1273,13 +1271,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000 - } - - release: -- if (hw->mac.type == e1000_pch_mtp) { -- ret_val = e1000e_force_smbus(hw); -- if (ret_val) -- e_dbg("Failed to force SMBUS over MTL system: %d\n", -- ret_val); -- } -- - hw->phy.ops.release(hw); - out: - if (ret_val) diff --git a/debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch b/debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch new file mode 100644 index 0000000..b0e8d91 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch @@ -0,0 +1,326 @@ +From 11fa4cfe7134f44f2cdac4b25636fc3291096979 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 8 Nov 2024 08:07:37 -0500 +Subject: KVM: x86: switch hugepage recovery thread to vhost_task + +kvm_vm_create_worker_thread() is meant to be used for kthreads that +can consume significant amounts of CPU time on behalf of a VM or in +response to how the VM behaves (for example how it accesses its memory). +Therefore it wants to charge the CPU time consumed by that work to +the VM's container. + +However, because of these threads, cgroups which have kvm instances inside +never complete freezing. This can be trivially reproduced: + + root@test ~# mkdir /sys/fs/cgroup/test + root@test ~# echo $fish_pid > /sys/fs/cgroup/test/cgroup.procs + root@test ~# qemu-system-x86_64 --nographic -enable-kvm + +and in another terminal: + + root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze + root@test ~# cat /sys/fs/cgroup/test/cgroup.events + populated 1 + frozen 0 + +The cgroup freezing happens in the signal delivery path but +kvm_vm_worker_thread() thread never call into the signal delivery path while +joining non-root cgroups, so they never get frozen. Because the cgroup +freezer determines whether a given cgroup is frozen by comparing the number +of frozen threads to the total number of threads in the cgroup, the cgroup +never becomes frozen and users waiting for the state transition may hang +indefinitely. + +Since the worker kthread is tied to a user process, it's better if +it behaves similarly to user tasks as much as possible, including +being able to send SIGSTOP and SIGCONT. In fact, vhost_task is all +that kvm_vm_create_worker_thread() wanted to be and more: not only it +inherits the userspace process's cgroups, it has other niceties like +being parented properly in the process tree. Use it instead of the +homegrown alternative. + +(Commit message based on emails from Tejun). + +Reported-by: Tejun Heo +Reported-by: Luca Boccassi +Tested-by: Luca Boccassi +Acked-by: Tejun Heo +Signed-off-by: Paolo Bonzini +--- + arch/x86/include/asm/kvm_host.h | 4 +- + arch/x86/kvm/Kconfig | 1 + + arch/x86/kvm/mmu/mmu.c | 67 +++++++++++---------- + include/linux/kvm_host.h | 6 -- + virt/kvm/kvm_main.c | 103 -------------------------------- + 5 files changed, 39 insertions(+), 142 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1445,7 +1446,8 @@ struct kvm_arch { + bool sgx_provisioning_allowed; + + struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; +- struct task_struct *nx_huge_page_recovery_thread; ++ struct vhost_task *nx_huge_page_recovery_thread; ++ u64 nx_huge_page_next; + + #ifdef CONFIG_X86_64 + /* The number of TDP MMU pages across all roots. */ +--- a/arch/x86/kvm/Kconfig ++++ b/arch/x86/kvm/Kconfig +@@ -29,6 +29,7 @@ config KVM + select HAVE_KVM_IRQ_BYPASS + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_READONLY_MEM ++ select VHOST_TASK + select KVM_ASYNC_PF + select USER_RETURN_NOTIFIER + select KVM_MMIO +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -7160,7 +7160,7 @@ static int set_nx_huge_pages(const char + kvm_mmu_zap_all_fast(kvm); + mutex_unlock(&kvm->slots_lock); + +- wake_up_process(kvm->arch.nx_huge_page_recovery_thread); ++ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + } + mutex_unlock(&kvm_lock); + } +@@ -7306,7 +7306,7 @@ static int set_nx_huge_pages_recovery_pa + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) +- wake_up_process(kvm->arch.nx_huge_page_recovery_thread); ++ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + + mutex_unlock(&kvm_lock); + } +@@ -7409,62 +7409,65 @@ static void kvm_recover_nx_huge_pages(st + srcu_read_unlock(&kvm->srcu, rcu_idx); + } + +-static long get_nx_huge_page_recovery_timeout(u64 start_time) ++#define NX_HUGE_PAGE_DISABLED (-1) ++ ++static u64 get_nx_huge_page_recovery_next(void) + { + bool enabled; + uint period; + + enabled = calc_nx_huge_pages_recovery_period(&period); + +- return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64() +- : MAX_SCHEDULE_TIMEOUT; ++ return enabled ? get_jiffies_64() + msecs_to_jiffies(period) ++ : NX_HUGE_PAGE_DISABLED; + } + +-static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data) ++static void kvm_nx_huge_page_recovery_worker_kill(void *data) + { +- u64 start_time; +- long remaining_time; +- +- while (true) { +- start_time = get_jiffies_64(); +- remaining_time = get_nx_huge_page_recovery_timeout(start_time); +- +- set_current_state(TASK_INTERRUPTIBLE); +- while (!kthread_should_stop() && remaining_time > 0) { +- schedule_timeout(remaining_time); +- remaining_time = get_nx_huge_page_recovery_timeout(start_time); +- set_current_state(TASK_INTERRUPTIBLE); +- } ++} + +- set_current_state(TASK_RUNNING); ++static bool kvm_nx_huge_page_recovery_worker(void *data) ++{ ++ struct kvm *kvm = data; ++ long remaining_time; + +- if (kthread_should_stop()) +- return 0; ++ if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED) ++ return false; + +- kvm_recover_nx_huge_pages(kvm); ++ remaining_time = kvm->arch.nx_huge_page_next - get_jiffies_64(); ++ if (remaining_time > 0) { ++ schedule_timeout(remaining_time); ++ /* check for signals and come back */ ++ return true; + } ++ ++ __set_current_state(TASK_RUNNING); ++ kvm_recover_nx_huge_pages(kvm); ++ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next(); ++ return true; + } + + int kvm_mmu_post_init_vm(struct kvm *kvm) + { +- int err; +- + if (nx_hugepage_mitigation_hard_disabled) + return 0; + +- err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0, +- "kvm-nx-lpage-recovery", +- &kvm->arch.nx_huge_page_recovery_thread); +- if (!err) +- kthread_unpark(kvm->arch.nx_huge_page_recovery_thread); ++ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next(); ++ kvm->arch.nx_huge_page_recovery_thread = vhost_task_create( ++ kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, ++ kvm, "kvm-nx-lpage-recovery"); + +- return err; ++ if (!kvm->arch.nx_huge_page_recovery_thread) ++ return -ENOMEM; ++ ++ vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); ++ return 0; + } + + void kvm_mmu_pre_destroy_vm(struct kvm *kvm) + { + if (kvm->arch.nx_huge_page_recovery_thread) +- kthread_stop(kvm->arch.nx_huge_page_recovery_thread); ++ vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread); + } + + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -2370,12 +2370,6 @@ static inline int kvm_arch_vcpu_run_pid_ + } + #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ + +-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); +- +-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, +- uintptr_t data, const char *name, +- struct task_struct **thread_ptr); +- + #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK + static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) + { +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -6573,106 +6573,3 @@ void kvm_exit(void) + kvm_irqfd_exit(); + } + EXPORT_SYMBOL_GPL(kvm_exit); +- +-struct kvm_vm_worker_thread_context { +- struct kvm *kvm; +- struct task_struct *parent; +- struct completion init_done; +- kvm_vm_thread_fn_t thread_fn; +- uintptr_t data; +- int err; +-}; +- +-static int kvm_vm_worker_thread(void *context) +-{ +- /* +- * The init_context is allocated on the stack of the parent thread, so +- * we have to locally copy anything that is needed beyond initialization +- */ +- struct kvm_vm_worker_thread_context *init_context = context; +- struct task_struct *parent; +- struct kvm *kvm = init_context->kvm; +- kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; +- uintptr_t data = init_context->data; +- int err; +- +- err = kthread_park(current); +- /* kthread_park(current) is never supposed to return an error */ +- WARN_ON(err != 0); +- if (err) +- goto init_complete; +- +- err = cgroup_attach_task_all(init_context->parent, current); +- if (err) { +- kvm_err("%s: cgroup_attach_task_all failed with err %d\n", +- __func__, err); +- goto init_complete; +- } +- +- set_user_nice(current, task_nice(init_context->parent)); +- +-init_complete: +- init_context->err = err; +- complete(&init_context->init_done); +- init_context = NULL; +- +- if (err) +- goto out; +- +- /* Wait to be woken up by the spawner before proceeding. */ +- kthread_parkme(); +- +- if (!kthread_should_stop()) +- err = thread_fn(kvm, data); +- +-out: +- /* +- * Move kthread back to its original cgroup to prevent it lingering in +- * the cgroup of the VM process, after the latter finishes its +- * execution. +- * +- * kthread_stop() waits on the 'exited' completion condition which is +- * set in exit_mm(), via mm_release(), in do_exit(). However, the +- * kthread is removed from the cgroup in the cgroup_exit() which is +- * called after the exit_mm(). This causes the kthread_stop() to return +- * before the kthread actually quits the cgroup. +- */ +- rcu_read_lock(); +- parent = rcu_dereference(current->real_parent); +- get_task_struct(parent); +- rcu_read_unlock(); +- cgroup_attach_task_all(parent, current); +- put_task_struct(parent); +- +- return err; +-} +- +-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, +- uintptr_t data, const char *name, +- struct task_struct **thread_ptr) +-{ +- struct kvm_vm_worker_thread_context init_context = {}; +- struct task_struct *thread; +- +- *thread_ptr = NULL; +- init_context.kvm = kvm; +- init_context.parent = current; +- init_context.thread_fn = thread_fn; +- init_context.data = data; +- init_completion(&init_context.init_done); +- +- thread = kthread_run(kvm_vm_worker_thread, &init_context, +- "%s-%d", name, task_pid_nr(current)); +- if (IS_ERR(thread)) +- return PTR_ERR(thread); +- +- /* kthread_run is never supposed to return NULL */ +- WARN_ON(thread == NULL); +- +- wait_for_completion(&init_context.init_done); +- +- if (!init_context.err) +- *thread_ptr = thread; +- +- return init_context.err; +-} diff --git a/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch b/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch index 6e2803b..ef0315e 100644 --- a/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch +++ b/debian/patches/patchset-zen/sauce/0009-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch @@ -61,7 +61,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2775,7 +2775,10 @@ int smu_get_power_limit(void *handle, +@@ -2785,7 +2785,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: @@ -73,7 +73,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with break; default: return -EINVAL; -@@ -2799,7 +2802,14 @@ static int smu_set_power_limit(void *han +@@ -2809,7 +2812,14 @@ static int smu_set_power_limit(void *han if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); diff --git a/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch b/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch new file mode 100644 index 0000000..47fc4ff --- /dev/null +++ b/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch @@ -0,0 +1,167 @@ +From c47df2793088980a32d6706da886fe32f7f045e6 Mon Sep 17 00:00:00 2001 +From: Sultan Alsawaf +Date: Sun, 19 Apr 2020 19:59:18 -0700 +Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free + pages + +Contains: + - mm: Stop kswapd early when nothing's waiting for it to free pages + + Keeping kswapd running when all the failed allocations that invoked it + are satisfied incurs a high overhead due to unnecessary page eviction + and writeback, as well as spurious VM pressure events to various + registered shrinkers. When kswapd doesn't need to work to make an + allocation succeed anymore, stop it prematurely to save resources. + + Signed-off-by: Sultan Alsawaf + + - mm: Don't stop kswapd on a per-node basis when there are no waiters + + The page allocator wakes all kswapds in an allocation context's allowed + nodemask in the slow path, so it doesn't make sense to have the kswapd- + waiter count per each NUMA node. Instead, it should be a global counter + to stop all kswapds when there are no failed allocation requests. + + Signed-off-by: Sultan Alsawaf + + - mm: Increment kswapd_waiters for throttled direct reclaimers + + Throttled direct reclaimers will wake up kswapd and wait for kswapd to + satisfy their page allocation request, even when the failed allocation + lacks the __GFP_KSWAPD_RECLAIM flag in its gfp mask. As a result, kswapd + may think that there are no waiters and thus exit prematurely, causing + throttled direct reclaimers lacking __GFP_KSWAPD_RECLAIM to stall on + waiting for kswapd to wake them up. Incrementing the kswapd_waiters + counter when such direct reclaimers become throttled fixes the problem. + + Signed-off-by: Sultan Alsawaf +--- + mm/internal.h | 1 + + mm/page_alloc.c | 17 ++++++++++++++--- + mm/vmscan.c | 19 +++++++++++++------ + 3 files changed, 28 insertions(+), 9 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -686,6 +686,7 @@ extern void post_alloc_hook(struct page + extern bool free_pages_prepare(struct page *page, unsigned int order); + + extern int user_min_free_kbytes; ++extern atomic_long_t kswapd_waiters; + + void free_unref_page(struct page *page, unsigned int order); + void free_unref_folios(struct folio_batch *fbatch); +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t; + */ + #define FPI_TO_TAIL ((__force fpi_t)BIT(1)) + ++atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0); ++ + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ + static DEFINE_MUTEX(pcp_batch_high_lock); + #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8) +@@ -4189,6 +4191,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u + unsigned int cpuset_mems_cookie; + unsigned int zonelist_iter_cookie; + int reserve_flags; ++ bool woke_kswapd = false; + + restart: + compaction_retries = 0; +@@ -4228,8 +4231,13 @@ restart: + goto nopage; + } + +- if (alloc_flags & ALLOC_KSWAPD) ++ if (alloc_flags & ALLOC_KSWAPD) { ++ if (!woke_kswapd) { ++ atomic_long_inc(&kswapd_waiters); ++ woke_kswapd = true; ++ } + wake_all_kswapds(order, gfp_mask, ac); ++ } + + /* + * The adjusted alloc_flags might result in immediate success, so try +@@ -4445,9 +4453,12 @@ nopage: + goto retry; + } + fail: +- warn_alloc(gfp_mask, ac->nodemask, +- "page allocation failure: order:%u", order); + got_pg: ++ if (woke_kswapd) ++ atomic_long_dec(&kswapd_waiters); ++ if (!page) ++ warn_alloc(gfp_mask, ac->nodemask, ++ "page allocation failure: order:%u", order); + return page; + } + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -6332,7 +6332,7 @@ retry: + return 0; + } + +-static bool allow_direct_reclaim(pg_data_t *pgdat) ++static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd) + { + struct zone *zone; + unsigned long pfmemalloc_reserve = 0; +@@ -6361,6 +6361,10 @@ static bool allow_direct_reclaim(pg_data + + wmark_ok = free_pages > pfmemalloc_reserve / 2; + ++ /* The throttled direct reclaimer is now a kswapd waiter */ ++ if (unlikely(!using_kswapd && !wmark_ok)) ++ atomic_long_inc(&kswapd_waiters); ++ + /* kswapd must be awake if processes are being throttled */ + if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { + if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) +@@ -6426,7 +6430,7 @@ static bool throttle_direct_reclaim(gfp_ + + /* Throttle based on the first usable node */ + pgdat = zone->zone_pgdat; +- if (allow_direct_reclaim(pgdat)) ++ if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM)) + goto out; + break; + } +@@ -6448,11 +6452,14 @@ static bool throttle_direct_reclaim(gfp_ + */ + if (!(gfp_mask & __GFP_FS)) + wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, +- allow_direct_reclaim(pgdat), HZ); ++ allow_direct_reclaim(pgdat, true), HZ); + else + /* Throttle until kswapd wakes the process */ + wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, +- allow_direct_reclaim(pgdat)); ++ allow_direct_reclaim(pgdat, true)); ++ ++ if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM))) ++ atomic_long_dec(&kswapd_waiters); + + if (fatal_signal_pending(current)) + return true; +@@ -6955,14 +6962,14 @@ restart: + * able to safely make forward progress. Wake them + */ + if (waitqueue_active(&pgdat->pfmemalloc_wait) && +- allow_direct_reclaim(pgdat)) ++ allow_direct_reclaim(pgdat, true)) + wake_up_all(&pgdat->pfmemalloc_wait); + + /* Check if kswapd should be suspending */ + __fs_reclaim_release(_THIS_IP_); + ret = kthread_freezable_should_stop(&was_frozen); + __fs_reclaim_acquire(_THIS_IP_); +- if (was_frozen || ret) ++ if (was_frozen || ret || !atomic_long_read(&kswapd_waiters)) + break; + + /* diff --git a/debian/patches/series b/debian/patches/series index 6d9b1d2..cfe1f6e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -301,12 +301,13 @@ patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.pat patchset-zen/sauce/0022-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch patchset-zen/sauce/0023-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch patchset-zen/sauce/0024-ZEN-Update-VHBA-driver.patch +patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch -patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch -patchset-pf/fixes/0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch +patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch +patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch patchset-zen/fixes/0001-Partially-revert-drm-amd-amdgpu-add-pipe1-hardware-s.patch diff --git a/debian/templates/image.preinst.in b/debian/templates/image.preinst.in index 0fc2b71..b800fe7 100644 --- a/debian/templates/image.preinst.in +++ b/debian/templates/image.preinst.in @@ -20,4 +20,27 @@ if [ -d /etc/kernel/preinst.d ] ; then /etc/kernel/preinst.d fi +f='/etc/apt/apt.conf.d/krd-linux' +while : ; do + [ -s "$f" ] || break + h=$(sha256sum -b "$f" | awk '{print $1}') + [ "$h" = '70e8b9a9dd5f6e153840ed7046285b927cff37a2859e0fbc512a3eb8c576de24' ] || break + exit 0 +done +cat > "$f" <<-'EOF' +APT +{ + NeverAutoRemove + { + "^krd-linux-.*$"; + }; + + VersionedKernelPackages + { + "krd-linux-.*"; + }; +}; +EOF +chmod 0644 "$f" + exit 0