release 6.11.8
This commit is contained in:
parent
02e7703ba0
commit
428ce77115
5
debian/bin/genpatch-pfkernel
vendored
5
debian/bin/genpatch-pfkernel
vendored
@ -5,11 +5,12 @@ export GIT_OPTIONAL_LOCKS=0
|
||||
|
||||
w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"
|
||||
|
||||
dst='debian/patches/pf'
|
||||
dst='debian/patches/pf-tmp'
|
||||
src='../linux-extras'
|
||||
branches='amd-pstate amd-rapl cpu cpuidle crypto fixes ksm zstd'
|
||||
|
||||
[ -d "${dst}" ]
|
||||
if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi
|
||||
mkdir -p "${dst}"
|
||||
|
||||
kver=
|
||||
if [ -n "$1" ] ; then
|
||||
|
7
debian/changelog
vendored
7
debian/changelog
vendored
@ -1,3 +1,10 @@
|
||||
linux (6.11.8-1) sid; urgency=medium
|
||||
|
||||
* New upstream stable update:
|
||||
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.11.8
|
||||
|
||||
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 15 Nov 2024 10:17:08 +0300
|
||||
|
||||
linux (6.11.7-1) sid; urgency=medium
|
||||
|
||||
* New upstream stable update:
|
||||
|
2
debian/config/amd64/config.mobile
vendored
2
debian/config/amd64/config.mobile
vendored
@ -3281,6 +3281,7 @@ CONFIG_CAN_C_CAN_PCI=m
|
||||
## file: drivers/net/can/cc770/Kconfig
|
||||
##
|
||||
CONFIG_CAN_CC770=m
|
||||
CONFIG_CAN_CC770_ISA=m
|
||||
CONFIG_CAN_CC770_PLATFORM=m
|
||||
|
||||
##
|
||||
@ -3323,6 +3324,7 @@ CONFIG_CAN_KVASER_PCI=m
|
||||
CONFIG_CAN_PEAK_PCI=m
|
||||
CONFIG_CAN_PEAK_PCIEC=y
|
||||
CONFIG_CAN_PLX_PCI=m
|
||||
CONFIG_CAN_SJA1000_ISA=m
|
||||
CONFIG_CAN_SJA1000_PLATFORM=m
|
||||
|
||||
##
|
||||
|
@ -18,7 +18,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
--- a/fs/btrfs/super.c
|
||||
+++ b/fs/btrfs/super.c
|
||||
@@ -2646,7 +2646,7 @@ module_exit(exit_btrfs_fs)
|
||||
@@ -2631,7 +2631,7 @@ module_exit(exit_btrfs_fs)
|
||||
|
||||
MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 1d120544580708eae6bd5981b308ca17735edaac Mon Sep 17 00:00:00 2001
|
||||
From: Vitaly Lifshits <vitaly.lifshits@intel.com>
|
||||
Date: Tue, 1 Oct 2024 20:08:48 +0300
|
||||
Subject: e1000e: Remove Meteor Lake SMBUS workarounds
|
||||
|
||||
This is a partial revert to commit 76a0a3f9cc2f ("e1000e: fix force smbus
|
||||
during suspend flow"). That commit fixed a sporadic PHY access issue but
|
||||
introduced a regression in runtime suspend flows.
|
||||
The original issue on Meteor Lake systems was rare in terms of the
|
||||
reproduction rate and the number of the systems affected.
|
||||
|
||||
After the integration of commit 0a6ad4d9e169 ("e1000e: avoid failing the
|
||||
system during pm_suspend"), PHY access loss can no longer cause a
|
||||
system-level suspend failure. As it only occurs when the LAN cable is
|
||||
disconnected, and is recovered during system resume flow. Therefore, its
|
||||
functional impact is low, and the priority is given to stabilizing
|
||||
runtime suspend.
|
||||
|
||||
Fixes: 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow")
|
||||
Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
|
||||
---
|
||||
drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++-------------
|
||||
1 file changed, 4 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
|
||||
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
|
||||
@@ -1205,12 +1205,10 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
|
||||
if (ret_val)
|
||||
goto out;
|
||||
|
||||
- if (hw->mac.type != e1000_pch_mtp) {
|
||||
- ret_val = e1000e_force_smbus(hw);
|
||||
- if (ret_val) {
|
||||
- e_dbg("Failed to force SMBUS: %d\n", ret_val);
|
||||
- goto release;
|
||||
- }
|
||||
+ ret_val = e1000e_force_smbus(hw);
|
||||
+ if (ret_val) {
|
||||
+ e_dbg("Failed to force SMBUS: %d\n", ret_val);
|
||||
+ goto release;
|
||||
}
|
||||
|
||||
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
|
||||
@@ -1273,13 +1271,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000
|
||||
}
|
||||
|
||||
release:
|
||||
- if (hw->mac.type == e1000_pch_mtp) {
|
||||
- ret_val = e1000e_force_smbus(hw);
|
||||
- if (ret_val)
|
||||
- e_dbg("Failed to force SMBUS over MTL system: %d\n",
|
||||
- ret_val);
|
||||
- }
|
||||
-
|
||||
hw->phy.ops.release(hw);
|
||||
out:
|
||||
if (ret_val)
|
326
debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
vendored
Normal file
326
debian/patches/patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
vendored
Normal file
@ -0,0 +1,326 @@
|
||||
From 11fa4cfe7134f44f2cdac4b25636fc3291096979 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 8 Nov 2024 08:07:37 -0500
|
||||
Subject: KVM: x86: switch hugepage recovery thread to vhost_task
|
||||
|
||||
kvm_vm_create_worker_thread() is meant to be used for kthreads that
|
||||
can consume significant amounts of CPU time on behalf of a VM or in
|
||||
response to how the VM behaves (for example how it accesses its memory).
|
||||
Therefore it wants to charge the CPU time consumed by that work to
|
||||
the VM's container.
|
||||
|
||||
However, because of these threads, cgroups which have kvm instances inside
|
||||
never complete freezing. This can be trivially reproduced:
|
||||
|
||||
root@test ~# mkdir /sys/fs/cgroup/test
|
||||
root@test ~# echo $fish_pid > /sys/fs/cgroup/test/cgroup.procs
|
||||
root@test ~# qemu-system-x86_64 --nographic -enable-kvm
|
||||
|
||||
and in another terminal:
|
||||
|
||||
root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze
|
||||
root@test ~# cat /sys/fs/cgroup/test/cgroup.events
|
||||
populated 1
|
||||
frozen 0
|
||||
|
||||
The cgroup freezing happens in the signal delivery path but
|
||||
kvm_vm_worker_thread() thread never call into the signal delivery path while
|
||||
joining non-root cgroups, so they never get frozen. Because the cgroup
|
||||
freezer determines whether a given cgroup is frozen by comparing the number
|
||||
of frozen threads to the total number of threads in the cgroup, the cgroup
|
||||
never becomes frozen and users waiting for the state transition may hang
|
||||
indefinitely.
|
||||
|
||||
Since the worker kthread is tied to a user process, it's better if
|
||||
it behaves similarly to user tasks as much as possible, including
|
||||
being able to send SIGSTOP and SIGCONT. In fact, vhost_task is all
|
||||
that kvm_vm_create_worker_thread() wanted to be and more: not only it
|
||||
inherits the userspace process's cgroups, it has other niceties like
|
||||
being parented properly in the process tree. Use it instead of the
|
||||
homegrown alternative.
|
||||
|
||||
(Commit message based on emails from Tejun).
|
||||
|
||||
Reported-by: Tejun Heo <tj@kernel.org>
|
||||
Reported-by: Luca Boccassi <bluca@debian.org>
|
||||
Tested-by: Luca Boccassi <bluca@debian.org>
|
||||
Acked-by: Tejun Heo <tj@kernel.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
arch/x86/include/asm/kvm_host.h | 4 +-
|
||||
arch/x86/kvm/Kconfig | 1 +
|
||||
arch/x86/kvm/mmu/mmu.c | 67 +++++++++++----------
|
||||
include/linux/kvm_host.h | 6 --
|
||||
virt/kvm/kvm_main.c | 103 --------------------------------
|
||||
5 files changed, 39 insertions(+), 142 deletions(-)
|
||||
|
||||
--- a/arch/x86/include/asm/kvm_host.h
|
||||
+++ b/arch/x86/include/asm/kvm_host.h
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/hyperv.h>
|
||||
#include <linux/kfifo.h>
|
||||
+#include <linux/sched/vhost_task.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/pvclock-abi.h>
|
||||
@@ -1445,7 +1446,8 @@ struct kvm_arch {
|
||||
bool sgx_provisioning_allowed;
|
||||
|
||||
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
|
||||
- struct task_struct *nx_huge_page_recovery_thread;
|
||||
+ struct vhost_task *nx_huge_page_recovery_thread;
|
||||
+ u64 nx_huge_page_next;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* The number of TDP MMU pages across all roots. */
|
||||
--- a/arch/x86/kvm/Kconfig
|
||||
+++ b/arch/x86/kvm/Kconfig
|
||||
@@ -29,6 +29,7 @@ config KVM
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_READONLY_MEM
|
||||
+ select VHOST_TASK
|
||||
select KVM_ASYNC_PF
|
||||
select USER_RETURN_NOTIFIER
|
||||
select KVM_MMIO
|
||||
--- a/arch/x86/kvm/mmu/mmu.c
|
||||
+++ b/arch/x86/kvm/mmu/mmu.c
|
||||
@@ -7160,7 +7160,7 @@ static int set_nx_huge_pages(const char
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
- wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
|
||||
+ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
|
||||
}
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
@@ -7306,7 +7306,7 @@ static int set_nx_huge_pages_recovery_pa
|
||||
mutex_lock(&kvm_lock);
|
||||
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
- wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
|
||||
+ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
|
||||
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
@@ -7409,62 +7409,65 @@ static void kvm_recover_nx_huge_pages(st
|
||||
srcu_read_unlock(&kvm->srcu, rcu_idx);
|
||||
}
|
||||
|
||||
-static long get_nx_huge_page_recovery_timeout(u64 start_time)
|
||||
+#define NX_HUGE_PAGE_DISABLED (-1)
|
||||
+
|
||||
+static u64 get_nx_huge_page_recovery_next(void)
|
||||
{
|
||||
bool enabled;
|
||||
uint period;
|
||||
|
||||
enabled = calc_nx_huge_pages_recovery_period(&period);
|
||||
|
||||
- return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
|
||||
- : MAX_SCHEDULE_TIMEOUT;
|
||||
+ return enabled ? get_jiffies_64() + msecs_to_jiffies(period)
|
||||
+ : NX_HUGE_PAGE_DISABLED;
|
||||
}
|
||||
|
||||
-static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
|
||||
+static void kvm_nx_huge_page_recovery_worker_kill(void *data)
|
||||
{
|
||||
- u64 start_time;
|
||||
- long remaining_time;
|
||||
-
|
||||
- while (true) {
|
||||
- start_time = get_jiffies_64();
|
||||
- remaining_time = get_nx_huge_page_recovery_timeout(start_time);
|
||||
-
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
- while (!kthread_should_stop() && remaining_time > 0) {
|
||||
- schedule_timeout(remaining_time);
|
||||
- remaining_time = get_nx_huge_page_recovery_timeout(start_time);
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
- }
|
||||
+}
|
||||
|
||||
- set_current_state(TASK_RUNNING);
|
||||
+static bool kvm_nx_huge_page_recovery_worker(void *data)
|
||||
+{
|
||||
+ struct kvm *kvm = data;
|
||||
+ long remaining_time;
|
||||
|
||||
- if (kthread_should_stop())
|
||||
- return 0;
|
||||
+ if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED)
|
||||
+ return false;
|
||||
|
||||
- kvm_recover_nx_huge_pages(kvm);
|
||||
+ remaining_time = kvm->arch.nx_huge_page_next - get_jiffies_64();
|
||||
+ if (remaining_time > 0) {
|
||||
+ schedule_timeout(remaining_time);
|
||||
+ /* check for signals and come back */
|
||||
+ return true;
|
||||
}
|
||||
+
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ kvm_recover_nx_huge_pages(kvm);
|
||||
+ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
|
||||
+ return true;
|
||||
}
|
||||
|
||||
int kvm_mmu_post_init_vm(struct kvm *kvm)
|
||||
{
|
||||
- int err;
|
||||
-
|
||||
if (nx_hugepage_mitigation_hard_disabled)
|
||||
return 0;
|
||||
|
||||
- err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
|
||||
- "kvm-nx-lpage-recovery",
|
||||
- &kvm->arch.nx_huge_page_recovery_thread);
|
||||
- if (!err)
|
||||
- kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
|
||||
+ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
|
||||
+ kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
|
||||
+ kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
|
||||
+ kvm, "kvm-nx-lpage-recovery");
|
||||
|
||||
- return err;
|
||||
+ if (!kvm->arch.nx_huge_page_recovery_thread)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->arch.nx_huge_page_recovery_thread)
|
||||
- kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
|
||||
+ vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
--- a/include/linux/kvm_host.h
|
||||
+++ b/include/linux/kvm_host.h
|
||||
@@ -2370,12 +2370,6 @@ static inline int kvm_arch_vcpu_run_pid_
|
||||
}
|
||||
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
|
||||
|
||||
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
|
||||
-
|
||||
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
|
||||
- uintptr_t data, const char *name,
|
||||
- struct task_struct **thread_ptr);
|
||||
-
|
||||
#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
|
||||
static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -6573,106 +6573,3 @@ void kvm_exit(void)
|
||||
kvm_irqfd_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||
-
|
||||
-struct kvm_vm_worker_thread_context {
|
||||
- struct kvm *kvm;
|
||||
- struct task_struct *parent;
|
||||
- struct completion init_done;
|
||||
- kvm_vm_thread_fn_t thread_fn;
|
||||
- uintptr_t data;
|
||||
- int err;
|
||||
-};
|
||||
-
|
||||
-static int kvm_vm_worker_thread(void *context)
|
||||
-{
|
||||
- /*
|
||||
- * The init_context is allocated on the stack of the parent thread, so
|
||||
- * we have to locally copy anything that is needed beyond initialization
|
||||
- */
|
||||
- struct kvm_vm_worker_thread_context *init_context = context;
|
||||
- struct task_struct *parent;
|
||||
- struct kvm *kvm = init_context->kvm;
|
||||
- kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
|
||||
- uintptr_t data = init_context->data;
|
||||
- int err;
|
||||
-
|
||||
- err = kthread_park(current);
|
||||
- /* kthread_park(current) is never supposed to return an error */
|
||||
- WARN_ON(err != 0);
|
||||
- if (err)
|
||||
- goto init_complete;
|
||||
-
|
||||
- err = cgroup_attach_task_all(init_context->parent, current);
|
||||
- if (err) {
|
||||
- kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
|
||||
- __func__, err);
|
||||
- goto init_complete;
|
||||
- }
|
||||
-
|
||||
- set_user_nice(current, task_nice(init_context->parent));
|
||||
-
|
||||
-init_complete:
|
||||
- init_context->err = err;
|
||||
- complete(&init_context->init_done);
|
||||
- init_context = NULL;
|
||||
-
|
||||
- if (err)
|
||||
- goto out;
|
||||
-
|
||||
- /* Wait to be woken up by the spawner before proceeding. */
|
||||
- kthread_parkme();
|
||||
-
|
||||
- if (!kthread_should_stop())
|
||||
- err = thread_fn(kvm, data);
|
||||
-
|
||||
-out:
|
||||
- /*
|
||||
- * Move kthread back to its original cgroup to prevent it lingering in
|
||||
- * the cgroup of the VM process, after the latter finishes its
|
||||
- * execution.
|
||||
- *
|
||||
- * kthread_stop() waits on the 'exited' completion condition which is
|
||||
- * set in exit_mm(), via mm_release(), in do_exit(). However, the
|
||||
- * kthread is removed from the cgroup in the cgroup_exit() which is
|
||||
- * called after the exit_mm(). This causes the kthread_stop() to return
|
||||
- * before the kthread actually quits the cgroup.
|
||||
- */
|
||||
- rcu_read_lock();
|
||||
- parent = rcu_dereference(current->real_parent);
|
||||
- get_task_struct(parent);
|
||||
- rcu_read_unlock();
|
||||
- cgroup_attach_task_all(parent, current);
|
||||
- put_task_struct(parent);
|
||||
-
|
||||
- return err;
|
||||
-}
|
||||
-
|
||||
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
|
||||
- uintptr_t data, const char *name,
|
||||
- struct task_struct **thread_ptr)
|
||||
-{
|
||||
- struct kvm_vm_worker_thread_context init_context = {};
|
||||
- struct task_struct *thread;
|
||||
-
|
||||
- *thread_ptr = NULL;
|
||||
- init_context.kvm = kvm;
|
||||
- init_context.parent = current;
|
||||
- init_context.thread_fn = thread_fn;
|
||||
- init_context.data = data;
|
||||
- init_completion(&init_context.init_done);
|
||||
-
|
||||
- thread = kthread_run(kvm_vm_worker_thread, &init_context,
|
||||
- "%s-%d", name, task_pid_nr(current));
|
||||
- if (IS_ERR(thread))
|
||||
- return PTR_ERR(thread);
|
||||
-
|
||||
- /* kthread_run is never supposed to return NULL */
|
||||
- WARN_ON(thread == NULL);
|
||||
-
|
||||
- wait_for_completion(&init_context.init_done);
|
||||
-
|
||||
- if (!init_context.err)
|
||||
- *thread_ptr = thread;
|
||||
-
|
||||
- return init_context.err;
|
||||
-}
|
@ -61,7 +61,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
|
||||
|
||||
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
|
||||
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
|
||||
@@ -2775,7 +2775,10 @@ int smu_get_power_limit(void *handle,
|
||||
@@ -2785,7 +2785,10 @@ int smu_get_power_limit(void *handle,
|
||||
*limit = smu->max_power_limit;
|
||||
break;
|
||||
case SMU_PPT_LIMIT_MIN:
|
||||
@ -73,7 +73,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -2799,7 +2802,14 @@ static int smu_set_power_limit(void *han
|
||||
@@ -2809,7 +2812,14 @@ static int smu_set_power_limit(void *han
|
||||
if (smu->ppt_funcs->set_power_limit)
|
||||
return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
|
||||
|
||||
|
167
debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
vendored
Normal file
167
debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
From c47df2793088980a32d6706da886fe32f7f045e6 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Sun, 19 Apr 2020 19:59:18 -0700
|
||||
Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free
|
||||
pages
|
||||
|
||||
Contains:
|
||||
- mm: Stop kswapd early when nothing's waiting for it to free pages
|
||||
|
||||
Keeping kswapd running when all the failed allocations that invoked it
|
||||
are satisfied incurs a high overhead due to unnecessary page eviction
|
||||
and writeback, as well as spurious VM pressure events to various
|
||||
registered shrinkers. When kswapd doesn't need to work to make an
|
||||
allocation succeed anymore, stop it prematurely to save resources.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
|
||||
- mm: Don't stop kswapd on a per-node basis when there are no waiters
|
||||
|
||||
The page allocator wakes all kswapds in an allocation context's allowed
|
||||
nodemask in the slow path, so it doesn't make sense to have the kswapd-
|
||||
waiter count per each NUMA node. Instead, it should be a global counter
|
||||
to stop all kswapds when there are no failed allocation requests.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
|
||||
- mm: Increment kswapd_waiters for throttled direct reclaimers
|
||||
|
||||
Throttled direct reclaimers will wake up kswapd and wait for kswapd to
|
||||
satisfy their page allocation request, even when the failed allocation
|
||||
lacks the __GFP_KSWAPD_RECLAIM flag in its gfp mask. As a result, kswapd
|
||||
may think that there are no waiters and thus exit prematurely, causing
|
||||
throttled direct reclaimers lacking __GFP_KSWAPD_RECLAIM to stall on
|
||||
waiting for kswapd to wake them up. Incrementing the kswapd_waiters
|
||||
counter when such direct reclaimers become throttled fixes the problem.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
mm/internal.h | 1 +
|
||||
mm/page_alloc.c | 17 ++++++++++++++---
|
||||
mm/vmscan.c | 19 +++++++++++++------
|
||||
3 files changed, 28 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/mm/internal.h
|
||||
+++ b/mm/internal.h
|
||||
@@ -686,6 +686,7 @@ extern void post_alloc_hook(struct page
|
||||
extern bool free_pages_prepare(struct page *page, unsigned int order);
|
||||
|
||||
extern int user_min_free_kbytes;
|
||||
+extern atomic_long_t kswapd_waiters;
|
||||
|
||||
void free_unref_page(struct page *page, unsigned int order);
|
||||
void free_unref_folios(struct folio_batch *fbatch);
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t;
|
||||
*/
|
||||
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
|
||||
|
||||
+atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);
|
||||
+
|
||||
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
|
||||
static DEFINE_MUTEX(pcp_batch_high_lock);
|
||||
#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
|
||||
@@ -4189,6 +4191,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
|
||||
unsigned int cpuset_mems_cookie;
|
||||
unsigned int zonelist_iter_cookie;
|
||||
int reserve_flags;
|
||||
+ bool woke_kswapd = false;
|
||||
|
||||
restart:
|
||||
compaction_retries = 0;
|
||||
@@ -4228,8 +4231,13 @@ restart:
|
||||
goto nopage;
|
||||
}
|
||||
|
||||
- if (alloc_flags & ALLOC_KSWAPD)
|
||||
+ if (alloc_flags & ALLOC_KSWAPD) {
|
||||
+ if (!woke_kswapd) {
|
||||
+ atomic_long_inc(&kswapd_waiters);
|
||||
+ woke_kswapd = true;
|
||||
+ }
|
||||
wake_all_kswapds(order, gfp_mask, ac);
|
||||
+ }
|
||||
|
||||
/*
|
||||
* The adjusted alloc_flags might result in immediate success, so try
|
||||
@@ -4445,9 +4453,12 @@ nopage:
|
||||
goto retry;
|
||||
}
|
||||
fail:
|
||||
- warn_alloc(gfp_mask, ac->nodemask,
|
||||
- "page allocation failure: order:%u", order);
|
||||
got_pg:
|
||||
+ if (woke_kswapd)
|
||||
+ atomic_long_dec(&kswapd_waiters);
|
||||
+ if (!page)
|
||||
+ warn_alloc(gfp_mask, ac->nodemask,
|
||||
+ "page allocation failure: order:%u", order);
|
||||
return page;
|
||||
}
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -6332,7 +6332,7 @@ retry:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static bool allow_direct_reclaim(pg_data_t *pgdat)
|
||||
+static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
|
||||
{
|
||||
struct zone *zone;
|
||||
unsigned long pfmemalloc_reserve = 0;
|
||||
@@ -6361,6 +6361,10 @@ static bool allow_direct_reclaim(pg_data
|
||||
|
||||
wmark_ok = free_pages > pfmemalloc_reserve / 2;
|
||||
|
||||
+ /* The throttled direct reclaimer is now a kswapd waiter */
|
||||
+ if (unlikely(!using_kswapd && !wmark_ok))
|
||||
+ atomic_long_inc(&kswapd_waiters);
|
||||
+
|
||||
/* kswapd must be awake if processes are being throttled */
|
||||
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
|
||||
if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
|
||||
@@ -6426,7 +6430,7 @@ static bool throttle_direct_reclaim(gfp_
|
||||
|
||||
/* Throttle based on the first usable node */
|
||||
pgdat = zone->zone_pgdat;
|
||||
- if (allow_direct_reclaim(pgdat))
|
||||
+ if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
@@ -6448,11 +6452,14 @@ static bool throttle_direct_reclaim(gfp_
|
||||
*/
|
||||
if (!(gfp_mask & __GFP_FS))
|
||||
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
|
||||
- allow_direct_reclaim(pgdat), HZ);
|
||||
+ allow_direct_reclaim(pgdat, true), HZ);
|
||||
else
|
||||
/* Throttle until kswapd wakes the process */
|
||||
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
|
||||
- allow_direct_reclaim(pgdat));
|
||||
+ allow_direct_reclaim(pgdat, true));
|
||||
+
|
||||
+ if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
|
||||
+ atomic_long_dec(&kswapd_waiters);
|
||||
|
||||
if (fatal_signal_pending(current))
|
||||
return true;
|
||||
@@ -6955,14 +6962,14 @@ restart:
|
||||
* able to safely make forward progress. Wake them
|
||||
*/
|
||||
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
|
||||
- allow_direct_reclaim(pgdat))
|
||||
+ allow_direct_reclaim(pgdat, true))
|
||||
wake_up_all(&pgdat->pfmemalloc_wait);
|
||||
|
||||
/* Check if kswapd should be suspending */
|
||||
__fs_reclaim_release(_THIS_IP_);
|
||||
ret = kthread_freezable_should_stop(&was_frozen);
|
||||
__fs_reclaim_acquire(_THIS_IP_);
|
||||
- if (was_frozen || ret)
|
||||
+ if (was_frozen || ret || !atomic_long_read(&kswapd_waiters))
|
||||
break;
|
||||
|
||||
/*
|
5
debian/patches/series
vendored
5
debian/patches/series
vendored
@ -301,12 +301,13 @@ patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.pat
|
||||
patchset-zen/sauce/0022-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch
|
||||
patchset-zen/sauce/0023-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
|
||||
patchset-zen/sauce/0024-ZEN-Update-VHBA-driver.patch
|
||||
patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
|
||||
|
||||
patchset-pf/fixes/0001-arch-Kconfig-Default-to-maximum-amount-of-ASLR-bits.patch
|
||||
patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch
|
||||
patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
|
||||
patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch
|
||||
patchset-pf/fixes/0005-e1000e-Remove-Meteor-Lake-SMBUS-workarounds.patch
|
||||
patchset-pf/fixes/0006-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
|
||||
patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
|
||||
patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
|
||||
|
||||
patchset-zen/fixes/0001-Partially-revert-drm-amd-amdgpu-add-pipe1-hardware-s.patch
|
||||
|
23
debian/templates/image.preinst.in
vendored
23
debian/templates/image.preinst.in
vendored
@ -20,4 +20,27 @@ if [ -d /etc/kernel/preinst.d ] ; then
|
||||
/etc/kernel/preinst.d
|
||||
fi
|
||||
|
||||
f='/etc/apt/apt.conf.d/krd-linux'
|
||||
while : ; do
|
||||
[ -s "$f" ] || break
|
||||
h=$(sha256sum -b "$f" | awk '{print $1}')
|
||||
[ "$h" = '70e8b9a9dd5f6e153840ed7046285b927cff37a2859e0fbc512a3eb8c576de24' ] || break
|
||||
exit 0
|
||||
done
|
||||
cat > "$f" <<-'EOF'
|
||||
APT
|
||||
{
|
||||
NeverAutoRemove
|
||||
{
|
||||
"^krd-linux-.*$";
|
||||
};
|
||||
|
||||
VersionedKernelPackages
|
||||
{
|
||||
"krd-linux-.*";
|
||||
};
|
||||
};
|
||||
EOF
|
||||
chmod 0644 "$f"
|
||||
|
||||
exit 0
|
||||
|
Loading…
Reference in New Issue
Block a user