1
0

release 6.11.11

This commit is contained in:
Konstantin Demin 2024-12-05 18:46:18 +03:00
parent bf0cfbdd57
commit 9debc8729c
41 changed files with 62 additions and 472 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
linux (6.11.11-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.11.11
-- Konstantin Demin <rockdrilla@gmail.com> Thu, 05 Dec 2024 18:41:08 +0300
linux (6.11.10-1) sid; urgency=medium
* New upstream stable update:

View File

@ -29,7 +29,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
MODULE_SOFTDEP("pre: blake2b-256");
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -7435,6 +7435,6 @@ static void __exit ext4_exit_fs(void)
@@ -7447,6 +7447,6 @@ static void __exit ext4_exit_fs(void)
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");

View File

@ -34,7 +34,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
/*
* Minimum number of threads to boot the kernel
*/
@@ -2136,6 +2142,10 @@ __latent_entropy struct task_struct *cop
@@ -2156,6 +2162,10 @@ __latent_entropy struct task_struct *cop
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
@ -45,7 +45,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3285,6 +3295,12 @@ int ksys_unshare(unsigned long unshare_f
@@ -3305,6 +3315,12 @@ int ksys_unshare(unsigned long unshare_f
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;

View File

@ -42,7 +42,7 @@ Export the currently un-exported symbols it depends on.
struct msg_msgseg *next;
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -205,6 +205,7 @@ void put_ipc_ns(struct ipc_namespace *ns
@@ -207,6 +207,7 @@ void put_ipc_ns(struct ipc_namespace *ns
schedule_work(&free_ipc_work);
}
}

View File

@ -68,7 +68,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
} else if (!strncmp(str, "forcedac", 8)) {
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
iommu_dma_forcedac = true;
@@ -2156,6 +2164,9 @@ static int device_def_domain_type(struct
@@ -2168,6 +2176,9 @@ static int device_def_domain_type(struct
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
@ -78,7 +78,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
}
return 0;
@@ -2456,6 +2467,9 @@ static int __init init_dmars(void)
@@ -2468,6 +2479,9 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}

View File

@ -107,7 +107,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
if (sk_can_gso(sk)) {
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2009,7 +2009,7 @@ void ieee80211_color_collision_detection
@@ -2010,7 +2010,7 @@ void ieee80211_color_collision_detection
/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
NETIF_F_HW_CSUM | NETIF_F_SG | \

View File

@ -25,7 +25,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS.
KBUILD_RUSTFLAGS += -Copt-level=s
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1407,6 +1407,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
@@ -1416,6 +1416,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
with the "-O2" compiler flag for best performance and most
helpful compile-time warnings.

View File

@ -48,7 +48,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
struct quirk_entry {
u32 nominal_freq;
u32 lowest_freq;
@@ -1402,7 +1398,7 @@ static ssize_t amd_pstate_show_status(ch
@@ -1380,7 +1376,7 @@ static ssize_t amd_pstate_show_status(ch
return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
}
@ -57,7 +57,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
{
int mode_idx;
@@ -1419,6 +1415,7 @@ static int amd_pstate_update_status(cons
@@ -1397,6 +1393,7 @@ static int amd_pstate_update_status(cons
return 0;
}

View File

@ -14,7 +14,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1605,7 +1605,7 @@ static void amd_pstate_epp_cpu_exit(stru
@@ -1583,7 +1583,7 @@ static void amd_pstate_epp_cpu_exit(stru
pr_debug("CPU %d exiting\n", policy->cpu);
}
@ -23,7 +23,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
{
struct amd_cpudata *cpudata = policy->driver_data;
u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
@@ -1655,7 +1655,7 @@ static void amd_pstate_epp_update_limit(
@@ -1633,7 +1633,7 @@ static void amd_pstate_epp_update_limit(
* This return value can only be negative for shared_memory
* systems where EPP register read/write not supported.
*/
@ -32,7 +32,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
}
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
@@ -1668,12 +1668,13 @@ static void amd_pstate_epp_update_limit(
@@ -1646,12 +1646,13 @@ static void amd_pstate_epp_update_limit(
}
WRITE_ONCE(cpudata->cppc_req_cached, value);
@ -47,7 +47,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
if (!policy->cpuinfo.max_freq)
return -ENODEV;
@@ -1683,7 +1684,9 @@ static int amd_pstate_epp_set_policy(str
@@ -1661,7 +1662,9 @@ static int amd_pstate_epp_set_policy(str
cpudata->policy = policy->policy;

View File

@ -42,7 +42,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1201,22 +1201,6 @@ unsigned long amd_get_dr_addr_mask(unsig
@@ -1202,22 +1202,6 @@ unsigned long amd_get_dr_addr_mask(unsig
}
EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);

View File

@ -60,7 +60,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
* @cpu: CPU to get numerator for.
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -837,36 +837,6 @@ static void amd_pstste_sched_prefcore_wo
@@ -815,36 +815,6 @@ static void amd_pstste_sched_prefcore_wo
}
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
@ -97,7 +97,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
#define CPPC_MAX_PERF U8_MAX
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
@@ -874,7 +844,7 @@ static void amd_pstate_init_prefcore(str
@@ -852,7 +822,7 @@ static void amd_pstate_init_prefcore(str
int ret, prio;
u32 highest_perf;
@ -106,7 +106,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
if (ret)
return;
@@ -918,7 +888,7 @@ static void amd_pstate_update_limits(uns
@@ -896,7 +866,7 @@ static void amd_pstate_update_limits(uns
if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
goto free_cpufreq_put;

View File

@ -149,7 +149,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
}
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -841,32 +841,18 @@ static DECLARE_WORK(sched_prefcore_work,
@@ -819,32 +819,18 @@ static DECLARE_WORK(sched_prefcore_work,
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
{
@ -185,7 +185,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
schedule_work(&sched_prefcore_work);
}
@@ -1037,12 +1023,12 @@ static int amd_pstate_cpu_init(struct cp
@@ -1015,12 +1001,12 @@ static int amd_pstate_cpu_init(struct cp
cpudata->cpu = policy->cpu;
@ -200,7 +200,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
ret = amd_pstate_init_freq(cpudata);
if (ret)
goto free_cpudata1;
@@ -1503,12 +1489,12 @@ static int amd_pstate_epp_cpu_init(struc
@@ -1481,12 +1467,12 @@ static int amd_pstate_epp_cpu_init(struc
cpudata->cpu = policy->cpu;
cpudata->epp_policy = 0;
@ -215,7 +215,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
ret = amd_pstate_init_freq(cpudata);
if (ret)
goto free_cpudata1;
@@ -1970,6 +1956,12 @@ static int __init amd_pstate_init(void)
@@ -1948,6 +1934,12 @@ static int __init amd_pstate_init(void)
static_call_update(amd_pstate_update_perf, cppc_update_perf);
}

View File

@ -143,7 +143,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
@@ -944,8 +910,8 @@ static u32 amd_pstate_get_transition_lat
@@ -922,8 +888,8 @@ static u32 amd_pstate_get_transition_lat
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
int ret;
@ -154,7 +154,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
u32 nominal_perf, nominal_freq;
u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
u32 boost_ratio, lowest_nonlinear_ratio;
@@ -967,8 +933,10 @@ static int amd_pstate_init_freq(struct a
@@ -945,8 +911,10 @@ static int amd_pstate_init_freq(struct a
nominal_perf = READ_ONCE(cpudata->nominal_perf);

View File

@ -17,7 +17,7 @@ Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -836,17 +836,17 @@ static void amd_pstate_update_limits(uns
@@ -814,17 +814,17 @@ static void amd_pstate_update_limits(uns
cpudata = policy->driver_data;

View File

@ -13,7 +13,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1796,7 +1796,7 @@ static int __init amd_pstate_set_driver(
@@ -1774,7 +1774,7 @@ static int __init amd_pstate_set_driver(
return -EINVAL;
}

View File

@ -93,7 +93,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
u32 min_perf, u32 des_perf,
u32 max_perf, bool fast_switch)
{
@@ -1919,9 +1919,9 @@ static int __init amd_pstate_init(void)
@@ -1897,9 +1897,9 @@ static int __init amd_pstate_init(void)
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");

View File

@ -33,7 +33,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1025,13 +1025,6 @@ static int amd_pstate_cpu_init(struct cp
@@ -1003,13 +1003,6 @@ static int amd_pstate_cpu_init(struct cp
if (cpu_feature_enabled(X86_FEATURE_CPPC))
policy->fast_switch_possible = true;
@ -47,7 +47,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
FREQ_QOS_MAX, policy->cpuinfo.max_freq);
if (ret < 0) {
@@ -1746,6 +1739,13 @@ static int amd_pstate_epp_resume(struct
@@ -1724,6 +1717,13 @@ static int amd_pstate_epp_resume(struct
return 0;
}
@ -61,7 +61,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
@@ -1759,6 +1759,7 @@ static struct cpufreq_driver amd_pstate_
@@ -1737,6 +1737,7 @@ static struct cpufreq_driver amd_pstate_
.update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
.attr = amd_pstate_attr,
@ -69,7 +69,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
};
static struct cpufreq_driver amd_pstate_epp_driver = {
@@ -1775,6 +1776,7 @@ static struct cpufreq_driver amd_pstate_
@@ -1753,6 +1754,7 @@ static struct cpufreq_driver amd_pstate_
.set_boost = amd_pstate_set_boost,
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,

View File

@ -16,7 +16,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -726,7 +726,7 @@ static int amd_pstate_cpu_boost_update(s
@@ -704,7 +704,7 @@ static int amd_pstate_cpu_boost_update(s
policy->max = policy->cpuinfo.max_freq;
if (cppc_state == AMD_PSTATE_PASSIVE) {
@ -25,7 +25,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
if (ret < 0)
pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
}
@@ -993,17 +993,17 @@ static int amd_pstate_cpu_init(struct cp
@@ -971,17 +971,17 @@ static int amd_pstate_cpu_init(struct cp
ret = amd_pstate_init_perf(cpudata);
if (ret)
@ -46,7 +46,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
min_freq = READ_ONCE(cpudata->min_freq);
max_freq = READ_ONCE(cpudata->max_freq);
@@ -1025,11 +1025,11 @@ static int amd_pstate_cpu_init(struct cp
@@ -1003,11 +1003,11 @@ static int amd_pstate_cpu_init(struct cp
if (cpu_feature_enabled(X86_FEATURE_CPPC))
policy->fast_switch_possible = true;
@ -60,7 +60,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
}
cpudata->max_limit_freq = max_freq;
@@ -1042,9 +1042,7 @@ static int amd_pstate_cpu_init(struct cp
@@ -1020,9 +1020,7 @@ static int amd_pstate_cpu_init(struct cp
return 0;
@ -71,7 +71,7 @@ Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
kfree(cpudata);
return ret;
}
@@ -1053,8 +1051,7 @@ static void amd_pstate_cpu_exit(struct c
@@ -1031,8 +1029,7 @@ static void amd_pstate_cpu_exit(struct c
{
struct amd_cpudata *cpudata = policy->driver_data;

View File

@ -38,7 +38,7 @@ Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
lowest_perf = READ_ONCE(cpudata->lowest_perf);
if (min_limit_perf < lowest_perf)
@@ -1526,10 +1531,13 @@ static int amd_pstate_epp_update_limit(s
@@ -1504,10 +1509,13 @@ static int amd_pstate_epp_update_limit(s
u64 value;
s16 epp;

View File

@ -1,55 +0,0 @@
From 01ad0fb3da95867947d923596a26b18d844afe3c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Sat, 12 Oct 2024 12:45:17 -0500
Subject: cpufreq/amd-pstate: Don't update CPPC request in
amd_pstate_cpu_boost_update()
When boost is changed the CPPC value is changed in amd_pstate_cpu_boost_update()
but then changed again when refresh_frequency_limits() and all it's callbacks
occur. The first is a pointless write, so instead just update the limits for
the policy and let the policy refresh anchor everything properly.
Fixes: c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 24 +-----------------------
1 file changed, 1 insertion(+), 23 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -695,34 +695,12 @@ static void amd_pstate_adjust_perf(unsig
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
{
struct amd_cpudata *cpudata = policy->driver_data;
- struct cppc_perf_ctrls perf_ctrls;
- u32 highest_perf, nominal_perf, nominal_freq, max_freq;
+ u32 nominal_freq, max_freq;
int ret = 0;
- highest_perf = READ_ONCE(cpudata->highest_perf);
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
nominal_freq = READ_ONCE(cpudata->nominal_freq);
max_freq = READ_ONCE(cpudata->max_freq);
- if (boot_cpu_has(X86_FEATURE_CPPC)) {
- u64 value = READ_ONCE(cpudata->cppc_req_cached);
-
- value &= ~GENMASK_ULL(7, 0);
- value |= on ? highest_perf : nominal_perf;
- WRITE_ONCE(cpudata->cppc_req_cached, value);
-
- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
- } else {
- perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
- ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
- if (ret) {
- cpufreq_cpu_release(policy);
- pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
- cpudata->cpu, ret);
- return ret;
- }
- }
-
if (on)
policy->cpuinfo.max_freq = max_freq;
else if (policy->cpuinfo.max_freq > nominal_freq * 1000)

View File

@ -1,33 +0,0 @@
From e82b9b5a56bcac18cae68878fe67263279805735 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
Date: Mon, 21 Oct 2024 15:48:35 +0530
Subject: amd-pstate: Set min_perf to nominal_perf for active mode performance
gov
The amd-pstate driver sets CPPC_REQ.min_perf to CPPC_REQ.max_perf when
in active mode with performance governor. Typically CPPC_REQ.max_perf
is set to CPPC.highest_perf. This causes frequency throttling on
power-limited platforms which causes performance regressions on
certain classes of workloads.
Hence, set the CPPC_REQ.min_perf to the CPPC.nominal_perf or
CPPC_REQ.max_perf, whichever is lower of the two.
Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1565,7 +1565,7 @@ static int amd_pstate_epp_update_limit(s
value = READ_ONCE(cpudata->cppc_req_cached);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
- min_perf = max_perf;
+ min_perf = min(cpudata->nominal_perf, max_perf);
/* Initial min/max values for CPPC Performance Controls Register */
value &= ~AMD_CPPC_MIN_PERF(~0L);

View File

@ -95,7 +95,7 @@ Signed-off-by: Christian Loehle <christian.loehle@arm.com>
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
@@ -834,6 +870,8 @@ static int sugov_start(struct cpufreq_po
@@ -833,6 +869,8 @@ static int sugov_start(struct cpufreq_po
sg_policy->limits_changed = false;
sg_policy->cached_raw_freq = 0;

View File

@ -314,7 +314,7 @@ Signed-off-by: Christian Loehle <christian.loehle@arm.com>
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
@@ -870,8 +697,6 @@ static int sugov_start(struct cpufreq_po
@@ -869,8 +696,6 @@ static int sugov_start(struct cpufreq_po
sg_policy->limits_changed = false;
sg_policy->cached_raw_freq = 0;

View File

@ -1,326 +0,0 @@
From 11fa4cfe7134f44f2cdac4b25636fc3291096979 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2024 08:07:37 -0500
Subject: KVM: x86: switch hugepage recovery thread to vhost_task
kvm_vm_create_worker_thread() is meant to be used for kthreads that
can consume significant amounts of CPU time on behalf of a VM or in
response to how the VM behaves (for example how it accesses its memory).
Therefore it wants to charge the CPU time consumed by that work to
the VM's container.
However, because of these threads, cgroups which have kvm instances inside
never complete freezing. This can be trivially reproduced:
root@test ~# mkdir /sys/fs/cgroup/test
root@test ~# echo $fish_pid > /sys/fs/cgroup/test/cgroup.procs
root@test ~# qemu-system-x86_64 --nographic -enable-kvm
and in another terminal:
root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze
root@test ~# cat /sys/fs/cgroup/test/cgroup.events
populated 1
frozen 0
The cgroup freezing happens in the signal delivery path but
kvm_vm_worker_thread() thread never call into the signal delivery path while
joining non-root cgroups, so they never get frozen. Because the cgroup
freezer determines whether a given cgroup is frozen by comparing the number
of frozen threads to the total number of threads in the cgroup, the cgroup
never becomes frozen and users waiting for the state transition may hang
indefinitely.
Since the worker kthread is tied to a user process, it's better if
it behaves similarly to user tasks as much as possible, including
being able to send SIGSTOP and SIGCONT. In fact, vhost_task is all
that kvm_vm_create_worker_thread() wanted to be and more: not only it
inherits the userspace process's cgroups, it has other niceties like
being parented properly in the process tree. Use it instead of the
homegrown alternative.
(Commit message based on emails from Tejun).
Reported-by: Tejun Heo <tj@kernel.org>
Reported-by: Luca Boccassi <bluca@debian.org>
Tested-by: Luca Boccassi <bluca@debian.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 4 +-
arch/x86/kvm/Kconfig | 1 +
arch/x86/kvm/mmu/mmu.c | 67 +++++++++++----------
include/linux/kvm_host.h | 6 --
virt/kvm/kvm_main.c | 103 --------------------------------
5 files changed, 39 insertions(+), 142 deletions(-)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -26,6 +26,7 @@
#include <linux/irqbypass.h>
#include <linux/hyperv.h>
#include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -1445,7 +1446,8 @@ struct kvm_arch {
bool sgx_provisioning_allowed;
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
- struct task_struct *nx_huge_page_recovery_thread;
+ struct vhost_task *nx_huge_page_recovery_thread;
+ u64 nx_huge_page_next;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_READONLY_MEM
+ select VHOST_TASK
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7160,7 +7160,7 @@ static int set_nx_huge_pages(const char
kvm_mmu_zap_all_fast(kvm);
mutex_unlock(&kvm->slots_lock);
- wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
}
mutex_unlock(&kvm_lock);
}
@@ -7306,7 +7306,7 @@ static int set_nx_huge_pages_recovery_pa
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
- wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+ vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
mutex_unlock(&kvm_lock);
}
@@ -7409,62 +7409,65 @@ static void kvm_recover_nx_huge_pages(st
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
-static long get_nx_huge_page_recovery_timeout(u64 start_time)
+#define NX_HUGE_PAGE_DISABLED (-1)
+
+static u64 get_nx_huge_page_recovery_next(void)
{
bool enabled;
uint period;
enabled = calc_nx_huge_pages_recovery_period(&period);
- return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
- : MAX_SCHEDULE_TIMEOUT;
+ return enabled ? get_jiffies_64() + msecs_to_jiffies(period)
+ : NX_HUGE_PAGE_DISABLED;
}
-static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
+static void kvm_nx_huge_page_recovery_worker_kill(void *data)
{
- u64 start_time;
- long remaining_time;
-
- while (true) {
- start_time = get_jiffies_64();
- remaining_time = get_nx_huge_page_recovery_timeout(start_time);
-
- set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop() && remaining_time > 0) {
- schedule_timeout(remaining_time);
- remaining_time = get_nx_huge_page_recovery_timeout(start_time);
- set_current_state(TASK_INTERRUPTIBLE);
- }
+}
- set_current_state(TASK_RUNNING);
+static bool kvm_nx_huge_page_recovery_worker(void *data)
+{
+ struct kvm *kvm = data;
+ long remaining_time;
- if (kthread_should_stop())
- return 0;
+ if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED)
+ return false;
- kvm_recover_nx_huge_pages(kvm);
+ remaining_time = kvm->arch.nx_huge_page_next - get_jiffies_64();
+ if (remaining_time > 0) {
+ schedule_timeout(remaining_time);
+ /* check for signals and come back */
+ return true;
}
+
+ __set_current_state(TASK_RUNNING);
+ kvm_recover_nx_huge_pages(kvm);
+ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+ return true;
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
{
- int err;
-
if (nx_hugepage_mitigation_hard_disabled)
return 0;
- err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
- "kvm-nx-lpage-recovery",
- &kvm->arch.nx_huge_page_recovery_thread);
- if (!err)
- kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
+ kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+ kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
+ kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
+ kvm, "kvm-nx-lpage-recovery");
- return err;
+ if (!kvm->arch.nx_huge_page_recovery_thread)
+ return -ENOMEM;
+
+ vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+ return 0;
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
{
if (kvm->arch.nx_huge_page_recovery_thread)
- kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
+ vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread);
}
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2370,12 +2370,6 @@ static inline int kvm_arch_vcpu_run_pid_
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
- uintptr_t data, const char *name,
- struct task_struct **thread_ptr);
-
#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
{
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6573,106 +6573,3 @@ void kvm_exit(void)
kvm_irqfd_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
-
-struct kvm_vm_worker_thread_context {
- struct kvm *kvm;
- struct task_struct *parent;
- struct completion init_done;
- kvm_vm_thread_fn_t thread_fn;
- uintptr_t data;
- int err;
-};
-
-static int kvm_vm_worker_thread(void *context)
-{
- /*
- * The init_context is allocated on the stack of the parent thread, so
- * we have to locally copy anything that is needed beyond initialization
- */
- struct kvm_vm_worker_thread_context *init_context = context;
- struct task_struct *parent;
- struct kvm *kvm = init_context->kvm;
- kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
- uintptr_t data = init_context->data;
- int err;
-
- err = kthread_park(current);
- /* kthread_park(current) is never supposed to return an error */
- WARN_ON(err != 0);
- if (err)
- goto init_complete;
-
- err = cgroup_attach_task_all(init_context->parent, current);
- if (err) {
- kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
- __func__, err);
- goto init_complete;
- }
-
- set_user_nice(current, task_nice(init_context->parent));
-
-init_complete:
- init_context->err = err;
- complete(&init_context->init_done);
- init_context = NULL;
-
- if (err)
- goto out;
-
- /* Wait to be woken up by the spawner before proceeding. */
- kthread_parkme();
-
- if (!kthread_should_stop())
- err = thread_fn(kvm, data);
-
-out:
- /*
- * Move kthread back to its original cgroup to prevent it lingering in
- * the cgroup of the VM process, after the latter finishes its
- * execution.
- *
- * kthread_stop() waits on the 'exited' completion condition which is
- * set in exit_mm(), via mm_release(), in do_exit(). However, the
- * kthread is removed from the cgroup in the cgroup_exit() which is
- * called after the exit_mm(). This causes the kthread_stop() to return
- * before the kthread actually quits the cgroup.
- */
- rcu_read_lock();
- parent = rcu_dereference(current->real_parent);
- get_task_struct(parent);
- rcu_read_unlock();
- cgroup_attach_task_all(parent, current);
- put_task_struct(parent);
-
- return err;
-}
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
- uintptr_t data, const char *name,
- struct task_struct **thread_ptr)
-{
- struct kvm_vm_worker_thread_context init_context = {};
- struct task_struct *thread;
-
- *thread_ptr = NULL;
- init_context.kvm = kvm;
- init_context.parent = current;
- init_context.thread_fn = thread_fn;
- init_context.data = data;
- init_completion(&init_context.init_done);
-
- thread = kthread_run(kvm_vm_worker_thread, &init_context,
- "%s-%d", name, task_pid_nr(current));
- if (IS_ERR(thread))
- return PTR_ERR(thread);
-
- /* kthread_run is never supposed to return NULL */
- WARN_ON(thread == NULL);
-
- wait_for_completion(&init_context.init_done);
-
- if (!init_context.err)
- *thread_ptr = thread;
-
- return init_context.err;
-}

View File

@ -174,7 +174,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
struct user_namespace *user_ns, struct ipc_namespace *ns);
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -207,6 +207,22 @@ void put_ipc_ns(struct ipc_namespace *ns
@@ -209,6 +209,22 @@ void put_ipc_ns(struct ipc_namespace *ns
}
EXPORT_SYMBOL_GPL(put_ipc_ns);

View File

@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -605,7 +605,8 @@ enum {
@@ -615,7 +615,8 @@ enum {
QUEUE_FLAG_MAX
};

View File

@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
no_console_suspend
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1309,6 +1309,18 @@ config SCHED_AUTOGROUP
@@ -1318,6 +1318,18 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.

View File

@ -9,7 +9,7 @@ Subject: ZEN: INTERACTIVE: Base config item
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -134,6 +134,12 @@ config THREAD_INFO_IN_TASK
@@ -143,6 +143,12 @@ config THREAD_INFO_IN_TASK
menu "General setup"

View File

@ -24,7 +24,7 @@ Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices
/*
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -140,6 +140,10 @@ config ZEN_INTERACTIVE
@@ -149,6 +149,10 @@ config ZEN_INTERACTIVE
help
Tunes the kernel for responsiveness at the cost of throughput and power usage.

View File

@ -26,7 +26,7 @@ Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices
return elevator_find_get("bfq");
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -143,6 +143,7 @@ config ZEN_INTERACTIVE
@@ -152,6 +152,7 @@ config ZEN_INTERACTIVE
--- Block Layer ----------------------------------------
Default scheduler for SQ..: mq-deadline -> bfq

View File

@ -32,7 +32,7 @@ Reasoning and details in the original patch: https://lwn.net/Articles/711248/
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -145,6 +145,10 @@ config ZEN_INTERACTIVE
@@ -154,6 +154,10 @@ config ZEN_INTERACTIVE
Default scheduler for SQ..: mq-deadline -> bfq
Default scheduler for MQ..: none -> kyber

View File

@ -17,7 +17,7 @@ that don't know they need it.
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -148,6 +148,7 @@ config ZEN_INTERACTIVE
@@ -157,6 +157,7 @@ config ZEN_INTERACTIVE
--- Virtual Memory Subsystem ---------------------------
Background-reclaim hugepages...: no -> yes

View File

@ -42,7 +42,7 @@ caused by rebalancing too many tasks at once.
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -150,6 +150,13 @@ config ZEN_INTERACTIVE
@@ -159,6 +159,13 @@ config ZEN_INTERACTIVE
Background-reclaim hugepages...: no -> yes
MG-LRU minimum cache TTL.......: 0 -> 1000 ms

View File

@ -75,7 +75,7 @@ Remove MuQSS cpufreq configuration.
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -157,6 +157,12 @@ config ZEN_INTERACTIVE
@@ -166,6 +166,12 @@ config ZEN_INTERACTIVE
Bandwidth slice size...........: 5 -> 3 ms
Task rebalancing threshold.....: 32 -> 8

View File

@ -12,7 +12,7 @@ turn it off when CONFIG_ZEN_INTERACTIVE is set as well.
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -149,6 +149,7 @@ config ZEN_INTERACTIVE
@@ -158,6 +158,7 @@ config ZEN_INTERACTIVE
Background-reclaim hugepages...: no -> yes
MG-LRU minimum cache TTL.......: 0 -> 1000 ms

View File

@ -14,7 +14,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -150,6 +150,7 @@ config ZEN_INTERACTIVE
@@ -159,6 +159,7 @@ config ZEN_INTERACTIVE
Background-reclaim hugepages...: no -> yes
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
Compact unevictable............: yes -> no

View File

@ -33,7 +33,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -151,6 +151,7 @@ config ZEN_INTERACTIVE
@@ -160,6 +160,7 @@ config ZEN_INTERACTIVE
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
Compact unevictable............: yes -> no
Compaction proactiveness.......: 20 -> 0

View File

@ -47,7 +47,7 @@ Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -152,6 +152,7 @@ config ZEN_INTERACTIVE
@@ -161,6 +161,7 @@ config ZEN_INTERACTIVE
Compact unevictable............: yes -> no
Compaction proactiveness.......: 20 -> 0
Watermark boost factor.........: 1.5 -> 0

View File

@ -34,7 +34,7 @@ Fixes: https://github.com/zen-kernel/zen-kernel/issues/282
goto bad;
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -144,6 +144,7 @@ config ZEN_INTERACTIVE
@@ -153,6 +153,7 @@ config ZEN_INTERACTIVE
Default scheduler for SQ..: mq-deadline -> bfq
Default scheduler for MQ..: none -> kyber

View File

@ -20,7 +20,7 @@ same change so Zen Kernel users benefit.
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -154,6 +154,7 @@ config ZEN_INTERACTIVE
@@ -163,6 +163,7 @@ config ZEN_INTERACTIVE
Compaction proactiveness.......: 20 -> 0
Watermark boost factor.........: 1.5 -> 0
Pageblock order................: 10 -> 3

View File

@ -188,7 +188,6 @@ patchset-pf/amd-pstate/0019-cpufreq-Add-a-callback-to-update-the-min_freq_req-fr
patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Set-the-initial-min_freq-to-lowes.patch
patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Cleanup-the-old-min_freq-qos-requ.patch
patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Use-nominal-perf-for-limits-when-.patch
patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Don-t-update-CPPC-request-in-amd_.patch
patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Use-amd_pstate_update_min_max_lim.patch
patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-Drop-needless-EPP-initialization.patch
patchset-pf/amd-pstate/0026-amd-pstate-6.11-update-setting-the-minimum-frequency.patch
@ -197,7 +196,6 @@ patchset-pf/amd-pstate/0028-cpufreq-amd-pstate-Call-amd_pstate_set_driver-in-amd
patchset-pf/amd-pstate/0029-cpufreq-amd-pstate-Remove-the-switch-case-in-amd_pst.patch
patchset-pf/amd-pstate/0030-cpufreq-amd-pstate-Remove-the-redundant-amd_pstate_s.patch
patchset-pf/amd-pstate/0031-cpufreq-amd-pstate-ut-Add-fix-for-min-freq-unit-test.patch
patchset-pf/amd-pstate/0032-amd-pstate-Set-min_perf-to-nominal_perf-for-active-m.patch
patchset-pf/amd-pstate/0033-amd-pstate-Switch-to-amd-pstate-by-default-on-some-S.patch
patchset-pf/amd-pstate/0034-cpufreq-amd-pstate-Push-adjust_perf-vfunc-init-into-.patch
patchset-pf/amd-pstate/0035-ACPI-processor-Move-arch_init_invariance_cppc-call-l.patch
@ -307,6 +305,5 @@ patchset-pf/fixes/0002-cpufreq-Remove-LATENCY_MULTIPLIER.patch
patchset-pf/fixes/0003-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch
patchset-pf/fixes/0004-nfsd-add-more-info-to-WARN_ON_ONCE-on-failed-callbac.patch
patchset-pf/fixes/0005-btrfs-also-add-stripe-entries-for-NOCOW-writes.patch
patchset-pf/fixes/0006-KVM-x86-switch-hugepage-recovery-thread-to-vhost_tas.patch
patchset-zen/fixes/0001-Partially-revert-drm-amd-amdgpu-add-pipe1-hardware-s.patch