201 lines
6.9 KiB
Diff
201 lines
6.9 KiB
Diff
From 08b1e02fc44abc04d813dbc827812db9ebca0dad Mon Sep 17 00:00:00 2001
|
|
From: Luo Gengkun <luogengkun@huaweicloud.com>
|
|
Date: Mon, 21 Apr 2025 03:50:21 +0000
|
|
Subject: watchdog: fix watchdog may detect false positive of softlockup
|
|
|
|
When updating `watchdog_thresh`, there is a race condition between writing
|
|
the new `watchdog_thresh` value and stopping the old watchdog timer. If
|
|
the old timer triggers during this window, it may falsely detect a
|
|
softlockup due to the old interval and the new `watchdog_thresh` value
|
|
being used. The problem can be described as follow:
|
|
|
|
# We asuume previous watchdog_thresh is 60, so the watchdog timer is
|
|
# coming every 24s.
|
|
echo 10 > /proc/sys/kernel/watchdog_thresh (User space)
|
|
|
|
|
+------>+ update watchdog_thresh (We are in kernel now)
|
|
|
|
|
| # using old interval and new `watchdog_thresh`
|
|
+------>+ watchdog hrtimer (irq context: detect softlockup)
|
|
|
|
|
|
|
|
+-------+
|
|
|
|
|
|
|
|
+ softlockup_stop_all
|
|
|
|
To fix this problem, introduce a shadow variable for `watchdog_thresh`.
|
|
The update to the actual `watchdog_thresh` is delayed until after the old
|
|
timer is stopped, preventing false positives.
|
|
|
|
The following testcase may help to understand this problem.
|
|
|
|
---------------------------------------------
|
|
echo RT_RUNTIME_SHARE > /sys/kernel/debug/sched/features
|
|
echo -1 > /proc/sys/kernel/sched_rt_runtime_us
|
|
echo 0 > /sys/kernel/debug/sched/fair_server/cpu3/runtime
|
|
echo 60 > /proc/sys/kernel/watchdog_thresh
|
|
taskset -c 3 chrt -r 99 /bin/bash -c "while true;do true; done" &
|
|
echo 10 > /proc/sys/kernel/watchdog_thresh &
|
|
---------------------------------------------
|
|
|
|
The test case above first removes the throttling restrictions for
|
|
real-time tasks. It then sets watchdog_thresh to 60 and executes a
|
|
real-time task ,a simple while(1) loop, on cpu3. Consequently, the final
|
|
command gets blocked because the presence of this real-time thread
|
|
prevents kworker:3 from being selected by the scheduler. This eventually
|
|
triggers a softlockup detection on cpu3 due to watchdog_timer_fn operating
|
|
with inconsistent variable - using both the old interval and the updated
|
|
watchdog_thresh simultaneously.
|
|
|
|
[nysal@linux.ibm.com: fix the SOFTLOCKUP_DETECTOR=n case]
|
|
Link: https://lkml.kernel.org/r/20250502111120.282690-1-nysal@linux.ibm.com
|
|
Link: https://lkml.kernel.org/r/20250421035021.3507649-1-luogengkun@huaweicloud.com
|
|
Signed-off-by: Luo Gengkun <luogengkun@huaweicloud.com>
|
|
Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
|
|
Cc: Doug Anderson <dianders@chromium.org>
|
|
Cc: Joel Granados <joel.granados@kernel.org>
|
|
Cc: Song Liu <song@kernel.org>
|
|
Cc: Thomas Gleinxer <tglx@linutronix.de>
|
|
Cc: "Nysal Jan K.A." <nysal@linux.ibm.com>
|
|
Cc: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
|
|
Cc: <stable@vger.kernel.org>
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
---
|
|
kernel/watchdog.c | 41 +++++++++++++++++++++++++++--------------
|
|
1 file changed, 27 insertions(+), 14 deletions(-)
|
|
|
|
--- a/kernel/watchdog.c
|
|
+++ b/kernel/watchdog.c
|
|
@@ -47,6 +47,7 @@ int __read_mostly watchdog_user_enabled
|
|
static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
|
|
static int __read_mostly watchdog_softlockup_user_enabled = 1;
|
|
int __read_mostly watchdog_thresh = 10;
|
|
+static int __read_mostly watchdog_thresh_next;
|
|
static int __read_mostly watchdog_hardlockup_available;
|
|
|
|
struct cpumask watchdog_cpumask __read_mostly;
|
|
@@ -870,12 +871,20 @@ int lockup_detector_offline_cpu(unsigned
|
|
return 0;
|
|
}
|
|
|
|
-static void __lockup_detector_reconfigure(void)
|
|
+static void __lockup_detector_reconfigure(bool thresh_changed)
|
|
{
|
|
cpus_read_lock();
|
|
watchdog_hardlockup_stop();
|
|
|
|
softlockup_stop_all();
|
|
+ /*
|
|
+ * To prevent watchdog_timer_fn from using the old interval and
|
|
+ * the new watchdog_thresh at the same time, which could lead to
|
|
+ * false softlockup reports, it is necessary to update the
|
|
+ * watchdog_thresh after the softlockup is completed.
|
|
+ */
|
|
+ if (thresh_changed)
|
|
+ watchdog_thresh = READ_ONCE(watchdog_thresh_next);
|
|
set_sample_period();
|
|
lockup_detector_update_enable();
|
|
if (watchdog_enabled && watchdog_thresh)
|
|
@@ -888,7 +897,7 @@ static void __lockup_detector_reconfigur
|
|
void lockup_detector_reconfigure(void)
|
|
{
|
|
mutex_lock(&watchdog_mutex);
|
|
- __lockup_detector_reconfigure();
|
|
+ __lockup_detector_reconfigure(false);
|
|
mutex_unlock(&watchdog_mutex);
|
|
}
|
|
|
|
@@ -908,27 +917,29 @@ static __init void lockup_detector_setup
|
|
return;
|
|
|
|
mutex_lock(&watchdog_mutex);
|
|
- __lockup_detector_reconfigure();
|
|
+ __lockup_detector_reconfigure(false);
|
|
softlockup_initialized = true;
|
|
mutex_unlock(&watchdog_mutex);
|
|
}
|
|
|
|
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
|
|
-static void __lockup_detector_reconfigure(void)
|
|
+static void __lockup_detector_reconfigure(bool thresh_changed)
|
|
{
|
|
cpus_read_lock();
|
|
watchdog_hardlockup_stop();
|
|
+ if (thresh_changed)
|
|
+ watchdog_thresh = READ_ONCE(watchdog_thresh_next);
|
|
lockup_detector_update_enable();
|
|
watchdog_hardlockup_start();
|
|
cpus_read_unlock();
|
|
}
|
|
void lockup_detector_reconfigure(void)
|
|
{
|
|
- __lockup_detector_reconfigure();
|
|
+ __lockup_detector_reconfigure(false);
|
|
}
|
|
static inline void lockup_detector_setup(void)
|
|
{
|
|
- __lockup_detector_reconfigure();
|
|
+ __lockup_detector_reconfigure(false);
|
|
}
|
|
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
|
|
|
|
@@ -946,11 +957,11 @@ void lockup_detector_soft_poweroff(void)
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
/* Propagate any changes to the watchdog infrastructure */
|
|
-static void proc_watchdog_update(void)
|
|
+static void proc_watchdog_update(bool thresh_changed)
|
|
{
|
|
/* Remove impossible cpus to keep sysctl output clean. */
|
|
cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
|
|
- __lockup_detector_reconfigure();
|
|
+ __lockup_detector_reconfigure(thresh_changed);
|
|
}
|
|
|
|
/*
|
|
@@ -984,7 +995,7 @@ static int proc_watchdog_common(int whic
|
|
} else {
|
|
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
if (!err && old != READ_ONCE(*param))
|
|
- proc_watchdog_update();
|
|
+ proc_watchdog_update(false);
|
|
}
|
|
mutex_unlock(&watchdog_mutex);
|
|
return err;
|
|
@@ -1035,11 +1046,13 @@ static int proc_watchdog_thresh(const st
|
|
|
|
mutex_lock(&watchdog_mutex);
|
|
|
|
- old = READ_ONCE(watchdog_thresh);
|
|
+ watchdog_thresh_next = READ_ONCE(watchdog_thresh);
|
|
+
|
|
+ old = watchdog_thresh_next;
|
|
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
- if (!err && write && old != READ_ONCE(watchdog_thresh))
|
|
- proc_watchdog_update();
|
|
+ if (!err && write && old != READ_ONCE(watchdog_thresh_next))
|
|
+ proc_watchdog_update(true);
|
|
|
|
mutex_unlock(&watchdog_mutex);
|
|
return err;
|
|
@@ -1060,7 +1073,7 @@ static int proc_watchdog_cpumask(const s
|
|
|
|
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
|
|
if (!err && write)
|
|
- proc_watchdog_update();
|
|
+ proc_watchdog_update(false);
|
|
|
|
mutex_unlock(&watchdog_mutex);
|
|
return err;
|
|
@@ -1080,7 +1093,7 @@ static const struct ctl_table watchdog_s
|
|
},
|
|
{
|
|
.procname = "watchdog_thresh",
|
|
- .data = &watchdog_thresh,
|
|
+ .data = &watchdog_thresh_next,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_watchdog_thresh,
|