1
0

drop "rt" featureset

This commit is contained in:
Konstantin Demin 2024-10-29 05:12:06 +03:00
parent ae7b2275b8
commit 3a08f39a8b
113 changed files with 0 additions and 11261 deletions

View File

@ -19,12 +19,6 @@ hardware_long = 'cloud platforms including Amazon EC2, Microsoft Azure, and Goog
[[featureset]] [[featureset]]
name = 'none' name = 'none'
[[featureset]]
name = 'rt'
# Override available flavours in rt featureset
[[featureset.flavour]]
name = 'amd64'
[build] [build]
enable_signed = true enable_signed = true
enable_vdso = true enable_vdso = true

View File

@ -25,12 +25,6 @@ hardware = '64-bit ARMv8 machines with 16k pages'
[[featureset]] [[featureset]]
name = 'none' name = 'none'
[[featureset]]
name = 'rt'
# Override available flavours in rt featureset
[[featureset.flavour]]
name = 'arm64'
[build] [build]
compiler_gnutype_compat = 'arm-linux-gnueabihf' compiler_gnutype_compat = 'arm-linux-gnueabihf'
enable_signed = true enable_signed = true

View File

@ -1,6 +0,0 @@
##
## file: arch/arm64/kvm/Kconfig
##
#. ARCH_SUPPORTS_RT depends on HAVE_POSIX_CPU_TIMERS_TASK_WORK
#. HAVE_POSIX_CPU_TIMERS_TASK_WORK depneds on !KVM
# CONFIG_KVM is not set

View File

@ -15,12 +15,6 @@ hardware_long = 'ARMv7 multiplatform kernel supporting LPAE. See https://wiki.de
[[featureset]] [[featureset]]
name = 'none' name = 'none'
[[featureset]]
name = 'rt'
# Override available flavours in rt featureset
[[featureset.flavour]]
name = 'armmp'
[build] [build]
enable_vdso = true enable_vdso = true
kernel_file = 'arch/arm/boot/zImage' kernel_file = 'arch/arm/boot/zImage'

View File

@ -84,16 +84,6 @@ name = 'x86'
[[featureset]] [[featureset]]
name = 'none' name = 'none'
[[featureset]]
name = 'rt'
enable = true
[featureset.description]
parts = ['rt']
[featureset.description.long]
rt = 'This kernel includes the PREEMPT_RT realtime patch set.'
[featureset.description.short]
rt = 'PREEMPT_RT'
[build] [build]
compiler = 'gcc-14' compiler = 'gcc-14'

View File

@ -1,25 +0,0 @@
##
## file: init/Kconfig
##
# CONFIG_SCHED_AUTOGROUP is not set
##
## file: kernel/Kconfig.preempt
##
## choice: Preemption Model
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT_RT=y
## end choice
##
## file: kernel/rcu/Kconfig
##
CONFIG_RCU_EXPERT=y
##
## file: kernel/trace/Kconfig
##
CONFIG_SCHED_TRACER=y
CONFIG_HWLAT_TRACER=y
CONFIG_OSNOISE_TRACER=y
CONFIG_TIMERLAT_TRACER=y

View File

@ -1,75 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 19 May 2023 16:57:29 +0200
Subject: [PATCH 1/4] ARM: vfp: Provide vfp_lock() for VFP locking.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
kernel_neon_begin() uses local_bh_disable() to ensure exclusive access
to the VFP unit. This is broken on PREEMPT_RT because a BH disabled
section remains preemptible on PREEMPT_RT.
Introduce vfp_lock() which uses local_bh_disable() and preempt_disable()
on PREEMPT_RT. Since softirqs are processed always in thread context,
disabling preemption is enough to ensure that the current context won't
get interrupted by something that is using the VFP. Use it in
kernel_neon_begin().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/vfp/vfpmodule.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -56,6 +56,34 @@ extern unsigned int VFP_arch_feroceon __
union vfp_state *vfp_current_hw_state[NR_CPUS];
/*
+ * Claim ownership of the VFP unit.
+ *
+ * The caller may change VFP registers until vfp_unlock() is called.
+ *
+ * local_bh_disable() is used to disable preemption and to disable VFP
+ * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is
+ * not sufficient because it only serializes soft interrupt related sections
+ * via a local lock, but stays preemptible. Disabling preemption is the right
+ * choice here as bottom half processing is always in thread context on RT
+ * kernels so it implicitly prevents bottom half processing as well.
+ */
+static void vfp_lock(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
+}
+
+static void vfp_unlock(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
+}
+
+/*
* Is 'thread's most up to date state stored in this CPUs hardware?
* Must be called from non-preemptible context.
*/
@@ -837,7 +865,7 @@ void kernel_neon_begin(void)
unsigned int cpu;
u32 fpexc;
- local_bh_disable();
+ vfp_lock();
/*
* Kernel mode NEON is only allowed outside of hardirq context with
@@ -868,7 +896,7 @@ void kernel_neon_end(void)
{
/* Disable the NEON/VFP unit. */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
- local_bh_enable();
+ vfp_unlock();
}
EXPORT_SYMBOL(kernel_neon_end);

View File

@ -1,36 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Jul 2015 17:14:48 +0200
Subject: [PATCH 1/2] arm: Disable jump-label on PREEMPT_RT.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
jump-labels are used to efficiently switch between two possible code
paths. To achieve this, stop_machine() is used to keep the CPU in a
known state while the opcode is modified. The usage of stop_machine()
here leads to large latency spikes which can be observed on PREEMPT_RT.
Jump labels may change the target during runtime and are not restricted
to debug or "configuration/ setup" part of a PREEMPT_RT system where
high latencies could be defined as acceptable.
Disable jump-label support on a PREEMPT_RT system.
[bigeasy: Patch description.]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20220613182447.112191-2-bigeasy@linutronix.de
---
arch/arm/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -78,7 +78,7 @@ config ARM
select HAS_IOPORT
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL

View File

@ -1,107 +0,0 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sat, 27 Feb 2016 08:09:11 +0100
Subject: [PATCH 1/8] drm/i915: Use preempt_disable/enable_rt() where
recommended
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Mario Kleiner suggest in commit
ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.")
a spots where preemption should be disabled on PREEMPT_RT. The
difference is that on PREEMPT_RT the intel_uncore::lock disables neither
preemption nor interrupts and so region remains preemptible.
The area covers only register reads and writes. The part that worries me
is:
- __intel_get_crtc_scanline() the worst case is 100us if no match is
found.
- intel_crtc_scanlines_since_frame_timestamp() not sure how long this
may take in the worst case.
It was in the RT queue for a while and nobody complained.
Disable preemption on PREEPMPT_RT during timestamping.
[bigeasy: patch description.]
Cc: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/display/intel_vblank.c | 38 ++++++++++++++++++++--------
1 file changed, 28 insertions(+), 10 deletions(-)
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -303,6 +303,26 @@ int intel_crtc_scanline_to_hw(struct int
* all register accesses to the same cacheline to be serialized,
* otherwise they may hang.
*/
+static void intel_vblank_section_enter_irqsave(struct drm_i915_private *i915, unsigned long *flags)
+ __acquires(i915->uncore.lock)
+{
+#ifdef I915
+ spin_lock_irqsave(&i915->uncore.lock, *flags);
+#else
+ *flags = 0;
+#endif
+}
+
+static void intel_vblank_section_exit_irqrestore(struct drm_i915_private *i915, unsigned long flags)
+ __releases(i915->uncore.lock)
+{
+#ifdef I915
+ spin_unlock_irqrestore(&i915->uncore.lock, flags);
+#else
+ if (flags)
+ return;
+#endif
+}
static void intel_vblank_section_enter(struct drm_i915_private *i915)
__acquires(i915->uncore.lock)
{
@@ -354,10 +374,10 @@ static bool i915_get_crtc_scanoutpos(str
* timing critical raw register reads, potentially with
* preemption disabled, so the following code must not block.
*/
- local_irq_save(irqflags);
- intel_vblank_section_enter(dev_priv);
+ intel_vblank_section_enter_irqsave(dev_priv, &irqflags);
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
/* Get optional system timestamp before query. */
if (stime)
@@ -421,10 +441,10 @@ static bool i915_get_crtc_scanoutpos(str
if (etime)
*etime = ktime_get();
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
- intel_vblank_section_exit(dev_priv);
- local_irq_restore(irqflags);
+ intel_vblank_section_exit_irqrestore(dev_priv, irqflags);
/*
* While in vblank, position will be negative
@@ -462,13 +482,11 @@ int intel_get_crtc_scanline(struct intel
unsigned long irqflags;
int position;
- local_irq_save(irqflags);
- intel_vblank_section_enter(dev_priv);
+ intel_vblank_section_enter_irqsave(dev_priv, &irqflags);
position = __intel_get_crtc_scanline(crtc);
- intel_vblank_section_exit(dev_priv);
- local_irq_restore(irqflags);
+ intel_vblank_section_exit_irqrestore(dev_priv, irqflags);
return position;
}

View File

@ -1,58 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:02 +0200
Subject: [PATCH 1/4] locking/rt: Add sparse annotation PREEMPT_RT's sleeping
locks.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The sleeping locks on PREEMPT_RT (rt_spin_lock() and friends) lack
sparse annotation. Therefore a missing spin_unlock() won't be spotted by
sparse in a PREEMPT_RT build while it is noticed on a !PREEMPT_RT build.
Add the __acquires/__releases macros to the lock/ unlock functions. The
trylock functions already use the __cond_lock() wrapper.
Link: https://lore.kernel.org/r/20240812104200.2239232-2-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/rwlock_rt.h | 10 +++++-----
include/linux/spinlock_rt.h | 8 ++++----
2 files changed, 9 insertions(+), 9 deletions(-)
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -24,13 +24,13 @@ do { \
__rt_rwlock_init(rwl, #rwl, &__key); \
} while (0)
-extern void rt_read_lock(rwlock_t *rwlock);
+extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock);
extern int rt_read_trylock(rwlock_t *rwlock);
-extern void rt_read_unlock(rwlock_t *rwlock);
-extern void rt_write_lock(rwlock_t *rwlock);
-extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
+extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock);
+extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock);
extern int rt_write_trylock(rwlock_t *rwlock);
-extern void rt_write_unlock(rwlock_t *rwlock);
+extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock);
static __always_inline void read_lock(rwlock_t *rwlock)
{
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -32,10 +32,10 @@ do { \
__rt_spin_lock_init(slock, #slock, &__key, true); \
} while (0)
-extern void rt_spin_lock(spinlock_t *lock);
-extern void rt_spin_lock_nested(spinlock_t *lock, int subclass);
-extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
-extern void rt_spin_unlock(spinlock_t *lock);
+extern void rt_spin_lock(spinlock_t *lock) __acquires(lock);
+extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock);
+extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock);
+extern void rt_spin_unlock(spinlock_t *lock) __releases(lock);
extern void rt_spin_lock_unlock(spinlock_t *lock);
extern int rt_spin_trylock_bh(spinlock_t *lock);
extern int rt_spin_trylock(spinlock_t *lock);

View File

@ -1,39 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:27 +0206
Subject: [PATCH 01/54] printk: Add notation to console_srcu locking
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
kernel/printk/printk.c:284:5: sparse: sparse: context imbalance in
'console_srcu_read_lock' - wrong count at exit
include/linux/srcu.h:301:9: sparse: sparse: context imbalance in
'console_srcu_read_unlock' - unexpected unlock
Fixes: 6c4afa79147e ("printk: Prepare for SRCU console list protection")
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240820063001.36405-2-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -282,6 +282,7 @@ EXPORT_SYMBOL(console_list_unlock);
* Return: A cookie to pass to console_srcu_read_unlock().
*/
int console_srcu_read_lock(void)
+ __acquires(&console_srcu)
{
return srcu_read_lock_nmisafe(&console_srcu);
}
@@ -295,6 +296,7 @@ EXPORT_SYMBOL(console_srcu_read_lock);
* Counterpart to console_srcu_read_lock()
*/
void console_srcu_read_unlock(int cookie)
+ __releases(&console_srcu)
{
srcu_read_unlock_nmisafe(&console_srcu, cookie);
}

View File

@ -1,54 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2023 13:30:37 +0200
Subject: [PATCH 1/3] sched/core: Provide a method to check if a task is
PI-boosted.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Provide a method to check if a task inherited the priority from another
task. This happens if a task owns a lock which is requested by a task
with higher priority. This can be used as a hint to add a preemption
point to the critical section.
Provide a function which reports true if the task is PI-boosted.
Link: https://lore.kernel.org/r/20230804113039.419794-2-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/sched.h | 1 +
kernel/sched/core.c | 15 +++++++++++++++
2 files changed, 16 insertions(+)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1814,6 +1814,7 @@ static inline int dl_task_check_affinity
}
#endif
+extern bool task_is_pi_boosted(const struct task_struct *p);
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7362,6 +7362,21 @@ static inline void preempt_dynamic_init(
#endif /* CONFIG_PREEMPT_DYNAMIC */
+/*
+ * task_is_pi_boosted - Check if task has been PI boosted.
+ * @p: Task to check.
+ *
+ * Return true if task is subject to priority inheritance.
+ */
+bool task_is_pi_boosted(const struct task_struct *p)
+{
+ int prio = p->prio;
+
+ if (!rt_prio(prio))
+ return false;
+ return prio != p->normal_prio;
+}
+
int io_schedule_prepare(void)
{
int old_iowait = current->in_iowait;

View File

@ -1,29 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:51:04 +0200
Subject: [PATCH 1/2] timers: Add sparse annotation for
timer_sync_wait_running().
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
timer_sync_wait_running() first releases two locks and then acquires
them again. This is unexpected and sparse complains about it.
Add sparse annotation for timer_sync_wait_running() to note that the
locking is expected.
Link: https://lore.kernel.org/r/20240812105326.2240000-2-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/timer.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1561,6 +1561,8 @@ static inline void timer_base_unlock_exp
* the waiter to acquire the lock and make progress.
*/
static void timer_sync_wait_running(struct timer_base *base)
+ __releases(&base->lock) __releases(&base->expiry_lock)
+ __acquires(&base->expiry_lock) __acquires(&base->lock)
{
if (atomic_read(&base->timer_waiters)) {
raw_spin_unlock_irq(&base->lock);

View File

@ -1,83 +0,0 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Thu, 31 Mar 2016 04:08:28 +0200
Subject: [PATCH 1/3] zram: Replace bit spinlocks with a spinlock_t.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The bit spinlock disables preemption. The spinlock_t lock becomes a sleeping
lock on PREEMPT_RT and it can not be acquired in this context. In this locked
section, zs_free() acquires a zs_pool::lock, and there is access to
zram::wb_limit_lock.
Add a spinlock_t for locking. Keep the set/ clear ZRAM_LOCK bit after
the lock has been acquired/ dropped. The size of struct zram_table_entry
increases by 4 bytes due to lock and additional 4 bytes padding with
CONFIG_ZRAM_TRACK_ENTRY_ACTIME enabled.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/20240906141520.730009-2-bigeasy@linutronix.de
---
drivers/block/zram/zram_drv.c | 18 ++++++++++++++----
drivers/block/zram/zram_drv.h | 1 +
2 files changed, 15 insertions(+), 4 deletions(-)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -59,17 +59,24 @@ static int zram_read_page(struct zram *z
static int zram_slot_trylock(struct zram *zram, u32 index)
{
- return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
+ int ret;
+
+ ret = spin_trylock(&zram->table[index].lock);
+ if (ret)
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+ return ret;
}
static void zram_slot_lock(struct zram *zram, u32 index)
{
- bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
+ spin_lock(&zram->table[index].lock);
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+ spin_unlock(&zram->table[index].lock);
}
static inline bool init_done(struct zram *zram)
@@ -1211,7 +1218,7 @@ static void zram_meta_free(struct zram *
static bool zram_meta_alloc(struct zram *zram, u64 disksize)
{
- size_t num_pages;
+ size_t num_pages, index;
num_pages = disksize >> PAGE_SHIFT;
zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
@@ -1226,6 +1233,9 @@ static bool zram_meta_alloc(struct zram
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
+
+ for (index = 0; index < num_pages; index++)
+ spin_lock_init(&zram->table[index].lock);
return true;
}
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -69,6 +69,7 @@ struct zram_table_entry {
unsigned long element;
};
unsigned long flags;
+ spinlock_t lock;
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
ktime_t ac_time;
#endif

View File

@ -1,44 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 19 May 2023 16:57:30 +0200
Subject: [PATCH 2/4] ARM: vfp: Use vfp_lock() in vfp_sync_hwstate().
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
vfp_sync_hwstate() uses preempt_disable() followed by local_bh_disable()
to ensure that it won't get interrupted while checking the VFP state.
This harms PREEMPT_RT because softirq handling can get preempted and
local_bh_disable() synchronizes the related section with a sleeping lock
which does not work with disabled preemption.
Use the vfp_lock() to synchronize the access.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/vfp/vfpmodule.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -540,11 +540,9 @@ static inline void vfp_pm_init(void) { }
*/
void vfp_sync_hwstate(struct thread_info *thread)
{
- unsigned int cpu = get_cpu();
+ vfp_lock();
- local_bh_disable();
-
- if (vfp_state_in_hw(cpu, thread)) {
+ if (vfp_state_in_hw(raw_smp_processor_id(), thread)) {
u32 fpexc = fmrx(FPEXC);
/*
@@ -555,8 +553,7 @@ void vfp_sync_hwstate(struct thread_info
fmxr(FPEXC, fpexc);
}
- local_bh_enable();
- put_cpu();
+ vfp_unlock();
}
/* Ensure that the thread reloads the hardware VFP state on the next use. */

View File

@ -1,119 +0,0 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sat, 27 Feb 2016 09:01:42 +0100
Subject: [PATCH 2/8] drm/i915: Don't disable interrupts on PREEMPT_RT during
atomic updates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Commit
8d7849db3eab7 ("drm/i915: Make sprite updates atomic")
started disabling interrupts across atomic updates. This breaks on PREEMPT_RT
because within this section the code attempt to acquire spinlock_t locks which
are sleeping locks on PREEMPT_RT.
According to the comment the interrupts are disabled to avoid random delays and
not required for protection or synchronisation.
If this needs to happen with disabled interrupts on PREEMPT_RT, and the
whole section is restricted to register access then all sleeping locks
need to be acquired before interrupts are disabled and some function
maybe moved after enabling interrupts again.
This includes:
- prepare_to_wait() + finish_wait() due its wake queue.
- drm_crtc_vblank_put() -> vblank_disable_fn() drm_device::vbl_lock.
- skl_pfit_enable(), intel_update_plane(), vlv_atomic_update_fifo() and
maybe others due to intel_uncore::lock
- drm_crtc_arm_vblank_event() due to drm_device::event_lock and
drm_device::vblank_time_lock.
Don't disable interrupts on PREEMPT_RT during atomic updates.
[bigeasy: drop local locks, commit message]
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/display/intel_crtc.c | 9 ++++++---
drivers/gpu/drm/i915/display/intel_cursor.c | 9 ++++++---
drivers/gpu/drm/i915/display/intel_vblank.c | 6 ++++--
3 files changed, 16 insertions(+), 8 deletions(-)
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -521,7 +521,8 @@ void intel_pipe_update_start(struct inte
*/
intel_psr_wait_for_idle_locked(new_crtc_state);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
crtc->debug.min_vbl = evade.min;
crtc->debug.max_vbl = evade.max;
@@ -539,7 +540,8 @@ void intel_pipe_update_start(struct inte
return;
irq_disable:
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@@ -668,7 +670,8 @@ void intel_pipe_update_end(struct intel_
*/
intel_vrr_send_push(new_crtc_state);
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
if (intel_vgpu_active(dev_priv))
goto out;
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -895,13 +895,15 @@ intel_legacy_cursor_update(struct drm_pl
*/
intel_psr_wait_for_idle_locked(crtc_state);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
intel_vblank_evade(&evade);
drm_crtc_vblank_put(&crtc->base);
} else {
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
if (new_plane_state->uapi.visible) {
@@ -911,7 +913,8 @@ intel_legacy_cursor_update(struct drm_pl
intel_plane_disable_arm(plane, crtc_state);
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
intel_psr_unlock(crtc_state);
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -705,11 +705,13 @@ int intel_vblank_evade(struct intel_vbla
break;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
timeout = schedule_timeout(timeout);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
finish_wait(wq, &wait);

View File

@ -1,33 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:51:05 +0200
Subject: [PATCH 2/2] hrtimer: Annotate hrtimer_cpu_base_.*_expiry() for
sparse.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The two hrtimer_cpu_base_.*_expiry() functions are wrapper around the
locking functions and sparse complains about the missing counterpart.
Add sparse annotation to denote that this bevaviour is expected.
Link: https://lore.kernel.org/r/20240812105326.2240000-3-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1351,11 +1351,13 @@ static void hrtimer_cpu_base_init_expiry
}
static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+ __acquires(&base->softirq_expiry_lock)
{
spin_lock(&base->softirq_expiry_lock);
}
static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+ __releases(&base->softirq_expiry_lock)
{
spin_unlock(&base->softirq_expiry_lock);
}

View File

@ -1,44 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:03 +0200
Subject: [PATCH 2/4] locking/rt: Remove one __cond_lock() in RT's
spin_trylock_irqsave()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
spin_trylock_irqsave() has a __cond_lock() wrapper which points to
__spin_trylock_irqsave(). The function then invokes spin_trylock() which
has another __cond_lock() finally pointing to rt_spin_trylock().
The compiler has no problem to parse this but sparse does not recognise
that users of spin_trylock_irqsave() acquire a conditional lock and
complains.
Remove one layer of __cond_lock() so that sparse recognises conditional
locking.
Link: https://lore.kernel.org/r/20240812104200.2239232-3-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/spinlock_rt.h | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -132,7 +132,7 @@ static __always_inline void spin_unlock_
#define spin_trylock_irq(lock) \
__cond_lock(lock, rt_spin_trylock(lock))
-#define __spin_trylock_irqsave(lock, flags) \
+#define spin_trylock_irqsave(lock, flags) \
({ \
int __locked; \
\
@@ -142,9 +142,6 @@ static __always_inline void spin_unlock_
__locked; \
})
-#define spin_trylock_irqsave(lock, flags) \
- __cond_lock(lock, __spin_trylock_irqsave(lock, flags))
-
#define spin_is_contended(lock) (((void)(lock), 0))
static inline int spin_is_locked(spinlock_t *lock)

View File

@ -1,109 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:28 +0206
Subject: [PATCH 02/54] printk: nbcon: Consolidate alloc() and init()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Rather than splitting the nbcon allocation and initialization into
two pieces, perform all initialization in nbcon_alloc(). Later,
the initial sequence is calculated and can be explicitly set using
nbcon_seq_force(). This removes the need for the strong rules of
nbcon_init() that even included a BUG_ON().
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-3-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 2 --
kernel/printk/nbcon.c | 37 +++++++++++--------------------------
kernel/printk/printk.c | 2 +-
3 files changed, 12 insertions(+), 29 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -75,7 +75,6 @@ u16 printk_parse_prefix(const char *text
u64 nbcon_seq_read(struct console *con);
void nbcon_seq_force(struct console *con, u64 seq);
bool nbcon_alloc(struct console *con);
-void nbcon_init(struct console *con);
void nbcon_free(struct console *con);
#else
@@ -96,7 +95,6 @@ static inline bool printk_percpu_data_re
static inline u64 nbcon_seq_read(struct console *con) { return 0; }
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
static inline bool nbcon_alloc(struct console *con) { return false; }
-static inline void nbcon_init(struct console *con) { }
static inline void nbcon_free(struct console *con) { }
#endif /* CONFIG_PRINTK */
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -929,17 +929,22 @@ static bool nbcon_emit_next_record(struc
}
/**
- * nbcon_alloc - Allocate buffers needed by the nbcon console
- * @con: Console to allocate buffers for
+ * nbcon_alloc - Allocate and init the nbcon console specific data
+ * @con: Console to initialize
*
- * Return: True on success. False otherwise and the console cannot
- * be used.
+ * Return: True if the console was fully allocated and initialized.
+ * Otherwise @con must not be registered.
*
- * This is not part of nbcon_init() because buffer allocation must
- * be performed earlier in the console registration process.
+ * When allocation and init was successful, the console must be properly
+ * freed using nbcon_free() once it is no longer needed.
*/
bool nbcon_alloc(struct console *con)
{
+ struct nbcon_state state = { };
+
+ nbcon_state_set(con, &state);
+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), 0);
+
if (con->flags & CON_BOOT) {
/*
* Boot console printing is synchronized with legacy console
@@ -959,26 +964,6 @@ bool nbcon_alloc(struct console *con)
}
/**
- * nbcon_init - Initialize the nbcon console specific data
- * @con: Console to initialize
- *
- * nbcon_alloc() *must* be called and succeed before this function
- * is called.
- *
- * This function expects that the legacy @con->seq has been set.
- */
-void nbcon_init(struct console *con)
-{
- struct nbcon_state state = { };
-
- /* nbcon_alloc() must have been called and successful! */
- BUG_ON(!con->pbufs);
-
- nbcon_seq_force(con, con->seq);
- nbcon_state_set(con, &state);
-}
-
-/**
* nbcon_free - Free and cleanup the nbcon console specific data
* @con: Console to free/cleanup nbcon data
*/
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3619,7 +3619,7 @@ void register_console(struct console *ne
console_init_seq(newcon, bootcon_registered);
if (newcon->flags & CON_NBCON)
- nbcon_init(newcon);
+ nbcon_seq_force(newcon, newcon->seq);
/*
* Put this console in the list - keep the

View File

@ -1,60 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2023 13:30:38 +0200
Subject: [PATCH 2/3] softirq: Add function to preempt serving softirqs.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Add a functionality for the softirq handler to preempt its current work
if needed. The softirq core has no particular state. It reads and resets
the pending softirq bits and then processes one after the other.
It can already be preempted while it invokes a certain softirq handler.
By enabling the BH the softirq core releases the per-CPU bh lock which
serializes all softirq handler. It is safe to do as long as the code
does not expect any serialisation in between. A typical scenarion would
after the invocation of callback where no state needs to be preserved
before the next callback is invoked.
Add functionaliry to preempt the serving softirqs.
Link: https://lore.kernel.org/r/20230804113039.419794-3-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/bottom_half.h | 2 ++
kernel/softirq.c | 13 +++++++++++++
2 files changed, 15 insertions(+)
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -35,8 +35,10 @@ static inline void local_bh_enable(void)
#ifdef CONFIG_PREEMPT_RT
extern bool local_bh_blocked(void);
+extern void softirq_preempt(void);
#else
static inline bool local_bh_blocked(void) { return false; }
+static inline void softirq_preempt(void) { }
#endif
#endif /* _LINUX_BH_H */
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -248,6 +248,19 @@ void __local_bh_enable_ip(unsigned long
}
EXPORT_SYMBOL(__local_bh_enable_ip);
+void softirq_preempt(void)
+{
+ if (WARN_ON_ONCE(!preemptible()))
+ return;
+
+ if (WARN_ON_ONCE(__this_cpu_read(softirq_ctrl.cnt) != SOFTIRQ_OFFSET))
+ return;
+
+ __local_bh_enable(SOFTIRQ_OFFSET, true);
+ /* preemption point */
+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
+}
+
/*
* Invoked from ksoftirqd_run() outside of the interrupt disabled section
* to acquire the per CPU local lock for reentrancy protection.

View File

@ -1,67 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 12:27:11 +0200
Subject: [PATCH 2/3] zram: Remove ZRAM_LOCK
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The ZRAM_LOCK was used for locking and after the addition of spinlock_t
the bit set and cleared but there no reader of it.
Remove the ZRAM_LOCK bit.
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/20240906141520.730009-3-bigeasy@linutronix.de
---
drivers/block/zram/zram_drv.c | 11 ++---------
drivers/block/zram/zram_drv.h | 4 +---
2 files changed, 3 insertions(+), 12 deletions(-)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -59,23 +59,16 @@ static int zram_read_page(struct zram *z
static int zram_slot_trylock(struct zram *zram, u32 index)
{
- int ret;
-
- ret = spin_trylock(&zram->table[index].lock);
- if (ret)
- __set_bit(ZRAM_LOCK, &zram->table[index].flags);
- return ret;
+ return spin_trylock(&zram->table[index].lock);
}
static void zram_slot_lock(struct zram *zram, u32 index)
{
spin_lock(&zram->table[index].lock);
- __set_bit(ZRAM_LOCK, &zram->table[index].flags);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
spin_unlock(&zram->table[index].lock);
}
@@ -1293,7 +1286,7 @@ static void zram_free_page(struct zram *
zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
WARN_ON_ONCE(zram->table[index].flags &
- ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
+ ~(1UL << ZRAM_UNDER_WB));
}
/*
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -45,9 +45,7 @@
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
- /* zram slot is locked */
- ZRAM_LOCK = ZRAM_FLAG_SHIFT,
- ZRAM_SAME, /* Page consists the same element */
+ ZRAM_SAME = ZRAM_FLAG_SHIFT, /* Page consists the same element */
ZRAM_WB, /* page is stored on backing_device */
ZRAM_UNDER_WB, /* page is under writeback */
ZRAM_HUGE, /* Incompressible page */

View File

@ -1,48 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 28 Jun 2023 09:36:10 +0200
Subject: [PATCH 3/4] ARM: vfp: Use vfp_lock() in vfp_support_entry().
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
vfp_entry() is invoked from exception handler and is fully preemptible.
It uses local_bh_disable() to remain uninterrupted while checking the
VFP state.
This is not working on PREEMPT_RT because local_bh_disable()
synchronizes the relevant section but the context remains fully
preemptible.
Use vfp_lock() for uninterrupted access.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/vfp/vfpmodule.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -708,7 +708,7 @@ static int vfp_support_entry(struct pt_r
if (!user_mode(regs))
return vfp_kmode_exception(regs, trigger);
- local_bh_disable();
+ vfp_lock();
fpexc = fmrx(FPEXC);
/*
@@ -787,7 +787,7 @@ static int vfp_support_entry(struct pt_r
if (!(fpscr & FPSCR_IXE)) {
if (!(fpscr & FPSCR_LENGTH_MASK)) {
pr_debug("not VFP\n");
- local_bh_enable();
+ vfp_unlock();
return -ENOEXEC;
}
fpexc |= FPEXC_DEX;
@@ -797,7 +797,7 @@ bounce: regs->ARM_pc += 4;
VFP_bounce(trigger, fpexc, regs);
}
- local_bh_enable();
+ vfp_unlock();
return 0;
}

View File

@ -1,39 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 25 Oct 2021 15:05:18 +0200
Subject: [PATCH 3/8] drm/i915: Don't check for atomic context on PREEMPT_RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The !in_atomic() check in _wait_for_atomic() triggers on PREEMPT_RT
because the uncore::lock is a spinlock_t and does not disable
preemption or interrupts.
Changing the uncore:lock to a raw_spinlock_t doubles the worst case
latency on an otherwise idle testbox during testing.
Ignore _WAIT_FOR_ATOMIC_CHECK() on PREEMPT_RT.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://lore.kernel.org/all/20211006164628.s2mtsdd2jdbfyf7g@linutronix.de/
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/i915_utils.h | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -273,8 +273,13 @@ wait_remaining_ms_from_jiffies(unsigned
(Wmax))
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
-/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+/*
+ * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false.
+ * On PREEMPT_RT the context isn't becoming atomic because it is used in an
+ * interrupt handler or because a spinlock_t is acquired. This leads to
+ * warnings which don't occur otherwise and therefore the check is disabled.
+ */
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
#else
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)

View File

@ -1,84 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:04 +0200
Subject: [PATCH 3/4] locking/rt: Add sparse annotation for RCU.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Every lock, that becomes a sleeping on PREEMPT_RT, starts a RCU read
section. There is no sparse annotation for this and sparse complains
about unbalanced locking.
Add __acquires/ __releases for the RCU lock. This covers all but the
trylock functions. I tried the __cond_acquires() annotation but it
didn't work.
Link: https://lore.kernel.org/r/20240812104200.2239232-4-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/locking/spinlock_rt.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -51,7 +51,7 @@ static __always_inline void __rt_spin_lo
migrate_disable();
}
-void __sched rt_spin_lock(spinlock_t *lock)
+void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU)
{
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
__rt_spin_lock(lock);
@@ -75,7 +75,7 @@ void __sched rt_spin_lock_nest_lock(spin
EXPORT_SYMBOL(rt_spin_lock_nest_lock);
#endif
-void __sched rt_spin_unlock(spinlock_t *lock)
+void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU)
{
spin_release(&lock->dep_map, _RET_IP_);
migrate_enable();
@@ -225,7 +225,7 @@ int __sched rt_write_trylock(rwlock_t *r
}
EXPORT_SYMBOL(rt_write_trylock);
-void __sched rt_read_lock(rwlock_t *rwlock)
+void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
@@ -235,7 +235,7 @@ void __sched rt_read_lock(rwlock_t *rwlo
}
EXPORT_SYMBOL(rt_read_lock);
-void __sched rt_write_lock(rwlock_t *rwlock)
+void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
@@ -246,7 +246,7 @@ void __sched rt_write_lock(rwlock_t *rwl
EXPORT_SYMBOL(rt_write_lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass)
+void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
@@ -257,7 +257,7 @@ void __sched rt_write_lock_nested(rwlock
EXPORT_SYMBOL(rt_write_lock_nested);
#endif
-void __sched rt_read_unlock(rwlock_t *rwlock)
+void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
migrate_enable();
@@ -266,7 +266,7 @@ void __sched rt_read_unlock(rwlock_t *rw
}
EXPORT_SYMBOL(rt_read_unlock);
-void __sched rt_write_unlock(rwlock_t *rwlock)
+void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
rcu_read_unlock();

View File

@ -1,148 +0,0 @@
From: Petr Mladek <pmladek@suse.com>
Date: Tue, 20 Aug 2024 08:35:29 +0206
Subject: [PATCH 03/54] printk: Properly deal with nbcon consoles on seq init
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
If a non-boot console is registering and boot consoles exist,
the consoles are flushed before being unregistered. This allows
the non-boot console to continue where the boot console left
off.
If for whatever reason flushing fails, the lowest seq found from
any of the enabled boot consoles is used. Until now con->seq was
checked. However, if it is an nbcon boot console, the function
nbcon_seq_read() must be used to read seq because con->seq is
not updated for nbcon consoles.
Check if it is an nbcon boot console and if so call
nbcon_seq_read() to read seq.
Also, avoid usage of con->seq as temporary storage of the
starting record. Instead, rename console_init_seq() to
get_init_console_seq() and just return the value. For nbcon
consoles set the sequence via nbcon_seq_force(), for legacy
consoles set con->seq.
The cleaned design should make sure that the value stays and is
set before the console is added to the console list. It also
unifies the sequence number initialization for legacy and nbcon
consoles.
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Link: https://lore.kernel.org/r/20240820063001.36405-4-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 3 ---
kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++------------
2 files changed, 29 insertions(+), 15 deletions(-)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -172,9 +172,6 @@ void nbcon_seq_force(struct console *con
u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
-
- /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
- con->seq = 0;
}
/**
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3449,19 +3449,21 @@ static void try_enable_default_console(s
newcon->flags |= CON_CONSDEV;
}
-static void console_init_seq(struct console *newcon, bool bootcon_registered)
+/* Return the starting sequence number for a newly registered console. */
+static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered)
{
struct console *con;
bool handover;
+ u64 init_seq;
if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
/* Get a consistent copy of @syslog_seq. */
mutex_lock(&syslog_lock);
- newcon->seq = syslog_seq;
+ init_seq = syslog_seq;
mutex_unlock(&syslog_lock);
} else {
/* Begin with next message added to ringbuffer. */
- newcon->seq = prb_next_seq(prb);
+ init_seq = prb_next_seq(prb);
/*
* If any enabled boot consoles are due to be unregistered
@@ -3482,7 +3484,7 @@ static void console_init_seq(struct cons
* Flush all consoles and set the console to start at
* the next unprinted sequence number.
*/
- if (!console_flush_all(true, &newcon->seq, &handover)) {
+ if (!console_flush_all(true, &init_seq, &handover)) {
/*
* Flushing failed. Just choose the lowest
* sequence of the enabled boot consoles.
@@ -3495,19 +3497,30 @@ static void console_init_seq(struct cons
if (handover)
console_lock();
- newcon->seq = prb_next_seq(prb);
+ init_seq = prb_next_seq(prb);
for_each_console(con) {
- if ((con->flags & CON_BOOT) &&
- (con->flags & CON_ENABLED) &&
- con->seq < newcon->seq) {
- newcon->seq = con->seq;
+ u64 seq;
+
+ if (!(con->flags & CON_BOOT) ||
+ !(con->flags & CON_ENABLED)) {
+ continue;
}
+
+ if (con->flags & CON_NBCON)
+ seq = nbcon_seq_read(con);
+ else
+ seq = con->seq;
+
+ if (seq < init_seq)
+ init_seq = seq;
}
}
console_unlock();
}
}
+
+ return init_seq;
}
#define console_first() \
@@ -3539,6 +3552,7 @@ void register_console(struct console *ne
struct console *con;
bool bootcon_registered = false;
bool realcon_registered = false;
+ u64 init_seq;
int err;
console_list_lock();
@@ -3616,10 +3630,13 @@ void register_console(struct console *ne
}
newcon->dropped = 0;
- console_init_seq(newcon, bootcon_registered);
+ init_seq = get_init_console_seq(newcon, bootcon_registered);
- if (newcon->flags & CON_NBCON)
- nbcon_seq_force(newcon, newcon->seq);
+ if (newcon->flags & CON_NBCON) {
+ nbcon_seq_force(newcon, init_seq);
+ } else {
+ newcon->seq = init_seq;
+ }
/*
* Put this console in the list - keep the

View File

@ -1,47 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2023 13:30:39 +0200
Subject: [PATCH 3/3] time: Allow to preempt after a callback.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The TIMER_SOFTIRQ handler invokes timer callbacks of the expired timers.
Before each invocation the timer_base::lock is dropped. The only lock
that is still held is the timer_base::expiry_lock and the per-CPU
bh-lock as part of local_bh_disable(). The former is released as part
of lock up prevention if the timer is preempted by the caller which is
waiting for its completion.
Both locks are already released as part of timer_sync_wait_running().
This can be extended by also releasing in bh-lock. The timer core does
not rely on any state that is serialized by the bh-lock. The timer
callback expects the bh-state to be serialized by the lock but there is
no need to keep state synchronized while invoking multiple callbacks.
Preempt handling softirqs and release all locks after a timer invocation
if the current has inherited priority.
Link: https://lore.kernel.org/r/20230804113039.419794-4-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/timer.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1564,9 +1564,16 @@ static void timer_sync_wait_running(stru
__releases(&base->lock) __releases(&base->expiry_lock)
__acquires(&base->expiry_lock) __acquires(&base->lock)
{
- if (atomic_read(&base->timer_waiters)) {
+ bool need_preempt;
+
+ need_preempt = task_is_pi_boosted(current);
+ if (need_preempt || atomic_read(&base->timer_waiters)) {
raw_spin_unlock_irq(&base->lock);
spin_unlock(&base->expiry_lock);
+
+ if (need_preempt)
+ softirq_preempt();
+
spin_lock(&base->expiry_lock);
raw_spin_lock_irq(&base->lock);
}

View File

@ -1,49 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 12:53:06 +0200
Subject: [PATCH 3/3] zram: Shrink zram_table_entry::flags.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The zram_table_entry::flags member is of type long and uses 8 bytes on a
64bit architecture. With a PAGE_SIZE of 256KiB we have PAGE_SHIFT of 18
which in turn leads to __NR_ZRAM_PAGEFLAGS = 27. This still fits in an
ordinary integer.
By reducing the size of `flags' to four bytes, the size of the struct
goes back to 16 bytes. The padding between the lock and ac_time (if
enabled) is also gone.
Make zram_table_entry::flags an unsigned int and update the build test
to reflect the change.
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/20240906141520.730009-4-bigeasy@linutronix.de
---
drivers/block/zram/zram_drv.c | 3 ++-
drivers/block/zram/zram_drv.h | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2404,9 +2404,10 @@ static void destroy_devices(void)
static int __init zram_init(void)
{
+ struct zram_table_entry zram_te;
int ret;
- BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
+ BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
zcomp_cpu_up_prepare, zcomp_cpu_dead);
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -66,7 +66,7 @@ struct zram_table_entry {
unsigned long handle;
unsigned long element;
};
- unsigned long flags;
+ unsigned int flags;
spinlock_t lock;
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
ktime_t ac_time;

View File

@ -1,121 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 28 Jun 2023 09:39:33 +0200
Subject: [PATCH 4/4] ARM: vfp: Move sending signals outside of vfp_lock()ed
section.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
VFP_bounce() is invoked from within vfp_support_entry() and may send a
signal. Sending a signal uses spinlock_t which becomes a sleeping lock
on PREEMPT_RT and must not be acquired within a preempt-disabled
section.
Move the vfp_raise_sigfpe() block outside of the vfp_lock() section.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/vfp/vfpmodule.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -268,7 +268,7 @@ static void vfp_panic(char *reason, u32
/*
* Process bitmask of exception conditions.
*/
-static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs)
+static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr)
{
int si_code = 0;
@@ -276,8 +276,7 @@ static void vfp_raise_exceptions(u32 exc
if (exceptions == VFP_EXCEPTION_ERROR) {
vfp_panic("unhandled bounce", inst);
- vfp_raise_sigfpe(FPE_FLTINV, regs);
- return;
+ return FPE_FLTINV;
}
/*
@@ -305,8 +304,7 @@ static void vfp_raise_exceptions(u32 exc
RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
- if (si_code)
- vfp_raise_sigfpe(si_code, regs);
+ return si_code;
}
/*
@@ -352,6 +350,8 @@ static u32 vfp_emulate_instruction(u32 i
static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
u32 fpscr, orig_fpscr, fpsid, exceptions;
+ int si_code2 = 0;
+ int si_code = 0;
pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
@@ -397,8 +397,8 @@ static void VFP_bounce(u32 trigger, u32
* unallocated VFP instruction but with FPSCR.IXE set and not
* on VFP subarch 1.
*/
- vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
- return;
+ si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr);
+ goto exit;
}
/*
@@ -422,14 +422,14 @@ static void VFP_bounce(u32 trigger, u32
*/
exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
if (exceptions)
- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
+ si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
/*
* If there isn't a second FP instruction, exit now. Note that
* the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
*/
if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
- return;
+ goto exit;
/*
* The barrier() here prevents fpinst2 being read
@@ -441,7 +441,13 @@ static void VFP_bounce(u32 trigger, u32
emulate:
exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
if (exceptions)
- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
+ si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
+exit:
+ vfp_unlock();
+ if (si_code2)
+ vfp_raise_sigfpe(si_code2, regs);
+ if (si_code)
+ vfp_raise_sigfpe(si_code, regs);
}
static void vfp_enable(void *unused)
@@ -773,6 +779,7 @@ static int vfp_support_entry(struct pt_r
* replay the instruction that trapped.
*/
fmxr(FPEXC, fpexc);
+ vfp_unlock();
} else {
/* Check for synchronous or asynchronous exceptions */
if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
@@ -794,10 +801,10 @@ static int vfp_support_entry(struct pt_r
}
}
bounce: regs->ARM_pc += 4;
+ /* VFP_bounce() will invoke vfp_unlock() */
VFP_bounce(trigger, fpexc, regs);
}
- vfp_unlock();
return 0;
}

View File

@ -1,60 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 6 Dec 2018 09:52:20 +0100
Subject: [PATCH 4/8] drm/i915: Disable tracing points on PREEMPT_RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Luca Abeni reported this:
| BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003
| CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10
| Call Trace:
| rt_spin_lock+0x3f/0x50
| gen6_read32+0x45/0x1d0 [i915]
| g4x_get_vblank_counter+0x36/0x40 [i915]
| trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915]
The tracing events use trace_intel_pipe_update_start() among other events
use functions acquire spinlock_t locks which are transformed into
sleeping locks on PREEMPT_RT. A few trace points use
intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also
might acquire a sleeping locks on PREEMPT_RT.
At the time the arguments are evaluated within trace point, preemption
is disabled and so the locks must not be acquired on PREEMPT_RT.
Based on this I don't see any other way than disable trace points on
PREMPT_RT.
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reported-by: Luca Abeni <lucabe72@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/display/intel_display_trace.h | 4 ++++
drivers/gpu/drm/i915/i915_trace.h | 4 ++++
2 files changed, 8 insertions(+)
--- a/drivers/gpu/drm/i915/display/intel_display_trace.h
+++ b/drivers/gpu/drm/i915/display/intel_display_trace.h
@@ -9,6 +9,10 @@
#if !defined(__INTEL_DISPLAY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
#define __INTEL_DISPLAY_TRACE_H__
+#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE)
+#define NOTRACE
+#endif
+
#include <linux/string_helpers.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -6,6 +6,10 @@
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _I915_TRACE_H_
+#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE)
+#define NOTRACE
+#endif
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>

View File

@ -1,35 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:05 +0200
Subject: [PATCH 4/4] locking/rt: Annotate unlock followed by lock for sparse.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
rt_mutex_slowlock_block() and rtlock_slowlock_locked() both unlock
lock::wait_lock and then lock it later. This is unusual and sparse
complains about it.
Add __releases() + __acquires() annotation to mark that it is expected.
Link: https://lore.kernel.org/r/20240812104200.2239232-5-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/locking/rtmutex.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1601,6 +1601,7 @@ static int __sched rt_mutex_slowlock_blo
unsigned int state,
struct hrtimer_sleeper *timeout,
struct rt_mutex_waiter *waiter)
+ __releases(&lock->wait_lock) __acquires(&lock->wait_lock)
{
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
struct task_struct *owner;
@@ -1805,6 +1806,7 @@ static __always_inline int __rt_mutex_lo
* @lock: The underlying RT mutex
*/
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
+ __releases(&lock->wait_lock) __acquires(&lock->wait_lock)
{
struct rt_mutex_waiter waiter;
struct task_struct *owner;

View File

@ -1,76 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:30 +0206
Subject: [PATCH 04/54] printk: Check printk_deferred_enter()/_exit() usage
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Add validation that printk_deferred_enter()/_exit() are called in
non-migration contexts.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-5-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/printk.h | 9 +++++----
kernel/printk/internal.h | 3 +++
kernel/printk/printk_safe.c | 12 ++++++++++++
3 files changed, 20 insertions(+), 4 deletions(-)
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -161,15 +161,16 @@ int _printk(const char *fmt, ...);
*/
__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
-extern void __printk_safe_enter(void);
-extern void __printk_safe_exit(void);
+extern void __printk_deferred_enter(void);
+extern void __printk_deferred_exit(void);
+
/*
* The printk_deferred_enter/exit macros are available only as a hack for
* some code paths that need to defer all printk console printing. Interrupts
* must be disabled for the deferred duration.
*/
-#define printk_deferred_enter __printk_safe_enter
-#define printk_deferred_exit __printk_safe_exit
+#define printk_deferred_enter() __printk_deferred_enter()
+#define printk_deferred_exit() __printk_deferred_exit()
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -53,6 +53,9 @@ int vprintk_store(int facility, int leve
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
+void __printk_safe_enter(void);
+void __printk_safe_exit(void);
+
bool printk_percpu_data_ready(void);
#define printk_safe_enter_irqsave(flags) \
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -26,6 +26,18 @@ void __printk_safe_exit(void)
this_cpu_dec(printk_context);
}
+void __printk_deferred_enter(void)
+{
+ cant_migrate();
+ __printk_safe_enter();
+}
+
+void __printk_deferred_exit(void)
+{
+ cant_migrate();
+ __printk_safe_exit();
+}
+
asmlinkage int vprintk(const char *fmt, va_list args)
{
#ifdef CONFIG_KGDB_KDB

View File

@ -1,89 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 8 Sep 2021 19:03:41 +0200
Subject: [PATCH 5/8] drm/i915/gt: Use spin_lock_irq() instead of
local_irq_disable() + spin_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
execlists_dequeue() is invoked from a function which uses
local_irq_disable() to disable interrupts so the spin_lock() behaves
like spin_lock_irq().
This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not
the same as spin_lock_irq().
execlists_dequeue_irq() and execlists_dequeue() has each one caller
only. If intel_engine_cs::active::lock is acquired and released with the
_irq suffix then it behaves almost as if execlists_dequeue() would be
invoked with disabled interrupts. The difference is the last part of the
function which is then invoked with enabled interrupts.
I can't tell if this makes a difference. From looking at it, it might
work to move the last unlock at the end of the function as I didn't find
anything that would acquire the lock again.
Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct int
* and context switches) submission.
*/
- spin_lock(&sched_engine->lock);
+ spin_lock_irq(&sched_engine->lock);
/*
* If the queue is higher priority than the last
@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct int
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
return;
}
}
@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct int
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.sched_engine->lock);
- spin_unlock(&engine->sched_engine->lock);
+ spin_unlock_irq(&engine->sched_engine->lock);
return; /* leave this for another sibling */
}
@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct int
*/
sched_engine->queue_priority_hint = queue_prio(sched_engine);
i915_sched_engine_reset_on_empty(sched_engine);
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
/*
* We can skip poking the HW if we ended up with exactly the same set
@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct int
}
}
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
- local_irq_disable(); /* Suspend interrupts across request submission */
- execlists_dequeue(engine);
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
static void clear_ports(struct i915_request **ports, int count)
{
memset_p((void **)ports, NULL, count);
@@ -2478,7 +2471,7 @@ static void execlists_submission_tasklet
}
if (!engine->execlists.pending[0]) {
- execlists_dequeue_irq(engine);
+ execlists_dequeue(engine);
start_timeslice(engine);
}

View File

@ -1,110 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:31 +0206
Subject: [PATCH 05/54] printk: nbcon: Clarify rules of the owner/waiter
matching
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The functions nbcon_owner_matches() and nbcon_waiter_matches()
use a minimal set of data to determine if a context matches.
The existing kerneldoc and comments were not clear enough and
caused the printk folks to re-prove that the functions are
indeed reliable in all cases.
Update and expand the explanations so that it is clear that the
implementations are sufficient for all cases.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-6-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 56 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 46 insertions(+), 10 deletions(-)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -228,6 +228,13 @@ static int nbcon_context_try_acquire_dir
struct nbcon_state new;
do {
+ /*
+ * Panic does not imply that the console is owned. However, it
+ * is critical that non-panic CPUs during panic are unable to
+ * acquire ownership in order to satisfy the assumptions of
+ * nbcon_waiter_matches(). In particular, the assumption that
+ * lower priorities are ignored during panic.
+ */
if (other_cpu_in_panic())
return -EPERM;
@@ -259,18 +266,29 @@ static bool nbcon_waiter_matches(struct
/*
* The request context is well defined by the @req_prio because:
*
- * - Only a context with a higher priority can take over the request.
+ * - Only a context with a priority higher than the owner can become
+ * a waiter.
+ * - Only a context with a priority higher than the waiter can
+ * directly take over the request.
* - There are only three priorities.
* - Only one CPU is allowed to request PANIC priority.
* - Lower priorities are ignored during panic() until reboot.
*
* As a result, the following scenario is *not* possible:
*
- * 1. Another context with a higher priority directly takes ownership.
- * 2. The higher priority context releases the ownership.
- * 3. A lower priority context takes the ownership.
- * 4. Another context with the same priority as this context
+ * 1. This context is currently a waiter.
+ * 2. Another context with a higher priority than this context
+ * directly takes ownership.
+ * 3. The higher priority context releases the ownership.
+ * 4. Another lower priority context takes the ownership.
+ * 5. Another context with the same priority as this context
* creates a request and starts waiting.
+ *
+ * Event #1 implies this context is EMERGENCY.
+ * Event #2 implies the new context is PANIC.
+ * Event #3 occurs when panic() has flushed the console.
+ * Events #4 and #5 are not possible due to the other_cpu_in_panic()
+ * check in nbcon_context_try_acquire_direct().
*/
return (cur->req_prio == expected_prio);
@@ -578,11 +596,29 @@ static bool nbcon_owner_matches(struct n
int expected_prio)
{
/*
- * Since consoles can only be acquired by higher priorities,
- * owning contexts are uniquely identified by @prio. However,
- * since contexts can unexpectedly lose ownership, it is
- * possible that later another owner appears with the same
- * priority. For this reason @cpu is also needed.
+ * A similar function, nbcon_waiter_matches(), only deals with
+ * EMERGENCY and PANIC priorities. However, this function must also
+ * deal with the NORMAL priority, which requires additional checks
+ * and constraints.
+ *
+ * For the case where preemption and interrupts are disabled, it is
+ * enough to also verify that the owning CPU has not changed.
+ *
+ * For the case where preemption or interrupts are enabled, an
+ * external synchronization method *must* be used. In particular,
+ * the driver-specific locking mechanism used in device_lock()
+ * (including disabling migration) should be used. It prevents
+ * scenarios such as:
+ *
+ * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and
+ * is scheduled out.
+ * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
+ * and releases it.
+ * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
+ * and is scheduled out.
+ * 4. [Task A] gets running on [CPU X] and sees that the console is
+ * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus
+ * [Task A] thinks it is the owner when it is not.
*/
if (cur->prio != expected_prio)

View File

@ -1,40 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 1 Oct 2021 20:01:03 +0200
Subject: [PATCH 6/8] drm/i915: Drop the irqs_disabled() check
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The !irqs_disabled() check triggers on PREEMPT_RT even with
i915_sched_engine::lock acquired. The reason is the lock is transformed
into a sleeping lock on PREEMPT_RT and does not disable interrupts.
There is no need to check for disabled interrupts. The lockdep
annotation below already check if the lock has been acquired by the
caller and will yell if the interrupts are not disabled.
Remove the !irqs_disabled() check.
Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/i915_request.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -608,7 +608,6 @@ bool __i915_request_submit(struct i915_r
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
@@ -717,7 +716,6 @@ void __i915_request_unsubmit(struct i915
*/
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*

View File

@ -1,71 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:32 +0206
Subject: [PATCH 06/54] printk: nbcon: Remove return value for write_atomic()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The return value of write_atomic() does not provide any useful
information. On the contrary, it makes things more complicated
for the caller to appropriately deal with the information.
Change write_atomic() to not have a return value. If the
message did not get printed due to loss of ownership, the
caller will notice this on its own. If ownership was not lost,
it will be assumed that the driver successfully printed the
message and the sequence number for that console will be
incremented.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-7-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 2 +-
kernel/printk/nbcon.c | 15 +++++++--------
2 files changed, 8 insertions(+), 9 deletions(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -345,7 +345,7 @@ struct console {
struct hlist_node node;
/* nbcon console specific members */
- bool (*write_atomic)(struct console *con,
+ void (*write_atomic)(struct console *con,
struct nbcon_write_context *wctxt);
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -885,7 +885,6 @@ static bool nbcon_emit_next_record(struc
unsigned long con_dropped;
struct nbcon_state cur;
unsigned long dropped;
- bool done;
/*
* The printk buffers are filled within an unsafe section. This
@@ -925,16 +924,16 @@ static bool nbcon_emit_next_record(struc
wctxt->unsafe_takeover = cur.unsafe_takeover;
if (con->write_atomic) {
- done = con->write_atomic(con, wctxt);
+ con->write_atomic(con, wctxt);
} else {
- nbcon_context_release(ctxt);
+ /*
+ * This function should never be called for legacy consoles.
+ * Handle it as if ownership was lost and try to continue.
+ */
WARN_ON_ONCE(1);
- done = false;
- }
-
- /* If not done, the emit was aborted. */
- if (!done)
+ nbcon_context_release(ctxt);
return false;
+ }
/*
* Since any dropped message was successfully output, reset the

View File

@ -1,30 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Oct 2023 21:37:21 +0200
Subject: [PATCH 7/8] drm/i915/guc: Consider also RCU depth in busy loop.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
intel_guc_send_busy_loop() looks at in_atomic() and irqs_disabled() to
decide if it should busy-spin while waiting or if it may sleep.
Both checks will report false on PREEMPT_RT if sleeping spinlocks are
acquired leading to RCU splats while the function sleeps.
Check also if RCU has been disabled.
Reported-by: "John B. Wyatt IV" <jwyatt@redhat.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -362,7 +362,7 @@ static inline int intel_guc_send_busy_lo
{
int err;
unsigned int sleep_period_ms = 1;
- bool not_atomic = !in_atomic() && !irqs_disabled();
+ bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth();
/*
* FIXME: Have caller pass in if we are in an atomic context to avoid

View File

@ -1,75 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:33 +0206
Subject: [PATCH 07/54] printk: nbcon: Add detailed doc for write_atomic()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The write_atomic() callback has special requirements and is
allowed to use special helper functions. Provide detailed
documentation of the callback so that a developer has a
chance of implementing it correctly.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-8-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -303,7 +303,7 @@ struct nbcon_write_context {
/**
* struct console - The console descriptor structure
* @name: The name of the console driver
- * @write: Write callback to output messages (Optional)
+ * @write: Legacy write callback to output messages (Optional)
* @read: Read callback for console input (Optional)
* @device: The underlying TTY device driver (Optional)
* @unblank: Callback to unblank the console (Optional)
@@ -320,7 +320,6 @@ struct nbcon_write_context {
* @data: Driver private data
* @node: hlist node for the console list
*
- * @write_atomic: Write callback for atomic context
* @nbcon_state: State for nbcon consoles
* @nbcon_seq: Sequence number of the next record for nbcon to print
* @pbufs: Pointer to nbcon private buffer
@@ -345,8 +344,34 @@ struct console {
struct hlist_node node;
/* nbcon console specific members */
- void (*write_atomic)(struct console *con,
- struct nbcon_write_context *wctxt);
+
+ /**
+ * @write_atomic:
+ *
+ * NBCON callback to write out text in any context. (Optional)
+ *
+ * This callback is called with the console already acquired. However,
+ * a higher priority context is allowed to take it over by default.
+ *
+ * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe()
+ * around any code where the takeover is not safe, for example, when
+ * manipulating the serial port registers.
+ *
+ * nbcon_enter_unsafe() will fail if the context has lost the console
+ * ownership in the meantime. In this case, the callback is no longer
+ * allowed to go forward. It must back out immediately and carefully.
+ * The buffer content is also no longer trusted since it no longer
+ * belongs to the context.
+ *
+ * The callback should allow the takeover whenever it is safe. It
+ * increases the chance to see messages when the system is in trouble.
+ *
+ * The callback can be called from any context (including NMI).
+ * Therefore it must avoid usage of any locking and instead rely
+ * on the console ownership for synchronization.
+ */
+ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
+
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
struct printk_buffers *pbufs;

View File

@ -1,24 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 21 Feb 2022 17:59:14 +0100
Subject: [PATCH 8/8] Revert "drm/i915: Depend on !PREEMPT_RT."
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Once the known issues are addressed, it should be safe to enable the
driver.
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/gpu/drm/i915/Kconfig | 1 -
1 file changed, 1 deletion(-)
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -3,7 +3,6 @@ config DRM_I915
tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
depends on DRM
depends on X86 && PCI
- depends on !PREEMPT_RT
select INTEL_GTT if X86
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular

View File

@ -1,87 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:34 +0206
Subject: [PATCH 08/54] printk: nbcon: Add callbacks to synchronize with driver
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Console drivers typically must deal with access to the hardware
via user input/output (such as an interactive login shell) and
output of kernel messages via printk() calls. To provide the
necessary synchronization, usually some driver-specific locking
mechanism is used (for example, the port spinlock for uart
serial consoles).
Until now, usage of this driver-specific locking has been hidden
from the printk-subsystem and implemented within the various
console callbacks. However, nbcon consoles would need to use it
even in the generic code.
Add device_lock() and device_unlock() callback which will need
to get implemented by nbcon consoles.
The callbacks will use whatever synchronization mechanism the
driver is using for itself. The minimum requirement is to
prevent CPU migration. It would allow a context friendly
acquiring of nbcon console ownership in non-emergency and
non-panic context.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-9-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -372,6 +372,49 @@ struct console {
*/
void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
+ /**
+ * @device_lock:
+ *
+ * NBCON callback to begin synchronization with driver code.
+ *
+ * Console drivers typically must deal with access to the hardware
+ * via user input/output (such as an interactive login shell) and
+ * output of kernel messages via printk() calls. This callback is
+ * called by the printk-subsystem whenever it needs to synchronize
+ * with hardware access by the driver. It should be implemented to
+ * use whatever synchronization mechanism the driver is using for
+ * itself (for example, the port lock for uart serial consoles).
+ *
+ * The callback is always called from task context. It may use any
+ * synchronization method required by the driver.
+ *
+ * IMPORTANT: The callback MUST disable migration. The console driver
+ * may be using a synchronization mechanism that already takes
+ * care of this (such as spinlocks). Otherwise this function must
+ * explicitly call migrate_disable().
+ *
+ * The flags argument is provided as a convenience to the driver. It
+ * will be passed again to device_unlock(). It can be ignored if the
+ * driver does not need it.
+ */
+ void (*device_lock)(struct console *con, unsigned long *flags);
+
+ /**
+ * @device_unlock:
+ *
+ * NBCON callback to finish synchronization with driver code.
+ *
+ * It is the counterpart to device_lock().
+ *
+ * This callback is always called from task context. It must
+ * appropriately re-enable migration (depending on how device_lock()
+ * disabled migration).
+ *
+ * The flags argument is the value of the same variable that was
+ * passed to device_lock().
+ */
+ void (*device_unlock)(struct console *con, unsigned long flags);
+
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
struct printk_buffers *pbufs;

View File

@ -1,124 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:35 +0206
Subject: [PATCH 09/54] printk: nbcon: Use driver synchronization while
(un)registering
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Console drivers typically have to deal with access to the
hardware via user input/output (such as an interactive login
shell) and output of kernel messages via printk() calls.
They use some classic driver-specific locking mechanism in most
situations. But console->write_atomic() callbacks, used by nbcon
consoles, are synchronized only by acquiring the console
context.
The synchronization via the console context ownership is possible
only when the console driver is registered. It is when a
particular device driver is connected with a particular console
driver.
The two synchronization mechanisms must be synchronized between
each other. It is tricky because the console context ownership
is quite special. It might be taken over by a higher priority
context. Also CPU migration must be disabled. The most tricky
part is to (dis)connect these two mechanisms during the console
(un)registration.
Use the driver-specific locking callbacks: device_lock(),
device_unlock(). They allow taking the device-specific lock
while the device is being (un)registered by the related console
driver.
For example, these callbacks lock/unlock the port lock for
serial port drivers.
Note that the driver-specific locking is only needed during
(un)register if it is an nbcon console with the write_atomic()
callback implemented. If write_atomic() is not implemented, the
driver should never attempt to access the hardware without
first acquiring its driver-specific lock.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-10-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3549,9 +3549,11 @@ static int unregister_console_locked(str
*/
void register_console(struct console *newcon)
{
- struct console *con;
+ bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic;
bool bootcon_registered = false;
bool realcon_registered = false;
+ struct console *con;
+ unsigned long flags;
u64 init_seq;
int err;
@@ -3639,6 +3641,19 @@ void register_console(struct console *ne
}
/*
+ * If another context is actively using the hardware of this new
+ * console, it will not be aware of the nbcon synchronization. This
+ * is a risk that two contexts could access the hardware
+ * simultaneously if this new console is used for atomic printing
+ * and the other context is still using the hardware.
+ *
+ * Use the driver synchronization to ensure that the hardware is not
+ * in use while this new console transitions to being registered.
+ */
+ if (use_device_lock)
+ newcon->device_lock(newcon, &flags);
+
+ /*
* Put this console in the list - keep the
* preferred driver at the head of the list.
*/
@@ -3662,6 +3677,10 @@ void register_console(struct console *ne
* register_console() completes.
*/
+ /* This new console is now registered. */
+ if (use_device_lock)
+ newcon->device_unlock(newcon, flags);
+
console_sysfs_notify();
/*
@@ -3690,6 +3709,8 @@ EXPORT_SYMBOL(register_console);
/* Must be called under console_list_lock(). */
static int unregister_console_locked(struct console *console)
{
+ bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic;
+ unsigned long flags;
int res;
lockdep_assert_console_list_lock_held();
@@ -3708,8 +3729,18 @@ static int unregister_console_locked(str
if (!console_is_registered_locked(console))
return -ENODEV;
+ /*
+ * Use the driver synchronization to ensure that the hardware is not
+ * in use while this console transitions to being unregistered.
+ */
+ if (use_device_lock)
+ console->device_lock(console, &flags);
+
hlist_del_init_rcu(&console->node);
+ if (use_device_lock)
+ console->device_unlock(console, flags);
+
/*
* <HISTORICAL>
* If this isn't the last console and it has CON_CONSDEV set, we

View File

@ -1,52 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:36 +0206
Subject: [PATCH 10/54] serial: core: Provide low-level functions to lock port
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
It will be necessary at times for the uart nbcon console
drivers to acquire the port lock directly (without the
additional nbcon functionality of the port lock wrappers).
These are special cases such as the implementation of the
device_lock()/device_unlock() callbacks or for internal
port lock wrapper synchronization.
Provide low-level variants __uart_port_lock_irqsave() and
__uart_port_unlock_irqrestore() for this purpose.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240820063001.36405-11-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/serial_core.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -590,6 +590,24 @@ struct uart_port {
void *private_data; /* generic platform data pointer */
};
+/*
+ * Only for console->device_lock()/_unlock() callbacks and internal
+ * port lock wrapper synchronization.
+ */
+static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
+{
+ spin_lock_irqsave(&up->lock, *flags);
+}
+
+/*
+ * Only for console->device_lock()/_unlock() callbacks and internal
+ * port lock wrapper synchronization.
+ */
+static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
+{
+ spin_unlock_irqrestore(&up->lock, flags);
+}
+
/**
* uart_port_lock - Lock the UART port
* @up: Pointer to UART port structure

View File

@ -1,125 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:37 +0206
Subject: [PATCH 11/54] serial: core: Introduce wrapper to set @uart_port->cons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Introduce uart_port_set_cons() as a wrapper to set @cons of a
uart_port. The wrapper sets @cons under the port lock in order
to prevent @cons from disappearing while another context is
holding the port lock. This is necessary for a follow-up
commit relating to the port lock wrappers, which rely on @cons
not changing between lock and unlock.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Tested-by: Théo Lebrun <theo.lebrun@bootlin.com> # EyeQ5, AMBA-PL011
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20240820063001.36405-12-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/tty/serial/8250/8250_core.c | 6 +++---
drivers/tty/serial/amba-pl011.c | 2 +-
drivers/tty/serial/serial_core.c | 16 ++++++++--------
include/linux/serial_core.h | 17 +++++++++++++++++
4 files changed, 29 insertions(+), 12 deletions(-)
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -423,11 +423,11 @@ static int univ8250_console_setup(struct
port = &serial8250_ports[co->index].port;
/* link port to console */
- port->cons = co;
+ uart_port_set_cons(port, co);
retval = serial8250_console_setup(port, options, false);
if (retval != 0)
- port->cons = NULL;
+ uart_port_set_cons(port, NULL);
return retval;
}
@@ -485,7 +485,7 @@ static int univ8250_console_match(struct
continue;
co->index = i;
- port->cons = co;
+ uart_port_set_cons(port, co);
return serial8250_console_setup(port, options, true);
}
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2480,7 +2480,7 @@ static int pl011_console_match(struct co
continue;
co->index = i;
- port->cons = co;
+ uart_port_set_cons(port, co);
return pl011_console_setup(co, options);
}
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3176,8 +3176,15 @@ static int serial_core_add_one_port(stru
state->uart_port = uport;
uport->state = state;
+ /*
+ * If this port is in use as a console then the spinlock is already
+ * initialised.
+ */
+ if (!uart_console_registered(uport))
+ uart_port_spin_lock_init(uport);
+
state->pm_state = UART_PM_STATE_UNDEFINED;
- uport->cons = drv->cons;
+ uart_port_set_cons(uport, drv->cons);
uport->minor = drv->tty_driver->minor_start + uport->line;
uport->name = kasprintf(GFP_KERNEL, "%s%d", drv->dev_name,
drv->tty_driver->name_base + uport->line);
@@ -3186,13 +3193,6 @@ static int serial_core_add_one_port(stru
goto out;
}
- /*
- * If this port is in use as a console then the spinlock is already
- * initialised.
- */
- if (!uart_console_registered(uport))
- uart_port_spin_lock_init(uport);
-
if (uport->cons && uport->dev)
of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -609,6 +609,23 @@ static inline void __uart_port_unlock_ir
}
/**
+ * uart_port_set_cons - Safely set the @cons field for a uart
+ * @up: The uart port to set
+ * @con: The new console to set to
+ *
+ * This function must be used to set @up->cons. It uses the port lock to
+ * synchronize with the port lock wrappers in order to ensure that the console
+ * cannot change or disappear while another context is holding the port lock.
+ */
+static inline void uart_port_set_cons(struct uart_port *up, struct console *con)
+{
+ unsigned long flags;
+
+ __uart_port_lock_irqsave(up, &flags);
+ up->cons = con;
+ __uart_port_unlock_irqrestore(up, flags);
+}
+/**
* uart_port_lock - Lock the UART port
* @up: Pointer to UART port structure
*/

View File

@ -1,75 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:38 +0206
Subject: [PATCH 12/54] console: Improve console_srcu_read_flags() comments
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
It was not clear when exactly console_srcu_read_flags() must be
used vs. directly reading @console->flags.
Refactor and clarify that console_srcu_read_flags() is only
needed if the console is registered or the caller is in a
context where the registration status of the console may change
(due to another context).
The function requires the caller holds @console_srcu, which will
ensure that the caller sees an appropriate @flags value for the
registered console and that exit/cleanup routines will not run
if the console is in the process of unregistration.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-13-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -446,28 +446,34 @@ extern void console_list_unlock(void) __
extern struct hlist_head console_list;
/**
- * console_srcu_read_flags - Locklessly read the console flags
+ * console_srcu_read_flags - Locklessly read flags of a possibly registered
+ * console
* @con: struct console pointer of console to read flags from
*
- * This function provides the necessary READ_ONCE() and data_race()
- * notation for locklessly reading the console flags. The READ_ONCE()
- * in this function matches the WRITE_ONCE() when @flags are modified
- * for registered consoles with console_srcu_write_flags().
+ * Locklessly reading @con->flags provides a consistent read value because
+ * there is at most one CPU modifying @con->flags and that CPU is using only
+ * read-modify-write operations to do so.
*
- * Only use this function to read console flags when locklessly
- * iterating the console list via srcu.
+ * Requires console_srcu_read_lock to be held, which implies that @con might
+ * be a registered console. The purpose of holding console_srcu_read_lock is
+ * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED)
+ * and that no exit/cleanup routines will run if the console is currently
+ * undergoing unregistration.
+ *
+ * If the caller is holding the console_list_lock or it is _certain_ that
+ * @con is not and will not become registered, the caller may read
+ * @con->flags directly instead.
*
* Context: Any context.
+ * Return: The current value of the @con->flags field.
*/
static inline short console_srcu_read_flags(const struct console *con)
{
WARN_ON_ONCE(!console_srcu_read_lock_is_held());
/*
- * Locklessly reading console->flags provides a consistent
- * read value because there is at most one CPU modifying
- * console->flags and that CPU is using only read-modify-write
- * operations to do so.
+ * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified
+ * for registered consoles with console_srcu_write_flags().
*/
return data_race(READ_ONCE(con->flags));
}

View File

@ -1,180 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:39 +0206
Subject: [PATCH 13/54] nbcon: Add API to acquire context for non-printing
operations
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Provide functions nbcon_device_try_acquire() and
nbcon_device_release() which will try to acquire the nbcon
console ownership with NBCON_PRIO_NORMAL and mark it unsafe for
handover/takeover.
These functions are to be used together with the device-specific
locking when performing non-printing activities on the console
device. They will allow synchronization against the
atomic_write() callback which will be serialized, for higher
priority contexts, only by acquiring the console context
ownership.
Pitfalls:
The API requires to be called in a context with migration
disabled because it uses per-CPU variables internally.
The context is set unsafe for a takeover all the time. It
guarantees full serialization against any atomic_write() caller
except for the final flush in panic() which might try an unsafe
takeover.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-14-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 2 +
include/linux/printk.h | 14 +++++++++++
kernel/printk/nbcon.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 73 insertions(+), 1 deletion(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -322,6 +322,7 @@ struct nbcon_write_context {
*
* @nbcon_state: State for nbcon consoles
* @nbcon_seq: Sequence number of the next record for nbcon to print
+ * @nbcon_device_ctxt: Context available for non-printing operations
* @pbufs: Pointer to nbcon private buffer
*/
struct console {
@@ -417,6 +418,7 @@ struct console {
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
+ struct nbcon_context __private nbcon_device_ctxt;
struct printk_buffers *pbufs;
};
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -9,6 +9,8 @@
#include <linux/ratelimit_types.h>
#include <linux/once_lite.h>
+struct console;
+
extern const char linux_banner[];
extern const char linux_proc_banner[];
@@ -198,6 +200,8 @@ extern asmlinkage void dump_stack_lvl(co
extern asmlinkage void dump_stack(void) __cold;
void printk_trigger_flush(void);
void console_try_replay_all(void);
+extern bool nbcon_device_try_acquire(struct console *con);
+extern void nbcon_device_release(struct console *con);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
@@ -280,6 +284,16 @@ static inline void printk_trigger_flush(
static inline void console_try_replay_all(void)
{
}
+
+static inline bool nbcon_device_try_acquire(struct console *con)
+{
+ return false;
+}
+
+static inline void nbcon_device_release(struct console *con)
+{
+}
+
#endif
bool this_cpu_in_panic(void);
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -5,7 +5,9 @@
#include <linux/kernel.h>
#include <linux/console.h>
#include <linux/delay.h>
+#include <linux/export.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include "internal.h"
/*
* Printk console printing implementation for consoles which does not depend
@@ -546,6 +548,7 @@ static struct printk_buffers panic_nbcon
* nbcon_context_try_acquire - Try to acquire nbcon console
* @ctxt: The context of the caller
*
+ * Context: Under @ctxt->con->device_lock() or local_irq_save().
* Return: True if the console was acquired. False otherwise.
*
* If the caller allowed an unsafe hostile takeover, on success the
@@ -553,7 +556,6 @@ static struct printk_buffers panic_nbcon
* in an unsafe state. Otherwise, on success the caller may assume
* the console is not in an unsafe state.
*/
-__maybe_unused
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
{
unsigned int cpu = smp_processor_id();
@@ -1011,3 +1013,57 @@ void nbcon_free(struct console *con)
con->pbufs = NULL;
}
+
+/**
+ * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe
+ * section
+ * @con: The nbcon console to acquire
+ *
+ * Context: Under the locking mechanism implemented in
+ * @con->device_lock() including disabling migration.
+ * Return: True if the console was acquired. False otherwise.
+ *
+ * Console drivers will usually use their own internal synchronization
+ * mechasism to synchronize between console printing and non-printing
+ * activities (such as setting baud rates). However, nbcon console drivers
+ * supporting atomic consoles may also want to mark unsafe sections when
+ * performing non-printing activities in order to synchronize against their
+ * atomic_write() callback.
+ *
+ * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL
+ * and marks it unsafe for handover/takeover.
+ */
+bool nbcon_device_try_acquire(struct console *con)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
+
+ cant_migrate();
+
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->console = con;
+ ctxt->prio = NBCON_PRIO_NORMAL;
+
+ if (!nbcon_context_try_acquire(ctxt))
+ return false;
+
+ if (!nbcon_context_enter_unsafe(ctxt))
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nbcon_device_try_acquire);
+
+/**
+ * nbcon_device_release - Exit unsafe section and release the nbcon console
+ * @con: The nbcon console acquired in nbcon_device_try_acquire()
+ */
+void nbcon_device_release(struct console *con)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
+
+ if (!nbcon_context_exit_unsafe(ctxt))
+ return;
+
+ nbcon_context_release(ctxt);
+}
+EXPORT_SYMBOL_GPL(nbcon_device_release);

View File

@ -1,185 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:40 +0206
Subject: [PATCH 14/54] serial: core: Acquire nbcon context in port->lock
wrapper
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Currently the port->lock wrappers uart_port_lock(),
uart_port_unlock() (and their variants) only lock/unlock
the spin_lock.
If the port is an nbcon console that has implemented the
write_atomic() callback, the wrappers must also acquire/release
the console context and mark the region as unsafe. This allows
general port->lock synchronization to be synchronized against
the nbcon write_atomic() callback.
Note that __uart_port_using_nbcon() relies on the port->lock
being held while a console is added and removed from the
console list (i.e. all uart nbcon drivers *must* take the
port->lock in their device_lock() callbacks).
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-15-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/serial_core.h | 82 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 80 insertions(+), 2 deletions(-)
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -11,6 +11,8 @@
#include <linux/compiler.h>
#include <linux/console.h>
#include <linux/interrupt.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/tty.h>
@@ -625,6 +627,60 @@ static inline void uart_port_set_cons(st
up->cons = con;
__uart_port_unlock_irqrestore(up, flags);
}
+
+/* Only for internal port lock wrapper usage. */
+static inline bool __uart_port_using_nbcon(struct uart_port *up)
+{
+ lockdep_assert_held_once(&up->lock);
+
+ if (likely(!uart_console(up)))
+ return false;
+
+ /*
+ * @up->cons is only modified under the port lock. Therefore it is
+ * certain that it cannot disappear here.
+ *
+ * @up->cons->node is added/removed from the console list under the
+ * port lock. Therefore it is certain that the registration status
+ * cannot change here, thus @up->cons->flags can be read directly.
+ */
+ if (hlist_unhashed_lockless(&up->cons->node) ||
+ !(up->cons->flags & CON_NBCON) ||
+ !up->cons->write_atomic) {
+ return false;
+ }
+
+ return true;
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up)
+{
+ if (!__uart_port_using_nbcon(up))
+ return true;
+
+ return nbcon_device_try_acquire(up->cons);
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline void __uart_port_nbcon_acquire(struct uart_port *up)
+{
+ if (!__uart_port_using_nbcon(up))
+ return;
+
+ while (!nbcon_device_try_acquire(up->cons))
+ cpu_relax();
+}
+
+/* Only for internal port lock wrapper usage. */
+static inline void __uart_port_nbcon_release(struct uart_port *up)
+{
+ if (!__uart_port_using_nbcon(up))
+ return;
+
+ nbcon_device_release(up->cons);
+}
+
/**
* uart_port_lock - Lock the UART port
* @up: Pointer to UART port structure
@@ -632,6 +688,7 @@ static inline void uart_port_set_cons(st
static inline void uart_port_lock(struct uart_port *up)
{
spin_lock(&up->lock);
+ __uart_port_nbcon_acquire(up);
}
/**
@@ -641,6 +698,7 @@ static inline void uart_port_lock(struct
static inline void uart_port_lock_irq(struct uart_port *up)
{
spin_lock_irq(&up->lock);
+ __uart_port_nbcon_acquire(up);
}
/**
@@ -651,6 +709,7 @@ static inline void uart_port_lock_irq(st
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
{
spin_lock_irqsave(&up->lock, *flags);
+ __uart_port_nbcon_acquire(up);
}
/**
@@ -661,7 +720,15 @@ static inline void uart_port_lock_irqsav
*/
static inline bool uart_port_trylock(struct uart_port *up)
{
- return spin_trylock(&up->lock);
+ if (!spin_trylock(&up->lock))
+ return false;
+
+ if (!__uart_port_nbcon_try_acquire(up)) {
+ spin_unlock(&up->lock);
+ return false;
+ }
+
+ return true;
}
/**
@@ -673,7 +740,15 @@ static inline bool uart_port_trylock(str
*/
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
{
- return spin_trylock_irqsave(&up->lock, *flags);
+ if (!spin_trylock_irqsave(&up->lock, *flags))
+ return false;
+
+ if (!__uart_port_nbcon_try_acquire(up)) {
+ spin_unlock_irqrestore(&up->lock, *flags);
+ return false;
+ }
+
+ return true;
}
/**
@@ -682,6 +757,7 @@ static inline bool uart_port_trylock_irq
*/
static inline void uart_port_unlock(struct uart_port *up)
{
+ __uart_port_nbcon_release(up);
spin_unlock(&up->lock);
}
@@ -691,6 +767,7 @@ static inline void uart_port_unlock(stru
*/
static inline void uart_port_unlock_irq(struct uart_port *up)
{
+ __uart_port_nbcon_release(up);
spin_unlock_irq(&up->lock);
}
@@ -701,6 +778,7 @@ static inline void uart_port_unlock_irq(
*/
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
{
+ __uart_port_nbcon_release(up);
spin_unlock_irqrestore(&up->lock, flags);
}

View File

@ -1,90 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:41 +0206
Subject: [PATCH 15/54] printk: nbcon: Do not rely on proxy headers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The headers kernel.h, serial_core.h, and console.h allow for the
definitions of many types and functions from other headers.
Rather than relying on these as proxy headers, explicitly
include all headers providing needed definitions. Also sort the
list alphabetically to be able to easily detect duplicates.
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-16-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 8 ++++++--
kernel/printk/nbcon.c | 13 ++++++++++++-
kernel/printk/printk_ringbuffer.h | 2 ++
3 files changed, 20 insertions(+), 3 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -2,11 +2,12 @@
/*
* internal.h - printk internal definitions
*/
-#include <linux/percpu.h>
#include <linux/console.h>
-#include "printk_ringbuffer.h"
+#include <linux/percpu.h>
+#include <linux/types.h>
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
+struct ctl_table;
void __init printk_sysctl_init(void);
int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@@ -43,6 +44,9 @@ enum printk_info_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
+struct printk_ringbuffer;
+struct dev_printk_info;
+
extern struct printk_ringbuffer *prb;
__printf(4, 0)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -2,13 +2,24 @@
// Copyright (C) 2022 Linutronix GmbH, John Ogness
// Copyright (C) 2022 Intel, Thomas Gleixner
-#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/bug.h>
#include <linux/console.h>
#include <linux/delay.h>
+#include <linux/errno.h>
#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/minmax.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/types.h>
#include "internal.h"
+#include "printk_ringbuffer.h"
/*
* Printk console printing implementation for consoles which does not depend
* on the legacy style console_lock mechanism.
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -5,6 +5,8 @@
#include <linux/atomic.h>
#include <linux/dev_printk.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
/*
* Meta information about each stored message.

View File

@ -1,105 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:42 +0206
Subject: [PATCH 16/54] printk: Make console_is_usable() available to nbcon.c
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Move console_is_usable() as-is into internal.h so that it can
be used by nbcon printing functions as well.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-17-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 32 ++++++++++++++++++++++++++++++++
kernel/printk/printk.c | 30 ------------------------------
2 files changed, 32 insertions(+), 30 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -84,6 +84,36 @@ void nbcon_seq_force(struct console *con
bool nbcon_alloc(struct console *con);
void nbcon_free(struct console *con);
+/*
+ * Check if the given console is currently capable and allowed to print
+ * records.
+ *
+ * Requires the console_srcu_read_lock.
+ */
+static inline bool console_is_usable(struct console *con)
+{
+ short flags = console_srcu_read_flags(con);
+
+ if (!(flags & CON_ENABLED))
+ return false;
+
+ if ((flags & CON_SUSPENDED))
+ return false;
+
+ if (!con->write)
+ return false;
+
+ /*
+ * Console drivers may assume that per-cpu resources have been
+ * allocated. So unless they're explicitly marked as being able to
+ * cope (CON_ANYTIME) don't call them until this CPU is officially up.
+ */
+ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
+ return false;
+
+ return true;
+}
+
#else
#define PRINTK_PREFIX_MAX 0
@@ -104,6 +134,8 @@ static inline void nbcon_seq_force(struc
static inline bool nbcon_alloc(struct console *con) { return false; }
static inline void nbcon_free(struct console *con) { }
+static inline bool console_is_usable(struct console *con) { return false; }
+
#endif /* CONFIG_PRINTK */
extern struct printk_buffers printk_shared_pbufs;
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2768,36 +2768,6 @@ int is_console_locked(void)
}
EXPORT_SYMBOL(is_console_locked);
-/*
- * Check if the given console is currently capable and allowed to print
- * records.
- *
- * Requires the console_srcu_read_lock.
- */
-static inline bool console_is_usable(struct console *con)
-{
- short flags = console_srcu_read_flags(con);
-
- if (!(flags & CON_ENABLED))
- return false;
-
- if ((flags & CON_SUSPENDED))
- return false;
-
- if (!con->write)
- return false;
-
- /*
- * Console drivers may assume that per-cpu resources have been
- * allocated. So unless they're explicitly marked as being able to
- * cope (CON_ANYTIME) don't call them until this CPU is officially up.
- */
- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
- return false;
-
- return true;
-}
-
static void __console_unlock(void)
{
console_locked = 0;

View File

@ -1,45 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:43 +0206
Subject: [PATCH 17/54] printk: Let console_is_usable() handle nbcon
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The nbcon consoles use a different printing callback. For nbcon
consoles, check for the write_atomic() callback instead of
write().
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-18-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -86,6 +86,8 @@ void nbcon_free(struct console *con);
/*
* Check if the given console is currently capable and allowed to print
+ * records. Note that this function does not consider the current context,
+ * which can also play a role in deciding if @con can be used to print
* records.
*
* Requires the console_srcu_read_lock.
@@ -100,8 +102,13 @@ static inline bool console_is_usable(str
if ((flags & CON_SUSPENDED))
return false;
- if (!con->write)
- return false;
+ if (flags & CON_NBCON) {
+ if (!con->write_atomic)
+ return false;
+ } else {
+ if (!con->write)
+ return false;
+ }
/*
* Console drivers may assume that per-cpu resources have been

View File

@ -1,69 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:44 +0206
Subject: [PATCH 18/54] printk: Add @flags argument for console_is_usable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The caller of console_is_usable() usually needs @console->flags
for its own checks. Rather than having console_is_usable() read
its own copy, make the caller pass in the @flags. This also
ensures that the caller saw the same @flags value.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-19-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 8 ++------
kernel/printk/printk.c | 5 +++--
2 files changed, 5 insertions(+), 8 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -89,13 +89,9 @@ void nbcon_free(struct console *con);
* records. Note that this function does not consider the current context,
* which can also play a role in deciding if @con can be used to print
* records.
- *
- * Requires the console_srcu_read_lock.
*/
-static inline bool console_is_usable(struct console *con)
+static inline bool console_is_usable(struct console *con, short flags)
{
- short flags = console_srcu_read_flags(con);
-
if (!(flags & CON_ENABLED))
return false;
@@ -141,7 +137,7 @@ static inline void nbcon_seq_force(struc
static inline bool nbcon_alloc(struct console *con) { return false; }
static inline void nbcon_free(struct console *con) { }
-static inline bool console_is_usable(struct console *con) { return false; }
+static inline bool console_is_usable(struct console *con, short flags) { return false; }
#endif /* CONFIG_PRINTK */
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3013,9 +3013,10 @@ static bool console_flush_all(bool do_co
cookie = console_srcu_read_lock();
for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
bool progress;
- if (!console_is_usable(con))
+ if (!console_is_usable(con, flags))
continue;
any_usable = true;
@@ -3926,7 +3927,7 @@ static bool __pr_flush(struct console *c
* that they make forward progress, so only increment
* @diff for usable consoles.
*/
- if (!console_is_usable(c))
+ if (!console_is_usable(c, flags))
continue;
if (flags & CON_NBCON) {

View File

@ -1,72 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:45 +0206
Subject: [PATCH 19/54] printk: nbcon: Add helper to assign priority based on
CPU state
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Add a helper function to use the current state of the CPU to
determine which priority to assign to the printing context.
The EMERGENCY priority handling is added in a follow-up commit.
It will use a per-CPU variable.
Note: nbcon_device_try_acquire(), which is used by console
drivers to acquire the nbcon console for non-printing
activities, is hard-coded to always use NORMAL priority.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-20-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 2 ++
kernel/printk/nbcon.c | 19 +++++++++++++++++++
2 files changed, 21 insertions(+)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -83,6 +83,7 @@ u64 nbcon_seq_read(struct console *con);
void nbcon_seq_force(struct console *con, u64 seq);
bool nbcon_alloc(struct console *con);
void nbcon_free(struct console *con);
+enum nbcon_prio nbcon_get_default_prio(void);
/*
* Check if the given console is currently capable and allowed to print
@@ -136,6 +137,7 @@ static inline u64 nbcon_seq_read(struct
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
static inline bool nbcon_alloc(struct console *con) { return false; }
static inline void nbcon_free(struct console *con) { }
+static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
static inline bool console_is_usable(struct console *con, short flags) { return false; }
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -974,6 +974,25 @@ static bool nbcon_emit_next_record(struc
}
/**
+ * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
+ * printing on the current CPU
+ *
+ * Context: Any context.
+ * Return: The nbcon_prio to use for acquiring an nbcon console in this
+ * context for printing.
+ *
+ * The function is safe for reading per-CPU data in any context because
+ * preemption is disabled if the current CPU is in the panic state.
+ */
+enum nbcon_prio nbcon_get_default_prio(void)
+{
+ if (this_cpu_in_panic())
+ return NBCON_PRIO_PANIC;
+
+ return NBCON_PRIO_NORMAL;
+}
+
+/**
* nbcon_alloc - Allocate and init the nbcon console specific data
* @con: Console to initialize
*

View File

@ -1,225 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:46 +0206
Subject: [PATCH 20/54] printk: nbcon: Provide function to flush using
write_atomic()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Provide nbcon_atomic_flush_pending() to perform flushing of all
registered nbcon consoles using their write_atomic() callback.
Unlike console_flush_all(), nbcon_atomic_flush_pending() will
only flush up through the newest record at the time of the
call. This prevents a CPU from printing unbounded when other
CPUs are adding records. If new records are added while
flushing, it is expected that the dedicated printer threads
will print those records. If the printer thread is not
available (which is always the case at this point in the
rework), nbcon_atomic_flush_pending() _will_ flush all records
in the ringbuffer.
Unlike console_flush_all(), nbcon_atomic_flush_pending() will
fully flush one console before flushing the next. This helps to
guarantee that a block of pending records (such as a stack
trace in an emergency situation) can be printed atomically at
once before releasing console ownership.
nbcon_atomic_flush_pending() is safe in any context because it
uses write_atomic() and acquires with unsafe_takeover disabled.
Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-21-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 2
kernel/printk/nbcon.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 152 insertions(+), 1 deletion(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -84,6 +84,7 @@ void nbcon_seq_force(struct console *con
bool nbcon_alloc(struct console *con);
void nbcon_free(struct console *con);
enum nbcon_prio nbcon_get_default_prio(void);
+void nbcon_atomic_flush_pending(void);
/*
* Check if the given console is currently capable and allowed to print
@@ -138,6 +139,7 @@ static inline void nbcon_seq_force(struc
static inline bool nbcon_alloc(struct console *con) { return false; }
static inline void nbcon_free(struct console *con) { }
static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
+static inline void nbcon_atomic_flush_pending(void) { }
static inline bool console_is_usable(struct console *con, short flags) { return false; }
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -886,7 +886,6 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
* When true is returned, @wctxt->ctxt.backlog indicates whether there are
* still records pending in the ringbuffer,
*/
-__maybe_unused
static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
@@ -992,6 +991,156 @@ enum nbcon_prio nbcon_get_default_prio(v
return NBCON_PRIO_NORMAL;
}
+/*
+ * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
+ * write_atomic() callback
+ * @con: The nbcon console to flush
+ * @stop_seq: Flush up until this record
+ *
+ * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on
+ * failure.
+ *
+ * Errors:
+ *
+ * -EPERM: Unable to acquire console ownership.
+ *
+ * -EAGAIN: Another context took over ownership while printing.
+ *
+ * -ENOENT: A record before @stop_seq is not available.
+ *
+ * If flushing up to @stop_seq was not successful, it only makes sense for the
+ * caller to try again when -EAGAIN was returned. When -EPERM is returned,
+ * this context is not allowed to acquire the console. When -ENOENT is
+ * returned, it cannot be expected that the unfinalized record will become
+ * available.
+ */
+static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
+{
+ struct nbcon_write_context wctxt = { };
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
+ int err = 0;
+
+ ctxt->console = con;
+ ctxt->spinwait_max_us = 2000;
+ ctxt->prio = nbcon_get_default_prio();
+
+ if (!nbcon_context_try_acquire(ctxt))
+ return -EPERM;
+
+ while (nbcon_seq_read(con) < stop_seq) {
+ /*
+ * nbcon_emit_next_record() returns false when the console was
+ * handed over or taken over. In both cases the context is no
+ * longer valid.
+ */
+ if (!nbcon_emit_next_record(&wctxt))
+ return -EAGAIN;
+
+ if (!ctxt->backlog) {
+ /* Are there reserved but not yet finalized records? */
+ if (nbcon_seq_read(con) < stop_seq)
+ err = -ENOENT;
+ break;
+ }
+ }
+
+ nbcon_context_release(ctxt);
+ return err;
+}
+
+/**
+ * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
+ * write_atomic() callback
+ * @con: The nbcon console to flush
+ * @stop_seq: Flush up until this record
+ *
+ * This will stop flushing before @stop_seq if another context has ownership.
+ * That context is then responsible for the flushing. Likewise, if new records
+ * are added while this context was flushing and there is no other context
+ * to handle the printing, this context must also flush those records.
+ */
+static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
+{
+ unsigned long flags;
+ int err;
+
+again:
+ /*
+ * Atomic flushing does not use console driver synchronization (i.e.
+ * it does not hold the port lock for uart consoles). Therefore IRQs
+ * must be disabled to avoid being interrupted and then calling into
+ * a driver that will deadlock trying to acquire console ownership.
+ */
+ local_irq_save(flags);
+
+ err = __nbcon_atomic_flush_pending_con(con, stop_seq);
+
+ local_irq_restore(flags);
+
+ /*
+ * If there was a new owner (-EPERM, -EAGAIN), that context is
+ * responsible for completing.
+ *
+ * Do not wait for records not yet finalized (-ENOENT) to avoid a
+ * possible deadlock. They will either get flushed by the writer or
+ * eventually skipped on panic CPU.
+ */
+ if (err)
+ return;
+
+ /*
+ * If flushing was successful but more records are available, this
+ * context must flush those remaining records because there is no
+ * other context that will do it.
+ */
+ if (prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
+ stop_seq = prb_next_reserve_seq(prb);
+ goto again;
+ }
+}
+
+/**
+ * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
+ * write_atomic() callback
+ * @stop_seq: Flush up until this record
+ */
+static void __nbcon_atomic_flush_pending(u64 stop_seq)
+{
+ struct console *con;
+ int cookie;
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
+
+ if (!(flags & CON_NBCON))
+ continue;
+
+ if (!console_is_usable(con, flags))
+ continue;
+
+ if (nbcon_seq_read(con) >= stop_seq)
+ continue;
+
+ nbcon_atomic_flush_pending_con(con, stop_seq);
+ }
+ console_srcu_read_unlock(cookie);
+}
+
+/**
+ * nbcon_atomic_flush_pending - Flush all nbcon consoles using their
+ * write_atomic() callback
+ *
+ * Flush the backlog up through the currently newest record. Any new
+ * records added while flushing will not be flushed if there is another
+ * context available to handle the flushing. This is to avoid one CPU
+ * printing unbounded because other CPUs continue to add records.
+ */
+void nbcon_atomic_flush_pending(void)
+{
+ __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb));
+}
+
/**
* nbcon_alloc - Allocate and init the nbcon console specific data
* @con: Console to initialize

View File

@ -1,82 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:47 +0206
Subject: [PATCH 21/54] printk: Track registered boot consoles
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Unfortunately it is not known if a boot console and a regular
(legacy or nbcon) console use the same hardware. For this reason
they must not be allowed to print simultaneously.
For legacy consoles this is not an issue because they are
already synchronized with the boot consoles using the console
lock. However nbcon consoles can be triggered separately.
Add a global flag @have_boot_console to identify if any boot
consoles are registered. This will be used in follow-up commits
to ensure that boot consoles and nbcon consoles cannot print
simultaneously.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-22-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -463,6 +463,14 @@ static int console_msg_format = MSG_FORM
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);
+/*
+ * Specifies if a boot console is registered. If boot consoles are present,
+ * nbcon consoles cannot print simultaneously and must be synchronized by
+ * the console lock. This is because boot consoles and nbcon consoles may
+ * have mapped the same hardware.
+ */
+static bool have_boot_console;
+
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
@@ -3611,6 +3619,9 @@ void register_console(struct console *ne
newcon->seq = init_seq;
}
+ if (newcon->flags & CON_BOOT)
+ have_boot_console = true;
+
/*
* If another context is actively using the hardware of this new
* console, it will not be aware of the nbcon synchronization. This
@@ -3681,7 +3692,9 @@ EXPORT_SYMBOL(register_console);
static int unregister_console_locked(struct console *console)
{
bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic;
+ bool found_boot_con = false;
unsigned long flags;
+ struct console *c;
int res;
lockdep_assert_console_list_lock_held();
@@ -3739,6 +3752,17 @@ static int unregister_console_locked(str
if (console->exit)
res = console->exit(console);
+ /*
+ * With this console gone, the global flags tracking registered
+ * console types may have changed. Update them.
+ */
+ for_each_console(c) {
+ if (c->flags & CON_BOOT)
+ found_boot_con = true;
+ }
+ if (!found_boot_con)
+ have_boot_console = found_boot_con;
+
return res;
}

View File

@ -1,212 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:48 +0206
Subject: [PATCH 22/54] printk: nbcon: Use nbcon consoles in
console_flush_all()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Allow nbcon consoles to print messages in the legacy printk()
caller context (printing via unlock) by integrating them into
console_flush_all(). The write_atomic() callback is used for
printing.
Provide nbcon_legacy_emit_next_record(), which acts as the
nbcon variant of console_emit_next_record(). Call this variant
within console_flush_all() for nbcon consoles. Since nbcon
consoles use their own @nbcon_seq variable to track the next
record to print, this also must be appropriately handled in
console_flush_all().
Note that the legacy printing logic uses @handover to detect
handovers for printing all consoles. For nbcon consoles,
handovers/takeovers occur on a per-console basis and thus do
not cause the console_flush_all() loop to abort.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-23-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 6 +++
kernel/printk/nbcon.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++
kernel/printk/printk.c | 17 ++++++---
3 files changed, 105 insertions(+), 5 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -78,6 +78,8 @@ void defer_console_output(void);
u16 printk_parse_prefix(const char *text, int *level,
enum printk_info_flags *flags);
+void console_lock_spinning_enable(void);
+int console_lock_spinning_disable_and_check(int cookie);
u64 nbcon_seq_read(struct console *con);
void nbcon_seq_force(struct console *con, u64 seq);
@@ -85,6 +87,8 @@ bool nbcon_alloc(struct console *con);
void nbcon_free(struct console *con);
enum nbcon_prio nbcon_get_default_prio(void);
void nbcon_atomic_flush_pending(void);
+bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
+ int cookie);
/*
* Check if the given console is currently capable and allowed to print
@@ -140,6 +144,8 @@ static inline bool nbcon_alloc(struct co
static inline void nbcon_free(struct console *con) { }
static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
static inline void nbcon_atomic_flush_pending(void) { }
+static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
+ int cookie) { return false; }
static inline bool console_is_usable(struct console *con, short flags) { return false; }
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -992,6 +992,93 @@ enum nbcon_prio nbcon_get_default_prio(v
}
/*
+ * nbcon_atomic_emit_one - Print one record for an nbcon console using the
+ * write_atomic() callback
+ * @wctxt: An initialized write context struct to use for this context
+ *
+ * Return: True, when a record has been printed and there are still
+ * pending records. The caller might want to continue flushing.
+ *
+ * False, when there is no pending record, or when the console
+ * context cannot be acquired, or the ownership has been lost.
+ * The caller should give up. Either the job is done, cannot be
+ * done, or will be handled by the owning context.
+ *
+ * This is an internal helper to handle the locking of the console before
+ * calling nbcon_emit_next_record().
+ */
+static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ if (!nbcon_context_try_acquire(ctxt))
+ return false;
+
+ /*
+ * nbcon_emit_next_record() returns false when the console was
+ * handed over or taken over. In both cases the context is no
+ * longer valid.
+ *
+ * The higher priority printing context takes over responsibility
+ * to print the pending records.
+ */
+ if (!nbcon_emit_next_record(wctxt))
+ return false;
+
+ nbcon_context_release(ctxt);
+
+ return ctxt->backlog;
+}
+
+/**
+ * nbcon_legacy_emit_next_record - Print one record for an nbcon console
+ * in legacy contexts
+ * @con: The console to print on
+ * @handover: Will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding
+ * both the console_lock and the SRCU read lock. Otherwise it
+ * is set to false.
+ * @cookie: The cookie from the SRCU read lock.
+ *
+ * Context: Any context except NMI.
+ * Return: True, when a record has been printed and there are still
+ * pending records. The caller might want to continue flushing.
+ *
+ * False, when there is no pending record, or when the console
+ * context cannot be acquired, or the ownership has been lost.
+ * The caller should give up. Either the job is done, cannot be
+ * done, or will be handled by the owning context.
+ *
+ * This function is meant to be called by console_flush_all() to print records
+ * on nbcon consoles from legacy context (printing via console unlocking).
+ * Essentially it is the nbcon version of console_emit_next_record().
+ */
+bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
+ int cookie)
+{
+ struct nbcon_write_context wctxt = { };
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
+ unsigned long flags;
+ bool progress;
+
+ /* Use the same procedure as console_emit_next_record(). */
+ printk_safe_enter_irqsave(flags);
+ console_lock_spinning_enable();
+ stop_critical_timings();
+
+ ctxt->console = con;
+ ctxt->prio = nbcon_get_default_prio();
+
+ progress = nbcon_atomic_emit_one(&wctxt);
+
+ start_critical_timings();
+ *handover = console_lock_spinning_disable_and_check(cookie);
+ printk_safe_exit_irqrestore(flags);
+
+ return progress;
+}
+
+/**
* __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
* write_atomic() callback
* @con: The nbcon console to flush
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1860,7 +1860,7 @@ static bool console_waiter;
* there may be a waiter spinning (like a spinlock). Also it must be
* ready to hand over the lock at the end of the section.
*/
-static void console_lock_spinning_enable(void)
+void console_lock_spinning_enable(void)
{
/*
* Do not use spinning in panic(). The panic CPU wants to keep the lock.
@@ -1899,7 +1899,7 @@ static void console_lock_spinning_enable
*
* Return: 1 if the lock rights were passed, 0 otherwise.
*/
-static int console_lock_spinning_disable_and_check(int cookie)
+int console_lock_spinning_disable_and_check(int cookie)
{
int waiter;
@@ -3022,13 +3022,20 @@ static bool console_flush_all(bool do_co
cookie = console_srcu_read_lock();
for_each_console_srcu(con) {
short flags = console_srcu_read_flags(con);
+ u64 printk_seq;
bool progress;
if (!console_is_usable(con, flags))
continue;
any_usable = true;
- progress = console_emit_next_record(con, handover, cookie);
+ if (flags & CON_NBCON) {
+ progress = nbcon_legacy_emit_next_record(con, handover, cookie);
+ printk_seq = nbcon_seq_read(con);
+ } else {
+ progress = console_emit_next_record(con, handover, cookie);
+ printk_seq = con->seq;
+ }
/*
* If a handover has occurred, the SRCU read lock
@@ -3038,8 +3045,8 @@ static bool console_flush_all(bool do_co
return false;
/* Track the next of the highest seq flushed. */
- if (con->seq > *next_seq)
- *next_seq = con->seq;
+ if (printk_seq > *next_seq)
+ *next_seq = printk_seq;
if (!progress)
continue;

View File

@ -1,68 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:49 +0206
Subject: [PATCH 23/54] printk: Add is_printk_legacy_deferred()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
If printk has been explicitly deferred or is called from NMI
context, legacy console printing must be deferred to an irq_work
context. Introduce a helper function is_printk_legacy_deferred()
for a CPU to query if it must defer legacy console printing.
In follow-up commits this helper will be needed at other call
sites as well.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-24-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 2 ++
kernel/printk/printk_safe.c | 11 ++++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -75,6 +75,7 @@ bool printk_percpu_data_ready(void);
} while (0)
void defer_console_output(void);
+bool is_printk_legacy_deferred(void);
u16 printk_parse_prefix(const char *text, int *level,
enum printk_info_flags *flags);
@@ -138,6 +139,7 @@ static inline bool console_is_usable(str
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
static inline bool printk_percpu_data_ready(void) { return false; }
+static inline bool is_printk_legacy_deferred(void) { return false; }
static inline u64 nbcon_seq_read(struct console *con) { return 0; }
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
static inline bool nbcon_alloc(struct console *con) { return false; }
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -38,6 +38,15 @@ void __printk_deferred_exit(void)
__printk_safe_exit();
}
+bool is_printk_legacy_deferred(void)
+{
+ /*
+ * The per-CPU variable @printk_context can be read safely in any
+ * context. CPU migration is always disabled when set.
+ */
+ return (this_cpu_read(printk_context) || in_nmi());
+}
+
asmlinkage int vprintk(const char *fmt, va_list args)
{
#ifdef CONFIG_KGDB_KDB
@@ -50,7 +59,7 @@ asmlinkage int vprintk(const char *fmt,
* Use the main logbuf even in NMI. But avoid calling console
* drivers that might have their own locks.
*/
- if (this_cpu_read(printk_context) || in_nmi())
+ if (is_printk_legacy_deferred())
return vprintk_deferred(fmt, args);
/* No obstacles. */

View File

@ -1,79 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:50 +0206
Subject: [PATCH 24/54] printk: nbcon: Flush new records on device_release()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
There may be new records that were added while a driver was
holding the nbcon context for non-printing purposes. These
new records must be flushed by the nbcon_device_release()
context because no other context will do it.
If boot consoles are registered, the legacy loop is used
(either direct or per irq_work) to handle the flushing.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-25-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 2 ++
kernel/printk/nbcon.c | 20 ++++++++++++++++++++
kernel/printk/printk.c | 2 +-
3 files changed, 23 insertions(+), 1 deletion(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -153,6 +153,8 @@ static inline bool console_is_usable(str
#endif /* CONFIG_PRINTK */
+extern bool have_boot_console;
+
extern struct printk_buffers printk_shared_pbufs;
/**
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1326,10 +1326,30 @@ EXPORT_SYMBOL_GPL(nbcon_device_try_acqui
void nbcon_device_release(struct console *con)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
+ int cookie;
if (!nbcon_context_exit_unsafe(ctxt))
return;
nbcon_context_release(ctxt);
+
+ /*
+ * This context must flush any new records added while the console
+ * was locked. The console_srcu_read_lock must be taken to ensure
+ * the console is usable throughout flushing.
+ */
+ cookie = console_srcu_read_lock();
+ if (console_is_usable(con, console_srcu_read_flags(con)) &&
+ prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
+ if (!have_boot_console) {
+ __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb));
+ } else if (!is_printk_legacy_deferred()) {
+ if (console_trylock())
+ console_unlock();
+ } else {
+ printk_trigger_flush();
+ }
+ }
+ console_srcu_read_unlock(cookie);
}
EXPORT_SYMBOL_GPL(nbcon_device_release);
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -469,7 +469,7 @@ static DEFINE_MUTEX(syslog_lock);
* the console lock. This is because boot consoles and nbcon consoles may
* have mapped the same hardware.
*/
-static bool have_boot_console;
+bool have_boot_console;
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);

View File

@ -1,30 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:51 +0206
Subject: [PATCH 25/54] printk: Flush nbcon consoles first on panic
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
In console_flush_on_panic(), flush the nbcon consoles before
flushing legacy consoles. The legacy write() callbacks are not
fully safe when oops_in_progress is set.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-26-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 3 +++
1 file changed, 3 insertions(+)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3270,6 +3270,9 @@ void console_flush_on_panic(enum con_flu
if (mode == CONSOLE_REPLAY_ALL)
__console_rewind_all();
+ if (!have_boot_console)
+ nbcon_atomic_flush_pending();
+
console_flush_all(false, &next_seq, &handover);
}

View File

@ -1,161 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:52 +0206
Subject: [PATCH 26/54] printk: nbcon: Add unsafe flushing on panic
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles
using the write_atomic() callback and allowing unsafe hostile
takeovers. Call this at the end of panic() as a final attempt
to flush any pending messages.
Note that legacy consoles use unsafe methods for flushing
from the beginning of panic (see bust_spinlocks()). Therefore,
systems using both legacy and nbcon consoles may still fail to
see panic messages due to unsafe legacy console usage.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-27-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/printk.h | 5 +++++
kernel/panic.c | 1 +
kernel/printk/nbcon.c | 32 +++++++++++++++++++++++++-------
3 files changed, 31 insertions(+), 7 deletions(-)
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -202,6 +202,7 @@ void printk_trigger_flush(void);
void console_try_replay_all(void);
extern bool nbcon_device_try_acquire(struct console *con);
extern void nbcon_device_release(struct console *con);
+void nbcon_atomic_flush_unsafe(void);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
@@ -294,6 +295,10 @@ static inline void nbcon_device_release(
{
}
+static inline void nbcon_atomic_flush_unsafe(void)
+{
+}
+
#endif
bool this_cpu_in_panic(void);
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -463,6 +463,7 @@ void panic(const char *fmt, ...)
* Explicitly flush the kernel log buffer one last time.
*/
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+ nbcon_atomic_flush_unsafe();
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1083,6 +1083,7 @@ bool nbcon_legacy_emit_next_record(struc
* write_atomic() callback
* @con: The nbcon console to flush
* @stop_seq: Flush up until this record
+ * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*
* Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on
* failure.
@@ -1101,7 +1102,8 @@ bool nbcon_legacy_emit_next_record(struc
* returned, it cannot be expected that the unfinalized record will become
* available.
*/
-static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
+static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
+ bool allow_unsafe_takeover)
{
struct nbcon_write_context wctxt = { };
struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
@@ -1110,6 +1112,7 @@ static int __nbcon_atomic_flush_pending_
ctxt->console = con;
ctxt->spinwait_max_us = 2000;
ctxt->prio = nbcon_get_default_prio();
+ ctxt->allow_unsafe_takeover = allow_unsafe_takeover;
if (!nbcon_context_try_acquire(ctxt))
return -EPERM;
@@ -1140,13 +1143,15 @@ static int __nbcon_atomic_flush_pending_
* write_atomic() callback
* @con: The nbcon console to flush
* @stop_seq: Flush up until this record
+ * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*
* This will stop flushing before @stop_seq if another context has ownership.
* That context is then responsible for the flushing. Likewise, if new records
* are added while this context was flushing and there is no other context
* to handle the printing, this context must also flush those records.
*/
-static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
+static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
+ bool allow_unsafe_takeover)
{
unsigned long flags;
int err;
@@ -1160,7 +1165,7 @@ static void nbcon_atomic_flush_pending_c
*/
local_irq_save(flags);
- err = __nbcon_atomic_flush_pending_con(con, stop_seq);
+ err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
local_irq_restore(flags);
@@ -1190,8 +1195,9 @@ static void nbcon_atomic_flush_pending_c
* __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
* write_atomic() callback
* @stop_seq: Flush up until this record
+ * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*/
-static void __nbcon_atomic_flush_pending(u64 stop_seq)
+static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover)
{
struct console *con;
int cookie;
@@ -1209,7 +1215,7 @@ static void __nbcon_atomic_flush_pending
if (nbcon_seq_read(con) >= stop_seq)
continue;
- nbcon_atomic_flush_pending_con(con, stop_seq);
+ nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
}
console_srcu_read_unlock(cookie);
}
@@ -1225,7 +1231,19 @@ static void __nbcon_atomic_flush_pending
*/
void nbcon_atomic_flush_pending(void)
{
- __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb));
+ __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false);
+}
+
+/**
+ * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their
+ * write_atomic() callback and allowing unsafe hostile takeovers
+ *
+ * Flush the backlog up through the currently newest record. Unsafe hostile
+ * takeovers will be performed, if necessary.
+ */
+void nbcon_atomic_flush_unsafe(void)
+{
+ __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
}
/**
@@ -1342,7 +1360,7 @@ void nbcon_device_release(struct console
if (console_is_usable(con, console_srcu_read_flags(con)) &&
prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
if (!have_boot_console) {
- __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb));
+ __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
} else if (!is_printk_legacy_deferred()) {
if (console_trylock())
console_unlock();

View File

@ -1,139 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:53 +0206
Subject: [PATCH 27/54] printk: Avoid console_lock dance if no legacy or boot
consoles
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Currently the console lock is used to attempt legacy-type
printing even if there are no legacy or boot consoles registered.
If no such consoles are registered, the console lock does not
need to be taken.
Add tracking of legacy console registration and use it with
boot console tracking to avoid unnecessary code paths, i.e.
do not use the console lock if there are no boot consoles
and no legacy consoles.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-28-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 39 ++++++++++++++++++++++++++++++++++-----
1 file changed, 34 insertions(+), 5 deletions(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -464,6 +464,13 @@ static int console_msg_format = MSG_FORM
static DEFINE_MUTEX(syslog_lock);
/*
+ * Specifies if a legacy console is registered. If legacy consoles are
+ * present, it is necessary to perform the console lock/unlock dance
+ * whenever console flushing should occur.
+ */
+static bool have_legacy_console;
+
+/*
* Specifies if a boot console is registered. If boot consoles are present,
* nbcon consoles cannot print simultaneously and must be synchronized by
* the console lock. This is because boot consoles and nbcon consoles may
@@ -471,6 +478,14 @@ static DEFINE_MUTEX(syslog_lock);
*/
bool have_boot_console;
+/*
+ * Specifies if the console lock/unlock dance is needed for console
+ * printing. If @have_boot_console is true, the nbcon consoles will
+ * be printed serially along with the legacy consoles because nbcon
+ * consoles cannot print simultaneously with boot consoles.
+ */
+#define printing_via_unlock (have_legacy_console || have_boot_console)
+
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
@@ -2339,7 +2354,7 @@ asmlinkage int vprintk_emit(int facility
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
/* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (!in_sched && printing_via_unlock) {
/*
* The caller may be holding system-critical or
* timing-sensitive locks. Disable preemption during
@@ -2359,7 +2374,7 @@ asmlinkage int vprintk_emit(int facility
preempt_enable();
}
- if (in_sched)
+ if (in_sched && printing_via_unlock)
defer_console_output();
else
wake_up_klogd();
@@ -2719,7 +2734,7 @@ void resume_console(void)
*/
static int console_cpu_notify(unsigned int cpu)
{
- if (!cpuhp_tasks_frozen) {
+ if (!cpuhp_tasks_frozen && printing_via_unlock) {
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
@@ -3626,6 +3641,7 @@ void register_console(struct console *ne
if (newcon->flags & CON_NBCON) {
nbcon_seq_force(newcon, init_seq);
} else {
+ have_legacy_console = true;
newcon->seq = init_seq;
}
@@ -3702,6 +3718,7 @@ EXPORT_SYMBOL(register_console);
static int unregister_console_locked(struct console *console)
{
bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic;
+ bool found_legacy_con = false;
bool found_boot_con = false;
unsigned long flags;
struct console *c;
@@ -3769,9 +3786,13 @@ static int unregister_console_locked(str
for_each_console(c) {
if (c->flags & CON_BOOT)
found_boot_con = true;
+ if (!(c->flags & CON_NBCON))
+ found_legacy_con = true;
}
if (!found_boot_con)
have_boot_console = found_boot_con;
+ if (!found_legacy_con)
+ have_legacy_console = found_legacy_con;
return res;
}
@@ -3932,8 +3953,10 @@ static bool __pr_flush(struct console *c
seq = prb_next_reserve_seq(prb);
/* Flush the consoles so that records up to @seq are printed. */
- console_lock();
- console_unlock();
+ if (printing_via_unlock) {
+ console_lock();
+ console_unlock();
+ }
for (;;) {
unsigned long begin_jiffies;
@@ -3946,6 +3969,12 @@ static bool __pr_flush(struct console *c
* console->seq. Releasing console_lock flushes more
* records in case @seq is still not printed on all
* usable consoles.
+ *
+ * Holding the console_lock is not necessary if there
+ * are no legacy or boot consoles. However, such a
+ * console could register at any time. Always hold the
+ * console_lock as a precaution rather than
+ * synchronizing against register_console().
*/
console_lock();

View File

@ -1,68 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:54 +0206
Subject: [PATCH 28/54] printk: Track nbcon consoles
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Add a global flag @have_nbcon_console to identify if any nbcon
consoles are registered. This will be used in follow-up commits
to preserve legacy behavior when no nbcon consoles are registered.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-29-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -471,6 +471,11 @@ static DEFINE_MUTEX(syslog_lock);
static bool have_legacy_console;
/*
+ * Specifies if an nbcon console is registered.
+ */
+static bool have_nbcon_console;
+
+/*
* Specifies if a boot console is registered. If boot consoles are present,
* nbcon consoles cannot print simultaneously and must be synchronized by
* the console lock. This is because boot consoles and nbcon consoles may
@@ -3639,6 +3644,7 @@ void register_console(struct console *ne
init_seq = get_init_console_seq(newcon, bootcon_registered);
if (newcon->flags & CON_NBCON) {
+ have_nbcon_console = true;
nbcon_seq_force(newcon, init_seq);
} else {
have_legacy_console = true;
@@ -3719,6 +3725,7 @@ static int unregister_console_locked(str
{
bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic;
bool found_legacy_con = false;
+ bool found_nbcon_con = false;
bool found_boot_con = false;
unsigned long flags;
struct console *c;
@@ -3786,13 +3793,18 @@ static int unregister_console_locked(str
for_each_console(c) {
if (c->flags & CON_BOOT)
found_boot_con = true;
- if (!(c->flags & CON_NBCON))
+
+ if (c->flags & CON_NBCON)
+ found_nbcon_con = true;
+ else
found_legacy_con = true;
}
if (!found_boot_con)
have_boot_console = found_boot_con;
if (!found_legacy_con)
have_legacy_console = found_legacy_con;
+ if (!found_nbcon_con)
+ have_nbcon_console = found_nbcon_con;
return res;
}

View File

@ -1,184 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:55 +0206
Subject: [PATCH 29/54] printk: Coordinate direct printing in panic
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
If legacy and nbcon consoles are registered and the nbcon
consoles are allowed to flush (i.e. no boot consoles
registered), the legacy consoles will no longer perform
direct printing on the panic CPU until after the backtrace
has been stored. This will give the safe nbcon consoles a
chance to print the panic messages before allowing the
unsafe legacy consoles to print.
If no nbcon consoles are registered or they are not allowed
to flush because boot consoles are registered, there is no
change in behavior (i.e. legacy consoles will always attempt
to print from the printk() caller context).
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-30-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/printk.h | 5 ++++
kernel/panic.c | 2 +
kernel/printk/internal.h | 1
kernel/printk/printk.c | 55 +++++++++++++++++++++++++++++++++++++++++------
4 files changed, 56 insertions(+), 7 deletions(-)
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -200,6 +200,7 @@ extern asmlinkage void dump_stack_lvl(co
extern asmlinkage void dump_stack(void) __cold;
void printk_trigger_flush(void);
void console_try_replay_all(void);
+void printk_legacy_allow_panic_sync(void);
extern bool nbcon_device_try_acquire(struct console *con);
extern void nbcon_device_release(struct console *con);
void nbcon_atomic_flush_unsafe(void);
@@ -286,6 +287,10 @@ static inline void console_try_replay_al
{
}
+static inline void printk_legacy_allow_panic_sync(void)
+{
+}
+
static inline bool nbcon_device_try_acquire(struct console *con)
{
return false;
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -374,6 +374,8 @@ void panic(const char *fmt, ...)
panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
+ printk_legacy_allow_panic_sync();
+
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -154,6 +154,7 @@ static inline bool console_is_usable(str
#endif /* CONFIG_PRINTK */
extern bool have_boot_console;
+extern bool legacy_allow_panic_sync;
extern struct printk_buffers printk_shared_pbufs;
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -471,7 +471,9 @@ static DEFINE_MUTEX(syslog_lock);
static bool have_legacy_console;
/*
- * Specifies if an nbcon console is registered.
+ * Specifies if an nbcon console is registered. If nbcon consoles are present,
+ * synchronous printing of legacy consoles will not occur during panic until
+ * the backtrace has been stored to the ringbuffer.
*/
static bool have_nbcon_console;
@@ -483,6 +485,9 @@ static bool have_nbcon_console;
*/
bool have_boot_console;
+/* See printk_legacy_allow_panic_sync() for details. */
+bool legacy_allow_panic_sync;
+
/*
* Specifies if the console lock/unlock dance is needed for console
* printing. If @have_boot_console is true, the nbcon consoles will
@@ -2330,12 +2335,28 @@ int vprintk_store(int facility, int leve
return ret;
}
+/*
+ * This acts as a one-way switch to allow legacy consoles to print from
+ * the printk() caller context on a panic CPU. It also attempts to flush
+ * the legacy consoles in this context.
+ */
+void printk_legacy_allow_panic_sync(void)
+{
+ legacy_allow_panic_sync = true;
+
+ if (printing_via_unlock && !is_printk_legacy_deferred()) {
+ if (console_trylock())
+ console_unlock();
+ }
+}
+
asmlinkage int vprintk_emit(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
{
+ bool do_trylock_unlock = printing_via_unlock;
+ bool defer_legacy = false;
int printed_len;
- bool in_sched = false;
/* Suppress unimportant messages after panic happens */
if (unlikely(suppress_printk))
@@ -2349,17 +2370,35 @@ asmlinkage int vprintk_emit(int facility
if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
return 0;
+ /* If called from the scheduler, we can not call up(). */
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
- in_sched = true;
+ defer_legacy = do_trylock_unlock;
+ do_trylock_unlock = false;
}
printk_delay(level);
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
- /* If called from the scheduler, we can not call up(). */
- if (!in_sched && printing_via_unlock) {
+ if (have_nbcon_console && !have_boot_console) {
+ nbcon_atomic_flush_pending();
+
+ /*
+ * In panic, the legacy consoles are not allowed to print from
+ * the printk calling context unless explicitly allowed. This
+ * gives the safe nbcon consoles a chance to print out all the
+ * panic messages first. This restriction only applies if
+ * there are nbcon consoles registered and they are allowed to
+ * flush.
+ */
+ if (this_cpu_in_panic() && !legacy_allow_panic_sync) {
+ do_trylock_unlock = false;
+ defer_legacy = false;
+ }
+ }
+
+ if (do_trylock_unlock) {
/*
* The caller may be holding system-critical or
* timing-sensitive locks. Disable preemption during
@@ -2379,7 +2418,7 @@ asmlinkage int vprintk_emit(int facility
preempt_enable();
}
- if (in_sched && printing_via_unlock)
+ if (defer_legacy)
defer_console_output();
else
wake_up_klogd();
@@ -3293,7 +3332,9 @@ void console_flush_on_panic(enum con_flu
if (!have_boot_console)
nbcon_atomic_flush_pending();
- console_flush_all(false, &next_seq, &handover);
+ /* Flush legacy consoles once allowed, even when dangerous. */
+ if (legacy_allow_panic_sync)
+ console_flush_all(false, &next_seq, &handover);
}
/*

View File

@ -1,348 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:56 +0206
Subject: [PATCH 30/54] printk: Add helper for flush type logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
There are many call sites where console flushing occur.
Depending on the system state and types of consoles, the flush
methods to use are different. A flush call site generally must
consider:
@have_boot_console
@have_nbcon_console
@have_legacy_console
@legacy_allow_panic_sync
is_printk_preferred()
and take into account the current CPU state:
NBCON_PRIO_NORMAL
NBCON_PRIO_EMERGENCY
NBCON_PRIO_PANIC
in order to decide if it should:
flush nbcon directly via atomic_write() callback
flush legacy directly via console_unlock
flush legacy via offload to irq_work
All of these call sites use their own logic to make this
decision, which is complicated and error prone. Especially
later when two more flush methods will be introduced:
flush nbcon via offload to kthread
flush legacy via offload to kthread
Introduce a new internal struct console_flush_type that specifies
which console flushing methods should be used in the context of
the caller.
Introduce a helper function to fill out console_flush_type to
be used for flushing call sites.
Replace the logic of all flushing call sites to use the new
helper.
This change standardizes behavior, leading to both fixes and
optimizations across various call sites. For instance, in
console_cpu_notify(), the new logic ensures that nbcon consoles
are flushed when they arent managed by the legacy loop.
Similarly, in console_flush_on_panic(), the system no longer
needs to flush nbcon consoles if none are present.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-31-john.ogness@linutronix.de
[pmladek@suse.com: Updated the commit message.]
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++
kernel/printk/nbcon.c | 12 +++++--
kernel/printk/printk.c | 68 +++++++++++++++++++------------------------
3 files changed, 112 insertions(+), 41 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -154,8 +154,81 @@ static inline bool console_is_usable(str
#endif /* CONFIG_PRINTK */
extern bool have_boot_console;
+extern bool have_nbcon_console;
+extern bool have_legacy_console;
extern bool legacy_allow_panic_sync;
+/**
+ * struct console_flush_type - Define available console flush methods
+ * @nbcon_atomic: Flush directly using nbcon_atomic() callback
+ * @legacy_direct: Call the legacy loop in this context
+ * @legacy_offload: Offload the legacy loop into IRQ
+ *
+ * Note that the legacy loop also flushes the nbcon consoles.
+ */
+struct console_flush_type {
+ bool nbcon_atomic;
+ bool legacy_direct;
+ bool legacy_offload;
+};
+
+/*
+ * Identify which console flushing methods should be used in the context of
+ * the caller.
+ */
+static inline void printk_get_console_flush_type(struct console_flush_type *ft)
+{
+ memset(ft, 0, sizeof(*ft));
+
+ switch (nbcon_get_default_prio()) {
+ case NBCON_PRIO_NORMAL:
+ if (have_nbcon_console && !have_boot_console)
+ ft->nbcon_atomic = true;
+
+ /* Legacy consoles are flushed directly when possible. */
+ if (have_legacy_console || have_boot_console) {
+ if (!is_printk_legacy_deferred())
+ ft->legacy_direct = true;
+ else
+ ft->legacy_offload = true;
+ }
+ break;
+
+ case NBCON_PRIO_PANIC:
+ /*
+ * In panic, the nbcon consoles will directly print. But
+ * only allowed if there are no boot consoles.
+ */
+ if (have_nbcon_console && !have_boot_console)
+ ft->nbcon_atomic = true;
+
+ if (have_legacy_console || have_boot_console) {
+ /*
+ * This is the same decision as NBCON_PRIO_NORMAL
+ * except that offloading never occurs in panic.
+ *
+ * Note that console_flush_on_panic() will flush
+ * legacy consoles anyway, even if unsafe.
+ */
+ if (!is_printk_legacy_deferred())
+ ft->legacy_direct = true;
+
+ /*
+ * In panic, if nbcon atomic printing occurs,
+ * the legacy consoles must remain silent until
+ * explicitly allowed.
+ */
+ if (ft->nbcon_atomic && !legacy_allow_panic_sync)
+ ft->legacy_direct = false;
+ }
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
extern struct printk_buffers printk_shared_pbufs;
/**
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1344,6 +1344,7 @@ EXPORT_SYMBOL_GPL(nbcon_device_try_acqui
void nbcon_device_release(struct console *con)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
+ struct console_flush_type ft;
int cookie;
if (!nbcon_context_exit_unsafe(ctxt))
@@ -1359,12 +1360,17 @@ void nbcon_device_release(struct console
cookie = console_srcu_read_lock();
if (console_is_usable(con, console_srcu_read_flags(con)) &&
prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
- if (!have_boot_console) {
+ /*
+ * If nbcon_atomic flushing is not available, fallback to
+ * using the legacy loop.
+ */
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic) {
__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
- } else if (!is_printk_legacy_deferred()) {
+ } else if (ft.legacy_direct) {
if (console_trylock())
console_unlock();
- } else {
+ } else if (ft.legacy_offload) {
printk_trigger_flush();
}
}
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -468,14 +468,14 @@ static DEFINE_MUTEX(syslog_lock);
* present, it is necessary to perform the console lock/unlock dance
* whenever console flushing should occur.
*/
-static bool have_legacy_console;
+bool have_legacy_console;
/*
* Specifies if an nbcon console is registered. If nbcon consoles are present,
* synchronous printing of legacy consoles will not occur during panic until
* the backtrace has been stored to the ringbuffer.
*/
-static bool have_nbcon_console;
+bool have_nbcon_console;
/*
* Specifies if a boot console is registered. If boot consoles are present,
@@ -488,14 +488,6 @@ bool have_boot_console;
/* See printk_legacy_allow_panic_sync() for details. */
bool legacy_allow_panic_sync;
-/*
- * Specifies if the console lock/unlock dance is needed for console
- * printing. If @have_boot_console is true, the nbcon consoles will
- * be printed serially along with the legacy consoles because nbcon
- * consoles cannot print simultaneously with boot consoles.
- */
-#define printing_via_unlock (have_legacy_console || have_boot_console)
-
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
@@ -2342,9 +2334,12 @@ int vprintk_store(int facility, int leve
*/
void printk_legacy_allow_panic_sync(void)
{
+ struct console_flush_type ft;
+
legacy_allow_panic_sync = true;
- if (printing_via_unlock && !is_printk_legacy_deferred()) {
+ printk_get_console_flush_type(&ft);
+ if (ft.legacy_direct) {
if (console_trylock())
console_unlock();
}
@@ -2354,8 +2349,7 @@ asmlinkage int vprintk_emit(int facility
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
{
- bool do_trylock_unlock = printing_via_unlock;
- bool defer_legacy = false;
+ struct console_flush_type ft;
int printed_len;
/* Suppress unimportant messages after panic happens */
@@ -2370,35 +2364,23 @@ asmlinkage int vprintk_emit(int facility
if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
return 0;
+ printk_get_console_flush_type(&ft);
+
/* If called from the scheduler, we can not call up(). */
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
- defer_legacy = do_trylock_unlock;
- do_trylock_unlock = false;
+ ft.legacy_offload |= ft.legacy_direct;
+ ft.legacy_direct = false;
}
printk_delay(level);
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
- if (have_nbcon_console && !have_boot_console) {
+ if (ft.nbcon_atomic)
nbcon_atomic_flush_pending();
- /*
- * In panic, the legacy consoles are not allowed to print from
- * the printk calling context unless explicitly allowed. This
- * gives the safe nbcon consoles a chance to print out all the
- * panic messages first. This restriction only applies if
- * there are nbcon consoles registered and they are allowed to
- * flush.
- */
- if (this_cpu_in_panic() && !legacy_allow_panic_sync) {
- do_trylock_unlock = false;
- defer_legacy = false;
- }
- }
-
- if (do_trylock_unlock) {
+ if (ft.legacy_direct) {
/*
* The caller may be holding system-critical or
* timing-sensitive locks. Disable preemption during
@@ -2418,7 +2400,7 @@ asmlinkage int vprintk_emit(int facility
preempt_enable();
}
- if (defer_legacy)
+ if (ft.legacy_offload)
defer_console_output();
else
wake_up_klogd();
@@ -2778,10 +2760,16 @@ void resume_console(void)
*/
static int console_cpu_notify(unsigned int cpu)
{
- if (!cpuhp_tasks_frozen && printing_via_unlock) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
+ struct console_flush_type ft;
+
+ if (!cpuhp_tasks_frozen) {
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.legacy_direct) {
+ if (console_trylock())
+ console_unlock();
+ }
}
return 0;
}
@@ -3306,6 +3294,7 @@ static void __console_rewind_all(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
+ struct console_flush_type ft;
bool handover;
u64 next_seq;
@@ -3329,7 +3318,8 @@ void console_flush_on_panic(enum con_flu
if (mode == CONSOLE_REPLAY_ALL)
__console_rewind_all();
- if (!have_boot_console)
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
nbcon_atomic_flush_pending();
/* Flush legacy consoles once allowed, even when dangerous. */
@@ -3993,6 +3983,7 @@ static bool __pr_flush(struct console *c
{
unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms);
unsigned long remaining_jiffies = timeout_jiffies;
+ struct console_flush_type ft;
struct console *c;
u64 last_diff = 0;
u64 printk_seq;
@@ -4006,7 +3997,8 @@ static bool __pr_flush(struct console *c
seq = prb_next_reserve_seq(prb);
/* Flush the consoles so that records up to @seq are printed. */
- if (printing_via_unlock) {
+ printk_get_console_flush_type(&ft);
+ if (ft.legacy_direct) {
console_lock();
console_unlock();
}

View File

@ -1,173 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:57 +0206
Subject: [PATCH 31/54] printk: nbcon: Implement emergency sections
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
In emergency situations (something has gone wrong but the
system continues to operate), usually important information
(such as a backtrace) is generated via printk(). This
information should be pushed out to the consoles ASAP.
Add per-CPU emergency nesting tracking because an emergency
can arise while in an emergency situation.
Add functions to mark the beginning and end of emergency
sections where the urgent messages are generated.
Perform direct console flushing at the emergency priority if
the current CPU is in an emergency state and it is safe to do
so.
Note that the emergency state is not system-wide. While one CPU
is in an emergency state, another CPU may attempt to print
console messages at normal priority.
Also note that printk() already attempts to flush consoles in
the caller context for normal priority. However, follow-up
changes will introduce printing kthreads, in which case the
normal priority printk() calls will offload to the kthreads.
Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 4 ++
kernel/printk/internal.h | 1
kernel/printk/nbcon.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 79 insertions(+), 1 deletion(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -553,10 +553,14 @@ static inline bool console_is_registered
hlist_for_each_entry(con, &console_list, node)
#ifdef CONFIG_PRINTK
+extern void nbcon_cpu_emergency_enter(void);
+extern void nbcon_cpu_emergency_exit(void);
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
#else
+static inline void nbcon_cpu_emergency_enter(void) { }
+static inline void nbcon_cpu_emergency_exit(void) { }
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -182,6 +182,7 @@ static inline void printk_get_console_fl
switch (nbcon_get_default_prio()) {
case NBCON_PRIO_NORMAL:
+ case NBCON_PRIO_EMERGENCY:
if (have_nbcon_console && !have_boot_console)
ft->nbcon_atomic = true;
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -972,6 +972,36 @@ static bool nbcon_emit_next_record(struc
return nbcon_context_exit_unsafe(ctxt);
}
+/* Track the nbcon emergency nesting per CPU. */
+static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
+static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
+
+/**
+ * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
+ *
+ * Context: For reading, any context. For writing, any context which could
+ * not be migrated to another CPU.
+ * Return: Either a pointer to the per CPU emergency nesting counter of
+ * the current CPU or to the init data during early boot.
+ *
+ * The function is safe for reading per-CPU variables in any context because
+ * preemption is disabled if the current CPU is in the emergency state. See
+ * also nbcon_cpu_emergency_enter().
+ */
+static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
+{
+ /*
+ * The value of __printk_percpu_data_ready gets set in normal
+ * context and before SMP initialization. As a result it could
+ * never change while inside an nbcon emergency section.
+ */
+ if (!printk_percpu_data_ready())
+ return &early_nbcon_pcpu_emergency_nesting;
+
+ /* Open code this_cpu_ptr() without checking migration. */
+ return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+}
+
/**
* nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
* printing on the current CPU
@@ -981,13 +1011,20 @@ static bool nbcon_emit_next_record(struc
* context for printing.
*
* The function is safe for reading per-CPU data in any context because
- * preemption is disabled if the current CPU is in the panic state.
+ * preemption is disabled if the current CPU is in the emergency or panic
+ * state.
*/
enum nbcon_prio nbcon_get_default_prio(void)
{
+ unsigned int *cpu_emergency_nesting;
+
if (this_cpu_in_panic())
return NBCON_PRIO_PANIC;
+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+ if (*cpu_emergency_nesting)
+ return NBCON_PRIO_EMERGENCY;
+
return NBCON_PRIO_NORMAL;
}
@@ -1247,6 +1284,42 @@ void nbcon_atomic_flush_unsafe(void)
}
/**
+ * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
+ * messages for that CPU are flushed directly
+ *
+ * Context: Any context. Disables preemption.
+ *
+ * When within an emergency section, printk() calls will attempt to flush any
+ * pending messages in the ringbuffer.
+ */
+void nbcon_cpu_emergency_enter(void)
+{
+ unsigned int *cpu_emergency_nesting;
+
+ preempt_disable();
+
+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+ (*cpu_emergency_nesting)++;
+}
+
+/**
+ * nbcon_cpu_emergency_exit - Exit an emergency section
+ *
+ * Context: Within an emergency section. Enables preemption.
+ */
+void nbcon_cpu_emergency_exit(void)
+{
+ unsigned int *cpu_emergency_nesting;
+
+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+
+ if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
+ (*cpu_emergency_nesting)--;
+
+ preempt_enable();
+}
+
+/**
* nbcon_alloc - Allocate and init the nbcon console specific data
* @con: Console to initialize
*

View File

@ -1,40 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:58 +0206
Subject: [PATCH 32/54] panic: Mark emergency section in warn
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Mark the full contents of __warn() as an emergency section. In
this section, every printk() call will attempt to directly
flush to the consoles using the EMERGENCY priority.
Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-33-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/panic.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -718,6 +718,8 @@ struct warn_args {
void __warn(const char *file, int line, void *caller, unsigned taint,
struct pt_regs *regs, struct warn_args *args)
{
+ nbcon_cpu_emergency_enter();
+
disable_trace_on_warning();
if (file)
@@ -753,6 +755,8 @@ void __warn(const char *file, int line,
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
+
+ nbcon_cpu_emergency_exit();
}
#ifdef CONFIG_BUG

View File

@ -1,42 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:35:59 +0206
Subject: [PATCH 33/54] panic: Mark emergency section in oops
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Mark an emergency section beginning with oops_enter() until the
end of oops_exit(). In this section, every printk() call will
attempt to directly flush to the consoles using the EMERGENCY
priority.
The very end of oops_exit() performs a kmsg_dump(). This is not
included in the emergency section because it is another
flushing mechanism that should occur after the consoles have
flushed the oops messages.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-34-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/panic.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -685,6 +685,7 @@ bool oops_may_print(void)
*/
void oops_enter(void)
{
+ nbcon_cpu_emergency_enter();
tracing_off();
/* can't trust the integrity of the kernel anymore: */
debug_locks_off();
@@ -707,6 +708,7 @@ void oops_exit(void)
{
do_oops_enter_exit();
print_oops_end_marker();
+ nbcon_cpu_emergency_exit();
kmsg_dump(KMSG_DUMP_OOPS);
}

View File

@ -1,97 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:36:00 +0206
Subject: [PATCH 34/54] rcu: Mark emergency sections in rcu stalls
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Mark emergency sections wherever multiple lines of
rcu stall information are generated. In an emergency
section, every printk() call will attempt to directly
flush to the consoles using the EMERGENCY priority.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240820063001.36405-35-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/rcu/tree_exp.h | 7 +++++++
kernel/rcu/tree_stall.h | 9 +++++++++
2 files changed, 16 insertions(+)
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -7,6 +7,7 @@
* Authors: Paul E. McKenney <paulmck@linux.ibm.com>
*/
+#include <linux/console.h>
#include <linux/lockdep.h>
static void rcu_exp_handler(void *unused);
@@ -590,6 +591,9 @@ static void synchronize_rcu_expedited_wa
return;
if (rcu_stall_is_suppressed())
continue;
+
+ nbcon_cpu_emergency_enter();
+
j = jiffies;
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
@@ -643,6 +647,9 @@ static void synchronize_rcu_expedited_wa
rcu_exp_print_detail_task_stall_rnp(rnp);
}
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
+
+ nbcon_cpu_emergency_exit();
+
panic_on_rcu_stall();
}
}
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -7,6 +7,7 @@
* Author: Paul E. McKenney <paulmck@linux.ibm.com>
*/
+#include <linux/console.h>
#include <linux/kvm_para.h>
#include <linux/rcu_notifier.h>
@@ -605,6 +606,8 @@ static void print_other_cpu_stall(unsign
if (rcu_stall_is_suppressed())
return;
+ nbcon_cpu_emergency_enter();
+
/*
* OK, time to rat on our buddy...
* See Documentation/RCU/stallwarn.rst for info on how to debug
@@ -657,6 +660,8 @@ static void print_other_cpu_stall(unsign
rcu_check_gp_kthread_expired_fqs_timer();
rcu_check_gp_kthread_starvation();
+ nbcon_cpu_emergency_exit();
+
panic_on_rcu_stall();
rcu_force_quiescent_state(); /* Kick them all. */
@@ -677,6 +682,8 @@ static void print_cpu_stall(unsigned lon
if (rcu_stall_is_suppressed())
return;
+ nbcon_cpu_emergency_enter();
+
/*
* OK, time to rat on ourselves...
* See Documentation/RCU/stallwarn.rst for info on how to debug
@@ -706,6 +713,8 @@ static void print_cpu_stall(unsigned lon
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ nbcon_cpu_emergency_exit();
+
panic_on_rcu_stall();
/*

View File

@ -1,453 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 20 Aug 2024 08:36:01 +0206
Subject: [PATCH 35/54] lockdep: Mark emergency sections in lockdep splats
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Mark emergency sections wherever multiple lines of
lock debugging output are generated. In an emergency
section, every printk() call will attempt to directly
flush to the consoles using the EMERGENCY priority.
Note that debug_show_all_locks() and
lockdep_print_held_locks() rely on their callers to
enter the emergency section. This is because these
functions can also be called in non-emergency
situations (such as sysrq).
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-36-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/locking/lockdep.c | 83 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 81 insertions(+), 2 deletions(-)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -56,6 +56,7 @@
#include <linux/kprobes.h>
#include <linux/lockdep.h>
#include <linux/context_tracking.h>
+#include <linux/console.h>
#include <asm/sections.h>
@@ -573,8 +574,10 @@ static struct lock_trace *save_trace(voi
if (!debug_locks_off_graph_unlock())
return NULL;
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
dump_stack();
+ nbcon_cpu_emergency_exit();
return NULL;
}
@@ -887,11 +890,13 @@ look_up_lock_class(const struct lockdep_
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
instrumentation_begin();
debug_locks_off();
+ nbcon_cpu_emergency_enter();
printk(KERN_ERR
"BUG: looking up invalid subclass: %u\n", subclass);
printk(KERN_ERR
"turning off the locking correctness validator.\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
instrumentation_end();
return NULL;
}
@@ -968,11 +973,13 @@ static bool assign_lock_key(struct lockd
else {
/* Debug-check: all keys must be persistent! */
debug_locks_off();
+ nbcon_cpu_emergency_enter();
pr_err("INFO: trying to register non-static key.\n");
pr_err("The code is fine but needs lockdep annotation, or maybe\n");
pr_err("you didn't initialize this object before use?\n");
pr_err("turning off the locking correctness validator.\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
return false;
}
@@ -1316,8 +1323,10 @@ register_lock_class(struct lockdep_map *
return NULL;
}
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
dump_stack();
+ nbcon_cpu_emergency_exit();
return NULL;
}
nr_lock_classes++;
@@ -1349,11 +1358,13 @@ register_lock_class(struct lockdep_map *
if (verbose(class)) {
graph_unlock();
+ nbcon_cpu_emergency_enter();
printk("\nnew class %px: %s", class->key, class->name);
if (class->name_version > 1)
printk(KERN_CONT "#%d", class->name_version);
printk(KERN_CONT "\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
if (!graph_lock()) {
return NULL;
@@ -1392,8 +1403,10 @@ static struct lock_list *alloc_list_entr
if (!debug_locks_off_graph_unlock())
return NULL;
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
dump_stack();
+ nbcon_cpu_emergency_exit();
return NULL;
}
nr_list_entries++;
@@ -2039,6 +2052,8 @@ static noinline void print_circular_bug(
depth = get_lock_depth(target);
+ nbcon_cpu_emergency_enter();
+
print_circular_bug_header(target, depth, check_src, check_tgt);
parent = get_lock_parent(target);
@@ -2057,6 +2072,8 @@ static noinline void print_circular_bug(
printk("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
static noinline void print_bfs_bug(int ret)
@@ -2569,6 +2586,8 @@ print_bad_irq_dependency(struct task_str
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("=====================================================\n");
pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
@@ -2618,11 +2637,13 @@ print_bad_irq_dependency(struct task_str
pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
next_root->trace = save_trace();
if (!next_root->trace)
- return;
+ goto out;
print_shortest_lock_dependencies(forwards_entry, next_root);
pr_warn("\nstack backtrace:\n");
dump_stack();
+out:
+ nbcon_cpu_emergency_exit();
}
static const char *state_names[] = {
@@ -2987,6 +3008,8 @@ print_deadlock_bug(struct task_struct *c
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("============================================\n");
pr_warn("WARNING: possible recursive locking detected\n");
@@ -3009,6 +3032,8 @@ print_deadlock_bug(struct task_struct *c
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
/*
@@ -3606,6 +3631,8 @@ static void print_collision(struct task_
struct held_lock *hlock_next,
struct lock_chain *chain)
{
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("============================\n");
pr_warn("WARNING: chain_key collision\n");
@@ -3622,6 +3649,8 @@ static void print_collision(struct task_
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
#endif
@@ -3712,8 +3741,10 @@ static inline int add_chain_cache(struct
if (!debug_locks_off_graph_unlock())
return 0;
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
dump_stack();
+ nbcon_cpu_emergency_exit();
return 0;
}
chain->chain_key = chain_key;
@@ -3730,8 +3761,10 @@ static inline int add_chain_cache(struct
if (!debug_locks_off_graph_unlock())
return 0;
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
dump_stack();
+ nbcon_cpu_emergency_exit();
return 0;
}
@@ -3970,6 +4003,8 @@ print_usage_bug(struct task_struct *curr
if (!debug_locks_off() || debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("================================\n");
pr_warn("WARNING: inconsistent lock state\n");
@@ -3998,6 +4033,8 @@ print_usage_bug(struct task_struct *curr
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
/*
@@ -4032,6 +4069,8 @@ print_irq_inversion_bug(struct task_stru
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("========================================================\n");
pr_warn("WARNING: possible irq lock inversion dependency detected\n");
@@ -4072,11 +4111,13 @@ print_irq_inversion_bug(struct task_stru
pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
root->trace = save_trace();
if (!root->trace)
- return;
+ goto out;
print_shortest_lock_dependencies(other, root);
pr_warn("\nstack backtrace:\n");
dump_stack();
+out:
+ nbcon_cpu_emergency_exit();
}
/*
@@ -4153,6 +4194,8 @@ void print_irqtrace_events(struct task_s
{
const struct irqtrace_events *trace = &curr->irqtrace;
+ nbcon_cpu_emergency_enter();
+
printk("irq event stamp: %u\n", trace->irq_events);
printk("hardirqs last enabled at (%u): [<%px>] %pS\n",
trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip,
@@ -4166,6 +4209,8 @@ void print_irqtrace_events(struct task_s
printk("softirqs last disabled at (%u): [<%px>] %pS\n",
trace->softirq_disable_event, (void *)trace->softirq_disable_ip,
(void *)trace->softirq_disable_ip);
+
+ nbcon_cpu_emergency_exit();
}
static int HARDIRQ_verbose(struct lock_class *class)
@@ -4686,10 +4731,12 @@ static int mark_lock(struct task_struct
* We must printk outside of the graph_lock:
*/
if (ret == 2) {
+ nbcon_cpu_emergency_enter();
printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
print_lock(this);
print_irqtrace_events(curr);
dump_stack();
+ nbcon_cpu_emergency_exit();
}
return ret;
@@ -4730,6 +4777,8 @@ print_lock_invalid_wait_context(struct t
if (debug_locks_silent)
return 0;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("=============================\n");
pr_warn("[ BUG: Invalid wait context ]\n");
@@ -4749,6 +4798,8 @@ print_lock_invalid_wait_context(struct t
pr_warn("stack backtrace:\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
+
return 0;
}
@@ -4956,6 +5007,8 @@ print_lock_nested_lock_not_held(struct t
if (debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("==================================\n");
pr_warn("WARNING: Nested lock was not taken\n");
@@ -4976,6 +5029,8 @@ print_lock_nested_lock_not_held(struct t
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
static int __lock_is_held(const struct lockdep_map *lock, int read);
@@ -5024,11 +5079,13 @@ static int __lock_acquire(struct lockdep
debug_class_ops_inc(class);
if (very_verbose(class)) {
+ nbcon_cpu_emergency_enter();
printk("\nacquire class [%px] %s", class->key, class->name);
if (class->name_version > 1)
printk(KERN_CONT "#%d", class->name_version);
printk(KERN_CONT "\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
}
/*
@@ -5155,6 +5212,7 @@ static int __lock_acquire(struct lockdep
#endif
if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
debug_locks_off();
+ nbcon_cpu_emergency_enter();
print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
printk(KERN_DEBUG "depth: %i max: %lu!\n",
curr->lockdep_depth, MAX_LOCK_DEPTH);
@@ -5162,6 +5220,7 @@ static int __lock_acquire(struct lockdep
lockdep_print_held_locks(current);
debug_show_all_locks();
dump_stack();
+ nbcon_cpu_emergency_exit();
return 0;
}
@@ -5181,6 +5240,8 @@ static void print_unlock_imbalance_bug(s
if (debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("=====================================\n");
pr_warn("WARNING: bad unlock balance detected!\n");
@@ -5197,6 +5258,8 @@ static void print_unlock_imbalance_bug(s
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
static noinstr int match_held_lock(const struct held_lock *hlock,
@@ -5901,6 +5964,8 @@ static void print_lock_contention_bug(st
if (debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("=================================\n");
pr_warn("WARNING: bad contention detected!\n");
@@ -5917,6 +5982,8 @@ static void print_lock_contention_bug(st
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
static void
@@ -6536,6 +6603,8 @@ print_freed_lock_bug(struct task_struct
if (debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("=========================\n");
pr_warn("WARNING: held lock freed!\n");
@@ -6548,6 +6617,8 @@ print_freed_lock_bug(struct task_struct
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
static inline int not_in_range(const void* mem_from, unsigned long mem_len,
@@ -6594,6 +6665,8 @@ static void print_held_locks_bug(void)
if (debug_locks_silent)
return;
+ nbcon_cpu_emergency_enter();
+
pr_warn("\n");
pr_warn("====================================\n");
pr_warn("WARNING: %s/%d still has locks held!\n",
@@ -6603,6 +6676,8 @@ static void print_held_locks_bug(void)
lockdep_print_held_locks(current);
pr_warn("\nstack backtrace:\n");
dump_stack();
+
+ nbcon_cpu_emergency_exit();
}
void debug_check_no_locks_held(void)
@@ -6660,6 +6735,7 @@ asmlinkage __visible void lockdep_sys_ex
if (unlikely(curr->lockdep_depth)) {
if (!debug_locks_off())
return;
+ nbcon_cpu_emergency_enter();
pr_warn("\n");
pr_warn("================================================\n");
pr_warn("WARNING: lock held when returning to user space!\n");
@@ -6668,6 +6744,7 @@ asmlinkage __visible void lockdep_sys_ex
pr_warn("%s/%d is leaving the kernel with locks still held!\n",
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
+ nbcon_cpu_emergency_exit();
}
/*
@@ -6684,6 +6761,7 @@ void lockdep_rcu_suspicious(const char *
bool rcu = warn_rcu_enter();
/* Note: the following can be executed concurrently, so be careful. */
+ nbcon_cpu_emergency_enter();
pr_warn("\n");
pr_warn("=============================\n");
pr_warn("WARNING: suspicious RCU usage\n");
@@ -6722,6 +6800,7 @@ void lockdep_rcu_suspicious(const char *
lockdep_print_held_locks(curr);
pr_warn("\nstack backtrace:\n");
dump_stack();
+ nbcon_cpu_emergency_exit();
warn_rcu_exit(rcu);
}
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);

View File

@ -1,37 +0,0 @@
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 3 Sep 2024 11:53:58 +0800
Subject: [PATCH 36/54] printk: Use the BITS_PER_LONG macro
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
sizeof(unsigned long) * 8 is the number of bits in an unsigned long
variable, replace it with BITS_PER_LONG macro to make it simpler.
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240903035358.308482-1-ruanjinjie@huawei.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk_ringbuffer.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -4,6 +4,7 @@
#define _KERNEL_PRINTK_RINGBUFFER_H
#include <linux/atomic.h>
+#include <linux/bits.h>
#include <linux/dev_printk.h>
#include <linux/stddef.h>
#include <linux/types.h>
@@ -122,7 +123,7 @@ enum desc_state {
#define _DATA_SIZE(sz_bits) (1UL << (sz_bits))
#define _DESCS_COUNT(ct_bits) (1U << (ct_bits))
-#define DESC_SV_BITS (sizeof(unsigned long) * 8)
+#define DESC_SV_BITS BITS_PER_LONG
#define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2)
#define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT)
#define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT))

View File

@ -1,29 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 27 Aug 2024 16:25:31 +0206
Subject: [PATCH 37/54] printk: nbcon: Use raw_cpu_ptr() instead of open coding
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
There is no need to open code a non-migration-checking
this_cpu_ptr(). That is exactly what raw_cpu_ptr() is.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/87plpum4jw.fsf@jogness.linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -998,8 +998,7 @@ static __ref unsigned int *nbcon_get_cpu
if (!printk_percpu_data_ready())
return &early_nbcon_pcpu_emergency_nesting;
- /* Open code this_cpu_ptr() without checking migration. */
- return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+ return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting);
}
/**

View File

@ -1,188 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:20 +0206
Subject: [PATCH 38/54] printk: nbcon: Add function for printers to reacquire
ownership
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Since ownership can be lost at any time due to handover or
takeover, a printing context _must_ be prepared to back out
immediately and carefully. However, there are scenarios where
the printing context must reacquire ownership in order to
finalize or revert hardware changes.
One such example is when interrupts are disabled during
printing. No other context will automagically re-enable the
interrupts. For this case, the disabling context _must_
reacquire nbcon ownership so that it can re-enable the
interrupts.
Provide nbcon_reacquire_nobuf() for exactly this purpose. It
allows a printing context to reacquire ownership using the same
priority as its previous ownership.
Note that after a successful reacquire the printing context
will have no output buffer because that has been lost. This
function cannot be used to resume printing.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-2-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 6 +++
kernel/printk/nbcon.c | 74 +++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 73 insertions(+), 7 deletions(-)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -366,6 +366,10 @@ struct console {
*
* The callback should allow the takeover whenever it is safe. It
* increases the chance to see messages when the system is in trouble.
+ * If the driver must reacquire ownership in order to finalize or
+ * revert hardware changes, nbcon_reacquire_nobuf() can be used.
+ * However, on reacquire the buffer content is no longer available. A
+ * reacquire cannot be used to resume printing.
*
* The callback can be called from any context (including NMI).
* Therefore it must avoid usage of any locking and instead rely
@@ -558,12 +562,14 @@ extern void nbcon_cpu_emergency_exit(voi
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
+extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt);
#else
static inline void nbcon_cpu_emergency_enter(void) { }
static inline void nbcon_cpu_emergency_exit(void) { }
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
+static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { }
#endif
extern int console_set_on_cmdline;
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -830,6 +830,19 @@ static bool __nbcon_context_update_unsaf
return nbcon_context_can_proceed(ctxt, &cur);
}
+static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt,
+ char *buf, unsigned int len)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+ struct console *con = ctxt->console;
+ struct nbcon_state cur;
+
+ wctxt->outbuf = buf;
+ wctxt->len = len;
+ nbcon_state_read(con, &cur);
+ wctxt->unsafe_takeover = cur.unsafe_takeover;
+}
+
/**
* nbcon_enter_unsafe - Enter an unsafe region in the driver
* @wctxt: The write context that was handed to the write function
@@ -845,8 +858,12 @@ static bool __nbcon_context_update_unsaf
bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+ bool is_owner;
- return nbcon_context_enter_unsafe(ctxt);
+ is_owner = nbcon_context_enter_unsafe(ctxt);
+ if (!is_owner)
+ nbcon_write_context_set_buf(wctxt, NULL, 0);
+ return is_owner;
}
EXPORT_SYMBOL_GPL(nbcon_enter_unsafe);
@@ -865,12 +882,44 @@ EXPORT_SYMBOL_GPL(nbcon_enter_unsafe);
bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+ bool ret;
- return nbcon_context_exit_unsafe(ctxt);
+ ret = nbcon_context_exit_unsafe(ctxt);
+ if (!ret)
+ nbcon_write_context_set_buf(wctxt, NULL, 0);
+ return ret;
}
EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
/**
+ * nbcon_reacquire_nobuf - Reacquire a console after losing ownership
+ * while printing
+ * @wctxt: The write context that was handed to the write callback
+ *
+ * Since ownership can be lost at any time due to handover or takeover, a
+ * printing context _must_ be prepared to back out immediately and
+ * carefully. However, there are scenarios where the printing context must
+ * reacquire ownership in order to finalize or revert hardware changes.
+ *
+ * This function allows a printing context to reacquire ownership using the
+ * same priority as its previous ownership.
+ *
+ * Note that after a successful reacquire the printing context will have no
+ * output buffer because that has been lost. This function cannot be used to
+ * resume printing.
+ */
+void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ while (!nbcon_context_try_acquire(ctxt))
+ cpu_relax();
+
+ nbcon_write_context_set_buf(wctxt, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf);
+
+/**
* nbcon_emit_next_record - Emit a record in the acquired context
* @wctxt: The write context that will be handed to the write function
*
@@ -895,7 +944,6 @@ static bool nbcon_emit_next_record(struc
.pbufs = ctxt->pbufs,
};
unsigned long con_dropped;
- struct nbcon_state cur;
unsigned long dropped;
/*
@@ -930,10 +978,7 @@ static bool nbcon_emit_next_record(struc
goto update_con;
/* Initialize the write context for driver callbacks. */
- wctxt->outbuf = &pmsg.pbufs->outbuf[0];
- wctxt->len = pmsg.outbuf_len;
- nbcon_state_read(con, &cur);
- wctxt->unsafe_takeover = cur.unsafe_takeover;
+ nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len);
if (con->write_atomic) {
con->write_atomic(con, wctxt);
@@ -947,6 +992,21 @@ static bool nbcon_emit_next_record(struc
return false;
}
+ if (!wctxt->outbuf) {
+ /*
+ * Ownership was lost and reacquired by the driver. Handle it
+ * as if ownership was lost.
+ */
+ nbcon_context_release(ctxt);
+ return false;
+ }
+
+ /*
+ * Ownership may have been lost but _not_ reacquired by the driver.
+ * This case is detected and handled when entering unsafe to update
+ * dropped/seq values.
+ */
+
/*
* Since any dropped message was successfully output, reset the
* dropped count for the console.

View File

@ -1,79 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:21 +0206
Subject: [PATCH 39/54] printk: Fail pr_flush() if before SYSTEM_SCHEDULING
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
A follow-up change adds pr_flush() to console unregistration.
However, with boot consoles unregistration can happen very
early if there are also regular consoles registering as well.
In this case the pr_flush() is not important because all
consoles are flushed when checking the initial console sequence
number.
Allow pr_flush() to fail if @system_state has not yet reached
SYSTEM_SCHEDULING. This avoids might_sleep() and msleep()
explosions that would otherwise occur:
[ 0.436739][ T0] printk: legacy console [ttyS0] enabled
[ 0.439820][ T0] printk: legacy bootconsole [earlyser0] disabled
[ 0.446822][ T0] BUG: scheduling while atomic: swapper/0/0/0x00000002
[ 0.450491][ T0] 1 lock held by swapper/0/0:
[ 0.457897][ T0] #0: ffffffff82ae5f88 (console_mutex){+.+.}-{4:4}, at: console_list_lock+0x20/0x70
[ 0.463141][ T0] Modules linked in:
[ 0.465307][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.10.0-rc1+ #372
[ 0.469394][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
[ 0.474402][ T0] Call Trace:
[ 0.476246][ T0] <TASK>
[ 0.481473][ T0] dump_stack_lvl+0x93/0xb0
[ 0.483949][ T0] dump_stack+0x10/0x20
[ 0.486256][ T0] __schedule_bug+0x68/0x90
[ 0.488753][ T0] __schedule+0xb9b/0xd80
[ 0.491179][ T0] ? lock_release+0xb5/0x270
[ 0.493732][ T0] schedule+0x43/0x170
[ 0.495998][ T0] schedule_timeout+0xc5/0x1e0
[ 0.498634][ T0] ? __pfx_process_timeout+0x10/0x10
[ 0.501522][ T0] ? msleep+0x13/0x50
[ 0.503728][ T0] msleep+0x3c/0x50
[ 0.505847][ T0] __pr_flush.constprop.0.isra.0+0x56/0x500
[ 0.509050][ T0] ? _printk+0x58/0x80
[ 0.511332][ T0] ? lock_is_held_type+0x9c/0x110
[ 0.514106][ T0] unregister_console_locked+0xe1/0x450
[ 0.517144][ T0] register_console+0x509/0x620
[ 0.519827][ T0] ? __pfx_univ8250_console_init+0x10/0x10
[ 0.523042][ T0] univ8250_console_init+0x24/0x40
[ 0.525845][ T0] console_init+0x43/0x210
[ 0.528280][ T0] start_kernel+0x493/0x980
[ 0.530773][ T0] x86_64_start_reservations+0x18/0x30
[ 0.533755][ T0] x86_64_start_kernel+0xae/0xc0
[ 0.536473][ T0] common_startup_64+0x12c/0x138
[ 0.539210][ T0] </TASK>
And then the kernel goes into an infinite loop complaining about:
1. releasing a pinned lock
2. unpinning an unpinned lock
3. bad: scheduling from the idle thread!
4. goto 1
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-3-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3992,6 +3992,10 @@ static bool __pr_flush(struct console *c
u64 diff;
u64 seq;
+ /* Sorry, pr_flush() will not work this early. */
+ if (system_state < SYSTEM_SCHEDULING)
+ return false;
+
might_sleep();
seq = prb_next_reserve_seq(prb);

View File

@ -1,39 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:22 +0206
Subject: [PATCH 40/54] printk: Flush console on unregister_console()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Ensure consoles have flushed pending records before
unregistering. The console should print up to at least its
related "console disabled" record.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-4-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3772,11 +3772,16 @@ static int unregister_console_locked(str
if (res > 0)
return 0;
+ if (!console_is_registered_locked(console))
+ res = -ENODEV;
+ else if (console_is_usable(console, console->flags))
+ __pr_flush(console, 1000, true);
+
/* Disable it unconditionally */
console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
- if (!console_is_registered_locked(console))
- return -ENODEV;
+ if (res < 0)
+ return res;
/*
* Use the driver synchronization to ensure that the hardware is not

View File

@ -1,184 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:23 +0206
Subject: [PATCH 41/54] printk: nbcon: Add context to usable() and emit()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The nbcon consoles will have two callbacks to be used for
different contexts. In order to determine if an nbcon console
is usable, console_is_usable() must know if it is a context
that will need to use the optional write_atomic() callback.
Also, nbcon_emit_next_record() must know which callback it
needs to call.
Add an extra parameter @use_atomic to console_is_usable() and
nbcon_emit_next_record() to specify this.
Since so far only the write_atomic() callback exists,
@use_atomic is set to true for all call sites.
For legacy consoles, @use_atomic is not used.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-5-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 8 +++++---
kernel/printk/nbcon.c | 34 +++++++++++++++++++---------------
kernel/printk/printk.c | 6 +++---
3 files changed, 27 insertions(+), 21 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -97,7 +97,7 @@ bool nbcon_legacy_emit_next_record(struc
* which can also play a role in deciding if @con can be used to print
* records.
*/
-static inline bool console_is_usable(struct console *con, short flags)
+static inline bool console_is_usable(struct console *con, short flags, bool use_atomic)
{
if (!(flags & CON_ENABLED))
return false;
@@ -106,7 +106,8 @@ static inline bool console_is_usable(str
return false;
if (flags & CON_NBCON) {
- if (!con->write_atomic)
+ /* The write_atomic() callback is optional. */
+ if (use_atomic && !con->write_atomic)
return false;
} else {
if (!con->write)
@@ -149,7 +150,8 @@ static inline void nbcon_atomic_flush_pe
static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
int cookie) { return false; }
-static inline bool console_is_usable(struct console *con, short flags) { return false; }
+static inline bool console_is_usable(struct console *con, short flags,
+ bool use_atomic) { return false; }
#endif /* CONFIG_PRINTK */
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -922,6 +922,7 @@ EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf)
/**
* nbcon_emit_next_record - Emit a record in the acquired context
* @wctxt: The write context that will be handed to the write function
+ * @use_atomic: True if the write_atomic() callback is to be used
*
* Return: True if this context still owns the console. False if
* ownership was handed over or taken.
@@ -935,7 +936,7 @@ EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf)
* When true is returned, @wctxt->ctxt.backlog indicates whether there are
* still records pending in the ringbuffer,
*/
-static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
+static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
struct console *con = ctxt->console;
@@ -947,6 +948,18 @@ static bool nbcon_emit_next_record(struc
unsigned long dropped;
/*
+ * This function should never be called for consoles that have not
+ * implemented the necessary callback for writing: i.e. legacy
+ * consoles and, when atomic, nbcon consoles with no write_atomic().
+ * Handle it as if ownership was lost and try to continue.
+ */
+ if (WARN_ON_ONCE((use_atomic && !con->write_atomic) ||
+ !(console_srcu_read_flags(con) & CON_NBCON))) {
+ nbcon_context_release(ctxt);
+ return false;
+ }
+
+ /*
* The printk buffers are filled within an unsafe section. This
* prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from
* clobbering each other.
@@ -980,17 +993,8 @@ static bool nbcon_emit_next_record(struc
/* Initialize the write context for driver callbacks. */
nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len);
- if (con->write_atomic) {
+ if (use_atomic)
con->write_atomic(con, wctxt);
- } else {
- /*
- * This function should never be called for legacy consoles.
- * Handle it as if ownership was lost and try to continue.
- */
- WARN_ON_ONCE(1);
- nbcon_context_release(ctxt);
- return false;
- }
if (!wctxt->outbuf) {
/*
@@ -1118,7 +1122,7 @@ static bool nbcon_atomic_emit_one(struct
* The higher priority printing context takes over responsibility
* to print the pending records.
*/
- if (!nbcon_emit_next_record(wctxt))
+ if (!nbcon_emit_next_record(wctxt, true))
return false;
nbcon_context_release(ctxt);
@@ -1219,7 +1223,7 @@ static int __nbcon_atomic_flush_pending_
* handed over or taken over. In both cases the context is no
* longer valid.
*/
- if (!nbcon_emit_next_record(&wctxt))
+ if (!nbcon_emit_next_record(&wctxt, true))
return -EAGAIN;
if (!ctxt->backlog) {
@@ -1305,7 +1309,7 @@ static void __nbcon_atomic_flush_pending
if (!(flags & CON_NBCON))
continue;
- if (!console_is_usable(con, flags))
+ if (!console_is_usable(con, flags, true))
continue;
if (nbcon_seq_read(con) >= stop_seq)
@@ -1490,7 +1494,7 @@ void nbcon_device_release(struct console
* the console is usable throughout flushing.
*/
cookie = console_srcu_read_lock();
- if (console_is_usable(con, console_srcu_read_flags(con)) &&
+ if (console_is_usable(con, console_srcu_read_flags(con), true) &&
prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
/*
* If nbcon_atomic flushing is not available, fallback to
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3072,7 +3072,7 @@ static bool console_flush_all(bool do_co
u64 printk_seq;
bool progress;
- if (!console_is_usable(con, flags))
+ if (!console_is_usable(con, flags, true))
continue;
any_usable = true;
@@ -3774,7 +3774,7 @@ static int unregister_console_locked(str
if (!console_is_registered_locked(console))
res = -ENODEV;
- else if (console_is_usable(console, console->flags))
+ else if (console_is_usable(console, console->flags, true))
__pr_flush(console, 1000, true);
/* Disable it unconditionally */
@@ -4044,7 +4044,7 @@ static bool __pr_flush(struct console *c
* that they make forward progress, so only increment
* @diff for usable consoles.
*/
- if (!console_is_usable(c, flags))
+ if (!console_is_usable(c, flags, true))
continue;
if (flags & CON_NBCON) {

View File

@ -1,57 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:24 +0206
Subject: [PATCH 42/54] printk: nbcon: Init @nbcon_seq to highest possible
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
When initializing an nbcon console, have nbcon_alloc() set
@nbcon_seq to the highest possible sequence number. For all
practical purposes, this will guarantee that the console
will have nothing to print until later when @nbcon_seq is
set to the proper initial printing value.
This will be particularly important once kthread printing is
introduced because nbcon_alloc() can create/start the kthread
before the desired initial sequence number is known.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-6-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 8 +++++++-
kernel/printk/printk_ringbuffer.h | 2 ++
2 files changed, 9 insertions(+), 1 deletion(-)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1397,7 +1397,13 @@ bool nbcon_alloc(struct console *con)
struct nbcon_state state = { };
nbcon_state_set(con, &state);
- atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), 0);
+
+ /*
+ * Initialize @nbcon_seq to the highest possible sequence number so
+ * that practically speaking it will have nothing to print until a
+ * desired initial sequence number has been set via nbcon_seq_force().
+ */
+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb));
if (con->flags & CON_BOOT) {
/*
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -404,10 +404,12 @@ u64 prb_next_reserve_seq(struct printk_r
#define __u64seq_to_ulseq(u64seq) (u64seq)
#define __ulseq_to_u64seq(rb, ulseq) (ulseq)
+#define ULSEQ_MAX(rb) (-1)
#else /* CONFIG_64BIT */
#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
+#define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL)
static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
{

View File

@ -1,696 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:25 +0206
Subject: [PATCH 43/54] printk: nbcon: Introduce printer kthreads
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Provide the main implementation for running a printer kthread
per nbcon console that is takeover/handover aware. This
includes:
- new mandatory write_thread() callback
- kthread creation
- kthread main printing loop
- kthread wakeup mechanism
- kthread shutdown
kthread creation is a bit tricky because consoles may register
before kthreads can be created. In such cases, registration
will succeed, even though no kthread exists. Once kthreads can
be created, an early_initcall will set @printk_kthreads_ready.
If there are no registered boot consoles, the early_initcall
creates the kthreads for all registered nbcon consoles. If
kthread creation fails, the related console is unregistered.
If there are registered boot consoles when
@printk_kthreads_ready is set, no kthreads are created until
the final boot console unregisters.
Once kthread creation finally occurs, @printk_kthreads_running
is set so that the system knows kthreads are available for all
registered nbcon consoles.
If @printk_kthreads_running is already set when the console
is registering, the kthread is created during registration. If
kthread creation fails, the registration will fail.
Until @printk_kthreads_running is set, console printing occurs
directly via the console_lock.
kthread shutdown on system shutdown/reboot is necessary to
ensure the printer kthreads finish their printing so that the
system can cleanly transition back to direct printing via the
console_lock in order to reliably push out the final
shutdown/reboot messages. @printk_kthreads_running is cleared
before shutting down the individual kthreads.
The kthread uses a new mandatory write_thread() callback that
is called with both device_lock() and the console context
acquired.
The console ownership handling is necessary for synchronization
against write_atomic() which is synchronized only via the
console context ownership.
The device_lock() serializes acquiring the console context with
NBCON_PRIO_NORMAL. It is needed in case the device_lock() does
not disable preemption. It prevents the following race:
CPU0 CPU1
[ task A ]
nbcon_context_try_acquire()
# success with NORMAL prio
# .unsafe == false; // safe for takeover
[ schedule: task A -> B ]
WARN_ON()
nbcon_atomic_flush_pending()
nbcon_context_try_acquire()
# success with EMERGENCY prio
# flushing
nbcon_context_release()
# HERE: con->nbcon_state is free
# to take by anyone !!!
nbcon_context_try_acquire()
# success with NORMAL prio [ task B ]
[ schedule: task B -> A ]
nbcon_enter_unsafe()
nbcon_context_can_proceed()
BUG: nbcon_context_can_proceed() returns "true" because
the console is owned by a context on CPU0 with
NBCON_PRIO_NORMAL.
But it should return "false". The console is owned
by a context from task B and we do the check
in a context from task A.
Note that with these changes, the printer kthreads do not yet
take over full responsibility for nbcon printing during normal
operation. These changes only focus on the lifecycle of the
kthreads.
Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-7-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 40 +++++++
kernel/printk/internal.h | 27 +++++
kernel/printk/nbcon.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++
kernel/printk/printk.c | 108 ++++++++++++++++++++
4 files changed, 420 insertions(+)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,7 +16,9 @@
#include <linux/atomic.h>
#include <linux/bits.h>
+#include <linux/irq_work.h>
#include <linux/rculist.h>
+#include <linux/rcuwait.h>
#include <linux/types.h>
#include <linux/vesa.h>
@@ -324,6 +326,9 @@ struct nbcon_write_context {
* @nbcon_seq: Sequence number of the next record for nbcon to print
* @nbcon_device_ctxt: Context available for non-printing operations
* @pbufs: Pointer to nbcon private buffer
+ * @kthread: Printer kthread for this console
+ * @rcuwait: RCU-safe wait object for @kthread waking
+ * @irq_work: Defer @kthread waking to IRQ work context
*/
struct console {
char name[16];
@@ -378,6 +383,37 @@ struct console {
void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
/**
+ * @write_thread:
+ *
+ * NBCON callback to write out text in task context.
+ *
+ * This callback must be called only in task context with both
+ * device_lock() and the nbcon console acquired with
+ * NBCON_PRIO_NORMAL.
+ *
+ * The same rules for console ownership verification and unsafe
+ * sections handling applies as with write_atomic().
+ *
+ * The console ownership handling is necessary for synchronization
+ * against write_atomic() which is synchronized only via the context.
+ *
+ * The device_lock() provides the primary serialization for operations
+ * on the device. It might be as relaxed (mutex)[*] or as tight
+ * (disabled preemption and interrupts) as needed. It allows
+ * the kthread to operate in the least restrictive mode[**].
+ *
+ * [*] Standalone nbcon_context_try_acquire() is not safe with
+ * the preemption enabled, see nbcon_owner_matches(). But it
+ * can be safe when always called in the preemptive context
+ * under the device_lock().
+ *
+ * [**] The device_lock() makes sure that nbcon_context_try_acquire()
+ * would never need to spin which is important especially with
+ * PREEMPT_RT.
+ */
+ void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt);
+
+ /**
* @device_lock:
*
* NBCON callback to begin synchronization with driver code.
@@ -423,7 +459,11 @@ struct console {
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
struct nbcon_context __private nbcon_device_ctxt;
+
struct printk_buffers *pbufs;
+ struct task_struct *kthread;
+ struct rcuwait rcuwait;
+ struct irq_work irq_work;
};
#ifdef CONFIG_LOCKDEP
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -48,6 +48,7 @@ struct printk_ringbuffer;
struct dev_printk_info;
extern struct printk_ringbuffer *prb;
+extern bool printk_kthreads_running;
__printf(4, 0)
int vprintk_store(int facility, int level,
@@ -90,6 +91,9 @@ enum nbcon_prio nbcon_get_default_prio(v
void nbcon_atomic_flush_pending(void);
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
int cookie);
+bool nbcon_kthread_create(struct console *con);
+void nbcon_kthread_stop(struct console *con);
+void nbcon_kthreads_wake(void);
/*
* Check if the given console is currently capable and allowed to print
@@ -125,12 +129,34 @@ static inline bool console_is_usable(str
return true;
}
+/**
+ * nbcon_kthread_wake - Wake up a console printing thread
+ * @con: Console to operate on
+ */
+static inline void nbcon_kthread_wake(struct console *con)
+{
+ /*
+ * Guarantee any new records can be seen by tasks preparing to wait
+ * before this context checks if the rcuwait is empty.
+ *
+ * The full memory barrier in rcuwait_wake_up() pairs with the full
+ * memory barrier within set_current_state() of
+ * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait()
+ * adds the waiter but before it has checked the wait condition.
+ *
+ * This pairs with nbcon_kthread_func:A.
+ */
+ rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */
+}
+
#else
#define PRINTK_PREFIX_MAX 0
#define PRINTK_MESSAGE_MAX 0
#define PRINTKRB_RECORD_MAX 0
+#define printk_kthreads_running (false)
+
/*
* In !PRINTK builds we still export console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
@@ -149,6 +175,7 @@ static inline enum nbcon_prio nbcon_get_
static inline void nbcon_atomic_flush_pending(void) { }
static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
int cookie) { return false; }
+static inline void nbcon_kthread_wake(struct console *con) { }
static inline bool console_is_usable(struct console *con, short flags,
bool use_atomic) { return false; }
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/irqflags.h>
+#include <linux/kthread.h>
#include <linux/minmax.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
@@ -952,6 +953,9 @@ static bool nbcon_emit_next_record(struc
* implemented the necessary callback for writing: i.e. legacy
* consoles and, when atomic, nbcon consoles with no write_atomic().
* Handle it as if ownership was lost and try to continue.
+ *
+ * Note that for nbcon consoles the write_thread() callback is
+ * mandatory and was already checked in nbcon_alloc().
*/
if (WARN_ON_ONCE((use_atomic && !con->write_atomic) ||
!(console_srcu_read_flags(con) & CON_NBCON))) {
@@ -995,6 +999,8 @@ static bool nbcon_emit_next_record(struc
if (use_atomic)
con->write_atomic(con, wctxt);
+ else
+ con->write_thread(con, wctxt);
if (!wctxt->outbuf) {
/*
@@ -1036,6 +1042,228 @@ static bool nbcon_emit_next_record(struc
return nbcon_context_exit_unsafe(ctxt);
}
+/**
+ * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
+ * @con: Console to operate on
+ * @ctxt: The nbcon context from nbcon_context_try_acquire()
+ *
+ * Return: True if the thread should shutdown or if the console is
+ * allowed to print and a record is available. False otherwise.
+ *
+ * After the thread wakes up, it must first check if it should shutdown before
+ * attempting any printing.
+ */
+static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
+{
+ bool ret = false;
+ short flags;
+ int cookie;
+
+ if (kthread_should_stop())
+ return true;
+
+ cookie = console_srcu_read_lock();
+
+ flags = console_srcu_read_flags(con);
+ if (console_is_usable(con, flags, false)) {
+ /* Bring the sequence in @ctxt up to date */
+ ctxt->seq = nbcon_seq_read(con);
+
+ ret = prb_read_valid(prb, ctxt->seq, NULL);
+ }
+
+ console_srcu_read_unlock(cookie);
+ return ret;
+}
+
+/**
+ * nbcon_kthread_func - The printer thread function
+ * @__console: Console to operate on
+ *
+ * Return: 0
+ */
+static int nbcon_kthread_func(void *__console)
+{
+ struct console *con = __console;
+ struct nbcon_write_context wctxt = {
+ .ctxt.console = con,
+ .ctxt.prio = NBCON_PRIO_NORMAL,
+ };
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
+ short con_flags;
+ bool backlog;
+ int cookie;
+
+wait_for_event:
+ /*
+ * Guarantee this task is visible on the rcuwait before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * ___rcuwait_wait_event() pairs with the full memory
+ * barrier within rcuwait_has_sleeper().
+ *
+ * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
+ */
+ rcuwait_wait_event(&con->rcuwait,
+ nbcon_kthread_should_wakeup(con, ctxt),
+ TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
+
+ do {
+ if (kthread_should_stop())
+ return 0;
+
+ backlog = false;
+
+ /*
+ * Keep the srcu read lock around the entire operation so that
+ * synchronize_srcu() can guarantee that the kthread stopped
+ * or suspended printing.
+ */
+ cookie = console_srcu_read_lock();
+
+ con_flags = console_srcu_read_flags(con);
+
+ if (console_is_usable(con, con_flags, false)) {
+ unsigned long lock_flags;
+
+ con->device_lock(con, &lock_flags);
+
+ /*
+ * Ensure this stays on the CPU to make handover and
+ * takeover possible.
+ */
+ cant_migrate();
+
+ if (nbcon_context_try_acquire(ctxt)) {
+ /*
+ * If the emit fails, this context is no
+ * longer the owner.
+ */
+ if (nbcon_emit_next_record(&wctxt, false)) {
+ nbcon_context_release(ctxt);
+ backlog = ctxt->backlog;
+ }
+ }
+
+ con->device_unlock(con, lock_flags);
+ }
+
+ console_srcu_read_unlock(cookie);
+
+ cond_resched();
+
+ } while (backlog);
+
+ goto wait_for_event;
+}
+
+/**
+ * nbcon_irq_work - irq work to wake console printer thread
+ * @irq_work: The irq work to operate on
+ */
+static void nbcon_irq_work(struct irq_work *irq_work)
+{
+ struct console *con = container_of(irq_work, struct console, irq_work);
+
+ nbcon_kthread_wake(con);
+}
+
+static inline bool rcuwait_has_sleeper(struct rcuwait *w)
+{
+ /*
+ * Guarantee any new records can be seen by tasks preparing to wait
+ * before this context checks if the rcuwait is empty.
+ *
+ * This full memory barrier pairs with the full memory barrier within
+ * set_current_state() of ___rcuwait_wait_event(), which is called
+ * after prepare_to_rcuwait() adds the waiter but before it has
+ * checked the wait condition.
+ *
+ * This pairs with nbcon_kthread_func:A.
+ */
+ smp_mb(); /* LMM(rcuwait_has_sleeper:A) */
+ return rcuwait_active(w);
+}
+
+/**
+ * nbcon_kthreads_wake - Wake up printing threads using irq_work
+ */
+void nbcon_kthreads_wake(void)
+{
+ struct console *con;
+ int cookie;
+
+ if (!printk_kthreads_running)
+ return;
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ if (!(console_srcu_read_flags(con) & CON_NBCON))
+ continue;
+
+ /*
+ * Only schedule irq_work if the printing thread is
+ * actively waiting. If not waiting, the thread will
+ * notice by itself that it has work to do.
+ */
+ if (rcuwait_has_sleeper(&con->rcuwait))
+ irq_work_queue(&con->irq_work);
+ }
+ console_srcu_read_unlock(cookie);
+}
+
+/*
+ * nbcon_kthread_stop - Stop a console printer thread
+ * @con: Console to operate on
+ */
+void nbcon_kthread_stop(struct console *con)
+{
+ lockdep_assert_console_list_lock_held();
+
+ if (!con->kthread)
+ return;
+
+ kthread_stop(con->kthread);
+ con->kthread = NULL;
+}
+
+/**
+ * nbcon_kthread_create - Create a console printer thread
+ * @con: Console to operate on
+ *
+ * Return: True if the kthread was started or already exists.
+ * Otherwise false and @con must not be registered.
+ *
+ * This function is called when it will be expected that nbcon consoles are
+ * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL
+ * will be no longer flushed by the legacy loop. This is why failure must
+ * be fatal for console registration.
+ *
+ * If @con was already registered and this function fails, @con must be
+ * unregistered before the global state variable @printk_kthreads_running
+ * can be set.
+ */
+bool nbcon_kthread_create(struct console *con)
+{
+ struct task_struct *kt;
+
+ lockdep_assert_console_list_lock_held();
+
+ if (con->kthread)
+ return true;
+
+ kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
+ if (WARN_ON(IS_ERR(kt))) {
+ con_printk(KERN_ERR, con, "failed to start printing thread\n");
+ return false;
+ }
+
+ con->kthread = kt;
+
+ return true;
+}
+
/* Track the nbcon emergency nesting per CPU. */
static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
@@ -1396,6 +1624,12 @@ bool nbcon_alloc(struct console *con)
{
struct nbcon_state state = { };
+ /* The write_thread() callback is mandatory. */
+ if (WARN_ON(!con->write_thread))
+ return false;
+
+ rcuwait_init(&con->rcuwait);
+ init_irq_work(&con->irq_work, nbcon_irq_work);
nbcon_state_set(con, &state);
/*
@@ -1418,6 +1652,14 @@ bool nbcon_alloc(struct console *con)
con_printk(KERN_ERR, con, "failed to allocate printing buffer\n");
return false;
}
+
+ if (printk_kthreads_running) {
+ if (!nbcon_kthread_create(con)) {
+ kfree(con->pbufs);
+ con->pbufs = NULL;
+ return false;
+ }
+ }
}
return true;
@@ -1431,6 +1673,9 @@ void nbcon_free(struct console *con)
{
struct nbcon_state state = { };
+ if (printk_kthreads_running)
+ nbcon_kthread_stop(con);
+
nbcon_state_set(con, &state);
/* Boot consoles share global printk buffers. */
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -34,6 +34,7 @@
#include <linux/security.h>
#include <linux/memblock.h>
#include <linux/syscalls.h>
+#include <linux/syscore_ops.h>
#include <linux/vmcore_info.h>
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
@@ -496,6 +497,9 @@ static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
+/* True when _all_ printer threads are available for printing. */
+bool printk_kthreads_running;
+
struct latched_seq {
seqcount_latch_t latch;
u64 val[2];
@@ -3028,6 +3032,8 @@ static bool console_emit_next_record(str
return false;
}
+static inline void printk_kthreads_check_locked(void) { }
+
#endif /* CONFIG_PRINTK */
/*
@@ -3388,6 +3394,102 @@ void console_start(struct console *conso
}
EXPORT_SYMBOL(console_start);
+#ifdef CONFIG_PRINTK
+static int unregister_console_locked(struct console *console);
+
+/* True when system boot is far enough to create printer threads. */
+static bool printk_kthreads_ready __ro_after_init;
+
+/**
+ * printk_kthreads_shutdown - shutdown all threaded printers
+ *
+ * On system shutdown all threaded printers are stopped. This allows printk
+ * to transition back to atomic printing, thus providing a robust mechanism
+ * for the final shutdown/reboot messages to be output.
+ */
+static void printk_kthreads_shutdown(void)
+{
+ struct console *con;
+
+ console_list_lock();
+ if (printk_kthreads_running) {
+ printk_kthreads_running = false;
+
+ for_each_console(con) {
+ if (con->flags & CON_NBCON)
+ nbcon_kthread_stop(con);
+ }
+
+ /*
+ * The threads may have been stopped while printing a
+ * backlog. Flush any records left over.
+ */
+ nbcon_atomic_flush_pending();
+ }
+ console_list_unlock();
+}
+
+static struct syscore_ops printk_syscore_ops = {
+ .shutdown = printk_kthreads_shutdown,
+};
+
+/*
+ * If appropriate, start nbcon kthreads and set @printk_kthreads_running.
+ * If any kthreads fail to start, those consoles are unregistered.
+ *
+ * Must be called under console_list_lock().
+ */
+static void printk_kthreads_check_locked(void)
+{
+ struct hlist_node *tmp;
+ struct console *con;
+
+ lockdep_assert_console_list_lock_held();
+
+ if (!printk_kthreads_ready)
+ return;
+
+ /*
+ * Printer threads cannot be started as long as any boot console is
+ * registered because there is no way to synchronize the hardware
+ * registers between boot console code and regular console code.
+ * It can only be known that there will be no new boot consoles when
+ * an nbcon console is registered.
+ */
+ if (have_boot_console || !have_nbcon_console) {
+ /* Clear flag in case all nbcon consoles unregistered. */
+ printk_kthreads_running = false;
+ return;
+ }
+
+ if (printk_kthreads_running)
+ return;
+
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
+ if (!(con->flags & CON_NBCON))
+ continue;
+
+ if (!nbcon_kthread_create(con))
+ unregister_console_locked(con);
+ }
+
+ printk_kthreads_running = true;
+}
+
+static int __init printk_set_kthreads_ready(void)
+{
+ register_syscore_ops(&printk_syscore_ops);
+
+ console_list_lock();
+ printk_kthreads_ready = true;
+ printk_kthreads_check_locked();
+ console_list_unlock();
+
+ return 0;
+}
+early_initcall(printk_set_kthreads_ready);
+#endif /* CONFIG_PRINTK */
+
static int __read_mostly keep_bootcon;
static int __init keep_bootcon_setup(char *str)
@@ -3746,6 +3848,9 @@ void register_console(struct console *ne
unregister_console_locked(con);
}
}
+
+ /* Changed console list, may require printer threads to start/stop. */
+ printk_kthreads_check_locked();
unlock:
console_list_unlock();
}
@@ -3842,6 +3947,9 @@ static int unregister_console_locked(str
if (!found_nbcon_con)
have_nbcon_console = found_nbcon_con;
+ /* Changed console list, may require printer threads to start/stop. */
+ printk_kthreads_check_locked();
+
return res;
}

View File

@ -1,111 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:26 +0206
Subject: [PATCH 44/54] printk: nbcon: Relocate nbcon_atomic_emit_one()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Move nbcon_atomic_emit_one() so that it can be used by
nbcon_kthread_func() in a follow-up commit.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-8-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 78 +++++++++++++++++++++++++-------------------------
1 file changed, 39 insertions(+), 39 deletions(-)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1042,6 +1042,45 @@ static bool nbcon_emit_next_record(struc
return nbcon_context_exit_unsafe(ctxt);
}
+/*
+ * nbcon_atomic_emit_one - Print one record for an nbcon console using the
+ * write_atomic() callback
+ * @wctxt: An initialized write context struct to use for this context
+ *
+ * Return: True, when a record has been printed and there are still
+ * pending records. The caller might want to continue flushing.
+ *
+ * False, when there is no pending record, or when the console
+ * context cannot be acquired, or the ownership has been lost.
+ * The caller should give up. Either the job is done, cannot be
+ * done, or will be handled by the owning context.
+ *
+ * This is an internal helper to handle the locking of the console before
+ * calling nbcon_emit_next_record().
+ */
+static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ if (!nbcon_context_try_acquire(ctxt))
+ return false;
+
+ /*
+ * nbcon_emit_next_record() returns false when the console was
+ * handed over or taken over. In both cases the context is no
+ * longer valid.
+ *
+ * The higher priority printing context takes over responsibility
+ * to print the pending records.
+ */
+ if (!nbcon_emit_next_record(wctxt, true))
+ return false;
+
+ nbcon_context_release(ctxt);
+
+ return ctxt->backlog;
+}
+
/**
* nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
* @con: Console to operate on
@@ -1319,45 +1358,6 @@ enum nbcon_prio nbcon_get_default_prio(v
return NBCON_PRIO_NORMAL;
}
-/*
- * nbcon_atomic_emit_one - Print one record for an nbcon console using the
- * write_atomic() callback
- * @wctxt: An initialized write context struct to use for this context
- *
- * Return: True, when a record has been printed and there are still
- * pending records. The caller might want to continue flushing.
- *
- * False, when there is no pending record, or when the console
- * context cannot be acquired, or the ownership has been lost.
- * The caller should give up. Either the job is done, cannot be
- * done, or will be handled by the owning context.
- *
- * This is an internal helper to handle the locking of the console before
- * calling nbcon_emit_next_record().
- */
-static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
-{
- struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
-
- if (!nbcon_context_try_acquire(ctxt))
- return false;
-
- /*
- * nbcon_emit_next_record() returns false when the console was
- * handed over or taken over. In both cases the context is no
- * longer valid.
- *
- * The higher priority printing context takes over responsibility
- * to print the pending records.
- */
- if (!nbcon_emit_next_record(wctxt, true))
- return false;
-
- nbcon_context_release(ctxt);
-
- return ctxt->backlog;
-}
-
/**
* nbcon_legacy_emit_next_record - Print one record for an nbcon console
* in legacy contexts

View File

@ -1,228 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:27 +0206
Subject: [PATCH 45/54] printk: nbcon: Use thread callback if in task context
for legacy
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
When printing via console_lock, the write_atomic() callback is
used for nbcon consoles. However, if it is known that the
current context is a task context, the write_thread() callback
can be used instead.
Using write_thread() instead of write_atomic() helps to reduce
large disabled preemption regions when the device_lock does not
disable preemption.
This is mainly a preparatory change to allow avoiding
write_atomic() completely during normal operation if boot
consoles are registered.
As a side-effect, it also allows consolidating the printing
code for legacy printing and the kthread printer.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-9-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 4 -
kernel/printk/nbcon.c | 95 ++++++++++++++++++++++++++---------------------
kernel/printk/printk.c | 5 +-
3 files changed, 59 insertions(+), 45 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -90,7 +90,7 @@ void nbcon_free(struct console *con);
enum nbcon_prio nbcon_get_default_prio(void);
void nbcon_atomic_flush_pending(void);
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
- int cookie);
+ int cookie, bool use_atomic);
bool nbcon_kthread_create(struct console *con);
void nbcon_kthread_stop(struct console *con);
void nbcon_kthreads_wake(void);
@@ -174,7 +174,7 @@ static inline void nbcon_free(struct con
static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
static inline void nbcon_atomic_flush_pending(void) { }
static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
- int cookie) { return false; }
+ int cookie, bool use_atomic) { return false; }
static inline void nbcon_kthread_wake(struct console *con) { }
static inline bool console_is_usable(struct console *con, short flags,
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1043,9 +1043,10 @@ static bool nbcon_emit_next_record(struc
}
/*
- * nbcon_atomic_emit_one - Print one record for an nbcon console using the
- * write_atomic() callback
+ * nbcon_emit_one - Print one record for an nbcon console using the
+ * specified callback
* @wctxt: An initialized write context struct to use for this context
+ * @use_atomic: True if the write_atomic() callback is to be used
*
* Return: True, when a record has been printed and there are still
* pending records. The caller might want to continue flushing.
@@ -1058,12 +1059,25 @@ static bool nbcon_emit_next_record(struc
* This is an internal helper to handle the locking of the console before
* calling nbcon_emit_next_record().
*/
-static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
+static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+ struct console *con = ctxt->console;
+ unsigned long flags;
+ bool ret = false;
+
+ if (!use_atomic) {
+ con->device_lock(con, &flags);
+
+ /*
+ * Ensure this stays on the CPU to make handover and
+ * takeover possible.
+ */
+ cant_migrate();
+ }
if (!nbcon_context_try_acquire(ctxt))
- return false;
+ goto out;
/*
* nbcon_emit_next_record() returns false when the console was
@@ -1073,12 +1087,16 @@ static bool nbcon_atomic_emit_one(struct
* The higher priority printing context takes over responsibility
* to print the pending records.
*/
- if (!nbcon_emit_next_record(wctxt, true))
- return false;
+ if (!nbcon_emit_next_record(wctxt, use_atomic))
+ goto out;
nbcon_context_release(ctxt);
- return ctxt->backlog;
+ ret = ctxt->backlog;
+out:
+ if (!use_atomic)
+ con->device_unlock(con, flags);
+ return ret;
}
/**
@@ -1163,30 +1181,8 @@ static int nbcon_kthread_func(void *__co
con_flags = console_srcu_read_flags(con);
- if (console_is_usable(con, con_flags, false)) {
- unsigned long lock_flags;
-
- con->device_lock(con, &lock_flags);
-
- /*
- * Ensure this stays on the CPU to make handover and
- * takeover possible.
- */
- cant_migrate();
-
- if (nbcon_context_try_acquire(ctxt)) {
- /*
- * If the emit fails, this context is no
- * longer the owner.
- */
- if (nbcon_emit_next_record(&wctxt, false)) {
- nbcon_context_release(ctxt);
- backlog = ctxt->backlog;
- }
- }
-
- con->device_unlock(con, lock_flags);
- }
+ if (console_is_usable(con, con_flags, false))
+ backlog = nbcon_emit_one(&wctxt, false);
console_srcu_read_unlock(cookie);
@@ -1367,6 +1363,13 @@ enum nbcon_prio nbcon_get_default_prio(v
* both the console_lock and the SRCU read lock. Otherwise it
* is set to false.
* @cookie: The cookie from the SRCU read lock.
+ * @use_atomic: Set true when called in an atomic or unknown context.
+ * It affects which nbcon callback will be used: write_atomic()
+ * or write_thread().
+ *
+ * When false, the write_thread() callback is used and would be
+ * called in a preemtible context unless disabled by the
+ * device_lock. The legacy handover is not allowed in this mode.
*
* Context: Any context except NMI.
* Return: True, when a record has been printed and there are still
@@ -1382,26 +1385,36 @@ enum nbcon_prio nbcon_get_default_prio(v
* Essentially it is the nbcon version of console_emit_next_record().
*/
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
- int cookie)
+ int cookie, bool use_atomic)
{
struct nbcon_write_context wctxt = { };
struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
unsigned long flags;
bool progress;
- /* Use the same procedure as console_emit_next_record(). */
- printk_safe_enter_irqsave(flags);
- console_lock_spinning_enable();
- stop_critical_timings();
-
ctxt->console = con;
ctxt->prio = nbcon_get_default_prio();
- progress = nbcon_atomic_emit_one(&wctxt);
+ if (use_atomic) {
+ /*
+ * In an atomic or unknown context, use the same procedure as
+ * in console_emit_next_record(). It allows to handover.
+ */
+ printk_safe_enter_irqsave(flags);
+ console_lock_spinning_enable();
+ stop_critical_timings();
+ }
- start_critical_timings();
- *handover = console_lock_spinning_disable_and_check(cookie);
- printk_safe_exit_irqrestore(flags);
+ progress = nbcon_emit_one(&wctxt, use_atomic);
+
+ if (use_atomic) {
+ start_critical_timings();
+ *handover = console_lock_spinning_disable_and_check(cookie);
+ printk_safe_exit_irqrestore(flags);
+ } else {
+ /* Non-atomic does not perform legacy spinning handovers. */
+ *handover = false;
+ }
return progress;
}
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3078,12 +3078,13 @@ static bool console_flush_all(bool do_co
u64 printk_seq;
bool progress;
- if (!console_is_usable(con, flags, true))
+ if (!console_is_usable(con, flags, !do_cond_resched))
continue;
any_usable = true;
if (flags & CON_NBCON) {
- progress = nbcon_legacy_emit_next_record(con, handover, cookie);
+ progress = nbcon_legacy_emit_next_record(con, handover, cookie,
+ !do_cond_resched);
printk_seq = nbcon_seq_read(con);
} else {
progress = console_emit_next_record(con, handover, cookie);

View File

@ -1,287 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:28 +0206
Subject: [PATCH 46/54] printk: nbcon: Rely on kthreads for normal operation
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Once the kthread is running and available
(i.e. @printk_kthreads_running is set), the kthread becomes
responsible for flushing any pending messages which are added
in NBCON_PRIO_NORMAL context. Namely the legacy
console_flush_all() and device_release() no longer flush the
console. And nbcon_atomic_flush_pending() used by
nbcon_cpu_emergency_exit() no longer flushes messages added
after the emergency messages.
The console context is safe when used by the kthread only when
one of the following conditions are true:
1. Other caller acquires the console context with
NBCON_PRIO_NORMAL with preemption disabled. It will
release the context before rescheduling.
2. Other caller acquires the console context with
NBCON_PRIO_NORMAL under the device_lock.
3. The kthread is the only context which acquires the console
with NBCON_PRIO_NORMAL.
This is satisfied for all atomic printing call sites:
nbcon_legacy_emit_next_record() (#1)
nbcon_atomic_flush_pending_con() (#1)
nbcon_device_release() (#2)
It is even double guaranteed when @printk_kthreads_running
is set because then _only_ the kthread will print for
NBCON_PRIO_NORMAL. (#3)
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-10-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 26 +++++++++++++++++++++++++
kernel/printk/nbcon.c | 17 ++++++++++------
kernel/printk/printk.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 84 insertions(+), 7 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -113,6 +113,13 @@ static inline bool console_is_usable(str
/* The write_atomic() callback is optional. */
if (use_atomic && !con->write_atomic)
return false;
+
+ /*
+ * For the !use_atomic case, @printk_kthreads_running is not
+ * checked because the write_thread() callback is also used
+ * via the legacy loop when the printer threads are not
+ * available.
+ */
} else {
if (!con->write)
return false;
@@ -176,6 +183,7 @@ static inline void nbcon_atomic_flush_pe
static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
int cookie, bool use_atomic) { return false; }
static inline void nbcon_kthread_wake(struct console *con) { }
+static inline void nbcon_kthreads_wake(void) { }
static inline bool console_is_usable(struct console *con, short flags,
bool use_atomic) { return false; }
@@ -190,6 +198,7 @@ extern bool legacy_allow_panic_sync;
/**
* struct console_flush_type - Define available console flush methods
* @nbcon_atomic: Flush directly using nbcon_atomic() callback
+ * @nbcon_offload: Offload flush to printer thread
* @legacy_direct: Call the legacy loop in this context
* @legacy_offload: Offload the legacy loop into IRQ
*
@@ -197,6 +206,7 @@ extern bool legacy_allow_panic_sync;
*/
struct console_flush_type {
bool nbcon_atomic;
+ bool nbcon_offload;
bool legacy_direct;
bool legacy_offload;
};
@@ -211,6 +221,22 @@ static inline void printk_get_console_fl
switch (nbcon_get_default_prio()) {
case NBCON_PRIO_NORMAL:
+ if (have_nbcon_console && !have_boot_console) {
+ if (printk_kthreads_running)
+ ft->nbcon_offload = true;
+ else
+ ft->nbcon_atomic = true;
+ }
+
+ /* Legacy consoles are flushed directly when possible. */
+ if (have_legacy_console || have_boot_console) {
+ if (!is_printk_legacy_deferred())
+ ft->legacy_direct = true;
+ else
+ ft->legacy_offload = true;
+ }
+ break;
+
case NBCON_PRIO_EMERGENCY:
if (have_nbcon_console && !have_boot_console)
ft->nbcon_atomic = true;
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1494,6 +1494,7 @@ static int __nbcon_atomic_flush_pending_
static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
bool allow_unsafe_takeover)
{
+ struct console_flush_type ft;
unsigned long flags;
int err;
@@ -1523,10 +1524,12 @@ static void nbcon_atomic_flush_pending_c
/*
* If flushing was successful but more records are available, this
- * context must flush those remaining records because there is no
- * other context that will do it.
+ * context must flush those remaining records if the printer thread
+ * is not available do it.
*/
- if (prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
+ printk_get_console_flush_type(&ft);
+ if (!ft.nbcon_offload &&
+ prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
stop_seq = prb_next_reserve_seq(prb);
goto again;
}
@@ -1754,17 +1757,19 @@ void nbcon_device_release(struct console
/*
* This context must flush any new records added while the console
- * was locked. The console_srcu_read_lock must be taken to ensure
- * the console is usable throughout flushing.
+ * was locked if the printer thread is not available to do it. The
+ * console_srcu_read_lock must be taken to ensure the console is
+ * usable throughout flushing.
*/
cookie = console_srcu_read_lock();
+ printk_get_console_flush_type(&ft);
if (console_is_usable(con, console_srcu_read_flags(con), true) &&
+ !ft.nbcon_offload &&
prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
/*
* If nbcon_atomic flushing is not available, fallback to
* using the legacy loop.
*/
- printk_get_console_flush_type(&ft);
if (ft.nbcon_atomic) {
__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
} else if (ft.legacy_direct) {
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2384,6 +2384,9 @@ asmlinkage int vprintk_emit(int facility
if (ft.nbcon_atomic)
nbcon_atomic_flush_pending();
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+
if (ft.legacy_direct) {
/*
* The caller may be holding system-critical or
@@ -2733,6 +2736,7 @@ void suspend_console(void)
void resume_console(void)
{
+ struct console_flush_type ft;
struct console *con;
if (!console_suspend_enabled)
@@ -2750,6 +2754,10 @@ void resume_console(void)
*/
synchronize_srcu(&console_srcu);
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+
pr_flush(1000, true);
}
@@ -3061,6 +3069,7 @@ static inline void printk_kthreads_check
*/
static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
{
+ struct console_flush_type ft;
bool any_usable = false;
struct console *con;
bool any_progress;
@@ -3072,12 +3081,22 @@ static bool console_flush_all(bool do_co
do {
any_progress = false;
+ printk_get_console_flush_type(&ft);
+
cookie = console_srcu_read_lock();
for_each_console_srcu(con) {
short flags = console_srcu_read_flags(con);
u64 printk_seq;
bool progress;
+ /*
+ * console_flush_all() is only responsible for nbcon
+ * consoles when the nbcon consoles cannot print via
+ * their atomic or threaded flushing.
+ */
+ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+ continue;
+
if (!console_is_usable(con, flags, !do_cond_resched))
continue;
any_usable = true;
@@ -3388,9 +3407,25 @@ EXPORT_SYMBOL(console_stop);
void console_start(struct console *console)
{
+ struct console_flush_type ft;
+ bool is_nbcon;
+
console_list_lock();
console_srcu_write_flags(console, console->flags | CON_ENABLED);
+ is_nbcon = console->flags & CON_NBCON;
console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. The related
+ * printing context must be able to see it is enabled so that
+ * it is guaranteed to wake up and resume printing.
+ */
+ synchronize_srcu(&console_srcu);
+
+ printk_get_console_flush_type(&ft);
+ if (is_nbcon && ft.nbcon_offload)
+ nbcon_kthread_wake(console);
+
__pr_flush(console, 1000, true);
}
EXPORT_SYMBOL(console_start);
@@ -4116,6 +4151,8 @@ static bool __pr_flush(struct console *c
/* Flush the consoles so that records up to @seq are printed. */
printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
if (ft.legacy_direct) {
console_lock();
console_unlock();
@@ -4153,8 +4190,10 @@ static bool __pr_flush(struct console *c
* that they make forward progress, so only increment
* @diff for usable consoles.
*/
- if (!console_is_usable(c, flags, true))
+ if (!console_is_usable(c, flags, true) &&
+ !console_is_usable(c, flags, false)) {
continue;
+ }
if (flags & CON_NBCON) {
printk_seq = nbcon_seq_read(c);
@@ -4630,8 +4669,15 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
*/
void console_try_replay_all(void)
{
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
if (console_trylock()) {
__console_rewind_all();
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
/* Consoles are flushed as part of console_unlock(). */
console_unlock();
}

View File

@ -1,83 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:29 +0206
Subject: [PATCH 47/54] printk: Provide helper for message prepending
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
In order to support prepending different texts to printk
messages, split out the prepending code into a helper
function.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-11-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 36 +++++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 11 deletions(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2844,30 +2844,31 @@ static void __console_unlock(void)
#ifdef CONFIG_PRINTK
/*
- * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This
- * is achieved by shifting the existing message over and inserting the dropped
- * message.
+ * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting
+ * the existing message over and inserting the scratchbuf message.
*
- * @pmsg is the printk message to prepend.
- *
- * @dropped is the dropped count to report in the dropped message.
+ * @pmsg is the original printk message.
+ * @fmt is the printf format of the message which will prepend the existing one.
*
- * If the message text in @pmsg->pbufs->outbuf does not have enough space for
- * the dropped message, the message text will be sufficiently truncated.
+ * If there is not enough space in @pmsg->pbufs->outbuf, the existing
+ * message text will be sufficiently truncated.
*
* If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
*/
-void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
+__printf(2, 3)
+static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...)
{
struct printk_buffers *pbufs = pmsg->pbufs;
const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
const size_t outbuf_sz = sizeof(pbufs->outbuf);
char *scratchbuf = &pbufs->scratchbuf[0];
char *outbuf = &pbufs->outbuf[0];
+ va_list args;
size_t len;
- len = scnprintf(scratchbuf, scratchbuf_sz,
- "** %lu printk messages dropped **\n", dropped);
+ va_start(args, fmt);
+ len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args);
+ va_end(args);
/*
* Make sure outbuf is sufficiently large before prepending.
@@ -2890,6 +2891,19 @@ void console_prepend_dropped(struct prin
}
/*
+ * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message".
+ * @pmsg->outbuf_len is updated appropriately.
+ *
+ * @pmsg is the printk message to prepend.
+ *
+ * @dropped is the dropped count to report in the dropped message.
+ */
+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
+{
+ console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped);
+}
+
+/*
* Read and format the specified record (or a later record if the specified
* record is not available).
*

View File

@ -1,133 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:30 +0206
Subject: [PATCH 48/54] printk: nbcon: Show replay message on takeover
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
An emergency or panic context can takeover console ownership
while the current owner was printing a printk message. The
atomic printer will re-print the message that the previous
owner was printing. However, this can look confusing to the
user and may even seem as though a message was lost.
[3430014.1
[3430014.181123] usb 1-2: Product: USB Audio
Add a new field @nbcon_prev_seq to struct console to track
the sequence number to print that was assigned to the previous
console owner. If this matches the sequence number to print
that the current owner is assigned, then a takeover must have
occurred. In this case, print an additional message to inform
the user that the previous message is being printed again.
[3430014.1
** replaying previous printk message **
[3430014.181123] usb 1-2: Product: USB Audio
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-12-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/console.h | 2 ++
kernel/printk/internal.h | 1 +
kernel/printk/nbcon.c | 26 ++++++++++++++++++++++++++
kernel/printk/printk.c | 11 +++++++++++
4 files changed, 40 insertions(+)
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -325,6 +325,7 @@ struct nbcon_write_context {
* @nbcon_state: State for nbcon consoles
* @nbcon_seq: Sequence number of the next record for nbcon to print
* @nbcon_device_ctxt: Context available for non-printing operations
+ * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print
* @pbufs: Pointer to nbcon private buffer
* @kthread: Printer kthread for this console
* @rcuwait: RCU-safe wait object for @kthread waking
@@ -459,6 +460,7 @@ struct console {
atomic_t __private nbcon_state;
atomic_long_t __private nbcon_seq;
struct nbcon_context __private nbcon_device_ctxt;
+ atomic_long_t __private nbcon_prev_seq;
struct printk_buffers *pbufs;
struct task_struct *kthread;
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -319,4 +319,5 @@ bool printk_get_next_message(struct prin
#ifdef CONFIG_PRINTK
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped);
+void console_prepend_replay(struct printk_message *pmsg);
#endif
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -946,7 +946,9 @@ static bool nbcon_emit_next_record(struc
.pbufs = ctxt->pbufs,
};
unsigned long con_dropped;
+ struct nbcon_state cur;
unsigned long dropped;
+ unsigned long ulseq;
/*
* This function should never be called for consoles that have not
@@ -987,6 +989,29 @@ static bool nbcon_emit_next_record(struc
if (dropped && !is_extended)
console_prepend_dropped(&pmsg, dropped);
+ /*
+ * If the previous owner was assigned the same record, this context
+ * has taken over ownership and is replaying the record. Prepend a
+ * message to let the user know the record is replayed.
+ */
+ ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq));
+ if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) {
+ console_prepend_replay(&pmsg);
+ } else {
+ /*
+ * Ensure this context is still the owner before trying to
+ * update @nbcon_prev_seq. Otherwise the value in @ulseq may
+ * not be from the previous owner and instead be some later
+ * value from the context that took over ownership.
+ */
+ nbcon_state_read(con, &cur);
+ if (!nbcon_context_can_proceed(ctxt, &cur))
+ return false;
+
+ atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq,
+ __u64seq_to_ulseq(pmsg.seq));
+ }
+
if (!nbcon_context_exit_unsafe(ctxt))
return false;
@@ -1646,6 +1671,7 @@ bool nbcon_alloc(struct console *con)
rcuwait_init(&con->rcuwait);
init_irq_work(&con->irq_work, nbcon_irq_work);
+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL);
nbcon_state_set(con, &state);
/*
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2904,6 +2904,17 @@ void console_prepend_dropped(struct prin
}
/*
+ * Prepend the message in @pmsg->pbufs->outbuf with a "replay message".
+ * @pmsg->outbuf_len is updated appropriately.
+ *
+ * @pmsg is the printk message to prepend.
+ */
+void console_prepend_replay(struct printk_message *pmsg)
+{
+ console_prepend_message(pmsg, "** replaying previous printk message **\n");
+}
+
+/*
* Read and format the specified record (or a later record if the specified
* record is not available).
*

View File

@ -1,37 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:31 +0206
Subject: [PATCH 49/54] proc: consoles: Add notation to c_start/c_stop
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
fs/proc/consoles.c:78:13: warning: context imbalance in 'c_start'
- wrong count at exit
fs/proc/consoles.c:104:13: warning: context imbalance in 'c_stop'
- unexpected unlock
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-13-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/proc/consoles.c | 2 ++
1 file changed, 2 insertions(+)
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -68,6 +68,7 @@ static int show_console_dev(struct seq_f
}
static void *c_start(struct seq_file *m, loff_t *pos)
+ __acquires(&console_mutex)
{
struct console *con;
loff_t off = 0;
@@ -94,6 +95,7 @@ static void *c_next(struct seq_file *m,
}
static void c_stop(struct seq_file *m, void *v)
+ __releases(&console_mutex)
{
console_list_unlock();
}

View File

@ -1,42 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:32 +0206
Subject: [PATCH 50/54] proc: Add nbcon support for /proc/consoles
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Update /proc/consoles output to show 'W' if an nbcon console is
registered. Since the write_thread() callback is mandatory, it
enough just to check if it is an nbcon console.
Also update /proc/consoles output to show 'N' if it is an
nbcon console.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-14-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/proc/consoles.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -21,6 +21,7 @@ static int show_console_dev(struct seq_f
{ CON_ENABLED, 'E' },
{ CON_CONSDEV, 'C' },
{ CON_BOOT, 'B' },
+ { CON_NBCON, 'N' },
{ CON_PRINTBUFFER, 'p' },
{ CON_BRL, 'b' },
{ CON_ANYTIME, 'a' },
@@ -58,8 +59,8 @@ static int show_console_dev(struct seq_f
seq_printf(m, "%s%d", con->name, con->index);
seq_pad(m, ' ');
seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
- con->write ? 'W' : '-', con->unblank ? 'U' : '-',
- flags);
+ ((con->flags & CON_NBCON) || con->write) ? 'W' : '-',
+ con->unblank ? 'U' : '-', flags);
if (dev)
seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));

View File

@ -1,27 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:33 +0206
Subject: [PATCH 51/54] tty: sysfs: Add nbcon support for 'active'
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Allow the 'active' attribute to list nbcon consoles.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-15-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/tty/tty_io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3567,7 +3567,7 @@ static ssize_t show_cons_active(struct d
for_each_console(c) {
if (!c->device)
continue;
- if (!c->write)
+ if (!(c->flags & CON_NBCON) && !c->write)
continue;
if ((c->flags & CON_ENABLED) == 0)
continue;

View File

@ -1,324 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:34 +0206
Subject: [PATCH 52/54] printk: Implement legacy printer kthread for PREEMPT_RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The write() callback of legacy consoles usually makes use of
spinlocks. This is not permitted with PREEMPT_RT in atomic
contexts.
For PREEMPT_RT, create a new kthread to handle printing of all
the legacy consoles (and nbcon consoles if boot consoles are
registered). This allows legacy consoles to work on PREEMPT_RT
without requiring modification. (However they will not have
the reliability properties guaranteed by nbcon atomic
consoles.)
Use the existing printk_kthreads_check_locked() to start/stop
the legacy kthread as needed.
Introduce the macro force_legacy_kthread() to query if the
forced threading of legacy consoles is in effect. Although
currently only enabled for PREEMPT_RT, this acts as a simple
mechanism for the future to allow other preemption models to
easily take advantage of the non-interference property provided
by the legacy kthread.
When force_legacy_kthread() is true, the legacy kthread
fulfills the role of the console_flush_type @legacy_offload by
waking the legacy kthread instead of printing via the
console_lock in the irq_work. If the legacy kthread is not
yet available, no legacy printing takes place (unless in
panic).
If for some reason the legacy kthread fails to create, any
legacy consoles are unregistered. With force_legacy_kthread(),
the legacy kthread is a critical component for legacy consoles.
These changes only affect CONFIG_PREEMPT_RT.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-16-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/internal.h | 16 ++++
kernel/printk/printk.c | 157 +++++++++++++++++++++++++++++++++++++++-----
kernel/printk/printk_safe.c | 4 -
3 files changed, 159 insertions(+), 18 deletions(-)
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -21,6 +21,19 @@ int devkmsg_sysctl_set_loglvl(const stru
(con->flags & CON_BOOT) ? "boot" : "", \
con->name, con->index, ##__VA_ARGS__)
+/*
+ * Identify if legacy printing is forced in a dedicated kthread. If
+ * true, all printing via console lock occurs within a dedicated
+ * legacy printer thread. The only exception is on panic, after the
+ * nbcon consoles have had their chance to print the panic messages
+ * first.
+ */
+#ifdef CONFIG_PREEMPT_RT
+# define force_legacy_kthread() (true)
+#else
+# define force_legacy_kthread() (false)
+#endif
+
#ifdef CONFIG_PRINTK
#ifdef CONFIG_PRINTK_CALLER
@@ -173,6 +186,7 @@ static inline void nbcon_kthread_wake(st
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
static inline bool printk_percpu_data_ready(void) { return false; }
+static inline void defer_console_output(void) { }
static inline bool is_printk_legacy_deferred(void) { return false; }
static inline u64 nbcon_seq_read(struct console *con) { return 0; }
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
@@ -200,7 +214,7 @@ extern bool legacy_allow_panic_sync;
* @nbcon_atomic: Flush directly using nbcon_atomic() callback
* @nbcon_offload: Offload flush to printer thread
* @legacy_direct: Call the legacy loop in this context
- * @legacy_offload: Offload the legacy loop into IRQ
+ * @legacy_offload: Offload the legacy loop into IRQ or legacy thread
*
* Note that the legacy loop also flushes the nbcon consoles.
*/
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -491,6 +491,7 @@ bool legacy_allow_panic_sync;
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
+static DECLARE_WAIT_QUEUE_HEAD(legacy_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
@@ -2757,6 +2758,8 @@ void resume_console(void)
printk_get_console_flush_type(&ft);
if (ft.nbcon_offload)
nbcon_kthreads_wake();
+ if (ft.legacy_offload)
+ defer_console_output();
pr_flush(1000, true);
}
@@ -3167,19 +3170,7 @@ static bool console_flush_all(bool do_co
return false;
}
-/**
- * console_unlock - unblock the console subsystem from printing
- *
- * Releases the console_lock which the caller holds to block printing of
- * the console subsystem.
- *
- * While the console_lock was held, console output may have been buffered
- * by printk(). If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
- *
- * console_unlock(); may be called from any context.
- */
-void console_unlock(void)
+static void __console_flush_and_unlock(void)
{
bool do_cond_resched;
bool handover;
@@ -3223,6 +3214,29 @@ void console_unlock(void)
*/
} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
}
+
+/**
+ * console_unlock - unblock the legacy console subsystem from printing
+ *
+ * Releases the console_lock which the caller holds to block printing of
+ * the legacy console subsystem.
+ *
+ * While the console_lock was held, console output may have been buffered
+ * by printk(). If this is the case, console_unlock() emits the output on
+ * legacy consoles prior to releasing the lock.
+ *
+ * console_unlock(); may be called from any context.
+ */
+void console_unlock(void)
+{
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
+ if (ft.legacy_direct)
+ __console_flush_and_unlock();
+ else
+ __console_unlock();
+}
EXPORT_SYMBOL(console_unlock);
/**
@@ -3450,6 +3464,8 @@ void console_start(struct console *conso
printk_get_console_flush_type(&ft);
if (is_nbcon && ft.nbcon_offload)
nbcon_kthread_wake(console);
+ else if (ft.legacy_offload)
+ defer_console_output();
__pr_flush(console, 1000, true);
}
@@ -3461,6 +3477,88 @@ static int unregister_console_locked(str
/* True when system boot is far enough to create printer threads. */
static bool printk_kthreads_ready __ro_after_init;
+static struct task_struct *printk_legacy_kthread;
+
+static bool legacy_kthread_should_wakeup(void)
+{
+ struct console_flush_type ft;
+ struct console *con;
+ bool ret = false;
+ int cookie;
+
+ if (kthread_should_stop())
+ return true;
+
+ printk_get_console_flush_type(&ft);
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
+ u64 printk_seq;
+
+ /*
+ * The legacy printer thread is only responsible for nbcon
+ * consoles when the nbcon consoles cannot print via their
+ * atomic or threaded flushing.
+ */
+ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+ continue;
+
+ if (!console_is_usable(con, flags, false))
+ continue;
+
+ if (flags & CON_NBCON) {
+ printk_seq = nbcon_seq_read(con);
+ } else {
+ /*
+ * It is safe to read @seq because only this
+ * thread context updates @seq.
+ */
+ printk_seq = con->seq;
+ }
+
+ if (prb_read_valid(prb, printk_seq, NULL)) {
+ ret = true;
+ break;
+ }
+ }
+ console_srcu_read_unlock(cookie);
+
+ return ret;
+}
+
+static int legacy_kthread_func(void *unused)
+{
+ for (;;) {
+ wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup());
+
+ if (kthread_should_stop())
+ break;
+
+ console_lock();
+ __console_flush_and_unlock();
+ }
+
+ return 0;
+}
+
+static bool legacy_kthread_create(void)
+{
+ struct task_struct *kt;
+
+ lockdep_assert_console_list_lock_held();
+
+ kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy");
+ if (WARN_ON(IS_ERR(kt))) {
+ pr_err("failed to start legacy printing thread\n");
+ return false;
+ }
+
+ printk_legacy_kthread = kt;
+
+ return true;
+}
+
/**
* printk_kthreads_shutdown - shutdown all threaded printers
*
@@ -3510,6 +3608,27 @@ static void printk_kthreads_check_locked
if (!printk_kthreads_ready)
return;
+ if (have_legacy_console || have_boot_console) {
+ if (!printk_legacy_kthread &&
+ force_legacy_kthread() &&
+ !legacy_kthread_create()) {
+ /*
+ * All legacy consoles must be unregistered. If there
+ * are any nbcon consoles, they will set up their own
+ * kthread.
+ */
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
+ if (con->flags & CON_NBCON)
+ continue;
+
+ unregister_console_locked(con);
+ }
+ }
+ } else if (printk_legacy_kthread) {
+ kthread_stop(printk_legacy_kthread);
+ printk_legacy_kthread = NULL;
+ }
+
/*
* Printer threads cannot be started as long as any boot console is
* registered because there is no way to synchronize the hardware
@@ -4286,9 +4405,13 @@ static void wake_up_klogd_work_func(stru
int pending = this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_OUTPUT) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
+ if (force_legacy_kthread()) {
+ if (printk_legacy_kthread)
+ wake_up_interruptible(&legacy_wait);
+ } else {
+ if (console_trylock())
+ console_unlock();
+ }
}
if (pending & PRINTK_PENDING_WAKEUP)
@@ -4703,6 +4826,8 @@ void console_try_replay_all(void)
nbcon_atomic_flush_pending();
if (ft.nbcon_offload)
nbcon_kthreads_wake();
+ if (ft.legacy_offload)
+ defer_console_output();
/* Consoles are flushed as part of console_unlock(). */
console_unlock();
}
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -44,7 +44,9 @@ bool is_printk_legacy_deferred(void)
* The per-CPU variable @printk_context can be read safely in any
* context. CPU migration is always disabled when set.
*/
- return (this_cpu_read(printk_context) || in_nmi());
+ return (force_legacy_kthread() ||
+ this_cpu_read(printk_context) ||
+ in_nmi());
}
asmlinkage int vprintk(const char *fmt, va_list args)

View File

@ -1,48 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:35 +0206
Subject: [PATCH 53/54] printk: nbcon: Assign nice -20 for printing threads
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
It is important that console printing threads are scheduled
shortly after a printk call and with generous runtime budgets.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-17-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/nbcon.c | 6 ++++++
kernel/printk/printk.c | 6 ++++++
2 files changed, 12 insertions(+)
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1321,6 +1321,12 @@ bool nbcon_kthread_create(struct console
con->kthread = kt;
+ /*
+ * It is important that console printing threads are scheduled
+ * shortly after a printk call and with generous runtime budgets.
+ */
+ sched_set_normal(con->kthread, -20);
+
return true;
}
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3556,6 +3556,12 @@ static bool legacy_kthread_create(void)
printk_legacy_kthread = kt;
+ /*
+ * It is important that console printing threads are scheduled
+ * shortly after a printk call and with generous runtime budgets.
+ */
+ sched_set_normal(printk_legacy_kthread, -20);
+
return true;
}

View File

@ -1,334 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 13 Sep 2023 15:30:36 +0000
Subject: [PATCH 53/54] serial: 8250: Switch to nbcon console
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Implement the necessary callbacks to switch the 8250 console driver
to perform as an nbcon console.
Add implementations for the nbcon console callbacks (write_atomic,
write_thread, device_lock, device_unlock) and add CON_NBCON to the
initial flags.
The legacy code is kept in order to easily switch back to legacy mode
by defining CONFIG_SERIAL_8250_LEGACY_CONSOLE.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/tty/serial/8250/8250_core.c | 42 +++++++++
drivers/tty/serial/8250/8250_port.c | 154 +++++++++++++++++++++++++++++++++++-
include/linux/serial_8250.h | 6 +
3 files changed, 199 insertions(+), 3 deletions(-)
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -388,6 +388,7 @@ void __init serial8250_register_ports(st
#ifdef CONFIG_SERIAL_8250_CONSOLE
+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
static void univ8250_console_write(struct console *co, const char *s,
unsigned int count)
{
@@ -395,6 +396,37 @@ static void univ8250_console_write(struc
serial8250_console_write(up, s, count);
}
+#else
+static void univ8250_console_write_atomic(struct console *co,
+ struct nbcon_write_context *wctxt)
+{
+ struct uart_8250_port *up = &serial8250_ports[co->index];
+
+ serial8250_console_write_atomic(up, wctxt);
+}
+
+static void univ8250_console_write_thread(struct console *co,
+ struct nbcon_write_context *wctxt)
+{
+ struct uart_8250_port *up = &serial8250_ports[co->index];
+
+ serial8250_console_write_thread(up, wctxt);
+}
+
+static void univ8250_console_device_lock(struct console *con, unsigned long *flags)
+{
+ struct uart_port *up = &serial8250_ports[con->index].port;
+
+ __uart_port_lock_irqsave(up, flags);
+}
+
+static void univ8250_console_device_unlock(struct console *con, unsigned long flags)
+{
+ struct uart_port *up = &serial8250_ports[con->index].port;
+
+ __uart_port_unlock_irqrestore(up, flags);
+}
+#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
static int univ8250_console_setup(struct console *co, char *options)
{
@@ -494,12 +526,20 @@ static int univ8250_console_match(struct
static struct console univ8250_console = {
.name = "ttyS",
+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
.write = univ8250_console_write,
+ .flags = CON_PRINTBUFFER | CON_ANYTIME,
+#else
+ .write_atomic = univ8250_console_write_atomic,
+ .write_thread = univ8250_console_write_thread,
+ .device_lock = univ8250_console_device_lock,
+ .device_unlock = univ8250_console_device_unlock,
+ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON,
+#endif
.device = uart_console_device,
.setup = univ8250_console_setup,
.exit = univ8250_console_exit,
.match = univ8250_console_match,
- .flags = CON_PRINTBUFFER | CON_ANYTIME,
.index = -1,
.data = &serial8250_reg,
};
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -546,6 +546,13 @@ static int serial8250_em485_init(struct
if (!p->em485)
return -ENOMEM;
+#ifndef CONFIG_SERIAL_8250_LEGACY_CONSOLE
+ if (uart_console(&p->port)) {
+ dev_warn(p->port.dev, "no atomic printing for rs485 consoles\n");
+ p->port.cons->write_atomic = NULL;
+ }
+#endif
+
hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
@@ -691,7 +698,11 @@ static void serial8250_set_sleep(struct
serial8250_rpm_put(p);
}
-static void serial8250_clear_IER(struct uart_8250_port *up)
+/*
+ * Only to be used by write_atomic() and the legacy write(), which do not
+ * require port lock.
+ */
+static void __serial8250_clear_IER(struct uart_8250_port *up)
{
if (up->capabilities & UART_CAP_UUE)
serial_out(up, UART_IER, UART_IER_UUE);
@@ -699,6 +710,11 @@ static void serial8250_clear_IER(struct
serial_out(up, UART_IER, 0);
}
+static inline void serial8250_clear_IER(struct uart_8250_port *up)
+{
+ __serial8250_clear_IER(up);
+}
+
#ifdef CONFIG_SERIAL_8250_RSA
/*
* Attempts to turn on the RSA FIFO. Returns zero on failure.
@@ -3269,6 +3285,11 @@ static void serial8250_console_putchar(s
wait_for_xmitr(up, UART_LSR_THRE);
serial_port_out(port, UART_TX, ch);
+
+ if (ch == '\n')
+ up->console_newline_needed = false;
+ else
+ up->console_newline_needed = true;
}
/*
@@ -3297,6 +3318,7 @@ static void serial8250_console_restore(s
serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
}
+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
/*
* Print a string to the serial port using the device FIFO
*
@@ -3355,7 +3377,7 @@ void serial8250_console_write(struct uar
* First save the IER then disable the interrupts
*/
ier = serial_port_in(port, UART_IER);
- serial8250_clear_IER(up);
+ __serial8250_clear_IER(up);
/* check scratch reg to see if port powered off during system sleep */
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@@ -3421,6 +3443,131 @@ void serial8250_console_write(struct uar
if (locked)
uart_port_unlock_irqrestore(port, flags);
}
+#else
+void serial8250_console_write_thread(struct uart_8250_port *up,
+ struct nbcon_write_context *wctxt)
+{
+ struct uart_8250_em485 *em485 = up->em485;
+ struct uart_port *port = &up->port;
+ unsigned int ier;
+
+ touch_nmi_watchdog();
+
+ if (!nbcon_enter_unsafe(wctxt))
+ return;
+
+ /* First save IER then disable the interrupts. */
+ ier = serial_port_in(port, UART_IER);
+ serial8250_clear_IER(up);
+
+ /* Check scratch reg if port powered off during system sleep. */
+ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
+ serial8250_console_restore(up);
+ up->canary = 0;
+ }
+
+ if (em485) {
+ if (em485->tx_stopped)
+ up->rs485_start_tx(up);
+ mdelay(port->rs485.delay_rts_before_send);
+ }
+
+ if (nbcon_exit_unsafe(wctxt)) {
+ int len = READ_ONCE(wctxt->len);
+ int i;
+
+ /*
+ * Write out the message. Toggle unsafe for each byte in order
+ * to give another (higher priority) context the opportunity
+ * for a friendly takeover. If such a takeover occurs, this
+ * context must reacquire ownership in order to perform final
+ * actions (such as re-enabling the interrupts).
+ *
+ * IMPORTANT: wctxt->outbuf and wctxt->len are no longer valid
+ * after a reacquire so writing the message must be
+ * aborted.
+ */
+ for (i = 0; i < len; i++) {
+ if (!nbcon_enter_unsafe(wctxt)) {
+ nbcon_reacquire_nobuf(wctxt);
+ break;
+ }
+
+ uart_console_write(port, wctxt->outbuf + i, 1, serial8250_console_putchar);
+
+ if (!nbcon_exit_unsafe(wctxt)) {
+ nbcon_reacquire_nobuf(wctxt);
+ break;
+ }
+ }
+ } else {
+ nbcon_reacquire_nobuf(wctxt);
+ }
+
+ while (!nbcon_enter_unsafe(wctxt))
+ nbcon_reacquire_nobuf(wctxt);
+
+ /* Finally, wait for transmitter to become empty and restore IER. */
+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
+ if (em485) {
+ mdelay(port->rs485.delay_rts_after_send);
+ if (em485->tx_stopped)
+ up->rs485_stop_tx(up);
+ }
+ serial_port_out(port, UART_IER, ier);
+
+ /*
+ * The receive handling will happen properly because the receive ready
+ * bit will still be set; it is not cleared on read. However, modem
+ * control will not, we must call it if we have saved something in the
+ * saved flags while processing with interrupts off.
+ */
+ if (up->msr_saved_flags)
+ serial8250_modem_status(up);
+
+ nbcon_exit_unsafe(wctxt);
+}
+
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+ struct nbcon_write_context *wctxt)
+{
+ struct uart_port *port = &up->port;
+ unsigned int ier;
+
+ /* Atomic console not supported for rs485 mode. */
+ if (WARN_ON_ONCE(up->em485))
+ return;
+
+ touch_nmi_watchdog();
+
+ if (!nbcon_enter_unsafe(wctxt))
+ return;
+
+ /*
+ * First save IER then disable the interrupts. The special variant to
+ * clear IER is used because atomic printing may occur without holding
+ * the port lock.
+ */
+ ier = serial_port_in(port, UART_IER);
+ __serial8250_clear_IER(up);
+
+ /* Check scratch reg if port powered off during system sleep. */
+ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
+ serial8250_console_restore(up);
+ up->canary = 0;
+ }
+
+ if (up->console_newline_needed)
+ uart_console_write(port, "\n", 1, serial8250_console_putchar);
+ uart_console_write(port, wctxt->outbuf, wctxt->len, serial8250_console_putchar);
+
+ /* Finally, wait for transmitter to become empty and restore IER. */
+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
+ serial_port_out(port, UART_IER, ier);
+
+ nbcon_exit_unsafe(wctxt);
+}
+#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
static unsigned int probe_baud(struct uart_port *port)
{
@@ -3439,6 +3586,7 @@ static unsigned int probe_baud(struct ua
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
{
+ struct uart_8250_port *up = up_to_u8250p(port);
int baud = 9600;
int bits = 8;
int parity = 'n';
@@ -3448,6 +3596,8 @@ int serial8250_console_setup(struct uart
if (!port->iobase && !port->membase)
return -ENODEV;
+ up->console_newline_needed = false;
+
if (options)
uart_parse_options(options, &baud, &parity, &bits, &flow);
else if (probe)
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -153,6 +153,8 @@ struct uart_8250_port {
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
unsigned char msr_saved_flags;
+ bool console_newline_needed;
+
struct uart_8250_dma *dma;
const struct uart_8250_ops *ops;
@@ -204,6 +206,10 @@ void serial8250_init_port(struct uart_82
void serial8250_set_defaults(struct uart_8250_port *up);
void serial8250_console_write(struct uart_8250_port *up, const char *s,
unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+ struct nbcon_write_context *wctxt);
+void serial8250_console_write_thread(struct uart_8250_port *up,
+ struct nbcon_write_context *wctxt);
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
int serial8250_console_exit(struct uart_port *port);

View File

@ -1,138 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 4 Sep 2024 14:11:36 +0206
Subject: [PATCH 54/54] printk: Avoid false positive lockdep report for legacy
printing
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
Legacy console printing from printk() caller context may invoke
the console driver from atomic context. This leads to a lockdep
splat because the console driver will acquire a sleeping lock
and the caller may already hold a spinning lock. This is noticed
by lockdep on !PREEMPT_RT configurations because it will lead to
a problem on PREEMPT_RT.
However, on PREEMPT_RT the printing path from atomic context is
always avoided and the console driver is always invoked from a
dedicated thread. Thus the lockdep splat on !PREEMPT_RT is a
false positive.
For !PREEMPT_RT override the lock-context before invoking the
console driver to avoid the false positive.
Do not override the lock-context for PREEMPT_RT in order to
allow lockdep to catch any real locking context issues related
to the write callback usage.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240904120536.115780-18-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 83 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 63 insertions(+), 20 deletions(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2983,6 +2983,34 @@ bool printk_get_next_message(struct prin
}
/*
+ * Legacy console printing from printk() caller context does not respect
+ * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a
+ * false positive. For PREEMPT_RT the false positive condition does not
+ * occur.
+ *
+ * This map is used to temporarily establish LD_WAIT_SLEEP context for the
+ * console write() callback when legacy printing to avoid false positive
+ * lockdep complaints, thus allowing lockdep to continue to function for
+ * real issues.
+ */
+#ifdef CONFIG_PREEMPT_RT
+static inline void printk_legacy_allow_spinlock_enter(void) { }
+static inline void printk_legacy_allow_spinlock_exit(void) { }
+#else
+static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP);
+
+static inline void printk_legacy_allow_spinlock_enter(void)
+{
+ lock_map_acquire_try(&printk_legacy_map);
+}
+
+static inline void printk_legacy_allow_spinlock_exit(void)
+{
+ lock_map_release(&printk_legacy_map);
+}
+#endif /* CONFIG_PREEMPT_RT */
+
+/*
* Used as the printk buffers for non-panic, serialized console printing.
* This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles.
* Its usage requires the console_lock held.
@@ -3031,31 +3059,46 @@ static bool console_emit_next_record(str
con->dropped = 0;
}
- /*
- * While actively printing out messages, if another printk()
- * were to occur on another CPU, it may wait for this one to
- * finish. This task can not be preempted if there is a
- * waiter waiting to take over.
- *
- * Interrupts are disabled because the hand over to a waiter
- * must not be interrupted until the hand over is completed
- * (@console_waiter is cleared).
- */
- printk_safe_enter_irqsave(flags);
- console_lock_spinning_enable();
+ /* Write everything out to the hardware. */
- /* Do not trace print latency. */
- stop_critical_timings();
+ if (force_legacy_kthread() && !panic_in_progress()) {
+ /*
+ * With forced threading this function is in a task context
+ * (either legacy kthread or get_init_console_seq()). There
+ * is no need for concern about printk reentrance, handovers,
+ * or lockdep complaints.
+ */
- /* Write everything out to the hardware. */
- con->write(con, outbuf, pmsg.outbuf_len);
+ con->write(con, outbuf, pmsg.outbuf_len);
+ con->seq = pmsg.seq + 1;
+ } else {
+ /*
+ * While actively printing out messages, if another printk()
+ * were to occur on another CPU, it may wait for this one to
+ * finish. This task can not be preempted if there is a
+ * waiter waiting to take over.
+ *
+ * Interrupts are disabled because the hand over to a waiter
+ * must not be interrupted until the hand over is completed
+ * (@console_waiter is cleared).
+ */
+ printk_safe_enter_irqsave(flags);
+ console_lock_spinning_enable();
+
+ /* Do not trace print latency. */
+ stop_critical_timings();
+
+ printk_legacy_allow_spinlock_enter();
+ con->write(con, outbuf, pmsg.outbuf_len);
+ printk_legacy_allow_spinlock_exit();
- start_critical_timings();
+ start_critical_timings();
- con->seq = pmsg.seq + 1;
+ con->seq = pmsg.seq + 1;
- *handover = console_lock_spinning_disable_and_check(cookie);
- printk_safe_exit_irqrestore(flags);
+ *handover = console_lock_spinning_disable_and_check(cookie);
+ printk_safe_exit_irqrestore(flags);
+ }
skip:
return true;
}

View File

@ -1,29 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Mon, 2 Oct 2023 15:30:43 +0000
Subject: [PATCH 54/54] serial: 8250: Revert "drop lockdep annotation from
serial8250_clear_IER()"
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The 8250 driver no longer depends on @oops_in_progress and
will no longer violate the port->lock locking constraints.
This reverts commit 3d9e6f556e235ddcdc9f73600fdd46fe1736b090.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/tty/serial/8250/8250_port.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -712,6 +712,9 @@ static void __serial8250_clear_IER(struc
static inline void serial8250_clear_IER(struct uart_8250_port *up)
{
+ /* Port locked to synchronize UART_IER access against the console. */
+ lockdep_assert_held_once(&up->port.lock);
+
__serial8250_clear_IER(up);
}

View File

@ -1,27 +0,0 @@
Subject: ARM64: Allow to enable RT
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri Oct 11 13:14:35 2019 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Allow to select RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm64/Kconfig | 1 +
1 file changed, 1 insertion(+)
---
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -99,6 +99,7 @@ config ARM64
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_SUPPORTS_RT
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT

View File

@ -1,35 +0,0 @@
Subject: ARM: Allow to enable RT
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri Oct 11 13:14:29 2019 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Allow to select RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/Kconfig | 2 ++
1 file changed, 2 insertions(+)
---
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -39,6 +39,7 @@ config ARM
select ARCH_SUPPORTS_CFI_CLANG
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
@@ -126,6 +127,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ

View File

@ -1,91 +0,0 @@
Subject: ARM: enable irq in translation/section permission fault handlers
From: Yadi.hu <yadi.hu@windriver.com>
Date: Wed Dec 10 10:32:09 2014 +0800
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Yadi.hu <yadi.hu@windriver.com>
Probably happens on all ARM, with
CONFIG_PREEMPT_RT
CONFIG_DEBUG_ATOMIC_SLEEP
This simple program....
int main() {
*((char*)0xc0001000) = 0;
};
[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a
[ 512.743217] INFO: lockdep is turned off.
[ 512.743360] irq event stamp: 0
[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null)
[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
[ 512.744303] softirqs last disabled at (0): [< (null)>] (null)
[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104)
[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24)
[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0)
[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c)
[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0)
[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c)
[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8)
[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94)
[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48)
[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0)
[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8)
Oxc0000000 belongs to kernel address space, user task can not be
allowed to access it. For above condition, correct result is that
test case should receive a “segment fault” and exits but not stacks.
the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in
prefetch/data abort handlers"),it deletes irq enable block in Data
abort assemble code and move them into page/breakpiont/alignment fault
handlers instead. But author does not enable irq in translation/section
permission fault handlers. ARM disables irq when it enters exception/
interrupt mode, if kernel doesn't enable irq, it would be still disabled
during translation/section permission fault.
We see the above splat because do_force_sig_info is still called with
IRQs off, and that code eventually does a:
spin_lock_irqsave(&t->sighand->siglock, flags);
As this is architecture independent code, and we've not seen any other
need for other arch to have the siglock converted to raw lock, we can
conclude that we should enable irq for ARM translation/section
permission exception.
Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/mm/fault.c | 6 ++++++
1 file changed, 6 insertions(+)
---
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -474,6 +474,9 @@ do_translation_fault(unsigned long addr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -544,6 +547,9 @@ do_translation_fault(unsigned long addr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}

View File

@ -1,19 +0,0 @@
Subject: Add localversion for -RT release
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri Jul 8 20:25:16 2011 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
localversion-rt | 1 +
1 file changed, 1 insertion(+)
create mode 100644 localversion-rt
---
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
+-rt7

View File

@ -1,35 +0,0 @@
Subject: POWERPC: Allow to enable RT
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri Oct 11 13:14:41 2019 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Allow to select RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/powerpc/Kconfig | 2 ++
1 file changed, 2 insertions(+)
---
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,6 +167,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
@@ -271,6 +272,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select HAVE_RSEQ
select HAVE_SETUP_PER_CPU_AREA if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK

View File

@ -1,779 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 23 Sep 2023 03:11:05 +0200
Subject: [PATCH] sched: define TIF_ALLOW_RESCHED
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
On Fri, Sep 22 2023 at 00:55, Thomas Gleixner wrote:
> On Thu, Sep 21 2023 at 09:00, Linus Torvalds wrote:
>> That said - I think as a proof of concept and "look, with this we get
>> the expected scheduling event counts", that patch is perfect. I think
>> you more than proved the concept.
>
> There is certainly quite some analyis work to do to make this a one to
> one replacement.
>
> With a handful of benchmarks the PoC (tweaked with some obvious fixes)
> is pretty much on par with the current mainline variants (NONE/FULL),
> but the memtier benchmark makes a massive dent.
>
> It sports a whopping 10% regression with the LAZY mode versus the mainline
> NONE model. Non-LAZY and FULL behave unsurprisingly in the same way.
>
> That benchmark is really sensitive to the preemption model. With current
> mainline (DYNAMIC_PREEMPT enabled) the preempt=FULL model has ~20%
> performance drop versus preempt=NONE.
That 20% was a tired pilot error. The real number is in the 5% ballpark.
> I have no clue what's going on there yet, but that shows that there is
> obviously quite some work ahead to get this sorted.
It took some head scratching to figure that out. The initial fix broke
the handling of the hog issue, i.e. the problem that Ankur tried to
solve, but I hacked up a "solution" for that too.
With that the memtier benchmark is roughly back to the mainline numbers,
but my throughput benchmark know how is pretty close to zero, so that
should be looked at by people who actually understand these things.
Likewise the hog prevention is just at the PoC level and clearly beyond
my knowledge of scheduler details: It unconditionally forces a
reschedule when the looping task is not responding to a lazy reschedule
request before the next tick. IOW it forces a reschedule on the second
tick, which is obviously different from the cond_resched()/might_sleep()
behaviour.
The changes vs. the original PoC aside of the bug and thinko fixes:
1) A hack to utilize the TRACE_FLAG_IRQS_NOSUPPORT flag to trace the
lazy preempt bit as the trace_entry::flags field is full already.
That obviously breaks the tracer ABI, but if we go there then
this needs to be fixed. Steven?
2) debugfs file to validate that loops can be force preempted w/o
cond_resched()
The usage is:
# taskset -c 1 bash
# echo 1 > /sys/kernel/debug/sched/hog &
# echo 1 > /sys/kernel/debug/sched/hog &
# echo 1 > /sys/kernel/debug/sched/hog &
top shows ~33% CPU for each of the hogs and tracing confirms that
the crude hack in the scheduler tick works:
bash-4559 [001] dlh2. 2253.331202: resched_curr <-__update_curr
bash-4560 [001] dlh2. 2253.340199: resched_curr <-__update_curr
bash-4561 [001] dlh2. 2253.346199: resched_curr <-__update_curr
bash-4559 [001] dlh2. 2253.353199: resched_curr <-__update_curr
bash-4561 [001] dlh2. 2253.358199: resched_curr <-__update_curr
bash-4560 [001] dlh2. 2253.370202: resched_curr <-__update_curr
bash-4559 [001] dlh2. 2253.378198: resched_curr <-__update_curr
bash-4561 [001] dlh2. 2253.389199: resched_curr <-__update_curr
The 'l' instead of the usual 'N' reflects that the lazy resched
bit is set. That makes __update_curr() invoke resched_curr()
instead of the lazy variant. resched_curr() sets TIF_NEED_RESCHED
and folds it into preempt_count so that preemption happens at the
next possible point, i.e. either in return from interrupt or at
the next preempt_enable().
That's as much as I wanted to demonstrate and I'm not going to spend
more cycles on it as I have already too many other things on flight and
the resulting scheduler woes are clearly outside of my expertice.
Though definitely I'm putting a permanent NAK in place for any attempts
to duct tape the preempt=NONE model any further by sprinkling more
cond*() and whatever warts around.
Thanks,
tglx
[tglx: s@CONFIG_PREEMPT_AUTO@CONFIG_PREEMPT_BUILD_AUTO@ ]
Link: https://lore.kernel.org/all/87jzshhexi.ffs@tglx/
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/Kconfig | 1
arch/x86/include/asm/thread_info.h | 6 ++--
drivers/acpi/processor_idle.c | 2 -
include/linux/entry-common.h | 2 -
include/linux/entry-kvm.h | 2 -
include/linux/sched.h | 12 +++++---
include/linux/sched/idle.h | 8 ++---
include/linux/thread_info.h | 24 +++++++++++++++++
include/linux/trace_events.h | 8 ++---
kernel/Kconfig.preempt | 17 +++++++++++-
kernel/entry/common.c | 4 +-
kernel/entry/kvm.c | 2 -
kernel/sched/core.c | 50 +++++++++++++++++++++++++------------
kernel/sched/debug.c | 19 ++++++++++++++
kernel/sched/fair.c | 46 ++++++++++++++++++++++------------
kernel/sched/features.h | 2 +
kernel/sched/idle.c | 3 --
kernel/sched/sched.h | 1
kernel/trace/trace.c | 2 +
kernel/trace/trace_output.c | 16 ++++++++++-
20 files changed, 171 insertions(+), 56 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -282,6 +282,7 @@
select HAVE_STATIC_CALL
select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL
select HAVE_PREEMPT_DYNAMIC_CALL
+ select HAVE_PREEMPT_AUTO
select HAVE_RSEQ
select HAVE_RUST if X86_64
select HAVE_SYSCALL_TRACEPOINTS
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,8 +87,9 @@
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Speculative store bypass disable */
+#define TIF_ARCH_RESCHED_LAZY 4 /* Lazy rescheduling */
+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
+#define TIF_SSBD 6 /* Speculative store bypass disable */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
@@ -110,6 +111,7 @@
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_ARCH_RESCHED_LAZY (1 << TIF_ARCH_RESCHED_LAZY)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -107,7 +107,7 @@
*/
static void __cpuidle acpi_safe_halt(void)
{
- if (!tif_need_resched()) {
+ if (!need_resched()) {
raw_safe_halt();
raw_local_irq_disable();
}
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -65,7 +65,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
- ARCH_EXIT_TO_USER_MODE_WORK)
+ _TIF_NEED_RESCHED_LAZY | ARCH_EXIT_TO_USER_MODE_WORK)
/**
* arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -18,7 +18,7 @@
#define XFER_TO_GUEST_MODE_WORK \
(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
+ _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED_LAZY | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1957,17 +1957,17 @@
update_ti_thread_flag(task_thread_info(tsk), flag, value);
}
-static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
+static inline bool test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
}
-static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
+static inline bool test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
}
-static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
+static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_ti_thread_flag(task_thread_info(tsk), flag);
}
@@ -1980,9 +1980,11 @@
static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ if (IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO))
+ clear_tsk_thread_flag(tsk, TIF_NEED_RESCHED_LAZY);
}
-static inline int test_tsk_need_resched(struct task_struct *tsk)
+static inline bool test_tsk_need_resched(struct task_struct *tsk)
{
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
@@ -2082,7 +2084,7 @@
static __always_inline bool need_resched(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(tif_need_resched_lazy() || tif_need_resched());
}
/*
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -63,7 +63,7 @@
*/
smp_mb__after_atomic();
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
static __always_inline bool __must_check current_clr_polling_and_test(void)
@@ -76,7 +76,7 @@
*/
smp_mb__after_atomic();
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
#else
@@ -85,11 +85,11 @@
static inline bool __must_check current_set_polling_and_test(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
static inline bool __must_check current_clr_polling_and_test(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
#endif
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,6 +59,16 @@
#include <asm/thread_info.h>
+#ifdef CONFIG_PREEMPT_BUILD_AUTO
+# define TIF_NEED_RESCHED_LAZY TIF_ARCH_RESCHED_LAZY
+# define _TIF_NEED_RESCHED_LAZY _TIF_ARCH_RESCHED_LAZY
+# define TIF_NEED_RESCHED_LAZY_OFFSET (TIF_NEED_RESCHED_LAZY - TIF_NEED_RESCHED)
+#else
+# define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
+# define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
+# define TIF_NEED_RESCHED_LAZY_OFFSET 0
+#endif
+
#ifdef __KERNEL__
#ifndef arch_set_restart_data
@@ -185,6 +195,13 @@
(unsigned long *)(&current_thread_info()->flags));
}
+static __always_inline bool tif_need_resched_lazy(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
+ arch_test_bit(TIF_NEED_RESCHED_LAZY,
+ (unsigned long *)(&current_thread_info()->flags));
+}
+
#else
static __always_inline bool tif_need_resched(void)
@@ -193,6 +210,13 @@
(unsigned long *)(&current_thread_info()->flags));
}
+static __always_inline bool tif_need_resched_lazy(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
+ test_bit(TIF_NEED_RESCHED_LAZY,
+ (unsigned long *)(&current_thread_info()->flags));
+}
+
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -184,8 +184,8 @@
enum trace_flag_type {
TRACE_FLAG_IRQS_OFF = 0x01,
- TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
- TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_NEED_RESCHED = 0x02,
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
@@ -211,11 +211,11 @@
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
{
- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
+ return tracing_gen_ctx_irq_test(0);
}
static inline unsigned int tracing_gen_ctx(void)
{
- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
+ return tracing_gen_ctx_irq_test(0);
}
#endif
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -11,6 +11,13 @@
select PREEMPTION
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
+config PREEMPT_BUILD_AUTO
+ bool
+ select PREEMPT_BUILD
+
+config HAVE_PREEMPT_AUTO
+ bool
+
choice
prompt "Preemption Model"
default PREEMPT_NONE
@@ -67,9 +74,17 @@
embedded system with latency requirements in the milliseconds
range.
+config PREEMPT_AUTO
+ bool "Automagic preemption mode with runtime tweaking support"
+ depends on HAVE_PREEMPT_AUTO
+ select PREEMPT_BUILD_AUTO
+ help
+ Add some sensible blurb here
+
config PREEMPT_RT
bool "Fully Preemptible Kernel (Real-Time)"
depends on EXPERT && ARCH_SUPPORTS_RT
+ select PREEMPT_BUILD_AUTO if HAVE_PREEMPT_AUTO
select PREEMPTION
help
This option turns the kernel into a real-time kernel by replacing
@@ -95,7 +110,7 @@
config PREEMPT_DYNAMIC
bool "Preemption behaviour defined on boot"
- depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+ depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO
select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
select PREEMPT_BUILD
default y if HAVE_PREEMPT_DYNAMIC_CALL
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -98,7 +98,7 @@
local_irq_enable_exit_to_user(ti_work);
- if (ti_work & _TIF_NEED_RESCHED)
+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
schedule();
if (ti_work & _TIF_UPROBE)
@@ -307,7 +307,7 @@
rcu_irq_exit_check_preempt();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack());
- if (need_resched())
+ if (test_tsk_need_resched(current))
preempt_schedule_irq();
}
}
--- a/kernel/entry/kvm.c
+++ b/kernel/entry/kvm.c
@@ -13,7 +13,7 @@
return -EINTR;
}
- if (ti_work & _TIF_NEED_RESCHED)
+ if (ti_work & (_TIF_NEED_RESCHED | TIF_NEED_RESCHED_LAZY))
schedule();
if (ti_work & _TIF_NOTIFY_RESUME)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -899,14 +899,15 @@
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
/*
- * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
+ * Atomically set TIF_NEED_RESCHED[_LAZY] and test for TIF_POLLING_NRFLAG,
* this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs.
*/
-static inline bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
{
struct thread_info *ti = task_thread_info(p);
- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
+
+ return !(fetch_or(&ti->flags, 1 << tif_bit) & _TIF_POLLING_NRFLAG);
}
/*
@@ -923,7 +924,7 @@
do {
if (!(val & _TIF_POLLING_NRFLAG))
return false;
- if (val & _TIF_NEED_RESCHED)
+ if (val & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
return true;
} while (!try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED));
@@ -931,9 +932,9 @@
}
#else
-static inline bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
{
- set_tsk_need_resched(p);
+ set_tsk_thread_flag(p, tif_bit);
return true;
}
@@ -1038,28 +1039,47 @@
* might also involve a cross-CPU call to trigger the scheduler on
* the target CPU.
*/
-void resched_curr(struct rq *rq)
+static void __resched_curr(struct rq *rq, int lazy)
{
+ int cpu, tif_bit = TIF_NEED_RESCHED + lazy;
struct task_struct *curr = rq->curr;
- int cpu;
lockdep_assert_rq_held(rq);
- if (test_tsk_need_resched(curr))
+ if (unlikely(test_tsk_thread_flag(curr, tif_bit)))
return;
cpu = cpu_of(rq);
if (cpu == smp_processor_id()) {
- set_tsk_need_resched(curr);
- set_preempt_need_resched();
+ set_tsk_thread_flag(curr, tif_bit);
+ if (!lazy)
+ set_preempt_need_resched();
return;
}
- if (set_nr_and_not_polling(curr))
- smp_send_reschedule(cpu);
- else
+ if (set_nr_and_not_polling(curr, tif_bit)) {
+ if (!lazy)
+ smp_send_reschedule(cpu);
+ } else {
trace_sched_wake_idle_without_ipi(cpu);
+ }
+}
+
+void resched_curr(struct rq *rq)
+{
+ __resched_curr(rq, 0);
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ int lazy = IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && !sched_feat(FORCE_NEED_RESCHED) ?
+ TIF_NEED_RESCHED_LAZY_OFFSET : 0;
+
+ if (lazy && unlikely(test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED)))
+ return;
+
+ __resched_curr(rq, lazy);
}
void resched_cpu(int cpu)
@@ -1154,7 +1174,7 @@
* and testing of the above solutions didn't appear to report
* much benefits.
*/
- if (set_nr_and_not_polling(rq->idle))
+ if (set_nr_and_not_polling(rq->idle, TIF_NEED_RESCHED))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -333,6 +333,23 @@
.release = seq_release,
};
+static ssize_t sched_hog_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long end = jiffies + 60 * HZ;
+
+ for (; time_before(jiffies, end) && !signal_pending(current);)
+ cpu_relax();
+
+ return cnt;
+}
+
+static const struct file_operations sched_hog_fops = {
+ .write = sched_hog_write,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
static struct dentry *debugfs_sched;
static __init int sched_init_debug(void)
@@ -374,6 +391,8 @@
debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
+ debugfs_create_file("hog", 0200, debugfs_sched, NULL, &sched_hog_fops);
+
return 0;
}
late_initcall(sched_init_debug);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -974,8 +974,10 @@
* XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
* this is probably good enough.
*/
-static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se, bool tick)
{
+ struct rq *rq = rq_of(cfs_rq);
+
if ((s64)(se->vruntime - se->deadline) < 0)
return;
@@ -994,10 +996,19 @@
/*
* The task has consumed its request, reschedule.
*/
- if (cfs_rq->nr_running > 1) {
- resched_curr(rq_of(cfs_rq));
- clear_buddies(cfs_rq, se);
+ if (cfs_rq->nr_running < 2)
+ return;
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) || sched_feat(FORCE_NEED_RESCHED)) {
+ resched_curr(rq);
+ } else {
+ /* Did the task ignore the lazy reschedule request? */
+ if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
+ resched_curr(rq);
+ else
+ resched_curr_lazy(rq);
}
+ clear_buddies(cfs_rq, se);
}
#include "pelt.h"
@@ -1153,7 +1164,7 @@
/*
* Update the current task's runtime statistics.
*/
-static void update_curr(struct cfs_rq *cfs_rq)
+static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
{
struct sched_entity *curr = cfs_rq->curr;
s64 delta_exec;
@@ -1166,7 +1177,7 @@
return;
curr->vruntime += calc_delta_fair(delta_exec, curr);
- update_deadline(cfs_rq, curr);
+ update_deadline(cfs_rq, curr, tick);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr))
@@ -1175,6 +1186,11 @@
account_cfs_rq_runtime(cfs_rq, delta_exec);
}
+static inline void update_curr(struct cfs_rq *cfs_rq)
+{
+ __update_curr(cfs_rq, false);
+}
+
static void update_curr_fair(struct rq *rq)
{
update_curr(cfs_rq_of(&rq->curr->se));
@@ -5520,7 +5536,7 @@
/*
* Update run-time statistics of the 'current'.
*/
- update_curr(cfs_rq);
+ __update_curr(cfs_rq, true);
/*
* Ensure that runnable average is periodically updated.
@@ -5534,7 +5550,7 @@
* validating it and just reschedule.
*/
if (queued) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
return;
}
/*
@@ -5680,7 +5696,7 @@
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static __always_inline
@@ -5940,7 +5956,7 @@
/* Determine whether we need to wake up potentially idle CPU: */
if (rq->curr == rq->idle && rq->cfs.nr_running)
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
#ifdef CONFIG_SMP
@@ -6655,7 +6671,7 @@
if (delta < 0) {
if (task_current(rq, p))
- resched_curr(rq);
+ resched_curr_lazy(rq);
return;
}
hrtick_start(rq, delta);
@@ -8387,7 +8403,7 @@
* prevents us from potentially nominating it as a false LAST_BUDDY
* below.
*/
- if (test_tsk_need_resched(curr))
+ if (need_resched())
return;
if (!sched_feat(WAKEUP_PREEMPTION))
@@ -8425,7 +8441,7 @@
return;
preempt:
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
#ifdef CONFIG_SMP
@@ -12579,7 +12595,7 @@
*/
if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
__entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
/*
@@ -12746,7 +12762,7 @@
*/
if (task_current(rq, p)) {
if (p->prio > oldprio)
- resched_curr(rq);
+ resched_curr_lazy(rq);
} else
wakeup_preempt(rq, p, 0);
}
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -87,3 +87,5 @@
SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(HZ_BW, true)
+
+SCHED_FEAT(FORCE_NEED_RESCHED, false)
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -57,8 +57,7 @@
ct_cpuidle_enter();
raw_local_irq_enable();
- while (!tif_need_resched() &&
- (cpu_idle_force_poll || tick_check_broadcast_expired()))
+ while (!need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
raw_local_irq_disable();
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2513,6 +2513,7 @@
extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
extern void resched_curr(struct rq *rq);
+extern void resched_curr_lazy(struct rq *rq);
extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth;
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2515,6 +2515,8 @@
if (tif_need_resched())
trace_flags |= TRACE_FLAG_NEED_RESCHED;
+ if (tif_need_resched_lazy())
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
if (test_preempt_need_resched())
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -460,17 +460,29 @@
(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
bh_off ? 'b' :
- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
+ !IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' :
'.';
- switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
+ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY |
TRACE_FLAG_PREEMPT_RESCHED)) {
+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
+ need_resched = 'B';
+ break;
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
need_resched = 'N';
break;
+ case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
+ need_resched = 'L';
+ break;
+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY:
+ need_resched = 'b';
+ break;
case TRACE_FLAG_NEED_RESCHED:
need_resched = 'n';
break;
+ case TRACE_FLAG_NEED_RESCHED_LAZY:
+ need_resched = 'l';
+ break;
case TRACE_FLAG_PREEMPT_RESCHED:
need_resched = 'p';
break;

View File

@ -1,35 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 23 Jan 2024 12:56:21 +0100
Subject: [PATCH] arm: Disable FAST_GUP on PREEMPT_RT if HIGHPTE is also
enabled.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
gup_pgd_range() is invoked with disabled interrupts and invokes
__kmap_local_page_prot() via pte_offset_map(), gup_p4d_range().
With HIGHPTE enabled, __kmap_local_page_prot() invokes kmap_high_get()
which uses a spinlock_t via lock_kmap_any(). This leads to an
sleeping-while-atomic error on PREEMPT_RT because spinlock_t becomes a
sleeping lock and must not be acquired in atomic context.
The loop in map_new_virtual() uses wait_queue_head_t for wake up which
also is using a spinlock_t.
Limit HAVE_FAST_GUP additionaly to remain disabled on PREEMPT_RT with
HIGHPTE enabled.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -102,7 +102,7 @@ config ARM
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
- select HAVE_GUP_FAST if ARM_LPAE
+ select HAVE_GUP_FAST if ARM_LPAE && !(PREEMPT_RT && HIGHPTE)
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER

View File

@ -1,238 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 19 Aug 2024 15:29:12 +0200
Subject: [PATCH] netfilter: nft_counter: Use u64_stats_t for statistic.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The nft_counter uses two s64 counters for statistics. Those two are
protected by a seqcount to ensure that the 64bit variable is always
properly seen during updates even on 32bit architectures where the store
is performed by two writes. A side effect is that the two counter (bytes
and packet) are written and read together in the same window.
This can be replaced with u64_stats_t. write_seqcount_begin()/ end() is
replaced with u64_stats_update_begin()/ end() and behaves the same way
as with seqcount_t on 32bit architectures. Additionally there is a
preempt_disable on PREEMPT_RT to ensure that a reader does not preempt a
writer.
On 64bit architectures the macros are removed and the reads happen
without any retries. This also means that the reader can observe one
counter (bytes) from before the update and the other counter (packets)
but that is okay since there is no requirement to have both counter from
the same update window.
Convert the statistic to u64_stats_t. There is one optimisation:
nft_counter_do_init() and nft_counter_clone() allocate a new per-CPU
counter and assign a value to it. During this assignment preemption is
disabled which is not needed because the counter is not yet exposed to
the system so there can not be another writer or reader. Therefore
disabling preemption is omitted and raw_cpu_ptr() is used to obtain a
pointer to a counter for the assignment.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/netfilter/nft_counter.c | 90 ++++++++++++++++++++++----------------------
1 file changed, 46 insertions(+), 44 deletions(-)
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -8,7 +8,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -17,6 +17,11 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
+ u64_stats_t bytes;
+ u64_stats_t packets;
+};
+
+struct nft_counter_tot {
s64 bytes;
s64 packets;
};
@@ -25,25 +30,24 @@ struct nft_counter_percpu_priv {
struct nft_counter __percpu *counter;
};
-static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync);
static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
-
- write_seqcount_begin(myseq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- this_cpu->bytes += pkt->skb->len;
- this_cpu->packets++;
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->bytes, pkt->skb->len);
+ u64_stats_inc(&this_cpu->packets);
+ u64_stats_update_end(nft_sync);
- write_seqcount_end(myseq);
local_bh_enable();
}
@@ -66,17 +70,16 @@ static int nft_counter_do_init(const str
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu = raw_cpu_ptr(cpu_stats);
if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ u64_stats_set(&this_cpu->packets,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])));
}
if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ u64_stats_set(&this_cpu->bytes,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])));
}
- preempt_enable();
+
priv->counter = cpu_stats;
return 0;
}
@@ -104,40 +107,41 @@ static void nft_counter_obj_destroy(cons
}
static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
+
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, -total->packets);
+ u64_stats_add(&this_cpu->bytes, -total->bytes);
+ u64_stats_update_end(nft_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets -= total->packets;
- this_cpu->bytes -= total->bytes;
- write_seqcount_end(myseq);
local_bh_enable();
}
static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
struct nft_counter *this_cpu;
- const seqcount_t *myseq;
u64 bytes, packets;
unsigned int seq;
int cpu;
memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+ struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu);
+
this_cpu = per_cpu_ptr(priv->counter, cpu);
do {
- seq = read_seqcount_begin(myseq);
- bytes = this_cpu->bytes;
- packets = this_cpu->packets;
- } while (read_seqcount_retry(myseq, seq));
+ seq = u64_stats_fetch_begin(nft_sync);
+ bytes = u64_stats_read(&this_cpu->bytes);
+ packets = u64_stats_read(&this_cpu->packets);
+ } while (u64_stats_fetch_retry(nft_sync, seq));
total->bytes += bytes;
total->packets += packets;
@@ -148,7 +152,7 @@ static int nft_counter_do_dump(struct sk
struct nft_counter_percpu_priv *priv,
bool reset)
{
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -237,7 +241,7 @@ static int nft_counter_clone(struct nft_
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -245,11 +249,9 @@ static int nft_counter_clone(struct nft_
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- this_cpu->packets = total.packets;
- this_cpu->bytes = total.bytes;
- preempt_enable();
+ this_cpu = raw_cpu_ptr(cpu_stats);
+ u64_stats_set(&this_cpu->packets, total.packets);
+ u64_stats_set(&this_cpu->bytes, total.bytes);
priv_clone->counter = cpu_stats;
return 0;
@@ -267,17 +269,17 @@ static void nft_counter_offload_stats(st
const struct flow_stats *stats)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets += stats->pkts;
- this_cpu->bytes += stats->bytes;
- write_seqcount_end(myseq);
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, stats->pkts);
+ u64_stats_add(&this_cpu->bytes, stats->bytes);
+ u64_stats_update_end(nft_sync);
local_bh_enable();
}
@@ -286,7 +288,7 @@ void nft_counter_init_seqcount(void)
int cpu;
for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+ u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu));
}
struct nft_expr_type nft_counter_type;

View File

@ -1,27 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 9 Mar 2023 09:13:52 +0100
Subject: [PATCH] powerpc/pseries: Select the generic memory allocator.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
The RTAS work area allocator is using the generic memory allocator and
as such it must select it.
Select the generic memory allocator on pseries.
Fixes: 43033bc62d349 ("powerpc/pseries: add RTAS work area allocator")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/20230309135110.uAxhqRFk@linutronix.de
---
arch/powerpc/platforms/pseries/Kconfig | 1 +
1 file changed, 1 insertion(+)
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -2,6 +2,7 @@
config PPC_PSERIES
depends on PPC64 && PPC_BOOK3S
bool "IBM pSeries & new (POWER5-based) iSeries"
+ select GENERIC_ALLOCATOR
select HAVE_PCSPKR_PLATFORM
select MPIC
select OF_DYNAMIC

View File

@ -1,38 +0,0 @@
Subject: powerpc: traps: Use PREEMPT_RT
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri Jul 26 11:30:49 2019 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Add PREEMPT_RT to the backtrace if enabled.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/powerpc/kernel/traps.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
---
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -261,12 +261,17 @@ static char *get_mmu_str(void)
static int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
PAGE_SIZE / 1024, get_mmu_str(),
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",

View File

@ -1,43 +0,0 @@
Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT
From: Bogdan Purcareata <bogdan.purcareata@freescale.com>
Date: Fri Apr 24 15:53:13 2015 +0000
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Bogdan Purcareata <bogdan.purcareata@freescale.com>
While converting the openpic emulation code to use a raw_spinlock_t enables
guests to run on RT, there's still a performance issue. For interrupts sent in
directed delivery mode with a multiple CPU mask, the emulated openpic will loop
through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop
through all the pending interrupts for that VCPU. This is done while holding the
raw_lock, meaning that in all this time the interrupts and preemption are
disabled on the host Linux. A malicious user app can max both these number and
cause a DoS.
This temporary fix is sent for two reasons. First is so that users who want to
use the in-kernel MPIC emulation are aware of the potential latencies, thus
making sure that the hardware MPIC and their usage scenario does not involve
interrupts sent in directed delivery mode, and the number of possible pending
interrupts is kept small. Secondly, this should incentivize the development of a
proper openpic emulation that would be better suited for RT.
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/powerpc/kvm/Kconfig | 1 +
1 file changed, 1 insertion(+)
---
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -221,6 +221,7 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && PPC_E500
+ depends on !PREEMPT_RT
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI

View File

@ -1,115 +0,0 @@
Subject: powerpc/pseries/iommu: Use a locallock instead local_irq_save()
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue Mar 26 18:31:54 2019 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
The locallock protects the per-CPU variable tce_page. The function
attempts to allocate memory while tce_page is protected (by disabling
interrupts).
Use local_irq_save() instead of local_irq_disable().
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/powerpc/platforms/pseries/iommu.c | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
---
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -26,6 +26,7 @@
#include <linux/of_address.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/local_lock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -244,7 +245,13 @@ static int tce_build_pSeriesLP(unsigned
return ret;
}
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+ __be64 * page;
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -267,9 +274,10 @@ static int tce_buildmulti_pSeriesLP(stru
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(&tce_page.lock, flags);
- tcep = __this_cpu_read(tce_page);
+ tcep = __this_cpu_read(tce_page.page);
/* This is safe to do since interrupts are off when we're called
* from iommu_alloc{,_sg}()
@@ -278,12 +286,12 @@ static int tce_buildmulti_pSeriesLP(stru
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tceshift,
npages, uaddr, direction, attrs);
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
rpn = __pa(uaddr) >> tceshift;
@@ -313,7 +321,7 @@ static int tce_buildmulti_pSeriesLP(stru
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -497,16 +505,17 @@ static int tce_setrange_multi_pSeriesLP(
DMA_BIDIRECTIONAL, 0);
}
- local_irq_disable(); /* to protect tcep and the page behind it */
- tcep = __this_cpu_read(tce_page);
+ /* to protect tcep and the page behind it */
+ local_lock_irq(&tce_page.lock);
+ tcep = __this_cpu_read(tce_page.page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return -ENOMEM;
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -549,7 +558,7 @@ static int tce_setrange_multi_pSeriesLP(
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return rc;
}

View File

@ -1,37 +0,0 @@
Subject: powerpc/stackprotector: work around stack-guard init from atomic
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue Mar 26 18:31:29 2019 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.11/older/patches-6.11-rt7.tar.xz
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
This is invoked from the secondary CPU in atomic context. On x86 we use
tsc instead. On Power we XOR it against mftb() so lets use stack address
as the initial value.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/powerpc/include/asm/stackprotector.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
---
--- a/arch/powerpc/include/asm/stackprotector.h
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -19,8 +19,13 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- unsigned long canary = get_random_canary();
+ unsigned long canary;
+#ifndef CONFIG_PREEMPT_RT
+ canary = get_random_canary();
+#else
+ canary = ((unsigned long)&canary) & CANARY_MASK;
+#endif
current->stack_canary = canary;
#ifdef CONFIG_PPC64
get_paca()->canary = canary;

Some files were not shown because too many files have changed in this diff Show More