From 67e1b0052f6bb82be84e30a5af7d27f29533a83b Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Fri, 5 Sep 2025 17:41:51 +0300 Subject: printk_ringbuffer: don't needlessly wrap data blocks around Previously, data blocks that perfectly fit the data ring buffer would get wrapped around to the beginning for no reason since the calculated offset of the next data block would belong to the next wrap. Since this offset is not actually part of the data block, but rather the offset of where the next data block is going to start, there is no reason to include it when deciding whether the current block fits the buffer. Signed-off-by: Daniil Tatianin Reviewed-by: Petr Mladek Tested-by: Petr Mladek Reviewed-by: John Ogness Tested-by: John Ogness Link: https://patch.msgid.link/20250905144152.9137-2-d-tatianin@yandex-team.ru [pmladek@suse.com: Updated indentation.] Signed-off-by: Petr Mladek --- kernel/printk/printk_ringbuffer.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 40198bffb7d0..839f504db6d3 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -999,6 +999,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) return true; } +static bool is_blk_wrapped(struct prb_data_ring *data_ring, + unsigned long begin_lpos, unsigned long next_lpos) +{ + /* + * Subtract one from next_lpos since it's not actually part of this data + * block. This allows perfectly fitting records to not wrap. + */ + return DATA_WRAPS(data_ring, begin_lpos) != + DATA_WRAPS(data_ring, next_lpos - 1); +} + /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) @@ -1010,7 +1021,7 @@ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, next_lpos = lpos + size; /* First check if the data block does not wrap. */ - if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) + if (!is_blk_wrapped(data_ring, begin_lpos, next_lpos)) return next_lpos; /* Wrapping data blocks store their data at the beginning. */ @@ -1087,7 +1098,7 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, blk = to_block(data_ring, begin_lpos); blk->id = id; /* LMM(data_alloc:B) */ - if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { + if (is_blk_wrapped(data_ring, begin_lpos, next_lpos)) { /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); @@ -1131,7 +1142,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, return NULL; /* Keep track if @blk_lpos was a wrapping data block. */ - wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); + wrapped = is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next); size = to_blk_size(size); @@ -1167,7 +1178,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, blk = to_block(data_ring, blk_lpos->begin); - if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { + if (is_blk_wrapped(data_ring, blk_lpos->begin, next_lpos)) { struct prb_data_block *old_blk = blk; /* Wrapping data blocks store their data at the beginning. */ @@ -1203,7 +1214,7 @@ static unsigned int space_used(struct prb_data_ring *data_ring, if (BLK_DATALESS(blk_lpos)) return 0; - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { + if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) { /* Data block does not wrap. */ return (DATA_INDEX(data_ring, blk_lpos->next) - DATA_INDEX(data_ring, blk_lpos->begin)); @@ -1250,14 +1261,15 @@ static const char *get_data(struct prb_data_ring *data_ring, } /* Regular data block: @begin less than @next and in same wrap. */ - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && + if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next) && blk_lpos->begin < blk_lpos->next) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; /* Wrapping data block: @begin is one wrap behind @next. */ - } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == - DATA_WRAPS(data_ring, blk_lpos->next)) { + } else if (!is_blk_wrapped(data_ring, + blk_lpos->begin + DATA_SIZE(data_ring), + blk_lpos->next)) { db = to_block(data_ring, 0); *data_size = DATA_INDEX(data_ring, blk_lpos->next); -- cgit v1.2.3 From 741ea7aa95dd9ac77f861e7d0961d8d231ac8448 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 20 Oct 2025 16:38:05 +0100 Subject: printk: Introduce console_flush_one_record console_flush_all prints all remaining records to all usable consoles whilst its caller holds console_lock. This can result in large waiting times for those waiting for console_lock especially where there is a large volume of records or where the console is slow (e.g. serial). Let's extract the parts of this function which print a single record into a new function named console_flush_one_record. This can later be used for functions that will release and reacquire console_lock between records. This commit should not change existing functionality. Reviewed-by: Petr Mladek Signed-off-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20251020-printk_legacy_thread_console_lock-v3-1-00f1f0ac055a@thegoodpenguin.co.uk Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 158 +++++++++++++++++++++++++++++++------------------ 1 file changed, 99 insertions(+), 59 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5aee9ffb16b9..1c048c66d099 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3134,6 +3134,99 @@ static inline void printk_kthreads_check_locked(void) { } #endif /* CONFIG_PRINTK */ + +/* + * Print out one record for each console. + * + * @do_cond_resched is set by the caller. It can be true only in schedulable + * context. + * + * @next_seq is set to the sequence number after the last available record. + * The value is valid only when there is at least one usable console and all + * usable consoles were flushed. + * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the + * console_lock. Otherwise it is set to false. + * + * @any_usable will be set to true if there are any usable consoles. + * + * Returns true when there was at least one usable console and a record was + * flushed. A returned false indicates there were no records to flush for any + * of the consoles. It may also indicate that there were no usable consoles, + * the context has been lost or there is a panic suitation. Regardless the + * reason, the caller should assume it is not useful to immediately try again. + * + * Requires the console_lock. + */ +static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *handover, + bool *any_usable) +{ + struct console_flush_type ft; + bool any_progress = false; + struct console *con; + int cookie; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + bool progress; + + /* + * console_flush_one_record() is only responsible for + * nbcon consoles when the nbcon consoles cannot print via + * their atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, !do_cond_resched)) + continue; + *any_usable = true; + + if (flags & CON_NBCON) { + progress = nbcon_legacy_emit_next_record(con, handover, cookie, + !do_cond_resched); + printk_seq = nbcon_seq_read(con); + } else { + progress = console_emit_next_record(con, handover, cookie); + printk_seq = con->seq; + } + + /* + * If a handover has occurred, the SRCU read lock + * is already released. + */ + if (*handover) + return false; + + /* Track the next of the highest seq flushed. */ + if (printk_seq > *next_seq) + *next_seq = printk_seq; + + if (!progress) + continue; + any_progress = true; + + /* Allow panic_cpu to take over the consoles safely. */ + if (panic_on_other_cpu()) + goto abandon; + + if (do_cond_resched) + cond_resched(); + } + console_srcu_read_unlock(cookie); + + return any_progress; + +abandon: + console_srcu_read_unlock(cookie); + return false; +} + /* * Print out all remaining records to all consoles. * @@ -3159,77 +3252,24 @@ static inline void printk_kthreads_check_locked(void) { } */ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) { - struct console_flush_type ft; bool any_usable = false; - struct console *con; bool any_progress; - int cookie; *next_seq = 0; *handover = false; do { - any_progress = false; + any_progress = console_flush_one_record(do_cond_resched, next_seq, handover, + &any_usable); - printk_get_console_flush_type(&ft); - - cookie = console_srcu_read_lock(); - for_each_console_srcu(con) { - short flags = console_srcu_read_flags(con); - u64 printk_seq; - bool progress; + if (*handover) + return false; - /* - * console_flush_all() is only responsible for nbcon - * consoles when the nbcon consoles cannot print via - * their atomic or threaded flushing. - */ - if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) - continue; - - if (!console_is_usable(con, flags, !do_cond_resched)) - continue; - any_usable = true; - - if (flags & CON_NBCON) { - progress = nbcon_legacy_emit_next_record(con, handover, cookie, - !do_cond_resched); - printk_seq = nbcon_seq_read(con); - } else { - progress = console_emit_next_record(con, handover, cookie); - printk_seq = con->seq; - } - - /* - * If a handover has occurred, the SRCU read lock - * is already released. - */ - if (*handover) - return false; - - /* Track the next of the highest seq flushed. */ - if (printk_seq > *next_seq) - *next_seq = printk_seq; - - if (!progress) - continue; - any_progress = true; - - /* Allow panic_cpu to take over the consoles safely. */ - if (panic_on_other_cpu()) - goto abandon; - - if (do_cond_resched) - cond_resched(); - } - console_srcu_read_unlock(cookie); + if (panic_on_other_cpu()) + return false; } while (any_progress); return any_usable; - -abandon: - console_srcu_read_unlock(cookie); - return false; } static void __console_flush_and_unlock(void) -- cgit v1.2.3 From ba00f7c4d0051e351ee284490c531a004fca4c7d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 20 Oct 2025 16:38:06 +0100 Subject: printk: console_flush_one_record() code cleanup console_flush_one_record() and console_flush_all() duplicate several checks. They both want to tell the caller that consoles are not longer usable in this context because it has lost the lock or the lock has to be reserved for the panic CPU. Remove the duplication by changing the semantic of the function console_flush_one_record() return value and parameters. The function will return true when it is able to do the job. It means that there is at least one usable console. And the flushing was not interrupted by a takeover or panic_on_other_cpu(). Also replace the @any_usable parameter with @try_again. The @try_again parameter will be set to true when the function could do the job and at least one console made a progress. Motivation: The callers need to know when + they should continue flushing => @try_again + when the console is flushed => can_do_the_job(return) && !@try_again + when @next_seq is valid => same as flushed + when lost console_lock => @takeover The proposed change makes it clear when the function can do the job. It simplifies the answer for the other questions. Also the return value from console_flush_one_record() can be used as return value from console_flush_all(). Reviewed-by: John Ogness Link: https://patch.msgid.link/20251020-printk_legacy_thread_console_lock-v3-2-00f1f0ac055a@thegoodpenguin.co.uk [pmladek@suse.com: Fixed type of any_usable variable reported by John] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 59 ++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1c048c66d099..60e39cfb2bc0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3142,31 +3142,33 @@ static inline void printk_kthreads_check_locked(void) { } * context. * * @next_seq is set to the sequence number after the last available record. - * The value is valid only when there is at least one usable console and all - * usable consoles were flushed. + * The value is valid only when all usable consoles were flushed. It is + * when the function returns true (can do the job) and @try_again parameter + * is set to false, see below. * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the * console_lock. Otherwise it is set to false. * - * @any_usable will be set to true if there are any usable consoles. + * @try_again will be set to true when it still makes sense to call this + * function again. The function could do the job, see the return value. + * And some consoles still make progress. * - * Returns true when there was at least one usable console and a record was - * flushed. A returned false indicates there were no records to flush for any - * of the consoles. It may also indicate that there were no usable consoles, - * the context has been lost or there is a panic suitation. Regardless the - * reason, the caller should assume it is not useful to immediately try again. + * Returns true when the function could do the job. Some consoles are usable, + * and there was no takeover and no panic_on_other_cpu(). * * Requires the console_lock. */ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *handover, - bool *any_usable) + bool *try_again) { struct console_flush_type ft; - bool any_progress = false; + bool any_usable = false; struct console *con; int cookie; + *try_again = false; + printk_get_console_flush_type(&ft); cookie = console_srcu_read_lock(); @@ -3185,7 +3187,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool * if (!console_is_usable(con, flags, !do_cond_resched)) continue; - *any_usable = true; + any_usable = true; if (flags & CON_NBCON) { progress = nbcon_legacy_emit_next_record(con, handover, cookie, @@ -3201,7 +3203,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool * * is already released. */ if (*handover) - return false; + goto fail; /* Track the next of the highest seq flushed. */ if (printk_seq > *next_seq) @@ -3209,21 +3211,28 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool * if (!progress) continue; - any_progress = true; + + /* + * An usable console made a progress. There might still be + * pending messages. + */ + *try_again = true; /* Allow panic_cpu to take over the consoles safely. */ if (panic_on_other_cpu()) - goto abandon; + goto fail_srcu; if (do_cond_resched) cond_resched(); } console_srcu_read_unlock(cookie); - return any_progress; + return any_usable; -abandon: +fail_srcu: console_srcu_read_unlock(cookie); +fail: + *try_again = false; return false; } @@ -3252,24 +3261,18 @@ abandon: */ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) { - bool any_usable = false; - bool any_progress; + bool try_again; + bool ret; *next_seq = 0; *handover = false; do { - any_progress = console_flush_one_record(do_cond_resched, next_seq, handover, - &any_usable); + ret = console_flush_one_record(do_cond_resched, next_seq, + handover, &try_again); + } while (try_again); - if (*handover) - return false; - - if (panic_on_other_cpu()) - return false; - } while (any_progress); - - return any_usable; + return ret; } static void __console_flush_and_unlock(void) -- cgit v1.2.3 From 1bc9a28f076fa68736603515dcf4ec027cb70102 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 20 Oct 2025 16:38:07 +0100 Subject: printk: Use console_flush_one_record for legacy printer kthread The legacy printer kthread uses console_lock and __console_flush_and_unlock to flush records to the console. This approach results in the console_lock being held for the entire duration of a flush. This can result in large waiting times for those waiting for console_lock especially where there is a large volume of records or where the console is slow (e.g. serial). This contention is observed during boot, as the call to filp_open in console_on_rootfs will delay progression to userspace until any in-flight flush is completed. Let's instead use console_flush_one_record and release/reacquire the console_lock between records. On a PocketBeagle 2, with the following boot args: "console=ttyS2,9600 initcall_debug=1 loglevel=10" Without this patch: [ 5.613166] +console_on_rootfs/filp_open [ 5.643473] mmc1: SDHCI controller on fa00000.mmc [fa00000.mmc] using ADMA 64-bit [ 5.643823] probe of fa00000.mmc returned 0 after 258244 usecs [ 5.710520] mmc1: new UHS-I speed SDR104 SDHC card at address 5048 [ 5.721976] mmcblk1: mmc1:5048 SD32G 29.7 GiB [ 5.747258] mmcblk1: p1 p2 [ 5.753324] probe of mmc1:5048 returned 0 after 40002 usecs [ 15.595240] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to 30040000.pruss [ 15.595282] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to e010000.watchdog [ 15.595297] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to e000000.watchdog [ 15.595437] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to 30300000.crc [ 146.275961] -console_on_rootfs/filp_open ... and with: [ 5.477122] +console_on_rootfs/filp_open [ 5.595814] mmc1: SDHCI controller on fa00000.mmc [fa00000.mmc] using ADMA 64-bit [ 5.596181] probe of fa00000.mmc returned 0 after 312757 usecs [ 5.662813] mmc1: new UHS-I speed SDR104 SDHC card at address 5048 [ 5.674367] mmcblk1: mmc1:5048 SD32G 29.7 GiB [ 5.699320] mmcblk1: p1 p2 [ 5.705494] probe of mmc1:5048 returned 0 after 39987 usecs [ 6.418682] -console_on_rootfs/filp_open ... ... [ 15.593509] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to 30040000.pruss [ 15.593551] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to e010000.watchdog [ 15.593566] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to e000000.watchdog [ 15.593704] ti_sci_pm_domains 44043000.system-controller:power-controller: sync_state() pending due to 30300000.crc Where I've added a printk surrounding the call in console_on_rootfs to filp_open. Suggested-by: Petr Mladek Signed-off-by: Andrew Murray Reviewed-by: Petr Mladek Reviewed-by: John Ogness Link: https://patch.msgid.link/20251020-printk_legacy_thread_console_lock-v3-3-00f1f0ac055a@thegoodpenguin.co.uk [pmladek@suse.com: Fixed ordering of variable definition suggested by John.] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 60e39cfb2bc0..cb79d1d2e6e5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3644,17 +3644,26 @@ static bool legacy_kthread_should_wakeup(void) static int legacy_kthread_func(void *unused) { - for (;;) { - wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + bool try_again; + +wait_for_event: + wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + + do { + bool handover = false; + u64 next_seq = 0; if (kthread_should_stop()) - break; + return 0; console_lock(); - __console_flush_and_unlock(); - } + console_flush_one_record(true, &next_seq, &handover, &try_again); + if (!handover) + __console_unlock(); - return 0; + } while (try_again); + + goto wait_for_event; } static bool legacy_kthread_create(void) -- cgit v1.2.3 From 4da42aaa82d6e3fa2e822e6e771d031c2e20a6c7 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Thu, 16 Oct 2025 11:47:54 -0300 Subject: printk: nbcon: Export console_is_usable The helper will be used on KDB code in the next commits. Reviewed-by: Petr Mladek Reviewed-by: John Ogness Signed-off-by: Marcos Paulo de Souza Link: https://patch.msgid.link/20251016-nbcon-kgdboc-v6-1-866aac60a80e@suse.com Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 45 --------------------------------------------- 1 file changed, 45 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index f72bbfa266d6..7e3128ec9336 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -3,7 +3,6 @@ * internal.h - printk internal definitions */ #include -#include #include #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) @@ -112,47 +111,6 @@ bool nbcon_kthread_create(struct console *con); void nbcon_kthread_stop(struct console *con); void nbcon_kthreads_wake(void); -/* - * Check if the given console is currently capable and allowed to print - * records. Note that this function does not consider the current context, - * which can also play a role in deciding if @con can be used to print - * records. - */ -static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) -{ - if (!(flags & CON_ENABLED)) - return false; - - if ((flags & CON_SUSPENDED)) - return false; - - if (flags & CON_NBCON) { - /* The write_atomic() callback is optional. */ - if (use_atomic && !con->write_atomic) - return false; - - /* - * For the !use_atomic case, @printk_kthreads_running is not - * checked because the write_thread() callback is also used - * via the legacy loop when the printer threads are not - * available. - */ - } else { - if (!con->write) - return false; - } - - /* - * Console drivers may assume that per-cpu resources have been - * allocated. So unless they're explicitly marked as being able to - * cope (CON_ANYTIME) don't call them until this CPU is officially up. - */ - if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) - return false; - - return true; -} - /** * nbcon_kthread_wake - Wake up a console printing thread * @con: Console to operate on @@ -204,9 +162,6 @@ static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *hand static inline void nbcon_kthread_wake(struct console *con) { } static inline void nbcon_kthreads_wake(void) { } -static inline bool console_is_usable(struct console *con, short flags, - bool use_atomic) { return false; } - #endif /* CONFIG_PRINTK */ extern bool have_boot_console; -- cgit v1.2.3 From 49f7d3054e84617395a37a058251c81320a3614a Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Thu, 16 Oct 2025 11:47:55 -0300 Subject: printk: nbcon: Introduce KDB helpers These helpers will be used when calling console->write_atomic on KDB code in the next patch. It's basically the same implementation as nbcon_device_try_acquire, but using NBCON_PRIO_EMERGENCY when acquiring the context. If the acquire succeeds, the message and message length are assigned to nbcon_write_context so ->write_atomic can print the message. After release try to flush the console since there may be a backlog of messages in the ringbuffer. The kthread console printers do not get a chance to run while kdb is active. Reviewed-by: Petr Mladek Reviewed-by: John Ogness Signed-off-by: Marcos Paulo de Souza Link: https://patch.msgid.link/20251016-nbcon-kgdboc-v6-2-866aac60a80e@suse.com Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 558ef3177976..e1bf5409cb6b 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1855,3 +1855,64 @@ void nbcon_device_release(struct console *con) console_srcu_read_unlock(cookie); } EXPORT_SYMBOL_GPL(nbcon_device_release); + +/** + * nbcon_kdb_try_acquire - Try to acquire nbcon console and enter unsafe + * section + * @con: The nbcon console to acquire + * @wctxt: The nbcon write context to be used on success + * + * Context: Under console_srcu_read_lock() for emitting a single kdb message + * using the given con->write_atomic() callback. Can be called + * only when the console is usable at the moment. + * + * Return: True if the console was acquired. False otherwise. + * + * kdb emits messages on consoles registered for printk() without + * storing them into the ring buffer. It has to acquire the console + * ownerhip so that it could call con->write_atomic() callback a safe way. + * + * This function acquires the nbcon console using priority NBCON_PRIO_EMERGENCY + * and marks it unsafe for handover/takeover. + */ +bool nbcon_kdb_try_acquire(struct console *con, + struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->prio = NBCON_PRIO_EMERGENCY; + + if (!nbcon_context_try_acquire(ctxt, false)) + return false; + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + return true; +} + +/** + * nbcon_kdb_release - Exit unsafe section and release the nbcon console + * + * @wctxt: The nbcon write context initialized by a successful + * nbcon_kdb_try_acquire() + */ +void nbcon_kdb_release(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + if (!nbcon_context_exit_unsafe(ctxt)) + return; + + nbcon_context_release(ctxt); + + /* + * Flush any new printk() messages added when the console was blocked. + * Only the console used by the given write context was blocked. + * The console was locked only when the write_atomic() callback + * was usable. + */ + __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb), false); +} -- cgit v1.2.3 From 286b113d70007e932d18aa0acfce1a3f5b25d8d1 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Thu, 16 Oct 2025 11:47:56 -0300 Subject: printk: nbcon: Allow KDB to acquire the NBCON context KDB can interrupt any console to execute the "mirrored printing" at any time, so add an exception to nbcon_context_try_acquire_direct to allow to get the context if the current CPU is the same as kdb_printf_cpu. This change will be necessary for the next patch, which fixes kdb_msg_write to work with NBCON consoles by calling ->write_atomic on such consoles. But to print it first needs to acquire the ownership of the console, so nbcon_context_try_acquire_direct is fixed here. Reviewed-by: John Ogness Signed-off-by: Marcos Paulo de Souza Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20251016-nbcon-kgdboc-v6-3-866aac60a80e@suse.com [pmladek@suse.com: Fix compilation with !CONFIG_KGDB_KDB.] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index e1bf5409cb6b..5be018493909 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -249,13 +250,16 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, * since all non-panic CPUs are stopped during panic(), it * is safer to have them avoid gaining console ownership. * - * If this acquire is a reacquire (and an unsafe takeover + * One exception is when kdb has locked for printing on this CPU. + * + * Second exception is a reacquire (and an unsafe takeover * has not previously occurred) then it is allowed to attempt * a direct acquire in panic. This gives console drivers an * opportunity to perform any necessary cleanup if they were * interrupted by the panic CPU while printing. */ if (panic_on_other_cpu() && + !kdb_printf_on_this_cpu() && (!is_reacquire || cur->unsafe_takeover)) { return -EPERM; } -- cgit v1.2.3 From 4349cf0df34f37d2470d246bc9be8d9836dfa49e Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Thu, 16 Oct 2025 11:47:57 -0300 Subject: printk: nbcon: Export nbcon_write_context_set_buf This function will be used in the next patch to allow a driver to set both the message and message length of a nbcon_write_context. This is necessary because the function also initializes the ->unsafe_takeover struct member. By using this helper we ensure that the struct is initialized correctly. Reviewed-by: Petr Mladek Reviewed-by: John Ogness Signed-off-by: Marcos Paulo de Souza Link: https://patch.msgid.link/20251016-nbcon-kgdboc-v6-4-866aac60a80e@suse.com Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 5be018493909..fdd1cbebe77d 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -854,8 +854,8 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } -static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, - char *buf, unsigned int len) +void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, + char *buf, unsigned int len) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; -- cgit v1.2.3 From 62627bf0cadf6eae87d92fecf604c42160fe16ef Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Thu, 16 Oct 2025 11:47:58 -0300 Subject: kdb: Adapt kdb_msg_write to work with NBCON consoles Function kdb_msg_write was calling con->write for any found console, but it won't work on NBCON consoles. In this case we should acquire the ownership of the console using NBCON_PRIO_EMERGENCY, since printing kdb messages should only be interrupted by a panic. At this point, the console is required to use the atomic callback. The console is skipped if the write_atomic callback is not set or if the context could not be acquired. The validation of NBCON is done by the console_is_usable helper. The context is released right after write_atomic finishes. The oops_in_progress handling is only needed in the legacy consoles, so it was moved around the con->write callback. Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Reviewed-by: John Ogness Signed-off-by: Marcos Paulo de Souza Link: https://patch.msgid.link/20251016-nbcon-kgdboc-v6-5-866aac60a80e@suse.com [pmladek@suse.com: Fixed compilation with !CONFIG_PRINTK.] Signed-off-by: Petr Mladek --- kernel/debug/kdb/kdb_io.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index b12b9db75c1d..61c1690058ed 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -589,24 +589,41 @@ static void kdb_msg_write(const char *msg, int msg_len) */ cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - if (!(console_srcu_read_flags(c) & CON_ENABLED)) + short flags = console_srcu_read_flags(c); + + if (!console_is_usable(c, flags, true)) continue; if (c == dbg_io_ops->cons) continue; - if (!c->write) - continue; - /* - * Set oops_in_progress to encourage the console drivers to - * disregard their internal spin locks: in the current calling - * context the risk of deadlock is a bigger problem than risks - * due to re-entering the console driver. We operate directly on - * oops_in_progress rather than using bust_spinlocks() because - * the calls bust_spinlocks() makes on exit are not appropriate - * for this calling context. - */ - ++oops_in_progress; - c->write(c, msg, msg_len); - --oops_in_progress; + + if (flags & CON_NBCON) { + struct nbcon_write_context wctxt = { }; + + /* + * Do not continue if the console is NBCON and the context + * can't be acquired. + */ + if (!nbcon_kdb_try_acquire(c, &wctxt)) + continue; + + nbcon_write_context_set_buf(&wctxt, (char *)msg, msg_len); + + c->write_atomic(c, &wctxt); + nbcon_kdb_release(&wctxt); + } else { + /* + * Set oops_in_progress to encourage the console drivers to + * disregard their internal spin locks: in the current calling + * context the risk of deadlock is a bigger problem than risks + * due to re-entering the console driver. We operate directly on + * oops_in_progress rather than using bust_spinlocks() because + * the calls bust_spinlocks() makes on exit are not appropriate + * for this calling context. + */ + ++oops_in_progress; + c->write(c, msg, msg_len); + --oops_in_progress; + } touch_nmi_watchdog(); } console_srcu_read_unlock(cookie); -- cgit v1.2.3 From 20793955835fe29a5582f68ff031fdcd35cc98ed Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 26 Oct 2025 16:07:26 +0100 Subject: printk_legacy_map: use LD_WAIT_CONFIG instead of LD_WAIT_SLEEP printk_legacy_map is used to hide lock nesting violations caused by legacy drivers and is using the wrong override type. LD_WAIT_SLEEP is for always sleeping lock types such as mutex_t. LD_WAIT_CONFIG is for lock type which are sleeping while spinning on PREEMPT_RT such as spinlock_t. Signed-off-by: Oleg Nesterov Reviewed-by: Petr Mladek Reviewed-by: John Ogness Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251026150726.GA23223@redhat.com [pmladek@suse.com: Fixed indentation.] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 578723208f14..b51ba9f9e79b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2982,21 +2982,18 @@ out: } /* - * Legacy console printing from printk() caller context does not respect - * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a - * false positive. For PREEMPT_RT the false positive condition does not - * occur. - * - * This map is used to temporarily establish LD_WAIT_SLEEP context for the - * console write() callback when legacy printing to avoid false positive - * lockdep complaints, thus allowing lockdep to continue to function for - * real issues. + * The legacy console always acquires a spinlock_t from its printing + * callback. This violates lock nesting if the caller acquired an always + * spinning lock (raw_spinlock_t) while invoking printk(). This is not a + * problem on PREEMPT_RT because legacy consoles print always from a + * dedicated thread and never from within printk(). Therefore we tell + * lockdep that a sleeping spin lock (spinlock_t) is valid here. */ #ifdef CONFIG_PREEMPT_RT static inline void printk_legacy_allow_spinlock_enter(void) { } static inline void printk_legacy_allow_spinlock_exit(void) { } #else -static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); +static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_CONFIG); static inline void printk_legacy_allow_spinlock_enter(void) { -- cgit v1.2.3 From c41c0ebfa1e0eb40cfb11846a7a579eb8d9dfb5f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:10 +0200 Subject: printk/nbcon: Block printk kthreads when any CPU is in an emergency context In emergency contexts, printk() tries to flush messages directly even on nbcon consoles. And it is allowed to takeover the console ownership and interrupt the printk kthread in the middle of a message. Only one takeover and one repeated message should be enough in most situations. The first emergency message flushes the backlog and printk kthreads get to sleep. Next emergency messages are flushed directly and printk() does not wake up the kthreads. However, the one takeover is not guaranteed. Any printk() in normal context on another CPU could wake up the kthreads. Or a new emergency message might be added before the kthreads get to sleep. Note that the interrupted .write_thread() callbacks usually have to call nbcon_reacquire_nobuf() and restore the original device setting before checking for pending messages. The risk of the repeated takeovers will be even bigger because __nbcon_atomic_flush_pending_con is going to release the console ownership after each emitted record. It will be needed to prevent hardlockup reports on other CPUs which are busy waiting for the context ownership, for example, by nbcon_reacquire_nobuf() or __uart_port_nbcon_acquire(). The repeated takeovers break the output, for example: [ 5042.650211][ T2220] Call Trace: [ 5042.6511 ** replaying previous printk message ** [ 5042.651192][ T2220] [ 5042.652160][ T2220] kunit_run_ ** replaying previous printk message ** [ 5042.652160][ T2220] kunit_run_tests+0x72/0x90 [ 5042.653340][ T22 ** replaying previous printk message ** [ 5042.653340][ T2220] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5042.654628][ T2220] ? stack_trace_save+0x4d/0x70 [ 5042.6553 ** replaying previous printk message ** [ 5042.655394][ T2220] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5042.656713][ T2220] ? save_trace+0x5b/0x180 A more robust solution is to block the printk kthread entirely whenever *any* CPU enters an emergency context. This ensures that critical messages can be flushed without contention from the normal, non-atomic printing path. Link: https://lore.kernel.org/all/aNQO-zl3k1l4ENfy@pathway.suse.cz Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-2-pmladek@suse.com [pmladek@suse.com: Added changes proposed by John Ogness] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 558ef3177976..dda336ffabdd 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -118,6 +118,9 @@ * from scratch. */ +/* Counter of active nbcon emergency contexts. */ +static atomic_t nbcon_cpu_emergency_cnt = ATOMIC_INIT(0); + /** * nbcon_state_set - Helper function to set the console state * @con: Console to update @@ -1163,6 +1166,16 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex if (kthread_should_stop()) return true; + /* + * Block the kthread when the system is in an emergency or panic mode. + * It increases the chance that these contexts would be able to show + * the messages directly. And it reduces the risk of interrupted writes + * where the context with a higher priority takes over the nbcon console + * ownership in the middle of a message. + */ + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + return false; + cookie = console_srcu_read_lock(); flags = console_srcu_read_flags(con); @@ -1214,6 +1227,13 @@ wait_for_event: if (kthread_should_stop()) return 0; + /* + * Block the kthread when the system is in an emergency or panic + * mode. See nbcon_kthread_should_wakeup() for more details. + */ + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + goto wait_for_event; + backlog = false; /* @@ -1655,6 +1675,8 @@ void nbcon_cpu_emergency_enter(void) preempt_disable(); + atomic_inc(&nbcon_cpu_emergency_cnt); + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); (*cpu_emergency_nesting)++; } @@ -1669,10 +1691,24 @@ void nbcon_cpu_emergency_exit(void) unsigned int *cpu_emergency_nesting; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); - if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) (*cpu_emergency_nesting)--; + /* + * Wake up kthreads because there might be some pending messages + * added by other CPUs with normal priority since the last flush + * in the emergency context. + */ + if (!WARN_ON_ONCE(atomic_read(&nbcon_cpu_emergency_cnt) == 0)) { + if (atomic_dec_return(&nbcon_cpu_emergency_cnt) == 0) { + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + } + } + preempt_enable(); } -- cgit v1.2.3 From 4c3ba0d5925685d27490078bf2d54ff9c0a0e67b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:11 +0200 Subject: printk/nbcon/panic: Allow printk kthread to sleep when the system is in panic The printk kthread might be running when there is a panic in progress. But it is not able to acquire the console ownership any longer. Prevent the desperate attempts to acquire the ownership and allow sleeping in panic. It would make it behave the same as when there is any CPU in an emergency context. Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-3-pmladek@suse.com [pmladek@suse.com: Rebased on top of 6.18-rc1 (panic_in_progress() moved to panic.c)] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index dda336ffabdd..2fd2f906e134 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1173,7 +1173,8 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex * where the context with a higher priority takes over the nbcon console * ownership in the middle of a message. */ - if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) return false; cookie = console_srcu_read_lock(); @@ -1231,7 +1232,8 @@ wait_for_event: * Block the kthread when the system is in an emergency or panic * mode. See nbcon_kthread_should_wakeup() for more details. */ - if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) goto wait_for_event; backlog = false; -- cgit v1.2.3 From d5d399efff65773ed82ddaf6c11a0fcfdb5eb029 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:12 +0200 Subject: printk/nbcon: Release nbcon consoles ownership in atomic flush after each emitted record printk() tries to flush messages with NBCON_PRIO_EMERGENCY on nbcon consoles immediately. It might take seconds to flush all pending lines on slow serial consoles. Note that there might be hundreds of messages, for example: [ 3.771531][ T1] pci 0000:3e:08.1: [8086:324 ** replaying previous printk message ** [ 3.771531][ T1] pci 0000:3e:08.1: [8086:3246] type 00 class 0x088000 PCIe Root Complex Integrated Endpoint [ ... more than 2000 lines, about 200kB messages ... ] [ 3.837752][ T1] pci 0000:20:01.0: Adding to iommu group 18 [ 3.837851][ T ** replaying previous printk message ** [ 3.837851][ T1] pci 0000:20:03.0: Adding to iommu group 19 [ 3.837946][ T1] pci 0000:20:05.0: Adding to iommu group 20 [ ... more than 500 messages for iommu groups 21-590 ...] [ 3.912932][ T1] pci 0000:f6:00.1: Adding to iommu group 591 [ 3.913070][ T1] pci 0000:f6:00.2: Adding to iommu group 592 [ 3.913243][ T1] DMAR: Intel(R) Virtualization Technology for Directed I/O [ 3.913245][ T1] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) [ 3.913245][ T1] software IO TLB: mapped [mem 0x000000004f000000-0x0000000053000000] (64MB) [ 3.913324][ T1] RAPL PMU: API unit is 2^-32 Joules, 3 fixed counters, 655360 ms ovfl timer [ 3.913325][ T1] RAPL PMU: hw unit of domain package 2^-14 Joules [ 3.913326][ T1] RAPL PMU: hw unit of domain dram 2^-14 Joules [ 3.913327][ T1] RAPL PMU: hw unit of domain psys 2^-0 Joules [ 3.933486][ T1] ------------[ cut here ]------------ [ 3.933488][ T1] WARNING: CPU: 2 PID: 1 at arch/x86/events/intel/uncore.c:1156 uncore_pci_pmu_register+0x15e/0x180 [ 3.930291][ C0] watchdog: Watchdog detected hard LOCKUP on cpu 0 [ 3.930291][ C0] Kernel panic - not syncing: Hard LOCKUP [...] [ 3.930291][ C0] CPU: 0 UID: 0 PID: 18 Comm: pr/ttyS0 Not tainted... [...] [ 3.930291][ C0] RIP: 0010:nbcon_reacquire_nobuf+0x11/0x50 [ 3.930291][ C0] Call Trace: [...] [ 3.930291][ C0] [ 3.930291][ C0] serial8250_console_write+0x16d/0x5c0 [ 3.930291][ C0] nbcon_emit_next_record+0x22c/0x250 [ 3.930291][ C0] nbcon_emit_one+0x93/0xe0 [ 3.930291][ C0] nbcon_kthread_func+0x13c/0x1c0 The are visible two takeovers of the console ownership: - The 1st one is triggered by the "WARNING: CPU: 2 PID: 1 at arch/x86/..." line printed with NBCON_PRIO_EMERGENCY. - The 2nd one is triggered by the "Kernel panic - not syncing: Hard LOCKUP" line printed with NBCON_PRIO_PANIC. There are more than 2500 lines, at about 240kB, emitted between the takeover and the 1st "WARNING" line in the emergency context. This amount of pending messages had to be flushed by nbcon_atomic_flush_pending() when WARN() printed its first line. The atomic flush was holding the nbcon console context for too long so that it triggered hard lockup on the CPU running the printk kthread "pr/ttyS0". The kthread needed to reacquire the console ownership for restoring the original serial port state in serial8250_console_write(). Prevent the hardlockup by releasing the nbcon console ownership after each emitted record. Note that __nbcon_atomic_flush_pending_con() used to hold the console ownership all the time because it blocked the printk kthread. Otherwise the kthread tried to flush the messages in parallel which caused repeated takeovers and more replayed messages. It is not longer a problem because the repeated takeovers are blocked by the counter of emergency contexts, see nbcon_cpu_emergency_cnt. Link: https://lore.kernel.org/all/aNQO-zl3k1l4ENfy@pathway.suse.cz Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-4-pmladek@suse.com Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 2fd2f906e134..3060360fb357 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1527,10 +1527,10 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; - if (!nbcon_context_try_acquire(ctxt, false)) - return -EPERM; - while (nbcon_seq_read(con) < stop_seq) { + if (!nbcon_context_try_acquire(ctxt, false)) + return -EPERM; + /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no @@ -1539,6 +1539,8 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; + nbcon_context_release(ctxt); + if (!ctxt->backlog) { /* Are there reserved but not yet finalized records? */ if (nbcon_seq_read(con) < stop_seq) @@ -1547,7 +1549,6 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, } } - nbcon_context_release(ctxt); return err; } -- cgit v1.2.3 From 187de7c212e5fa87779e1026bf949337bca0cdaa Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 27 Oct 2025 17:18:03 +0106 Subject: printk: nbcon: Allow unsafe write_atomic() for panic There may be console drivers that have not yet figured out a way to implement safe atomic printing (->write_atomic() callback). These drivers could choose to only implement threaded printing (->write_thread() callback), but then it is guaranteed that _no_ output will be printed during panic. Not even attempted. As a result, developers may be tempted to implement unsafe ->write_atomic() callbacks and/or implement some sort of custom deferred printing trickery to try to make it work. This goes against the principle intention of the nbcon API as well as endangers other nbcon drivers that are doing things correctly (safely). As a compromise, allow nbcon drivers to implement unsafe ->write_atomic() callbacks by providing a new console flag CON_NBCON_ATOMIC_UNSAFE. When specified, the ->write_atomic() callback for that console will _only_ be called during the final "hope and pray" flush attempt at the end of a panic: nbcon_atomic_flush_unsafe(). Signed-off-by: John Ogness Link: https://lore.kernel.org/lkml/b2qps3uywhmjaym4mht2wpxul4yqtuuayeoq4iv4k3zf5wdgh3@tocu6c7mj4lt Reviewed-by: Petr Mladek Link: https://lore.kernel.org/all/swdpckuwwlv3uiessmtnf2jwlx3jusw6u7fpk5iggqo4t2vdws@7rpjso4gr7qp/ [1] Link: https://lore.kernel.org/all/20251103-fix_netpoll_aa-v4-1-4cfecdf6da7c@debian.org/ [2] Link: https://patch.msgid.link/20251027161212.334219-2-john.ogness@linutronix.de [pmladek@suse.com: Fix build with rework/nbcon-in-kdb branch.] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index fdd1cbebe77d..90412c2b2961 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1408,6 +1408,26 @@ enum nbcon_prio nbcon_get_default_prio(void) return NBCON_PRIO_NORMAL; } +/* + * Track if it is allowed to perform unsafe hostile takeovers of console + * ownership. When true, console drivers might perform unsafe actions while + * printing. It is externally available via nbcon_allow_unsafe_takeover(). + */ +static bool panic_nbcon_allow_unsafe_takeover; + +/** + * nbcon_allow_unsafe_takeover - Check if unsafe console takeovers are allowed + * + * Return: True, when it is permitted to perform unsafe console printing + * + * This is also used by console_is_usable() to determine if it is allowed to + * call write_atomic() callbacks flagged as unsafe (CON_NBCON_ATOMIC_UNSAFE). + */ +bool nbcon_allow_unsafe_takeover(void) +{ + return panic_on_this_cpu() && panic_nbcon_allow_unsafe_takeover; +} + /** * nbcon_legacy_emit_next_record - Print one record for an nbcon console * in legacy contexts @@ -1478,7 +1498,6 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record - * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on * failure. @@ -1497,8 +1516,7 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, * returned, it cannot be expected that the unfinalized record will become * available. */ -static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, - bool allow_unsafe_takeover) +static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); @@ -1507,7 +1525,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, ctxt->console = con; ctxt->spinwait_max_us = 2000; ctxt->prio = nbcon_get_default_prio(); - ctxt->allow_unsafe_takeover = allow_unsafe_takeover; + ctxt->allow_unsafe_takeover = nbcon_allow_unsafe_takeover(); if (!nbcon_context_try_acquire(ctxt, false)) return -EPERM; @@ -1538,15 +1556,13 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record - * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * This will stop flushing before @stop_seq if another context has ownership. * That context is then responsible for the flushing. Likewise, if new records * are added while this context was flushing and there is no other context * to handle the printing, this context must also flush those records. */ -static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, - bool allow_unsafe_takeover) +static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) { struct console_flush_type ft; unsigned long flags; @@ -1561,7 +1577,7 @@ again: */ local_irq_save(flags); - err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); + err = __nbcon_atomic_flush_pending_con(con, stop_seq); local_irq_restore(flags); @@ -1593,9 +1609,8 @@ again: * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * @stop_seq: Flush up until this record - * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers */ -static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover) +static void __nbcon_atomic_flush_pending(u64 stop_seq) { struct console *con; int cookie; @@ -1613,7 +1628,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeove if (nbcon_seq_read(con) >= stop_seq) continue; - nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); + nbcon_atomic_flush_pending_con(con, stop_seq); } console_srcu_read_unlock(cookie); } @@ -1629,7 +1644,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeove */ void nbcon_atomic_flush_pending(void) { - __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false); + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); } /** @@ -1641,7 +1656,9 @@ void nbcon_atomic_flush_pending(void) */ void nbcon_atomic_flush_unsafe(void) { - __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); + panic_nbcon_allow_unsafe_takeover = true; + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); + panic_nbcon_allow_unsafe_takeover = false; } /** @@ -1848,7 +1865,7 @@ void nbcon_device_release(struct console *con) * using the legacy loop. */ if (ft.nbcon_atomic) { - __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); + __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb)); } else if (ft.legacy_direct) { if (console_trylock()) console_unlock(); @@ -1918,5 +1935,5 @@ void nbcon_kdb_release(struct nbcon_write_context *wctxt) * The console was locked only when the write_atomic() callback * was usable. */ - __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb), false); + __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb)); } -- cgit v1.2.3 From cc3bad11de6e0d6012460487903e7167d3e73957 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 7 Nov 2025 20:47:19 +0100 Subject: printk_ringbuffer: Fix check of valid data size when blk_lpos overflows The commit 67e1b0052f6bb8 ("printk_ringbuffer: don't needlessly wrap data blocks around") allows to use the last 4 bytes of the ring buffer. But the check for the @data_size was not properly updated in get_data(). It fails when "blk_lpos->next" overflows to "0". In this case: + is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next) returns "false" because it checks "blk_lpos->next - 1". + "blk_lpos->begin < blk_lpos->next" fails because "blk_lpos->next" is already 0. + is_blk_wrapped(data_ring, blk_lpos->begin + DATA_SIZE(data_ring), blk_lpos->next) returns "false" because "begin_lpos" is from the next wrap but "next_lpos - 1" is from the previous one. As a result, get_data() triggers the WARN_ON_ONCE() for "Illegal block description", for example: [ 216.317316][ T7652] loop0: detected capacity change from 0 to 16 ** 1 printk messages dropped ** [ 216.327750][ T7652] ------------[ cut here ]------------ [ 216.327789][ T7652] WARNING: kernel/printk/printk_ringbuffer.c:1278 at get_data+0x48a/0x840, CPU#1: syz.0.585/7652 [ 216.327848][ T7652] Modules linked in: [ 216.327907][ T7652] CPU: 1 UID: 0 PID: 7652 Comm: syz.0.585 Not tainted syzkaller #0 PREEMPT(full) [ 216.327933][ T7652] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 [ 216.327953][ T7652] RIP: 0010:get_data+0x48a/0x840 [ 216.327986][ T7652] Code: 83 c4 f8 48 b8 00 00 00 00 00 fc ff df 41 0f b6 04 07 84 c0 0f 85 ee 01 00 00 44 89 65 00 49 83 c5 08 eb 13 e8 a7 19 1f 00 90 <0f> 0b 90 eb 05 e8 9c 19 1f 00 45 31 ed 4c 89 e8 48 83 c4 28 5b 41 [ 216.328007][ T7652] RSP: 0018:ffffc900035170e0 EFLAGS: 00010293 [ 216.328029][ T7652] RAX: ffffffff81a1eee9 RBX: 00003fffffffffff RCX: ffff888033255b80 [ 216.328048][ T7652] RDX: 0000000000000000 RSI: 00003fffffffffff RDI: 0000000000000000 [ 216.328063][ T7652] RBP: 0000000000000012 R08: 0000000000000e55 R09: 000000325e213cc7 [ 216.328079][ T7652] R10: 000000325e213cc7 R11: 00001de4c2000037 R12: 0000000000000012 [ 216.328095][ T7652] R13: 0000000000000000 R14: ffffc90003517228 R15: 1ffffffff1bca646 [ 216.328111][ T7652] FS: 00007f44eb8da6c0(0000) GS:ffff888125fda000(0000) knlGS:0000000000000000 [ 216.328131][ T7652] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 216.328147][ T7652] CR2: 00007f44ea9722e0 CR3: 0000000066344000 CR4: 00000000003526f0 [ 216.328168][ T7652] Call Trace: [ 216.328178][ T7652] [ 216.328199][ T7652] _prb_read_valid+0x672/0xa90 [ 216.328328][ T7652] ? desc_read+0x1b8/0x3f0 [ 216.328381][ T7652] ? __pfx__prb_read_valid+0x10/0x10 [ 216.328422][ T7652] ? panic_on_this_cpu+0x32/0x40 [ 216.328450][ T7652] prb_read_valid+0x3c/0x60 [ 216.328482][ T7652] printk_get_next_message+0x15c/0x7b0 [ 216.328526][ T7652] ? __pfx_printk_get_next_message+0x10/0x10 [ 216.328561][ T7652] ? __lock_acquire+0xab9/0xd20 [ 216.328595][ T7652] ? console_flush_all+0x131/0xb10 [ 216.328621][ T7652] ? console_flush_all+0x478/0xb10 [ 216.328648][ T7652] console_flush_all+0x4cc/0xb10 [ 216.328673][ T7652] ? console_flush_all+0x131/0xb10 [ 216.328704][ T7652] ? __pfx_console_flush_all+0x10/0x10 [ 216.328748][ T7652] ? is_printk_cpu_sync_owner+0x32/0x40 [ 216.328781][ T7652] console_unlock+0xbb/0x190 [ 216.328815][ T7652] ? __pfx___down_trylock_console_sem+0x10/0x10 [ 216.328853][ T7652] ? __pfx_console_unlock+0x10/0x10 [ 216.328899][ T7652] vprintk_emit+0x4c5/0x590 [ 216.328935][ T7652] ? __pfx_vprintk_emit+0x10/0x10 [ 216.328993][ T7652] _printk+0xcf/0x120 [ 216.329028][ T7652] ? __pfx__printk+0x10/0x10 [ 216.329051][ T7652] ? kernfs_get+0x5a/0x90 [ 216.329090][ T7652] _erofs_printk+0x349/0x410 [ 216.329130][ T7652] ? __pfx__erofs_printk+0x10/0x10 [ 216.329161][ T7652] ? __raw_spin_lock_init+0x45/0x100 [ 216.329186][ T7652] ? __init_swait_queue_head+0xa9/0x150 [ 216.329231][ T7652] erofs_fc_fill_super+0x1591/0x1b20 [ 216.329285][ T7652] ? __pfx_erofs_fc_fill_super+0x10/0x10 [ 216.329324][ T7652] ? sb_set_blocksize+0x104/0x180 [ 216.329356][ T7652] ? setup_bdev_super+0x4c1/0x5b0 [ 216.329385][ T7652] get_tree_bdev_flags+0x40e/0x4d0 [ 216.329410][ T7652] ? __pfx_erofs_fc_fill_super+0x10/0x10 [ 216.329444][ T7652] ? __pfx_get_tree_bdev_flags+0x10/0x10 [ 216.329483][ T7652] vfs_get_tree+0x92/0x2b0 [ 216.329512][ T7652] do_new_mount+0x302/0xa10 [ 216.329537][ T7652] ? apparmor_capable+0x137/0x1b0 [ 216.329576][ T7652] ? __pfx_do_new_mount+0x10/0x10 [ 216.329605][ T7652] ? ns_capable+0x8a/0xf0 [ 216.329637][ T7652] ? kmem_cache_free+0x19b/0x690 [ 216.329682][ T7652] __se_sys_mount+0x313/0x410 [ 216.329717][ T7652] ? __pfx___se_sys_mount+0x10/0x10 [ 216.329836][ T7652] ? do_syscall_64+0xbe/0xfa0 [ 216.329869][ T7652] ? __x64_sys_mount+0x20/0xc0 [ 216.329901][ T7652] do_syscall_64+0xfa/0xfa0 [ 216.329932][ T7652] ? lockdep_hardirqs_on+0x9c/0x150 [ 216.329964][ T7652] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 216.329988][ T7652] ? clear_bhb_loop+0x60/0xb0 [ 216.330017][ T7652] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 216.330040][ T7652] RIP: 0033:0x7f44ea99076a [ 216.330080][ T7652] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb a6 e8 de 1a 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 [ 216.330100][ T7652] RSP: 002b:00007f44eb8d9e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 216.330128][ T7652] RAX: ffffffffffffffda RBX: 00007f44eb8d9ef0 RCX: 00007f44ea99076a [ 216.330146][ T7652] RDX: 0000200000000180 RSI: 00002000000001c0 RDI: 00007f44eb8d9eb0 [ 216.330164][ T7652] RBP: 0000200000000180 R08: 00007f44eb8d9ef0 R09: 0000000000000000 [ 216.330181][ T7652] R10: 0000000000000000 R11: 0000000000000246 R12: 00002000000001c0 [ 216.330196][ T7652] R13: 00007f44eb8d9eb0 R14: 00000000000001a1 R15: 0000200000000080 [ 216.330233][ T7652] Solve the problem by moving and fixing the sanity check. The problematic if-else-if-else code will just distinguish three basic scenarios: "regular" vs. "wrapped" vs. "too many times wrapped" block. The new sanity check is more precise. A valid "data_size" must be lower than half of the data buffer size. Also it must not be zero at this stage. It allows to catch problematic "data_size" even for wrapped blocks. Closes: https://lore.kernel.org/all/69096836.a70a0220.88fb8.0006.GAE@google.com/ Closes: https://lore.kernel.org/all/69078fb6.050a0220.29fc44.0029.GAE@google.com/ Fixes: 67e1b0052f6bb82 ("printk_ringbuffer: don't needlessly wrap data blocks around") Reviewed-by: John Ogness Tested-by: John Ogness Link: https://patch.msgid.link/20251107194720.1231457-2-pmladek@suse.com Signed-off-by: Petr Mladek --- kernel/printk/printk_ringbuffer.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 839f504db6d3..3e6fd8d6fa9f 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1260,9 +1260,8 @@ static const char *get_data(struct prb_data_ring *data_ring, return NULL; } - /* Regular data block: @begin less than @next and in same wrap. */ - if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next) && - blk_lpos->begin < blk_lpos->next) { + /* Regular data block: @begin and @next in the same wrap. */ + if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; @@ -1279,6 +1278,10 @@ static const char *get_data(struct prb_data_ring *data_ring, return NULL; } + /* Sanity check. Data-less blocks were handled earlier. */ + if (WARN_ON_ONCE(!data_check_size(data_ring, *data_size) || !*data_size)) + return NULL; + /* A valid data block will always be aligned to the ID size. */ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { -- cgit v1.2.3 From 394aa576c0b783ae728d87ed98fe4f1831dfd720 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 7 Nov 2025 20:47:20 +0100 Subject: printk_ringbuffer: Create a helper function to decide whether more space is needed The decision whether some more space is needed is tricky in the printk ring buffer code: 1. The given lpos values might overflow. A subtraction must be used instead of a simple "lower than" check. 2. Another CPU might reuse the space in the mean time. It can be detected when the subtraction is bigger than DATA_SIZE(data_ring). 3. There is exactly enough space when the result of the subtraction is zero. But more space is needed when the result is exactly DATA_SIZE(data_ring). Add a helper function to make sure that the check is done correctly in all situations. Also it helps to make the code consistent and better documented. Suggested-by: John Ogness Link: https://lore.kernel.org/r/87tsz7iea2.fsf@jogness.linutronix.de Reviewed-by: John Ogness Link: https://patch.msgid.link/20251107194720.1231457-3-pmladek@suse.com [pmladek@suse.com: Updated wording as suggested by John] Signed-off-by: Petr Mladek --- kernel/printk/printk_ringbuffer.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 3e6fd8d6fa9f..56c8e3d031f4 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -411,6 +411,23 @@ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) return to_blk_size(size) <= DATA_SIZE(data_ring) / 2; } +/* + * Compare the current and requested logical position and decide + * whether more space is needed. + * + * Return false when @lpos_current is already at or beyond @lpos_target. + * + * Also return false when the difference between the positions is bigger + * than the size of the data buffer. It might happen only when the caller + * raced with another CPU(s) which already made and used the space. + */ +static bool need_more_space(struct prb_data_ring *data_ring, + unsigned long lpos_current, + unsigned long lpos_target) +{ + return lpos_target - lpos_current - 1 < DATA_SIZE(data_ring); +} + /* Query the state of a descriptor. */ static enum desc_state get_desc_state(unsigned long id, unsigned long state_val) @@ -577,7 +594,7 @@ static bool data_make_reusable(struct printk_ringbuffer *rb, unsigned long id; /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ - while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { + while (need_more_space(data_ring, lpos_begin, lpos_end)) { blk = to_block(data_ring, lpos_begin); /* @@ -668,7 +685,7 @@ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) * sees the new tail lpos, any descriptor states that transitioned to * the reusable state must already be visible. */ - while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { + while (need_more_space(data_ring, tail_lpos, lpos)) { /* * Make all descriptors reusable that are associated with * data blocks before @lpos. @@ -1148,8 +1165,15 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); - /* If the data block does not increase, there is nothing to do. */ - if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { + /* + * Use the current data block when the size does not increase, i.e. + * when @head_lpos is already able to accommodate the new @next_lpos. + * + * Note that need_more_space() could never return false here because + * the difference between the positions was bigger than the data + * buffer size. The data block is reopened and can't get reused. + */ + if (!need_more_space(data_ring, head_lpos, next_lpos)) { if (wrapped) blk = to_block(data_ring, 0); else -- cgit v1.2.3 From ace38521705bd79a47e5f46b6bae6dc044f3cfdc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Nov 2025 15:32:35 +0100 Subject: tracing: Switch to use %ptSp Use %ptSp instead of open coded variants to print content of struct timespec64 in human readable format. Acked-by: Steven Rostedt (Google) Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20251113150217.3030010-22-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- kernel/trace/trace_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 97db0b0ccf3e..14f86f0a8bc7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1467,12 +1467,12 @@ trace_hwlat_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry); - trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld count:%d", + trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ptSp count:%d", field->seqnum, field->duration, field->outer_duration, - (long long)field->timestamp.tv_sec, - field->timestamp.tv_nsec, field->count); + &field->timestamp, + field->count); if (field->nmi_count) { /* -- cgit v1.2.3 From d01ff281bd9b1bfeac9ab98ec8a9ee41da900d5e Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 13 Nov 2025 17:09:47 +0106 Subject: printk: Allow printk_trigger_flush() to flush all types Currently printk_trigger_flush() only triggers legacy offloaded flushing, even if that may not be the appropriate method to flush for currently registered consoles. (The function predates the NBCON consoles.) Since commit 6690d6b52726 ("printk: Add helper for flush type logic") there is printk_get_console_flush_type(), which also considers NBCON consoles and reports all the methods of flushing appropriate based on the system state and consoles available. Update printk_trigger_flush() to use printk_get_console_flush_type() to appropriately flush registered consoles. Suggested-by: Petr Mladek Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/stable/20251113160351.113031-2-john.ogness%40linutronix.de Tested-by: Sherry Sun Link: https://patch.msgid.link/20251113160351.113031-2-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 2 +- kernel/printk/printk.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 558ef3177976..73f315fd97a3 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1849,7 +1849,7 @@ void nbcon_device_release(struct console *con) if (console_trylock()) console_unlock(); } else if (ft.legacy_offload) { - printk_trigger_flush(); + defer_console_output(); } } console_srcu_read_unlock(cookie); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5aee9ffb16b9..dc89239cf1b5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -4567,9 +4567,30 @@ void defer_console_output(void) __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); } +/** + * printk_trigger_flush - Attempt to flush printk buffer to consoles. + * + * If possible, flush the printk buffer to all consoles in the caller's + * context. If offloading is available, trigger deferred printing. + * + * This is best effort. Depending on the system state, console states, + * and caller context, no actual flushing may result from this call. + */ void printk_trigger_flush(void) { - defer_console_output(); + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } + if (ft.legacy_offload) + defer_console_output(); } int vprintk_deferred(const char *fmt, va_list args) -- cgit v1.2.3 From 26873e3e7f0cb26c45e6ad63656f9fe36b2aa31b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 13 Nov 2025 17:09:48 +0106 Subject: printk: Avoid scheduling irq_work on suspend Allowing irq_work to be scheduled while trying to suspend has shown to cause problems as some architectures interpret the pending interrupts as a reason to not suspend. This became a problem for printk() with the introduction of NBCON consoles. With every printk() call, NBCON console printing kthreads are woken by queueing irq_work. This means that irq_work continues to be queued due to printk() calls late in the suspend procedure. Avoid this problem by preventing printk() from queueing irq_work once console suspending has begun. This applies to triggering NBCON and legacy deferred printing as well as klogd waiters. Since triggering of NBCON threaded printing relies on irq_work, the pr_flush() within console_suspend_all() is used to perform the final flushing before suspending consoles and blocking irq_work queueing. NBCON consoles that are not suspended (due to the usage of the "no_console_suspend" boot argument) transition to atomic flushing. Introduce a new global variable @console_irqwork_blocked to flag when irq_work queueing is to be avoided. The flag is used by printk_get_console_flush_type() to avoid allowing deferred printing and switch NBCON consoles to atomic flushing. It is also used by vprintk_emit() to avoid klogd waking. Add WARN_ON_ONCE(console_irqwork_blocked) to the irq_work queuing functions to catch any code that attempts to queue printk irq_work during the suspending/resuming procedure. Cc: stable@vger.kernel.org # 6.13.x because no drivers in 6.12.x Fixes: 6b93bb41f6ea ("printk: Add non-BKL (nbcon) console basic infrastructure") Closes: https://lore.kernel.org/lkml/DB9PR04MB8429E7DDF2D93C2695DE401D92C4A@DB9PR04MB8429.eurprd04.prod.outlook.com Signed-off-by: John Ogness Reviewed-by: Petr Mladek Tested-by: Sherry Sun Link: https://patch.msgid.link/20251113160351.113031-3-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 8 ++++--- kernel/printk/nbcon.c | 7 ++++++ kernel/printk/printk.c | 58 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index f72bbfa266d6..b20929b7d71f 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -230,6 +230,8 @@ struct console_flush_type { bool legacy_offload; }; +extern bool console_irqwork_blocked; + /* * Identify which console flushing methods should be used in the context of * the caller. @@ -241,7 +243,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft) switch (nbcon_get_default_prio()) { case NBCON_PRIO_NORMAL: if (have_nbcon_console && !have_boot_console) { - if (printk_kthreads_running) + if (printk_kthreads_running && !console_irqwork_blocked) ft->nbcon_offload = true; else ft->nbcon_atomic = true; @@ -251,7 +253,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft) if (have_legacy_console || have_boot_console) { if (!is_printk_legacy_deferred()) ft->legacy_direct = true; - else + else if (!console_irqwork_blocked) ft->legacy_offload = true; } break; @@ -264,7 +266,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft) if (have_legacy_console || have_boot_console) { if (!is_printk_legacy_deferred()) ft->legacy_direct = true; - else + else if (!console_irqwork_blocked) ft->legacy_offload = true; } break; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 73f315fd97a3..730d14f6cbc5 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1276,6 +1276,13 @@ void nbcon_kthreads_wake(void) if (!printk_kthreads_running) return; + /* + * It is not allowed to call this function when console irq_work + * is blocked. + */ + if (WARN_ON_ONCE(console_irqwork_blocked)) + return; + cookie = console_srcu_read_lock(); for_each_console_srcu(con) { if (!(console_srcu_read_flags(con) & CON_NBCON)) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index dc89239cf1b5..b1c0d35cf3ca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -462,6 +462,9 @@ bool have_boot_console; /* See printk_legacy_allow_panic_sync() for details. */ bool legacy_allow_panic_sync; +/* Avoid using irq_work when suspending. */ +bool console_irqwork_blocked; + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); @@ -2426,7 +2429,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (ft.legacy_offload) defer_console_output(); - else + else if (!console_irqwork_blocked) wake_up_klogd(); return printed_len; @@ -2730,10 +2733,20 @@ void console_suspend_all(void) { struct console *con; + if (console_suspend_enabled) + pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); + + /* + * Flush any console backlog and then avoid queueing irq_work until + * console_resume_all(). Until then deferred printing is no longer + * triggered, NBCON consoles transition to atomic flushing, and + * any klogd waiters are not triggered. + */ + pr_flush(1000, true); + console_irqwork_blocked = true; + if (!console_suspend_enabled) return; - pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); - pr_flush(1000, true); console_list_lock(); for_each_console(con) @@ -2754,26 +2767,34 @@ void console_resume_all(void) struct console_flush_type ft; struct console *con; - if (!console_suspend_enabled) - return; - - console_list_lock(); - for_each_console(con) - console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); - console_list_unlock(); - /* - * Ensure that all SRCU list walks have completed. All printing - * contexts must be able to see they are no longer suspended so - * that they are guaranteed to wake up and resume printing. + * Allow queueing irq_work. After restoring console state, deferred + * printing and any klogd waiters need to be triggered in case there + * is now a console backlog. */ - synchronize_srcu(&console_srcu); + console_irqwork_blocked = false; + + if (console_suspend_enabled) { + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see they are no longer suspended so + * that they are guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + } printk_get_console_flush_type(&ft); if (ft.nbcon_offload) nbcon_kthreads_wake(); if (ft.legacy_offload) defer_console_output(); + else + wake_up_klogd(); pr_flush(1000, true); } @@ -4511,6 +4532,13 @@ static void __wake_up_klogd(int val) if (!printk_percpu_data_ready()) return; + /* + * It is not allowed to call this function when console irq_work + * is blocked. + */ + if (WARN_ON_ONCE(console_irqwork_blocked)) + return; + preempt_disable(); /* * Guarantee any new records can be seen by tasks preparing to wait -- cgit v1.2.3 From 66e7c1e0ee08cfb6db64f8f3f6e5a3cc930145c8 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 21 Nov 2025 11:26:00 +0106 Subject: printk: Avoid irq_work for printk_deferred() on suspend With commit ("printk: Avoid scheduling irq_work on suspend") the implementation of printk_get_console_flush_type() was modified to avoid offloading when irq_work should be blocked during suspend. Since printk uses the returned flush type to determine what flushing methods are used, this was thought to be sufficient for avoiding irq_work usage during the suspend phase. However, vprintk_emit() implements a hack to support printk_deferred(). In this hack, the returned flush type is adjusted to make sure no legacy direct printing occurs when printk_deferred() was used. Because of this hack, the legacy offloading flushing method can still be used, causing irq_work to be queued when it should not be. Adjust the vprintk_emit() hack to also consider @console_irqwork_blocked so that legacy offloading will not be chosen when irq_work should be blocked. Link: https://lore.kernel.org/lkml/87fra90xv4.fsf@jogness.linutronix.de Signed-off-by: John Ogness Fixes: 26873e3e7f0c ("printk: Avoid scheduling irq_work on suspend") Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b1c0d35cf3ca..c27fc7fc64eb 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2393,7 +2393,7 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - ft.legacy_offload |= ft.legacy_direct; + ft.legacy_offload |= ft.legacy_direct && !console_irqwork_blocked; ft.legacy_direct = false; } -- cgit v1.2.3 From 466348abb0c364cfaf01c6a1142e32cb0d704980 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Fri, 21 Nov 2025 15:50:35 -0300 Subject: printk: Use console_is_usable on console_unblank The macro for_each_console_srcu iterates over all registered consoles. It's implied that all registered consoles have CON_ENABLED flag set, making the check for the flag unnecessary. Call console_is_usable function to fully verify if the given console is usable before calling the ->unblank callback. Signed-off-by: Marcos Paulo de Souza Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20251121-printk-cleanup-part2-v2-3-57b8b78647f4@suse.com Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5aee9ffb16b9..ac5f38213f82 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3331,12 +3331,10 @@ void console_unblank(void) */ cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - short flags = console_srcu_read_flags(c); - - if (flags & CON_SUSPENDED) + if (!console_is_usable(c, console_srcu_read_flags(c), true)) continue; - if ((flags & CON_ENABLED) && c->unblank) { + if (c->unblank) { found_unblank = true; break; } @@ -3373,12 +3371,10 @@ void console_unblank(void) cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - short flags = console_srcu_read_flags(c); - - if (flags & CON_SUSPENDED) + if (!console_is_usable(c, console_srcu_read_flags(c), true)) continue; - if ((flags & CON_ENABLED) && c->unblank) + if (c->unblank) c->unblank(); } console_srcu_read_unlock(cookie); -- cgit v1.2.3