From 0e1d5731d3c1e2214249ef36dcd13ad51ad304cf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 08:35:30 +0206 Subject: printk: Check printk_deferred_enter()/_exit() usage Add validation that printk_deferred_enter()/_exit() are called in non-migration contexts. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-5-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index b937cefcb31c..eee8e97da681 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -161,15 +161,16 @@ int _printk(const char *fmt, ...); */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); -extern void __printk_safe_enter(void); -extern void __printk_safe_exit(void); +extern void __printk_deferred_enter(void); +extern void __printk_deferred_exit(void); + /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ -#define printk_deferred_enter __printk_safe_enter -#define printk_deferred_exit __printk_safe_exit +#define printk_deferred_enter() __printk_deferred_enter() +#define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state -- cgit v1.2.3 From b7049d88c1763d5f133b0e93e39da8e89a9f944b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:32 +0206 Subject: printk: nbcon: Remove return value for write_atomic() The return value of write_atomic() does not provide any useful information. On the contrary, it makes things more complicated for the caller to appropriately deal with the information. Change write_atomic() to not have a return value. If the message did not get printed due to loss of ownership, the caller will notice this on its own. If ownership was not lost, it will be assumed that the driver successfully printed the message and the sequence number for that console will be incremented. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 31a8f5b85f5d..577b157fe4fb 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -345,7 +345,7 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - bool (*write_atomic)(struct console *con, + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; -- cgit v1.2.3 From 7d56936c1e5208b5eeea2a9a2f70e85248f6da49 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:33 +0206 Subject: printk: nbcon: Add detailed doc for write_atomic() The write_atomic() callback has special requirements and is allowed to use special helper functions. Provide detailed documentation of the callback so that a developer has a chance of implementing it correctly. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-8-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 577b157fe4fb..35c64ee3827b 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -303,7 +303,7 @@ struct nbcon_write_context { /** * struct console - The console descriptor structure * @name: The name of the console driver - * @write: Write callback to output messages (Optional) + * @write: Legacy write callback to output messages (Optional) * @read: Read callback for console input (Optional) * @device: The underlying TTY device driver (Optional) * @unblank: Callback to unblank the console (Optional) @@ -320,7 +320,6 @@ struct nbcon_write_context { * @data: Driver private data * @node: hlist node for the console list * - * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -345,8 +344,34 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - void (*write_atomic)(struct console *con, - struct nbcon_write_context *wctxt); + + /** + * @write_atomic: + * + * NBCON callback to write out text in any context. (Optional) + * + * This callback is called with the console already acquired. However, + * a higher priority context is allowed to take it over by default. + * + * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe() + * around any code where the takeover is not safe, for example, when + * manipulating the serial port registers. + * + * nbcon_enter_unsafe() will fail if the context has lost the console + * ownership in the meantime. In this case, the callback is no longer + * allowed to go forward. It must back out immediately and carefully. + * The buffer content is also no longer trusted since it no longer + * belongs to the context. + * + * The callback should allow the takeover whenever it is safe. It + * increases the chance to see messages when the system is in trouble. + * + * The callback can be called from any context (including NMI). + * Therefore it must avoid usage of any locking and instead rely + * on the console ownership for synchronization. + */ + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; -- cgit v1.2.3 From 7a16a771890746b4060333d665ebe674945a3cfa Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:34 +0206 Subject: printk: nbcon: Add callbacks to synchronize with driver Console drivers typically must deal with access to the hardware via user input/output (such as an interactive login shell) and output of kernel messages via printk() calls. To provide the necessary synchronization, usually some driver-specific locking mechanism is used (for example, the port spinlock for uart serial consoles). Until now, usage of this driver-specific locking has been hidden from the printk-subsystem and implemented within the various console callbacks. However, nbcon consoles would need to use it even in the generic code. Add device_lock() and device_unlock() callback which will need to get implemented by nbcon consoles. The callbacks will use whatever synchronization mechanism the driver is using for itself. The minimum requirement is to prevent CPU migration. It would allow a context friendly acquiring of nbcon console ownership in non-emergency and non-panic context. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-9-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 35c64ee3827b..46b3c210b931 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -372,6 +372,49 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @device_lock: + * + * NBCON callback to begin synchronization with driver code. + * + * Console drivers typically must deal with access to the hardware + * via user input/output (such as an interactive login shell) and + * output of kernel messages via printk() calls. This callback is + * called by the printk-subsystem whenever it needs to synchronize + * with hardware access by the driver. It should be implemented to + * use whatever synchronization mechanism the driver is using for + * itself (for example, the port lock for uart serial consoles). + * + * The callback is always called from task context. It may use any + * synchronization method required by the driver. + * + * IMPORTANT: The callback MUST disable migration. The console driver + * may be using a synchronization mechanism that already takes + * care of this (such as spinlocks). Otherwise this function must + * explicitly call migrate_disable(). + * + * The flags argument is provided as a convenience to the driver. It + * will be passed again to device_unlock(). It can be ignored if the + * driver does not need it. + */ + void (*device_lock)(struct console *con, unsigned long *flags); + + /** + * @device_unlock: + * + * NBCON callback to finish synchronization with driver code. + * + * It is the counterpart to device_lock(). + * + * This callback is always called from task context. It must + * appropriately re-enable migration (depending on how device_lock() + * disabled migration). + * + * The flags argument is the value of the same variable that was + * passed to device_lock(). + */ + void (*device_unlock)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; -- cgit v1.2.3 From 77e73c0687f5bc884fa1b0cb97aca912bcc01957 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:36 +0206 Subject: serial: core: Provide low-level functions to lock port It will be necessary at times for the uart nbcon console drivers to acquire the port lock directly (without the additional nbcon functionality of the port lock wrappers). These are special cases such as the implementation of the device_lock()/device_unlock() callbacks or for internal port lock wrapper synchronization. Provide low-level variants __uart_port_lock_irqsave() and __uart_port_unlock_irqrestore() for this purpose. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240820063001.36405-11-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aea25eef9a1a..8872cd21e70a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -590,6 +590,24 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure -- cgit v1.2.3 From eabd4600dafacf9895184259373f2445ff748654 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:37 +0206 Subject: serial: core: Introduce wrapper to set @uart_port->cons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce uart_port_set_cons() as a wrapper to set @cons of a uart_port. The wrapper sets @cons under the port lock in order to prevent @cons from disappearing while another context is holding the port lock. This is necessary for a follow-up commit relating to the port lock wrappers, which rely on @cons not changing between lock and unlock. Signed-off-by: John Ogness Tested-by: Théo Lebrun # EyeQ5, AMBA-PL011 Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240820063001.36405-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8872cd21e70a..2cf03ff2056a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -608,6 +608,23 @@ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned spin_unlock_irqrestore(&up->lock, flags); } +/** + * uart_port_set_cons - Safely set the @cons field for a uart + * @up: The uart port to set + * @con: The new console to set to + * + * This function must be used to set @up->cons. It uses the port lock to + * synchronize with the port lock wrappers in order to ensure that the console + * cannot change or disappear while another context is holding the port lock. + */ +static inline void uart_port_set_cons(struct uart_port *up, struct console *con) +{ + unsigned long flags; + + __uart_port_lock_irqsave(up, &flags); + up->cons = con; + __uart_port_unlock_irqrestore(up, flags); +} /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure -- cgit v1.2.3 From dc219d8d858d95549543aa7a41b6e64a0da9e406 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:38 +0206 Subject: console: Improve console_srcu_read_flags() comments It was not clear when exactly console_srcu_read_flags() must be used vs. directly reading @console->flags. Refactor and clarify that console_srcu_read_flags() is only needed if the console is registered or the caller is in a context where the registration status of the console may change (due to another context). The function requires the caller holds @console_srcu, which will ensure that the caller sees an appropriate @flags value for the registered console and that exit/cleanup routines will not run if the console is in the process of unregistration. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-13-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 46b3c210b931..aafe3121b74e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -446,28 +446,34 @@ extern void console_list_unlock(void) __releases(console_mutex); extern struct hlist_head console_list; /** - * console_srcu_read_flags - Locklessly read the console flags + * console_srcu_read_flags - Locklessly read flags of a possibly registered + * console * @con: struct console pointer of console to read flags from * - * This function provides the necessary READ_ONCE() and data_race() - * notation for locklessly reading the console flags. The READ_ONCE() - * in this function matches the WRITE_ONCE() when @flags are modified - * for registered consoles with console_srcu_write_flags(). + * Locklessly reading @con->flags provides a consistent read value because + * there is at most one CPU modifying @con->flags and that CPU is using only + * read-modify-write operations to do so. * - * Only use this function to read console flags when locklessly - * iterating the console list via srcu. + * Requires console_srcu_read_lock to be held, which implies that @con might + * be a registered console. The purpose of holding console_srcu_read_lock is + * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED) + * and that no exit/cleanup routines will run if the console is currently + * undergoing unregistration. + * + * If the caller is holding the console_list_lock or it is _certain_ that + * @con is not and will not become registered, the caller may read + * @con->flags directly instead. * * Context: Any context. + * Return: The current value of the @con->flags field. */ static inline short console_srcu_read_flags(const struct console *con) { WARN_ON_ONCE(!console_srcu_read_lock_is_held()); /* - * Locklessly reading console->flags provides a consistent - * read value because there is at most one CPU modifying - * console->flags and that CPU is using only read-modify-write - * operations to do so. + * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). */ return data_race(READ_ONCE(con->flags)); } -- cgit v1.2.3 From adf6f37d142e14896115929f3892bbc0a86e25bf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:39 +0206 Subject: nbcon: Add API to acquire context for non-printing operations Provide functions nbcon_device_try_acquire() and nbcon_device_release() which will try to acquire the nbcon console ownership with NBCON_PRIO_NORMAL and mark it unsafe for handover/takeover. These functions are to be used together with the device-specific locking when performing non-printing activities on the console device. They will allow synchronization against the atomic_write() callback which will be serialized, for higher priority contexts, only by acquiring the console context ownership. Pitfalls: The API requires to be called in a context with migration disabled because it uses per-CPU variables internally. The context is set unsafe for a takeover all the time. It guarantees full serialization against any atomic_write() caller except for the final flush in panic() which might try an unsafe takeover. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ include/linux/printk.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index aafe3121b74e..3706f944de46 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -322,6 +322,7 @@ struct nbcon_write_context { * * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print + * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -417,6 +418,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; + struct nbcon_context __private nbcon_device_ctxt; struct printk_buffers *pbufs; }; diff --git a/include/linux/printk.h b/include/linux/printk.h index eee8e97da681..9687089f5ace 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -9,6 +9,8 @@ #include #include +struct console; + extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -198,6 +200,8 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +extern bool nbcon_device_try_acquire(struct console *con); +extern void nbcon_device_release(struct console *con); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -280,6 +284,16 @@ static inline void printk_trigger_flush(void) static inline void console_try_replay_all(void) { } + +static inline bool nbcon_device_try_acquire(struct console *con) +{ + return false; +} + +static inline void nbcon_device_release(struct console *con) +{ +} + #endif bool this_cpu_in_panic(void); -- cgit v1.2.3 From 4126f149c48b2ae757d11ea24675f7071ab22ebf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:40 +0206 Subject: serial: core: Acquire nbcon context in port->lock wrapper Currently the port->lock wrappers uart_port_lock(), uart_port_unlock() (and their variants) only lock/unlock the spin_lock. If the port is an nbcon console that has implemented the write_atomic() callback, the wrappers must also acquire/release the console context and mark the region as unsafe. This allows general port->lock synchronization to be synchronized against the nbcon write_atomic() callback. Note that __uart_port_using_nbcon() relies on the port->lock being held while a console is added and removed from the console list (i.e. all uart nbcon drivers *must* take the port->lock in their device_lock() callbacks). Signed-off-by: John Ogness Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-15-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 82 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 2cf03ff2056a..4ab65874a850 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -625,6 +627,60 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) up->cons = con; __uart_port_unlock_irqrestore(up, flags); } + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_using_nbcon(struct uart_port *up) +{ + lockdep_assert_held_once(&up->lock); + + if (likely(!uart_console(up))) + return false; + + /* + * @up->cons is only modified under the port lock. Therefore it is + * certain that it cannot disappear here. + * + * @up->cons->node is added/removed from the console list under the + * port lock. Therefore it is certain that the registration status + * cannot change here, thus @up->cons->flags can be read directly. + */ + if (hlist_unhashed_lockless(&up->cons->node) || + !(up->cons->flags & CON_NBCON) || + !up->cons->write_atomic) { + return false; + } + + return true; +} + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return true; + + return nbcon_device_try_acquire(up->cons); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + while (!nbcon_device_try_acquire(up->cons)) + cpu_relax(); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_release(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + nbcon_device_release(up->cons); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure @@ -632,6 +688,7 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -641,6 +698,7 @@ static inline void uart_port_lock(struct uart_port *up) static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -651,6 +709,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); + __uart_port_nbcon_acquire(up); } /** @@ -661,7 +720,15 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f */ static inline bool uart_port_trylock(struct uart_port *up) { - return spin_trylock(&up->lock); + if (!spin_trylock(&up->lock)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock(&up->lock); + return false; + } + + return true; } /** @@ -673,7 +740,15 @@ static inline bool uart_port_trylock(struct uart_port *up) */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { - return spin_trylock_irqsave(&up->lock, *flags); + if (!spin_trylock_irqsave(&up->lock, *flags)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock_irqrestore(&up->lock, *flags); + return false; + } + + return true; } /** @@ -682,6 +757,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long */ static inline void uart_port_unlock(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock(&up->lock); } @@ -691,6 +767,7 @@ static inline void uart_port_unlock(struct uart_port *up) */ static inline void uart_port_unlock_irq(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } @@ -701,6 +778,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { + __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } -- cgit v1.2.3 From 5dde3b7354133846079fa51e55e74ef90a836759 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:52 +0206 Subject: printk: nbcon: Add unsafe flushing on panic Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles using the write_atomic() callback and allowing unsafe hostile takeovers. Call this at the end of panic() as a final attempt to flush any pending messages. Note that legacy consoles use unsafe methods for flushing from the beginning of panic (see bust_spinlocks()). Therefore, systems using both legacy and nbcon consoles may still fail to see panic messages due to unsafe legacy console usage. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-27-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 9687089f5ace..2e083f01f8a3 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -202,6 +202,7 @@ void printk_trigger_flush(void); void console_try_replay_all(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); +void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -294,6 +295,10 @@ static inline void nbcon_device_release(struct console *con) { } +static inline void nbcon_atomic_flush_unsafe(void) +{ +} + #endif bool this_cpu_in_panic(void); -- cgit v1.2.3 From e35a8884270bae11196eedf3b0a5bf22619f11f2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:55 +0206 Subject: printk: Coordinate direct printing in panic If legacy and nbcon consoles are registered and the nbcon consoles are allowed to flush (i.e. no boot consoles registered), the legacy consoles will no longer perform direct printing on the panic CPU until after the backtrace has been stored. This will give the safe nbcon consoles a chance to print the panic messages before allowing the unsafe legacy consoles to print. If no nbcon consoles are registered or they are not allowed to flush because boot consoles are registered, there is no change in behavior (i.e. legacy consoles will always attempt to print from the printk() caller context). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-30-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 2e083f01f8a3..eca9bb2ee637 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -200,6 +200,7 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); @@ -286,6 +287,10 @@ static inline void console_try_replay_all(void) { } +static inline void printk_legacy_allow_panic_sync(void) +{ +} + static inline bool nbcon_device_try_acquire(struct console *con) { return false; -- cgit v1.2.3 From ecb5e1aa82c86642ec1eaafefd4e317dfba3a238 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Aug 2024 08:35:57 +0206 Subject: printk: nbcon: Implement emergency sections In emergency situations (something has gone wrong but the system continues to operate), usually important information (such as a backtrace) is generated via printk(). This information should be pushed out to the consoles ASAP. Add per-CPU emergency nesting tracking because an emergency can arise while in an emergency situation. Add functions to mark the beginning and end of emergency sections where the urgent messages are generated. Perform direct console flushing at the emergency priority if the current CPU is in an emergency state and it is safe to do so. Note that the emergency state is not system-wide. While one CPU is in an emergency state, another CPU may attempt to print console messages at normal priority. Also note that printk() already attempts to flush consoles in the caller context for normal priority. However, follow-up changes will introduce printing kthreads, in which case the normal priority printk() calls will offload to the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 3706f944de46..9a13f91b0c43 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con) hlist_for_each_entry(con, &console_list, node) #ifdef CONFIG_PRINTK +extern void nbcon_cpu_emergency_enter(void); +extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); #else +static inline void nbcon_cpu_emergency_enter(void) { } +static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } -- cgit v1.2.3 From bd07d864522e7c3e4ee364e91aee8754992f5855 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:20 +0206 Subject: printk: nbcon: Add function for printers to reacquire ownership Since ownership can be lost at any time due to handover or takeover, a printing context _must_ be prepared to back out immediately and carefully. However, there are scenarios where the printing context must reacquire ownership in order to finalize or revert hardware changes. One such example is when interrupts are disabled during printing. No other context will automagically re-enable the interrupts. For this case, the disabling context _must_ reacquire nbcon ownership so that it can re-enable the interrupts. Provide nbcon_reacquire_nobuf() for exactly this purpose. It allows a printing context to reacquire ownership using the same priority as its previous ownership. Note that after a successful reacquire the printing context will have no output buffer because that has been lost. This function cannot be used to resume printing. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-2-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9a13f91b0c43..88050d30a9cc 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -366,6 +366,10 @@ struct console { * * The callback should allow the takeover whenever it is safe. It * increases the chance to see messages when the system is in trouble. + * If the driver must reacquire ownership in order to finalize or + * revert hardware changes, nbcon_reacquire_nobuf() can be used. + * However, on reacquire the buffer content is no longer available. A + * reacquire cannot be used to resume printing. * * The callback can be called from any context (including NMI). * Therefore it must avoid usage of any locking and instead rely @@ -558,12 +562,14 @@ extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt); #else static inline void nbcon_cpu_emergency_enter(void) { } static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { } #endif extern int console_set_on_cmdline; -- cgit v1.2.3 From 76f258bf3f2aae570209319944703a92ac64e29e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Sep 2024 14:11:25 +0206 Subject: printk: nbcon: Introduce printer kthreads Provide the main implementation for running a printer kthread per nbcon console that is takeover/handover aware. This includes: - new mandatory write_thread() callback - kthread creation - kthread main printing loop - kthread wakeup mechanism - kthread shutdown kthread creation is a bit tricky because consoles may register before kthreads can be created. In such cases, registration will succeed, even though no kthread exists. Once kthreads can be created, an early_initcall will set @printk_kthreads_ready. If there are no registered boot consoles, the early_initcall creates the kthreads for all registered nbcon consoles. If kthread creation fails, the related console is unregistered. If there are registered boot consoles when @printk_kthreads_ready is set, no kthreads are created until the final boot console unregisters. Once kthread creation finally occurs, @printk_kthreads_running is set so that the system knows kthreads are available for all registered nbcon consoles. If @printk_kthreads_running is already set when the console is registering, the kthread is created during registration. If kthread creation fails, the registration will fail. Until @printk_kthreads_running is set, console printing occurs directly via the console_lock. kthread shutdown on system shutdown/reboot is necessary to ensure the printer kthreads finish their printing so that the system can cleanly transition back to direct printing via the console_lock in order to reliably push out the final shutdown/reboot messages. @printk_kthreads_running is cleared before shutting down the individual kthreads. The kthread uses a new mandatory write_thread() callback that is called with both device_lock() and the console context acquired. The console ownership handling is necessary for synchronization against write_atomic() which is synchronized only via the console context ownership. The device_lock() serializes acquiring the console context with NBCON_PRIO_NORMAL. It is needed in case the device_lock() does not disable preemption. It prevents the following race: CPU0 CPU1 [ task A ] nbcon_context_try_acquire() # success with NORMAL prio # .unsafe == false; // safe for takeover [ schedule: task A -> B ] WARN_ON() nbcon_atomic_flush_pending() nbcon_context_try_acquire() # success with EMERGENCY prio # flushing nbcon_context_release() # HERE: con->nbcon_state is free # to take by anyone !!! nbcon_context_try_acquire() # success with NORMAL prio [ task B ] [ schedule: task B -> A ] nbcon_enter_unsafe() nbcon_context_can_proceed() BUG: nbcon_context_can_proceed() returns "true" because the console is owned by a context on CPU0 with NBCON_PRIO_NORMAL. But it should return "false". The console is owned by a context from task B and we do the check in a context from task A. Note that with these changes, the printer kthreads do not yet take over full responsibility for nbcon printing during normal operation. These changes only focus on the lifecycle of the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 88050d30a9cc..788ce9c829f6 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include @@ -324,6 +326,9 @@ struct nbcon_write_context { * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking + * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; @@ -377,6 +382,37 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @write_thread: + * + * NBCON callback to write out text in task context. + * + * This callback must be called only in task context with both + * device_lock() and the nbcon console acquired with + * NBCON_PRIO_NORMAL. + * + * The same rules for console ownership verification and unsafe + * sections handling applies as with write_atomic(). + * + * The console ownership handling is necessary for synchronization + * against write_atomic() which is synchronized only via the context. + * + * The device_lock() provides the primary serialization for operations + * on the device. It might be as relaxed (mutex)[*] or as tight + * (disabled preemption and interrupts) as needed. It allows + * the kthread to operate in the least restrictive mode[**]. + * + * [*] Standalone nbcon_context_try_acquire() is not safe with + * the preemption enabled, see nbcon_owner_matches(). But it + * can be safe when always called in the preemptive context + * under the device_lock(). + * + * [**] The device_lock() makes sure that nbcon_context_try_acquire() + * would never need to spin which is important especially with + * PREEMPT_RT. + */ + void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt); + /** * @device_lock: * @@ -423,7 +459,11 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + struct printk_buffers *pbufs; + struct task_struct *kthread; + struct rcuwait rcuwait; + struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 5102981d5e2a5a7a12424b5d26c7524ac2899898 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:30 +0206 Subject: printk: nbcon: Show replay message on takeover An emergency or panic context can takeover console ownership while the current owner was printing a printk message. The atomic printer will re-print the message that the previous owner was printing. However, this can look confusing to the user and may even seem as though a message was lost. [3430014.1 [3430014.181123] usb 1-2: Product: USB Audio Add a new field @nbcon_prev_seq to struct console to track the sequence number to print that was assigned to the previous console owner. If this matches the sequence number to print that the current owner is assigned, then a takeover must have occurred. In this case, print an additional message to inform the user that the previous message is being printed again. [3430014.1 ** replaying previous printk message ** [3430014.181123] usb 1-2: Product: USB Audio Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 788ce9c829f6..eba367bf605d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -325,6 +325,7 @@ struct nbcon_write_context { * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations + * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print * @pbufs: Pointer to nbcon private buffer * @kthread: Printer kthread for this console * @rcuwait: RCU-safe wait object for @kthread waking @@ -459,6 +460,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + atomic_long_t __private nbcon_prev_seq; struct printk_buffers *pbufs; struct task_struct *kthread; -- cgit v1.2.3