From 52103be07d8b08311955f8c30e535c2dda290cf4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:10:59 +0200 Subject: smp: Optimize flush_smp_call_function_queue() The call_single_queue can contain (two) different callbacks, synchronous and asynchronous. The current interrupt handler runs them in-order, which means that remote CPUs that are waiting for their synchronous call can be delayed by running asynchronous callbacks. Rework the interrupt handler to first run the synchonous callbacks. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.836818381@infradead.org --- kernel/smp.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 786092aabdcd..db2f73808db5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -209,9 +209,9 @@ void generic_smp_call_function_single_interrupt(void) */ static void flush_smp_call_function_queue(bool warn_cpu_offline) { - struct llist_head *head; - struct llist_node *entry; call_single_data_t *csd, *csd_next; + struct llist_node *entry, *prev; + struct llist_head *head; static bool warned; lockdep_assert_irqs_disabled(); @@ -235,20 +235,39 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd->func); } + /* + * First; run all SYNC callbacks, people are waiting for us. + */ + prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { smp_call_func_t func = csd->func; void *info = csd->info; /* Do we wait until *after* callback? */ if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + if (prev) { + prev->next = &csd_next->llist; + } else { + entry = &csd_next->llist; + } func(info); csd_unlock(csd); } else { - csd_unlock(csd); - func(info); + prev = &csd->llist; } } + /* + * Second; run all !SYNC callbacks. + */ + llist_for_each_entry_safe(csd, csd_next, entry, llist) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } + /* * Handle irq works queued remotely by irq_work_queue_on(). * Smp functions above are typically synchronous so they -- cgit v1.2.3 From afaa653c564da38c0b34c4baba31e88c46a8764c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:00 +0200 Subject: smp: Move irq_work_run() out of flush_smp_call_function_queue() This ensures flush_smp_call_function_queue() is strictly about call_single_queue. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.895109676@infradead.org --- kernel/smp.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index db2f73808db5..f720e38e880d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -84,6 +84,7 @@ int smpcfd_dying_cpu(unsigned int cpu) * still pending. */ flush_smp_call_function_queue(false); + irq_work_run(); return 0; } @@ -191,6 +192,14 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, void generic_smp_call_function_single_interrupt(void) { flush_smp_call_function_queue(true); + + /* + * Handle irq works queued remotely by irq_work_queue_on(). + * Smp functions above are typically synchronous so they + * better run first since some other CPUs may be busy waiting + * for them. + */ + irq_work_run(); } /** @@ -267,14 +276,6 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd_unlock(csd); func(info); } - - /* - * Handle irq works queued remotely by irq_work_queue_on(). - * Smp functions above are typically synchronous so they - * better run first since some other CPUs may be busy waiting - * for them. - */ - irq_work_run(); } /* -- cgit v1.2.3 From b2a02fc43a1f40ef4eb2fb2b06357382608d4d84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:01 +0200 Subject: smp: Optimize send_call_function_single_ipi() Just like the ttwu_queue_remote() IPI, make use of _TIF_POLLING_NRFLAG to avoid sending IPIs to idle CPUs. [ mingo: Fix UP build bug. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.953304789@infradead.org --- kernel/smp.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index f720e38e880d..9f1181375141 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -135,6 +135,8 @@ static __always_inline void csd_unlock(call_single_data_t *csd) static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); +extern void send_call_function_single_ipi(int cpu); + /* * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have @@ -178,7 +180,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, * equipped to do the right thing... */ if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) - arch_send_call_function_single_ipi(cpu); + send_call_function_single_ipi(cpu); return 0; } @@ -278,6 +280,18 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } } +void flush_smp_call_function_from_idle(void) +{ + unsigned long flags; + + if (llist_empty(this_cpu_ptr(&call_single_queue))) + return; + + local_irq_save(flags); + flush_smp_call_function_queue(true); + local_irq_restore(flags); +} + /* * smp_call_function_single - Run a function on a specific CPU * @func: The function to run. This must be fast and non-blocking. -- cgit v1.2.3 From 4b44a21dd640b692d4e9b12d3e37c24825f90baa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:02 +0200 Subject: irq_work, smp: Allow irq_work on call_single_queue Currently irq_work_queue_on() will issue an unconditional arch_send_call_function_single_ipi() and has the handler do irq_work_run(). This is unfortunate in that it makes the IPI handler look at a second cacheline and it misses the opportunity to avoid the IPI. Instead note that struct irq_work and struct __call_single_data are very similar in layout, so use a few bits in the flags word to encode a type and stick the irq_work on the call_single_queue list. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161908.011635912@infradead.org --- kernel/smp.c | 119 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 46 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 9f1181375141..856562b80794 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -23,10 +23,8 @@ #include "smpboot.h" -enum { - CSD_FLAG_LOCK = 0x01, - CSD_FLAG_SYNCHRONOUS = 0x02, -}; + +#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK) struct call_function_data { call_single_data_t __percpu *csd; @@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); extern void send_call_function_single_ipi(int cpu); +void __smp_call_single_queue(int cpu, struct llist_node *node) +{ + /* + * The list addition should be visible before sending the IPI + * handler locks the list to pull the entry off it because of + * normal cache coherency rules implied by spinlocks. + * + * If IPIs can go out of order to the cache coherency protocol + * in an architecture, sufficient synchronisation should be added + * to arch code to make it appear to obey cache coherency WRT + * locking and barrier primitives. Generic code isn't really + * equipped to do the right thing... + */ + if (llist_add(node, &per_cpu(call_single_queue, cpu))) + send_call_function_single_ipi(cpu); +} + /* * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, call_single_data_t *csd, - smp_call_func_t func, void *info) +static int generic_exec_single(int cpu, call_single_data_t *csd) { if (cpu == smp_processor_id()) { + smp_call_func_t func = csd->func; + void *info = csd->info; unsigned long flags; /* @@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, return 0; } - if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) { csd_unlock(csd); return -ENXIO; } - csd->func = func; - csd->info = info; - - /* - * The list addition should be visible before sending the IPI - * handler locks the list to pull the entry off it because of - * normal cache coherency rules implied by spinlocks. - * - * If IPIs can go out of order to the cache coherency protocol - * in an architecture, sufficient synchronisation should be added - * to arch code to make it appear to obey cache coherency WRT - * locking and barrier primitives. Generic code isn't really - * equipped to do the right thing... - */ - if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) - send_call_function_single_ipi(cpu); + __smp_call_single_queue(cpu, &csd->llist); return 0; } @@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, void generic_smp_call_function_single_interrupt(void) { flush_smp_call_function_queue(true); - - /* - * Handle irq works queued remotely by irq_work_queue_on(). - * Smp functions above are typically synchronous so they - * better run first since some other CPUs may be busy waiting - * for them. - */ - irq_work_run(); } +extern void irq_work_single(void *); + /** * flush_smp_call_function_queue - Flush pending smp-call-function callbacks * @@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) * We don't have to use the _safe() variant here * because we are not invoking the IPI handlers yet. */ - llist_for_each_entry(csd, entry, llist) - pr_warn("IPI callback %pS sent to offline CPU\n", - csd->func); + llist_for_each_entry(csd, entry, llist) { + switch (CSD_TYPE(csd)) { + case CSD_TYPE_ASYNC: + case CSD_TYPE_SYNC: + case CSD_TYPE_IRQ_WORK: + pr_warn("IPI callback %pS sent to offline CPU\n", + csd->func); + break; + + default: + pr_warn("IPI callback, unknown type %d, sent to offline CPU\n", + CSD_TYPE(csd)); + break; + } + } } /* @@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) */ prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { - smp_call_func_t func = csd->func; - void *info = csd->info; - /* Do we wait until *after* callback? */ - if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + if (CSD_TYPE(csd) == CSD_TYPE_SYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + if (prev) { prev->next = &csd_next->llist; } else { entry = &csd_next->llist; } + func(info); csd_unlock(csd); } else { @@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) * Second; run all !SYNC callbacks. */ llist_for_each_entry_safe(csd, csd_next, entry, llist) { - smp_call_func_t func = csd->func; - void *info = csd->info; + int type = CSD_TYPE(csd); - csd_unlock(csd); - func(info); + if (type == CSD_TYPE_ASYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } else if (type == CSD_TYPE_IRQ_WORK) { + irq_work_single(csd); + } } } @@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, { call_single_data_t *csd; call_single_data_t csd_stack = { - .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS, + .flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, }; int this_cpu; int err; @@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, csd_lock(csd); } - err = generic_exec_single(cpu, csd, func, info); + csd->func = func; + csd->info = info; + + err = generic_exec_single(cpu, csd); if (wait) csd_lock_wait(csd); @@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) csd->flags = CSD_FLAG_LOCK; smp_wmb(); - err = generic_exec_single(cpu, csd, csd->func, csd->info); + err = generic_exec_single(cpu, csd); out: preempt_enable(); @@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd_lock(csd); if (wait) - csd->flags |= CSD_FLAG_SYNCHRONOUS; + csd->flags |= CSD_TYPE_SYNC; csd->func = func; csd->info = info; if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) @@ -632,6 +648,17 @@ void __init smp_init(void) { int num_nodes, num_cpus; + /* + * Ensure struct irq_work layout matches so that + * flush_smp_call_function_queue() can do horrible things. + */ + BUILD_BUG_ON(offsetof(struct irq_work, llnode) != + offsetof(struct __call_single_data, llist)); + BUILD_BUG_ON(offsetof(struct irq_work, func) != + offsetof(struct __call_single_data, func)); + BUILD_BUG_ON(offsetof(struct irq_work, flags) != + offsetof(struct __call_single_data, flags)); + idle_threads_init(); cpuhp_threads_init(); -- cgit v1.2.3 From a148866489fbe243c936fe43e4525d8dbfa0318f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:04 +0200 Subject: sched: Replace rq::wake_list The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()") got smp_call_function_single_async() subtly wrong. Even though it will return -EBUSY when trying to re-use a csd, that condition is not atomic and still requires external serialization. The change in ttwu_queue_remote() got this wrong. While on first reading ttwu_queue_remote() has an atomic test-and-set that appears to serialize the use, the matching 'release' is not in the right place to actually guarantee this serialization. The actual race is vs the sched_ttwu_pending() call in the idle loop; that can run the wakeup-list without consuming the CSD. Instead of trying to chain the lists, merge them. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161908.129371594@infradead.org --- kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 856562b80794..0d61dc060b01 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -196,6 +196,7 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } +extern void sched_ttwu_pending(void *); extern void irq_work_single(void *); /** @@ -244,6 +245,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd->func); break; + case CSD_TYPE_TTWU: + pr_warn("IPI task-wakeup sent to offline CPU\n"); + break; + default: pr_warn("IPI callback, unknown type %d, sent to offline CPU\n", CSD_TYPE(csd)); @@ -275,22 +280,43 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } } + if (!entry) + return; + /* * Second; run all !SYNC callbacks. */ + prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { int type = CSD_TYPE(csd); - if (type == CSD_TYPE_ASYNC) { - smp_call_func_t func = csd->func; - void *info = csd->info; + if (type != CSD_TYPE_TTWU) { + if (prev) { + prev->next = &csd_next->llist; + } else { + entry = &csd_next->llist; + } - csd_unlock(csd); - func(info); - } else if (type == CSD_TYPE_IRQ_WORK) { - irq_work_single(csd); + if (type == CSD_TYPE_ASYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } else if (type == CSD_TYPE_IRQ_WORK) { + irq_work_single(csd); + } + + } else { + prev = &csd->llist; } } + + /* + * Third; only CSD_TYPE_TTWU is left, issue those. + */ + if (entry) + sched_ttwu_pending(entry); } void flush_smp_call_function_from_idle(void) @@ -659,6 +685,13 @@ void __init smp_init(void) BUILD_BUG_ON(offsetof(struct irq_work, flags) != offsetof(struct __call_single_data, flags)); + /* + * Assert the CSD_TYPE_TTWU layout is similar enough + * for task_struct to be on the @call_single_queue. + */ + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) != + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist)); + idle_threads_init(); cpuhp_threads_init(); -- cgit v1.2.3 From 1f8db4150536431b031585ecc2a6793f69245de2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2020 11:01:34 +0200 Subject: sched/headers: Split out open-coded prototypes into kernel/sched/smp.h Move the prototypes for sched_ttwu_pending() and send_call_function_single_ipi() into the newly created kernel/sched/smp.h header, to make sure they are all the same, and to architectures happy that use -Wmissing-prototypes. Signed-off-by: Ingo Molnar --- kernel/smp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 0d61dc060b01..4dec04f7fdc5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -22,7 +22,7 @@ #include #include "smpboot.h" - +#include "sched/smp.h" #define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK) @@ -133,8 +133,6 @@ static __always_inline void csd_unlock(call_single_data_t *csd) static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); -extern void send_call_function_single_ipi(int cpu); - void __smp_call_single_queue(int cpu, struct llist_node *node) { /* @@ -196,7 +194,6 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } -extern void sched_ttwu_pending(void *); extern void irq_work_single(void *); /** -- cgit v1.2.3 From 25de110d148666752dc0e0da7a0b69de31cd7098 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 2 Jun 2020 12:08:39 +0200 Subject: irq_work: Define irq_work_single() on !CONFIG_IRQ_WORK too Some SMP platforms don't have CONFIG_IRQ_WORK defined, resulting in a link error at build time. Define a stub and clean up the prototype definitions. Reported-by: kbuild test robot Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 4dec04f7fdc5..c80486a7e3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -194,8 +194,6 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } -extern void irq_work_single(void *); - /** * flush_smp_call_function_queue - Flush pending smp-call-function callbacks * -- cgit v1.2.3