diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 15:55:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 15:55:25 -0700 |
commit | 3b2074c77d25f453247163300d5638adfab4e4fa (patch) | |
tree | 8f389af5fd4feb1d4a59d7d2b2fa7ff47273d67c | |
parent | 1d17e808cf2aad182f0eb2ea83e329e4a6795428 (diff) | |
parent | 3253cb49cbad4772389d6ef55be75db1f97da910 (diff) |
Merge tag 'irq-core-2025-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq core updates from Thomas Gleixner:
"A set of updates for the interrupt core subsystem:
- Introduce irq_chip_[startup|shutdown]_parent() to prepare for
addressing a few short comings in the PCI/MSI interrupt subsystem.
It allows to utilize the interrupt chip startup/shutdown callbacks
for initializing the interrupt chip hierarchy properly on certain
RISCV implementations and provides a mechanism to reduce the
overhead of masking and unmasking PCI/MSI interrupts during
operation when the underlying MSI provider can mask the interrupt.
The actual usage comes with the interrupt driver pull request.
- Add generic error handling for devm_request_*_irq()
This allows to remove the zoo of random error printk's all over the
usage sites.
- Add a mechanism to warn about long-running interrupt handlers
Long running interrupt handlers can introduce latencies and
tracking them down is a tedious task. The tracking has to be
enabled with a threshold on the kernel command line and utilizes a
static branch to remove the overhead when disabled.
- Update and extend the selftests which validate the CPU hotplug
interrupt migration logic
- Allow dropping the per CPU softirq lock on PREEMPT_RT kernels,
which causes contention and latencies all over the place.
The serialization requirements have been pushed down into the
actual affected usage sites already.
- The usual small cleanups and improvements"
* tag 'irq-core-2025-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT
softirq: Provide a handshake for canceling tasklets via polling
genirq/test: Ensure CPU 1 is online for hotplug test
genirq/test: Drop CONFIG_GENERIC_IRQ_MIGRATION assumptions
genirq/test: Depend on SPARSE_IRQ
genirq/test: Fail early if interrupt request fails
genirq/test: Factor out fake-virq setup
genirq/test: Select IRQ_DOMAIN
genirq/test: Fix depth tests on architectures with NOREQUEST by default.
genirq: Add support for warning on long-running interrupt handlers
genirq/devres: Add error handling in devm_request_*_irq()
genirq: Add irq_chip_(startup/shutdown)_parent()
genirq: Remove GENERIC_IRQ_LEGACY
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 5 | ||||
-rw-r--r-- | include/linux/irq.h | 6 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 13 | ||||
-rw-r--r-- | kernel/irq/Kconfig | 6 | ||||
-rw-r--r-- | kernel/irq/chip.c | 37 | ||||
-rw-r--r-- | kernel/irq/devres.c | 127 | ||||
-rw-r--r-- | kernel/irq/handle.c | 49 | ||||
-rw-r--r-- | kernel/irq/irq_test.c | 55 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 7 | ||||
-rw-r--r-- | kernel/softirq.c | 145 |
10 files changed, 344 insertions, 106 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bf1e6ca6aeb8..e019db1633fd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2606,6 +2606,11 @@ for it. Intended to get systems with badly broken firmware running. + irqhandler.duration_warn_us= [KNL] + Warn if an IRQ handler exceeds the specified duration + threshold in microseconds. Useful for identifying + long-running IRQs in the system. + irqpoll [HW] When an interrupt is not handled search all handlers for it. Also check all handlers each timer diff --git a/include/linux/irq.h b/include/linux/irq.h index 1d6b606a81ef..c67e76fbcc07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data, extern int irq_chip_get_parent_state(struct irq_data *data, enum irqchip_irq_state which, bool *state); +extern void irq_chip_shutdown_parent(struct irq_data *data); +extern unsigned int irq_chip_startup_parent(struct irq_data *data); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); @@ -976,10 +978,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq); -#endif - /** * struct irq_chip_regs - register offsets for struct irq_gci * @enable: Enable register offset to reg_base diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 54ea59ff8fbe..da326800c1c9 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -103,6 +103,19 @@ config PREEMPT_RT Select this if you are building a kernel for systems which require real-time guarantees. +config PREEMPT_RT_NEEDS_BH_LOCK + bool "Enforce softirq synchronisation on PREEMPT_RT" + depends on PREEMPT_RT + help + Enforce synchronisation across the softirqs context. On PREEMPT_RT + the softirq is preemptible. This enforces the same per-CPU BLK + semantic non-PREEMPT_RT builds have. This should not be needed + because per-CPU locks were added to avoid the per-CPU BKL. + + This switch provides the old behaviour for testing reasons. Select + this if you suspect an error with preemptible softirq and want test + the old synchronized behaviour. + config PREEMPT_COUNT bool diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1da5e9d9da71..1b4254d19a73 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -6,10 +6,6 @@ menu "IRQ subsystem" config MAY_HAVE_SPARSE_IRQ bool -# Legacy support, required for itanic -config GENERIC_IRQ_LEGACY - bool - # Enable the generic irq autoprobe mechanism config GENERIC_IRQ_PROBE bool @@ -147,7 +143,9 @@ config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD config IRQ_KUNIT_TEST bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS depends on KUNIT=y + depends on SPARSE_IRQ default KUNIT_ALL_TESTS + select IRQ_DOMAIN imply SMP help This option enables KUnit tests for the IRQ subsystem API. These are diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0d0276378c70..3ffa0d80ddd1 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1260,6 +1260,43 @@ int irq_chip_get_parent_state(struct irq_data *data, EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); /** + * irq_chip_shutdown_parent - Shutdown the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_shutdown() callback of the parent if available or falls + * back to irq_chip_disable_parent(). + */ +void irq_chip_shutdown_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_shutdown) + parent->chip->irq_shutdown(parent); + else + irq_chip_disable_parent(data); +} +EXPORT_SYMBOL_GPL(irq_chip_shutdown_parent); + +/** + * irq_chip_startup_parent - Startup the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_startup() callback of the parent if available or falls + * back to irq_chip_enable_parent(). + */ +unsigned int irq_chip_startup_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_startup) + return parent->chip->irq_startup(parent); + + irq_chip_enable_parent(data); + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_startup_parent); + +/** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) * @data: Pointer to interrupt specific data diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index eb16a58e0322..b41188698622 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -30,29 +30,22 @@ static int devm_irq_match(struct device *dev, void *res, void *data) return this->irq == match->irq && this->dev_id == match->dev_id; } -/** - * devm_request_threaded_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @thread_fn: function to be called in a threaded interrupt context. NULL - * for devices which handle everything in @handler - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function - * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_threaded_irq(). IRQs requested with this function will be - * automatically freed on driver detach. - * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. - */ -int devm_request_threaded_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, irq_handler_t thread_fn, - unsigned long irqflags, const char *devname, - void *dev_id) +static int devm_request_result(struct device *dev, int rc, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + const char *devname) +{ + if (rc >= 0) + return rc; + + return dev_err_probe(dev, rc, "request_irq(%u) %ps %ps %s\n", + irq, handler, thread_fn, devname ? : ""); +} + +static int __devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -78,28 +71,48 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq, return 0; } -EXPORT_SYMBOL(devm_request_threaded_irq); /** - * devm_request_any_context_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function + * devm_request_threaded_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_any_context_irq(). IRQs requested with this function will be - * automatically freed on driver detach. + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_threaded_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. + * Return: 0 on success or a negative error number. */ -int devm_request_any_context_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) +{ + int rc = __devm_request_threaded_irq(dev, irq, handler, thread_fn, + irqflags, devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, thread_fn, devname); +} +EXPORT_SYMBOL(devm_request_threaded_irq); + +static int __devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -124,6 +137,40 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return rc; } + +/** + * devm_request_any_context_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_any_context_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. + * + * Return: IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success, or a negative error + * number. + */ +int devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + int rc = __devm_request_any_context_irq(dev, irq, handler, irqflags, + devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, NULL, devname); +} EXPORT_SYMBOL(devm_request_any_context_irq); /** diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3db3..e103451243a0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); +static u64 irqhandler_duration_threshold_ns __ro_after_init; + +static int __init irqhandler_duration_check_setup(char *arg) +{ + unsigned long val; + int ret; + + ret = kstrtoul(arg, 0, &val); + if (ret) { + pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret); + return 0; + } + + if (!val) { + pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n"); + return 0; + } + + irqhandler_duration_threshold_ns = val * 1000; + static_branch_enable(&irqhandler_duration_check_enabled); + + return 1; +} +__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup); + +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, + const struct irqaction *action) +{ + u64 delta_ns = local_clock() - ts_start; + + if (unlikely(delta_ns > irqhandler_duration_threshold_ns)) { + pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n", + smp_processor_id(), irq, action->handler, + div_u64(delta_ns, NSEC_PER_USEC)); + } +} + irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; @@ -155,7 +193,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) lockdep_hardirq_threaded(); trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { + u64 ts_start = local_clock(); + + res = action->handler(irq, action->dev_id); + irqhandler_duration_check(ts_start, irq, action); + } else { + res = action->handler(irq, action->dev_id); + } + trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c index a75abebed7f2..e2d31914b3c4 100644 --- a/kernel/irq/irq_test.c +++ b/kernel/irq/irq_test.c @@ -41,21 +41,37 @@ static struct irq_chip fake_irq_chip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void irq_disable_depth_test(struct kunit *test) +static int irq_test_setup_fake_irq(struct kunit *test, struct irq_affinity_desc *affd) { struct irq_desc *desc; - int virq, ret; + int virq; - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, affd); KUNIT_ASSERT_GE(test, virq, 0); - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + /* On some architectures, IRQs are NOREQUEST | NOPROBE by default. */ + irq_settings_clr_norequest(desc); + + return virq; +} + +static void irq_disable_depth_test(struct kunit *test) +{ + struct irq_desc *desc; + int virq, ret; + + virq = irq_test_setup_fake_irq(test, NULL); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); @@ -73,16 +89,13 @@ static void irq_free_disabled_test(struct kunit *test) struct irq_desc *desc; int virq, ret; - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, NULL); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); @@ -93,7 +106,7 @@ static void irq_free_disabled_test(struct kunit *test) KUNIT_EXPECT_GE(test, desc->depth, 1); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); free_irq(virq, NULL); @@ -112,10 +125,7 @@ static void irq_shutdown_depth_test(struct kunit *test) if (!IS_ENABLED(CONFIG_SMP)) kunit_skip(test, "requires CONFIG_SMP for managed shutdown"); - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, &affinity); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); @@ -124,7 +134,7 @@ static void irq_shutdown_depth_test(struct kunit *test) KUNIT_ASSERT_PTR_NE(test, data, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); @@ -169,13 +179,12 @@ static void irq_cpuhotplug_test(struct kunit *test) kunit_skip(test, "requires more than 1 CPU for CPU hotplug"); if (!cpu_is_hotpluggable(1)) kunit_skip(test, "CPU 1 must be hotpluggable"); + if (!cpu_online(1)) + kunit_skip(test, "CPU 1 must be online"); cpumask_copy(&affinity.mask, cpumask_of(1)); - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, &affinity); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); @@ -184,7 +193,7 @@ static void irq_cpuhotplug_test(struct kunit *test) KUNIT_ASSERT_PTR_NE(test, data, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); @@ -196,13 +205,9 @@ static void irq_cpuhotplug_test(struct kunit *test) KUNIT_EXPECT_EQ(test, desc->depth, 1); KUNIT_EXPECT_EQ(test, remove_cpu(1), 0); - KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); - KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); KUNIT_EXPECT_GE(test, desc->depth, 1); KUNIT_EXPECT_EQ(test, add_cpu(1), 0); - KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); - KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); KUNIT_EXPECT_EQ(test, desc->depth, 1); enable_irq(virq); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index b64c57b44c20..db714d3014b5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -653,13 +653,6 @@ void irq_mark_irq(unsigned int irq) irq_insert_desc(irq, irq_desc + irq); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq) -{ - free_desc(irq); -} -#endif - #endif /* !CONFIG_SPARSE_IRQ */ int handle_irq_desc(struct irq_desc *desc) diff --git a/kernel/softirq.c b/kernel/softirq.c index 513b1945987c..77198911b8dd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) /* First entry of a task into a BH disabled section? */ if (!current->softirq_disable_cnt) { if (preemptible()) { - local_lock(&softirq_ctrl.lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + local_lock(&softirq_ctrl.lock); + else + migrate_disable(); + /* Required to meet the RCU bottomhalf requirements. */ rcu_read_lock(); } else { @@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) * Track the per CPU softirq disabled state. On RT this is per CPU * state to allow preemption of bottom half disabled sections. */ - newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); - /* - * Reflect the result in the task state to prevent recursion on the - * local lock and to make softirq_count() & al work. - */ - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { - raw_local_irq_save(flags); - lockdep_softirqs_off(ip); - raw_local_irq_restore(flags); + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } + } else { + bool sirq_dis = false; + + if (!current->softirq_disable_cnt) + sirq_dis = true; + + this_cpu_add(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt += cnt; + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } } } EXPORT_SYMBOL(__local_bh_disable_ip); @@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip); static void __local_bh_enable(unsigned int cnt, bool unlock) { unsigned long flags; + bool sirq_en = false; int newcnt; - DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != - this_cpu_read(softirq_ctrl.cnt)); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + if (softirq_count() == cnt) + sirq_en = true; + } else { + if (current->softirq_disable_cnt == cnt) + sirq_en = true; + } - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) { raw_local_irq_save(flags); lockdep_softirqs_on(_RET_IP_); raw_local_irq_restore(flags); } - newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; - if (!newcnt && unlock) { - rcu_read_unlock(); - local_unlock(&softirq_ctrl.lock); + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } + } else { + current->softirq_disable_cnt -= cnt; + this_cpu_sub(softirq_ctrl.cnt, cnt); + if (unlock && !current->softirq_disable_cnt) { + migrate_enable(); + rcu_read_unlock(); + } else { + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + } } } @@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) lock_map_release(&bh_lock_map); local_irq_save(flags); - curcnt = __this_cpu_read(softirq_ctrl.cnt); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + curcnt = this_cpu_read(softirq_ctrl.cnt); + else + curcnt = current->softirq_disable_cnt; /* * If this is not reenabling soft interrupts, no point in trying to @@ -805,6 +848,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t) return false; } +#ifdef CONFIG_PREEMPT_RT +struct tasklet_sync_callback { + spinlock_t cb_lock; + atomic_t cb_waiters; +}; + +static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = { + .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock), + .cb_waiters = ATOMIC_INIT(0), +}; + +static void tasklet_lock_callback(void) +{ + spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_unlock_callback(void) +{ + spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_callback_cancel_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + atomic_inc(&sync_cb->cb_waiters); + spin_lock(&sync_cb->cb_lock); + atomic_dec(&sync_cb->cb_waiters); + spin_unlock(&sync_cb->cb_lock); +} + +static void tasklet_callback_sync_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + if (atomic_read(&sync_cb->cb_waiters)) { + spin_unlock(&sync_cb->cb_lock); + spin_lock(&sync_cb->cb_lock); + } +} + +#else /* !CONFIG_PREEMPT_RT: */ + +static void tasklet_lock_callback(void) { } +static void tasklet_unlock_callback(void) { } +static void tasklet_callback_sync_wait_running(void) { } + +#ifdef CONFIG_SMP +static void tasklet_callback_cancel_wait_running(void) { } +#endif +#endif /* !CONFIG_PREEMPT_RT */ + static void tasklet_action_common(struct tasklet_head *tl_head, unsigned int softirq_nr) { @@ -816,6 +911,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, tl_head->tail = &tl_head->head; local_irq_enable(); + tasklet_lock_callback(); while (list) { struct tasklet_struct *t = list; @@ -835,6 +931,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, } } tasklet_unlock(t); + tasklet_callback_sync_wait_running(); continue; } tasklet_unlock(t); @@ -847,6 +944,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, __raise_softirq_irqoff(softirq_nr); local_irq_enable(); } + tasklet_unlock_callback(); } static __latent_entropy void tasklet_action(void) @@ -897,12 +995,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t) /* * Prevent a live lock when current preempted soft * interrupt processing or prevents ksoftirqd from - * running. If the tasklet runs on a different CPU - * then this has no effect other than doing the BH - * disable/enable dance for nothing. + * running. */ - local_bh_disable(); - local_bh_enable(); + tasklet_callback_cancel_wait_running(); } else { cpu_relax(); } |