From 80ae7b1a918e78b0bae88b0c0ad413d3fdced968 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:53 +0200 Subject: x86/apic/vector: Prevent hlist corruption and leaks Several people observed the WARN_ON() in irq_matrix_free() which triggers when the caller tries to free an vector which is not in the allocation range. Song provided the trace information which allowed to decode the root cause. The rework of the vector allocation mechanism failed to preserve a sanity check, which prevents setting a new target vector/CPU when the previous affinity change has not fully completed. As a result a half finished affinity change can be overwritten, which can cause the leak of a irq descriptor pointer on the previous target CPU and double enqueue of the hlist head into the cleanup lists of two or more CPUs. After one CPU cleaned up its vector the next CPU will invoke the cleanup handler with vector 0, which triggers the out of range warning in the matrix allocator. Prevent this by checking the apic_data of the interrupt whether the move_in_progress flag is false and the hlist node is not hashed. Return -EBUSY if not. This prevents the damage and restores the behaviour before the vector allocation rework, but due to other changes in that area it also widens the chance that user space can observe -EBUSY. In theory this should be fine, but actually not all user space tools handle -EBUSY correctly. Addressing that is not part of this fix, but will be addressed in follow up patches. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> Reported-by: Tariq Toukan Reported-by: Song Liu Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20180604162224.303870257@linutronix.de --- arch/x86/kernel/apic/vector.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel/apic/vector.c') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index bb6f7a2148d7..72b575a0b662 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -235,6 +235,15 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0; + /* + * Careful here. @apicd might either have move_in_progress set or + * be enqueued for cleanup. Assigning a new vector would either + * leave a stale vector on some CPU around or in case of a pending + * cleanup corrupt the hlist. + */ + if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) + return -EBUSY; + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); if (vector > 0) apic_update_vector(irqd, vector, cpu); -- cgit v1.2.3 From c0255770ccdc77ef2184d2a0a2e0cde09d2b44a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:55 +0200 Subject: x86/apic: Provide apic_ack_irq() apic_ack_edge() is explicitely for handling interrupt affinity cleanup when interrupt remapping is not available or disable. Remapped interrupts and also some of the platform specific special interrupts, e.g. UV, invoke ack_APIC_irq() directly. To address the issue of failing an affinity update with -EBUSY the delayed affinity mechanism can be reused, but ack_APIC_irq() does not handle that. Adding this to ack_APIC_irq() is not possible, because that function is also used for exceptions and directly handled interrupts like IPIs. Create a new function, which just contains the conditional invocation of irq_move_irq() and the final ack_APIC_irq(). Reuse the new function in apic_ack_edge(). Preparatory change for the real fix. Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Song Liu Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.471925894@linutronix.de --- arch/x86/kernel/apic/vector.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/apic/vector.c') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 72b575a0b662..b708f597eee3 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -809,13 +809,18 @@ static int apic_retrigger_irq(struct irq_data *irqd) return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_irq(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(irqd)); irq_move_irq(irqd); ack_APIC_irq(); } +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, -- cgit v1.2.3 From a07771ac6a78860777a9da5d9bc38830ec993fe7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:34:00 +0200 Subject: x86/apic/vector: Print APIC control bits in debugfs Extend the debugability of the vector management by adding the state bits to the debugfs output. Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Song Liu Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.908136099@linutronix.de --- arch/x86/kernel/apic/vector.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/apic/vector.c') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index b708f597eee3..35aaee4fc028 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -588,8 +588,7 @@ error: static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; + struct apic_chip_data apicd; unsigned long flags; int irq; @@ -605,24 +604,26 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, return; } - apicd = irqd->chip_data; - if (!apicd) { + if (!irqd->chip_data) { seq_printf(m, "%*sVector: Not assigned\n", ind, ""); return; } raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; + memcpy(&apicd, irqd->chip_data, sizeof(apicd)); raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + + seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); + if (apicd.prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); } + seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); + seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); + seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); + seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); + seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); } #endif -- cgit v1.2.3