From a01379671d67d34f254cc81f42cf854aa628f3a3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: xen/events: avoid removing an event channel while handling it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2 upstream. Today it can happen that an event channel is being removed from the system while the event handling loop is active. This can lead to a race resulting in crashes or WARN() splats when trying to access the irq_info structure related to the event channel. Fix this problem by using a rwlock taken as reader in the event handling loop and as writer when deallocating the irq_info structure. As the observed problem was a NULL dereference in evtchn_from_irq() make this function more robust against races by testing the irq_info pointer to be not NULL before dereferencing it. And finally make all accesses to evtchn_to_irq[row][col] atomic ones in order to avoid seeing partial updates of an array element in irq handling. Note that irq handling can be entered only for event channels which have been valid before, so any not populated row isn't a problem in this regard, as rows are only ever added and never removed. This is XSA-331. Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki Reported-by: Jinoh Kang Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e402620b8920..46cc722cf26a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -70,6 +71,23 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -104,7 +122,7 @@ static void clear_evtchn_to_irq_row(unsigned row) unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_to_irq[row][col], -1); } static void clear_evtchn_to_irq_all(void) @@ -141,7 +159,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -151,7 +169,7 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ @@ -260,10 +278,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -439,16 +461,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + list_del(&info->list); set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -1234,6 +1261,8 @@ static void __xen_evtchn_do_upcall(void) int cpu = get_cpu(); unsigned count; + read_lock(&evtchn_rwlock); + do { vcpu_info->evtchn_upcall_pending = 0; @@ -1249,6 +1278,7 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: + read_unlock(&evtchn_rwlock); put_cpu(); } -- cgit v1.2.3 From 4bea575a10691a99b03d5e9055f3079040b59868 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: xen/events: add a proper barrier to 2-level uevent unmasking commit 4d3fe31bd993ef504350989786858aefdb877daa upstream. A follow-up patch will require certain write to happen before an event channel is unmasked. While the memory barrier is not strictly necessary for all the callers, the main one will need it. In order to avoid an extra memory barrier when using fifo event channels, mandate evtchn_unmask() to provide write ordering. The 2-level event handling unmask operation is missing an appropriate barrier, so add it. Fifo event channels are fine in this regard due to using sync_cmpxchg(). This is part of XSA-332. Cc: stable@vger.kernel.org Suggested-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_2l.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 8edef51c92e5..e4b75693600e 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port) BUG_ON(!irqs_disabled()); + smp_wmb(); /* All writes before unmask must be visible. */ + if (unlikely((cpu != cpu_from_evtchn(port)))) do_hypercall = 1; else { -- cgit v1.2.3 From 44a455e06d87b09a54dd8a679751ae54fef8e371 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Oct 2020 06:52:55 +0200 Subject: xen/events: fix race in evtchn_fifo_unmask() commit f01337197419b7e8a492e83089552b77d3b5fb90 upstream. Unmasking a fifo event channel can result in unmasking it twice, once directly in the kernel and once via a hypercall in case the event was pending. Fix that by doing the local unmask only if the event is not pending. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_fifo.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 76b318e88382..3071256a9413 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsigned port) return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } /* - * Clear MASKED, spinning if BUSY is set. + * Clear MASKED if not PENDING, spinning if BUSY is set. + * Return true if mask was cleared. */ -static void clear_masked(volatile event_word_t *word) +static bool clear_masked_cond(volatile event_word_t *word) { event_word_t new, old, w; w = *word; do { + if (w & (1 << EVTCHN_FIFO_PENDING)) + return false; + old = w & ~(1 << EVTCHN_FIFO_BUSY); new = old & ~(1 << EVTCHN_FIFO_MASKED); w = sync_cmpxchg(word, old, new); } while (w != old); + + return true; } static void evtchn_fifo_unmask(unsigned port) @@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned port) BUG_ON(!irqs_disabled()); - clear_masked(word); - if (evtchn_fifo_is_pending(port)) { + if (!clear_masked_cond(word)) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } -- cgit v1.2.3 From df54eca9ae8aace17463f2e8402fcc62efddd0d5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: xen/events: add a new "late EOI" evtchn framework commit 54c9de89895e0a36047fcc4ae754ea5b8655fb9d upstream. In order to avoid tight event channel related IRQ loops add a new framework of "late EOI" handling: the IRQ the event channel is bound to will be masked until the event has been handled and the related driver is capable to handle another event. The driver is responsible for unmasking the event channel via the new function xen_irq_lateeoi(). This is similar to binding an event channel to a threaded IRQ, but without having to structure the driver accordingly. In order to support a future special handling in case a rogue guest is sending lots of unsolicited events, add a flag to xen_irq_lateeoi() which can be set by the caller to indicate the event was a spurious one. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 151 ++++++++++++++++++++++++++++++++++----- include/xen/events.h | 29 ++++++-- 2 files changed, 159 insertions(+), 21 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 46cc722cf26a..1a1325e09d50 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -112,6 +112,7 @@ static bool (*pirq_needs_eoi)(unsigned irq); static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; static struct irq_chip xen_dynamic_chip; +static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); @@ -396,6 +397,33 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +static void xen_irq_lateeoi_locked(struct irq_info *info) +{ + evtchn_port_t evtchn; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn)) + return; + + unmask_evtchn(evtchn); +} + +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) +{ + struct irq_info *info; + unsigned long flags; + + read_lock_irqsave(&evtchn_rwlock, flags); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} +EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -867,7 +895,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) { int irq; int ret; @@ -884,7 +912,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); ret = xen_irq_info_evtchn_setup(irq, evtchn); @@ -905,8 +933,19 @@ out: return irq; } + +int bind_evtchn_to_irq(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -948,8 +987,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) +static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, + evtchn_port_t remote_port, + struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -960,10 +1000,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, + chip); +} + +int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); + static int find_virq(unsigned int virq, unsigned int cpu) { struct evtchn_status status; @@ -1059,14 +1115,15 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) +static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id, + struct irq_chip *chip) { int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_evtchn_to_irq_chip(evtchn, chip); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1077,18 +1134,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, return irq; } + +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); -int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); + +static int bind_interdomain_evtchn_to_irqhandler_chip( + unsigned int remote_domain, evtchn_port_t remote_port, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, struct irq_chip *chip) { int irq, retval; - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + chip); if (irq < 0) return irq; @@ -1100,8 +1177,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, return irq; } + +int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1645,6 +1747,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_retrigger = retrigger_dynirq, }; +static struct irq_chip xen_lateeoi_chip __read_mostly = { + /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ + .name = "xen-dyn-lateeoi", + + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = mask_ack_dynirq, + .irq_mask_ack = mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_pirq_chip __read_mostly = { .name = "xen-pirq", diff --git a/include/xen/events.h b/include/xen/events.h index c0e6a0598397..31952308a6d5 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -14,11 +14,16 @@ unsigned xen_evtchn_nr_channels(void); -int bind_evtchn_to_irq(unsigned int evtchn); -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, @@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, const char *devname, void *dev_id); int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port); + evtchn_port_t remote_port); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); /* * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); +/* + * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi + * functions above. + */ +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags); +/* Signal an event was spurious, i.e. there was no action resulting from it. */ +#define XEN_EOI_FLAG_SPURIOUS 0x00000001 + #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN -- cgit v1.2.3 From ade6bd5af7f9fe27af96fe92b4d50b81c2de0d91 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: xen/blkback: use lateeoi irq binding commit 01263a1fabe30b4d542f34c7e2364a22587ddaf2 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving blkfront use the lateeoi irq binding for blkback and unmask the event channel only after processing all pending requests. As the thread processing requests is used to do purging work in regular intervals an EOI may be sent only after having received an event. If there was no pending I/O request flag the EOI as spurious. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkback/blkback.c | 22 +++++++++++++++++----- drivers/block/xen-blkback/xenbus.c | 5 ++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 3666afa639d1..b18f0162cb9c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -202,7 +202,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) -static int do_block_io_op(struct xen_blkif_ring *ring); +static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags); static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct blkif_request *req, struct pending_req *pending_req); @@ -615,6 +615,8 @@ int xen_blkif_schedule(void *arg) struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; int ret; + bool do_eoi; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; set_freezable(); while (!kthread_should_stop()) { @@ -639,16 +641,23 @@ int xen_blkif_schedule(void *arg) if (timeout == 0) goto purge_gnt_list; + do_eoi = ring->waiting_reqs; + ring->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - ret = do_block_io_op(ring); + ret = do_block_io_op(ring, &eoi_flags); if (ret > 0) ring->waiting_reqs = 1; if (ret == -EACCES) wait_event_interruptible(ring->shutdown_wq, kthread_should_stop()); + if (do_eoi && !ring->waiting_reqs) { + xen_irq_lateeoi(ring->irq, eoi_flags); + eoi_flags |= XEN_EOI_FLAG_SPURIOUS; + } + purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && time_after(jiffies, ring->next_lru)) { @@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio) * and transmute it to the block API to hand it over to the proper block disk. */ static int -__do_block_io_op(struct xen_blkif_ring *ring) +__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; struct blkif_request req; @@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + /* We've seen a request, so clear spurious eoi flag. */ + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (kthread_should_stop()) { more_to_do = 1; break; @@ -1202,13 +1214,13 @@ done: } static int -do_block_io_op(struct xen_blkif_ring *ring) +do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; int more_to_do; do { - more_to_do = __do_block_io_op(ring); + more_to_do = __do_block_io_op(ring, eoi_flags); if (more_to_do) break; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c4cd68116e7f..192ca58cc3c7 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -229,9 +229,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, BUG(); } - err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, - xen_blkif_be_int, 0, - "blkif-backend", ring); + err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid, + evtchn, xen_blkif_be_int, 0, "blkif-backend", ring); if (err < 0) { xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); ring->blk_rings.common.sring = NULL; -- cgit v1.2.3 From e6ea898e56029b8ea77f7696b496629b7b644c79 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: xen/netback: use lateeoi irq binding commit 23025393dbeb3b8b3b60ebfa724cdae384992e27 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving netfront use the lateeoi irq binding for netback and unmask the event channel only just before going to sleep waiting for new events. Make sure not to issue an EOI when none is pending by introducing an eoi_pending element to struct xenvif_queue. When no request has been consumed set the spurious flag when sending the EOI for an interrupt. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/common.h | 15 +++++++++ drivers/net/xen-netback/interface.c | 61 +++++++++++++++++++++++++++++++------ drivers/net/xen-netback/netback.c | 11 ++++++- drivers/net/xen-netback/rx.c | 13 +++++--- 4 files changed, 86 insertions(+), 14 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 05847eb91a1b..32fe131ba366 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ struct xenvif *vif; /* Parent VIF */ + /* + * TX/RX common EOI handling. + * When feature-split-event-channels = 0, interrupt handler sets + * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set + * by the RX and TX interrupt handlers. + * RX and TX handler threads will issue an EOI when either + * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or + * NETBK_TX_EOI) are set and they will reset those bits. + */ + atomic_t eoi_pending; +#define NETBK_RX_EOI 0x01 +#define NETBK_TX_EOI 0x02 +#define NETBK_COMMON_EOI 0x04 + /* Use NAPI for guest TX */ struct napi_struct napi; /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ @@ -375,6 +389,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 103ed00775eb..e889488b84a0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif) !vif->disabled; } +static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + if (rc) + napi_schedule(&queue->napi); + return rc; +} + static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) - napi_schedule(&queue->napi); + old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending); + WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_tx_interrupt(queue)) { + atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget) return work_done; } +static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = xenvif_have_rx_work(queue, false); + if (rc) + xenvif_kick_thread(queue); + return rc; +} + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - xenvif_kick_thread(queue); + old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending); + WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_rx_interrupt(queue)) { + atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } irqreturn_t xenvif_interrupt(int irq, void *dev_id) { - xenvif_tx_interrupt(irq, dev_id); - xenvif_rx_interrupt(irq, dev_id); + struct xenvif_queue *queue = dev_id; + int old; + + old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); + WARN(old, "Interrupt while EOI pending\n"); + + /* Use bitwise or as we need to call both functions. */ + if ((!xenvif_handle_tx_interrupt(queue) | + !xenvif_handle_rx_interrupt(queue))) { + atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, queue->name, queue); if (err < 0) @@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, /* feature-split-event-channels == 1 */ snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, queue->tx_irq_name, queue); if (err < 0) @@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, queue->rx_irq_name, queue); if (err < 0) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 0020b2e8c279..fa1ac0abc924 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) if (more_to_do) napi_schedule(&queue->napi); + else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_TX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->tx_irq, 0); } static void tx_add_credit(struct xenvif_queue *queue) @@ -1622,9 +1626,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; + unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS; - while (xenvif_ctrl_work_todo(vif)) + while (xenvif_ctrl_work_todo(vif)) { xenvif_ctrl_action(vif); + eoi_flag = 0; + } + + xen_irq_lateeoi(irq, eoi_flag); return IRQ_HANDLED; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index ef5887037b22..9b62f65b630e 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) return queue->stalled && prod - cons >= 1; } -static bool xenvif_have_rx_work(struct xenvif_queue *queue) +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) { return xenvif_rx_ring_slots_available(queue) || (queue->vif->stall_timeout && (xenvif_rx_queue_stalled(queue) || xenvif_rx_queue_ready(queue))) || - kthread_should_stop() || + (test_kthread && kthread_should_stop()) || queue->vif->disabled; } @@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) { DEFINE_WAIT(wait); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) return; for (;;) { long ret; prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) break; + if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_RX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->rx_irq, 0); + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); if (!ret) break; -- cgit v1.2.3 From 5441639a38df4c416fdfa68eecbafe4a782bbd2d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: xen/scsiback: use lateeoi irq binding commit 86991b6e7ea6c613b7692f65106076943449b6b7 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving scsifront use the lateeoi irq binding for scsiback and unmask the event channel only just before leaving the event handling function. In case of a ring protocol error don't issue an EOI in order to avoid the possibility to use that for producing an event storm. This at once will result in no further call of scsiback_irq_fn(), so the ring_error struct member can be dropped and scsiback_do_cmd_fn() can signal the protocol error via a negative return value. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-scsiback.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index ba0942e481bc..33d6499d8472 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -91,7 +91,6 @@ struct vscsibk_info { unsigned int irq; struct vscsiif_back_ring ring; - int ring_error; spinlock_t ring_lock; atomic_t nr_unreplied_reqs; @@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info, return pending_req; } -static int scsiback_do_cmd_fn(struct vscsibk_info *info) +static int scsiback_do_cmd_fn(struct vscsibk_info *info, + unsigned int *eoi_flags) { struct vscsiif_back_ring *ring = &info->ring; struct vscsiif_request ring_req; @@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) rc = ring->rsp_prod_pvt; pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", info->domid, rp, rc, rp - rc); - info->ring_error = 1; - return 0; + return -EINVAL; } while ((rc != rp)) { + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) break; @@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) { struct vscsibk_info *info = dev_id; + int rc; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (info->ring_error) - return IRQ_HANDLED; - - while (scsiback_do_cmd_fn(info)) + while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0) cond_resched(); + /* In case of a ring error we keep the event channel masked. */ + if (!rc) + xen_irq_lateeoi(irq, eoi_flags); + return IRQ_HANDLED; } @@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, sring = (struct vscsiif_sring *)area; BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); if (err < 0) goto unmap_page; @@ -1252,7 +1256,6 @@ static int scsiback_probe(struct xenbus_device *dev, info->domid = dev->otherend_id; spin_lock_init(&info->ring_lock); - info->ring_error = 0; atomic_set(&info->nr_unreplied_reqs, 0); init_waitqueue_head(&info->waiting_to_free); info->dev = dev; -- cgit v1.2.3 From 9396de462aa6e9f289f2876de8540ba340bd0f89 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: xen/pvcallsback: use lateeoi irq binding commit c8d647a326f06a39a8e5f0f1af946eacfa1835f8 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pvcallsfront use the lateeoi irq binding for pvcallsback and unmask the event channel only after handling all write requests, which are the ones coming in via an irq. This requires modifying the logic a little bit to not require an event for each write request, but to keep the ioworker running until no further data is found on the ring page to be processed. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/pvcalls-back.c | 76 ++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index ffe9bd843922..9439de2ca0e4 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -66,6 +66,7 @@ struct sock_mapping { atomic_t write; atomic_t io; atomic_t release; + atomic_t eoi; void (*saved_data_ready)(struct sock *sk); struct pvcalls_ioworker ioworker; }; @@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, struct sock_mapping *map); -static void pvcalls_conn_back_read(void *opaque) +static bool pvcalls_conn_back_read(void *opaque) { struct sock_mapping *map = (struct sock_mapping *)opaque; struct msghdr msg; @@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque) virt_mb(); if (error) - return; + return false; size = pvcalls_queued(prod, cons, array_size); if (size >= array_size) - return; + return false; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { atomic_set(&map->read, 0); spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); - return; + return true; } spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); wanted = array_size - size; @@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque) ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); WARN_ON(ret > wanted); if (ret == -EAGAIN) /* shouldn't happen */ - return; + return true; if (!ret) ret = -ENOTCONN; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); @@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque) virt_wmb(); notify_remote_via_irq(map->irq); - return; + return true; } -static void pvcalls_conn_back_write(struct sock_mapping *map) +static bool pvcalls_conn_back_write(struct sock_mapping *map) { struct pvcalls_data_intf *intf = map->ring; struct pvcalls_data *data = &map->data; @@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) array_size = XEN_FLEX_RING_SIZE(map->ring_order); size = pvcalls_queued(prod, cons, array_size); if (size == 0) - return; + return false; memset(&msg, 0, sizeof(msg)); msg.msg_flags |= MSG_DONTWAIT; @@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) atomic_set(&map->write, 0); ret = inet_sendmsg(map->sock, &msg, size); - if (ret == -EAGAIN || (ret >= 0 && ret < size)) { + if (ret == -EAGAIN) { atomic_inc(&map->write); atomic_inc(&map->io); + return true; } - if (ret == -EAGAIN) - return; /* write the data, then update the indexes */ virt_wmb(); @@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) } /* update the indexes, then notify the other end */ virt_wmb(); - if (prod != cons + ret) + if (prod != cons + ret) { atomic_inc(&map->write); + atomic_inc(&map->io); + } notify_remote_via_irq(map->irq); + + return true; } static void pvcalls_back_ioworker(struct work_struct *work) @@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work) struct pvcalls_ioworker, register_work); struct sock_mapping *map = container_of(ioworker, struct sock_mapping, ioworker); + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; while (atomic_read(&map->io) > 0) { if (atomic_read(&map->release) > 0) { @@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work) return; } - if (atomic_read(&map->read) > 0) - pvcalls_conn_back_read(map); - if (atomic_read(&map->write) > 0) - pvcalls_conn_back_write(map); + if (atomic_read(&map->read) > 0 && + pvcalls_conn_back_read(map)) + eoi_flags = 0; + if (atomic_read(&map->write) > 0 && + pvcalls_conn_back_write(map)) + eoi_flags = 0; + + if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) { + atomic_set(&map->eoi, 0); + xen_irq_lateeoi(map->irq, eoi_flags); + eoi_flags = XEN_EOI_FLAG_SPURIOUS; + } atomic_dec(&map->io); } @@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket( goto out; map->bytes = page; - ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, - evtchn, - pvcalls_back_conn_event, - 0, - "pvcalls-backend", - map); + ret = bind_interdomain_evtchn_to_irqhandler_lateeoi( + fedata->dev->otherend_id, evtchn, + pvcalls_back_conn_event, 0, "pvcalls-backend", map); if (ret < 0) goto out; map->irq = ret; @@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) { struct xenbus_device *dev = dev_id; struct pvcalls_fedata *fedata = NULL; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (dev == NULL) - return IRQ_HANDLED; + if (dev) { + fedata = dev_get_drvdata(&dev->dev); + if (fedata) { + pvcalls_back_work(fedata); + eoi_flags = 0; + } + } - fedata = dev_get_drvdata(&dev->dev); - if (fedata == NULL) - return IRQ_HANDLED; + xen_irq_lateeoi(irq, eoi_flags); - pvcalls_back_work(fedata); return IRQ_HANDLED; } @@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) struct pvcalls_ioworker *iow; if (map == NULL || map->sock == NULL || map->sock->sk == NULL || - map->sock->sk->sk_user_data != map) + map->sock->sk->sk_user_data != map) { + xen_irq_lateeoi(irq, 0); return IRQ_HANDLED; + } iow = &map->ioworker; atomic_inc(&map->write); + atomic_inc(&map->eoi); atomic_inc(&map->io); queue_work(iow->wq, &iow->register_work); @@ -931,7 +947,7 @@ static int backend_connect(struct xenbus_device *dev) goto error; } - err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (err < 0) goto error; fedata->irq = err; -- cgit v1.2.3 From 48b533aa838d1cf49e717775cb625caf86770a34 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:29 +0200 Subject: xen/pciback: use lateeoi irq binding commit c2711441bc961b37bba0615dd7135857d189035f upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pcifront use the lateeoi irq binding for pciback and unmask the event channel only just before leaving the event handling function. Restructure the handling to support that scheme. Basically an event can come in for two reasons: either a normal request for a pciback action, which is handled in a worker, or in case the guest has finished an AER request which was requested by pciback. When an AER request is issued to the guest and a normal pciback action is currently active issue an EOI early in order to be able to receive another event when the AER request has been finished by the guest. Let the worker processing the normal requests run until no further request is pending, instead of starting a new worker ion that case. Issue the EOI only just before leaving the worker. This scheme allows to drop calling the generic function xen_pcibk_test_and_schedule_op() after processing of any request as the handling of both request types is now separated more cleanly. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-pciback/pci_stub.c | 14 +++++----- drivers/xen/xen-pciback/pciback.h | 12 +++++++-- drivers/xen/xen-pciback/pciback_ops.c | 48 +++++++++++++++++++++++++++-------- drivers/xen/xen-pciback/xenbus.c | 2 +- 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 097410a7cdb7..adf3aae2939f 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -733,10 +733,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, wmb(); notify_remote_via_irq(pdev->evtchn_irq); + /* Enable IRQ to signal "request done". */ + xen_pcibk_lateeoi(pdev, 0); + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, !(test_bit(_XEN_PCIB_active, (unsigned long *) &sh_info->flags)), 300*HZ); + /* Enable IRQ for pcifront request if not already active. */ + if (!test_bit(_PDEVF_op_active, &pdev->flags)) + xen_pcibk_lateeoi(pdev, 0); + if (!ret) { if (test_bit(_XEN_PCIB_active, (unsigned long *)&sh_info->flags)) { @@ -750,13 +757,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - if (test_bit(_XEN_PCIF_active, - (unsigned long *)&sh_info->flags)) { - dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in " DRV_NAME "\n"); - xen_pcibk_test_and_schedule_op(psdev->pdev); - } - res = (pci_ers_result_t)aer_op->err; return res; } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 263c059bff90..235cdfe13494 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #define DRV_NAME "xen-pciback" @@ -27,6 +28,8 @@ struct pci_dev_entry { #define PDEVF_op_active (1<<(_PDEVF_op_active)) #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) +#define _EOI_pending (2) +#define EOI_pending (1<<(_EOI_pending)) struct xen_pcibk_device { void *pci_dev_data; @@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); +static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev, + unsigned int eoi_flag) +{ + if (test_and_clear_bit(_EOI_pending, &pdev->flags)) + xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag); +} + int xen_pcibk_xenbus_register(void); void xen_pcibk_xenbus_unregister(void); extern int verbose_request; - -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); #endif /* Handles shared IRQs that can to device domain and control domain. */ diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 787966f44589..c4ed2c634ca7 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, return 0; } #endif + +static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) +{ + return test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags) && + !test_and_set_bit(_PDEVF_op_active, &pdev->flags); +} + /* * Now the same evtchn is used for both pcifront conf_read_write request * as well as pcie aer front end ack. We use a new work_queue to schedule * xen_pcibk conf_read_write service for avoiding confict with aer_core * do_recovery job which also use the system default work_queue */ -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) +static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) { + bool eoi = true; + /* Check that frontend is requesting an operation and that we are not * already processing a request */ - if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) - && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + if (xen_pcibk_test_op_pending(pdev)) { schedule_work(&pdev->op_work); + eoi = false; } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) && test_bit(_PCIB_op_pending, &pdev->flags)) { wake_up(&xen_pcibk_aer_wait_queue); + eoi = false; } + + /* EOI if there was nothing to do. */ + if (eoi) + xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); } /* Performing the configuration space reads/writes must not be done in atomic @@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * use of semaphores). This function is intended to be called from a work * queue in process context taking a struct xen_pcibk_device as a parameter */ -void xen_pcibk_do_op(struct work_struct *data) +static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) { - struct xen_pcibk_device *pdev = - container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; @@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct *data) smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ clear_bit(_PDEVF_op_active, &pdev->flags); smp_mb__after_atomic(); /* /before/ final check for work */ +} - /* Check to see if the driver domain tried to start another request in - * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. - */ - xen_pcibk_test_and_schedule_op(pdev); +void xen_pcibk_do_op(struct work_struct *data) +{ + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); + + do { + xen_pcibk_do_one_op(pdev); + } while (xen_pcibk_test_op_pending(pdev)); + + xen_pcibk_lateeoi(pdev, 0); } irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) { struct xen_pcibk_device *pdev = dev_id; + bool eoi; + + /* IRQs might come in before pdev->evtchn_irq is written. */ + if (unlikely(pdev->evtchn_irq != irq)) + pdev->evtchn_irq = irq; + + eoi = test_and_set_bit(_EOI_pending, &pdev->flags); + WARN(eoi, "IRQ while EOI pending\n"); xen_pcibk_test_and_schedule_op(pdev); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 833b2d2c4318..e7a670235965 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, pdev->sh_info = vaddr; - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, 0, DRV_NAME, pdev); if (err < 0) { -- cgit v1.2.3 From b7d6a66e21722df4ad2ea2eba1e3bc7b62911dbc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:29 +0200 Subject: xen/events: switch user event channels to lateeoi model commit c44b849cee8c3ac587da3b0980e01f77500d158c upstream. Instead of disabling the irq when an event is received and enabling it again when handled by the user process use the lateeoi model. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Tested-by: Stefano Stabellini Reviewed-by: Stefano Stabellini Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/evtchn.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 052b55a14ebc..a43930191e20 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) "Interrupt for port %d, but apparently not enabled; per-user %p\n", evtchn->port, u); - disable_irq_nosync(irq); evtchn->enabled = false; spin_lock(&u->ring_prod_lock); @@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, evtchn = find_evtchn(u, port); if (evtchn && !evtchn->enabled) { evtchn->enabled = true; - enable_irq(irq_from_evtchn(port)); + xen_irq_lateeoi(irq_from_evtchn(port), 0); } } @@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, - u->name, evtchn); + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + u->name, evtchn); if (rc < 0) goto err; -- cgit v1.2.3 From 25c23f03345764ec290566f502c2bce84c2a458f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sun, 13 Sep 2020 14:23:02 +0200 Subject: xen/events: use a common cpu hotplug hook for event channels commit 7beb290caa2adb0a399e735a1e175db9aae0523a upstream. Today only fifo event channels have a cpu hotplug callback. In order to prepare for more percpu (de)init work move that callback into events_base.c and add percpu_init() and percpu_deinit() hooks to struct evtchn_ops. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 25 ++++++++++++++++++++++ drivers/xen/events/events_fifo.c | 40 +++++++++++++++++------------------- drivers/xen/events/events_internal.h | 3 +++ 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 1a1325e09d50..b2b5eccdc5df 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -34,6 +34,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -1834,6 +1835,26 @@ void xen_callback_vector(void) {} static bool fifo_events = true; module_param(fifo_events, bool, 0); +static int xen_evtchn_cpu_prepare(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + + return ret; +} + +static int xen_evtchn_cpu_dead(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_deinit) + ret = evtchn_ops->percpu_deinit(cpu); + + return ret; +} + void __init xen_init_IRQ(void) { int ret = -EINVAL; @@ -1844,6 +1865,10 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, + "xen/evtchn:prepare", + xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 3071256a9413..59e6002c9699 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -385,21 +385,6 @@ static void evtchn_fifo_resume(void) event_array_pages = 0; } -static const struct evtchn_ops evtchn_ops_fifo = { - .max_channels = evtchn_fifo_max_channels, - .nr_channels = evtchn_fifo_nr_channels, - .setup = evtchn_fifo_setup, - .bind_to_cpu = evtchn_fifo_bind_to_cpu, - .clear_pending = evtchn_fifo_clear_pending, - .set_pending = evtchn_fifo_set_pending, - .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, - .mask = evtchn_fifo_mask, - .unmask = evtchn_fifo_unmask, - .handle_events = evtchn_fifo_handle_events, - .resume = evtchn_fifo_resume, -}; - static int evtchn_fifo_alloc_control_block(unsigned cpu) { void *control_block = NULL; @@ -422,19 +407,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu) return ret; } -static int xen_evtchn_cpu_prepare(unsigned int cpu) +static int evtchn_fifo_percpu_init(unsigned int cpu) { if (!per_cpu(cpu_control_block, cpu)) return evtchn_fifo_alloc_control_block(cpu); return 0; } -static int xen_evtchn_cpu_dead(unsigned int cpu) +static int evtchn_fifo_percpu_deinit(unsigned int cpu) { __evtchn_fifo_handle_events(cpu, true); return 0; } +static const struct evtchn_ops evtchn_ops_fifo = { + .max_channels = evtchn_fifo_max_channels, + .nr_channels = evtchn_fifo_nr_channels, + .setup = evtchn_fifo_setup, + .bind_to_cpu = evtchn_fifo_bind_to_cpu, + .clear_pending = evtchn_fifo_clear_pending, + .set_pending = evtchn_fifo_set_pending, + .is_pending = evtchn_fifo_is_pending, + .test_and_set_mask = evtchn_fifo_test_and_set_mask, + .mask = evtchn_fifo_mask, + .unmask = evtchn_fifo_unmask, + .handle_events = evtchn_fifo_handle_events, + .resume = evtchn_fifo_resume, + .percpu_init = evtchn_fifo_percpu_init, + .percpu_deinit = evtchn_fifo_percpu_deinit, +}; + int __init xen_evtchn_fifo_init(void) { int cpu = smp_processor_id(); @@ -448,9 +450,5 @@ int __init xen_evtchn_fifo_init(void) evtchn_ops = &evtchn_ops_fifo; - cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, - "xen/evtchn:prepare", - xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); - return ret; } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 82938cff6c7a..fef1d645261e 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -69,6 +69,9 @@ struct evtchn_ops { void (*handle_events)(unsigned cpu); void (*resume)(void); + + int (*percpu_init)(unsigned int cpu); + int (*percpu_deinit)(unsigned int cpu); }; extern const struct evtchn_ops *evtchn_ops; -- cgit v1.2.3 From 1d628c330fa6e1fe58b00624d46b4ce0a2502f54 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:30 +0200 Subject: xen/events: defer eoi in case of excessive number of events commit e99502f76271d6bc4e374fe368c50c67a1fd3070 upstream. In case rogue guests are sending events at high frequency it might happen that xen_evtchn_do_upcall() won't stop processing events in dom0. As this is done in irq handling a crash might be the result. In order to avoid that, delay further inter-domain events after some time in xen_evtchn_do_upcall() by forcing eoi processing into a worker on the same cpu, thus inhibiting new events coming in. The time after which eoi processing is to be delayed is configurable via a new module parameter "event_loop_timeout" which specifies the maximum event loop time in jiffies (default: 2, the value was chosen after some tests showing that a value of 2 was the lowest with an only slight drop of dom0 network throughput while multiple guests performed an event storm). How long eoi processing will be delayed can be specified via another parameter "event_eoi_delay" (again in jiffies, default 10, again the value was chosen after testing with different delay values). This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 8 + drivers/xen/events/events_2l.c | 7 +- drivers/xen/events/events_base.c | 189 +++++++++++++++++++++++- drivers/xen/events/events_fifo.c | 30 ++-- drivers/xen/events/events_internal.h | 14 +- 5 files changed, 216 insertions(+), 32 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 988a0d2535b2..5b4753e602de 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5462,6 +5462,14 @@ as generic guest with no PV drivers. Currently support XEN HVM, KVM, HYPER_V and VMWARE guest. + xen.event_eoi_delay= [XEN] + How long to delay EOI handling in case of event + storms (jiffies). Default is 10. + + xen.event_loop_timeout= [XEN] + After which time (jiffies) the event handling loop + should start to delay EOI handling. Default is 2. + xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index e4b75693600e..f026624898e7 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu, * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ -static void evtchn_2l_handle_events(unsigned cpu) +static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) { int irq; xen_ulong_t pending_words; @@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu) /* Process port. */ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; - irq = get_evtchn_to_irq(port); - - if (irq != -1) - generic_handle_irq(irq); + handle_irq_for_port(port, ctrl); bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b2b5eccdc5df..f67b92a488da 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86 #include @@ -64,6 +66,15 @@ #include "events_internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly event_loop_timeout = 2; +module_param(event_loop_timeout, uint, 0644); + +static uint __read_mostly event_eoi_delay = 10; +module_param(event_eoi_delay, uint, 0644); + const struct evtchn_ops *evtchn_ops; /* @@ -87,6 +98,7 @@ static DEFINE_RWLOCK(evtchn_rwlock); * irq_mapping_update_lock * evtchn_rwlock * IRQ-desc lock + * percpu eoi_list_lock */ static LIST_HEAD(xen_irq_list_head); @@ -119,6 +131,8 @@ static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static DEFINE_PER_CPU(unsigned int, irq_epoch); + static void clear_evtchn_to_irq_row(unsigned row) { unsigned col; @@ -398,17 +412,120 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +struct lateeoi_work { + struct delayed_work delayed; + spinlock_t eoi_list_lock; + struct list_head eoi_list; +}; + +static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); + +static void lateeoi_list_del(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + unsigned long flags; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + list_del_init(&info->eoi_list); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void lateeoi_list_add(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + struct irq_info *elem; + u64 now = get_jiffies_64(); + unsigned long delay; + unsigned long flags; + + if (now < info->eoi_time) + delay = info->eoi_time - now; + else + delay = 1; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + if (list_empty(&eoi->eoi_list)) { + list_add(&info->eoi_list, &eoi->eoi_list); + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, delay); + } else { + list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { + if (elem->eoi_time <= info->eoi_time) + break; + } + list_add(&info->eoi_list, &elem->eoi_list); + } + + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + static void xen_irq_lateeoi_locked(struct irq_info *info) { evtchn_port_t evtchn; + unsigned int cpu; evtchn = info->evtchn; - if (!VALID_EVTCHN(evtchn)) + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) return; + cpu = info->eoi_cpu; + if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { + lateeoi_list_add(info); + return; + } + + info->eoi_time = 0; unmask_evtchn(evtchn); } +static void xen_irq_lateeoi_worker(struct work_struct *work) +{ + struct lateeoi_work *eoi; + struct irq_info *info; + u64 now = get_jiffies_64(); + unsigned long flags; + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + + read_lock_irqsave(&evtchn_rwlock, flags); + + while (true) { + spin_lock(&eoi->eoi_list_lock); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + + if (info == NULL || now < info->eoi_time) { + spin_unlock(&eoi->eoi_list_lock); + break; + } + + list_del_init(&info->eoi_list); + + spin_unlock(&eoi->eoi_list_lock); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info); + } + + if (info) + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, info->eoi_time - now); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} + +static void xen_cpu_init_eoi(unsigned int cpu) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); + + INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); + spin_lock_init(&eoi->eoi_list_lock); + INIT_LIST_HEAD(&eoi->eoi_list); +} + void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; @@ -428,6 +545,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi); static void xen_irq_init(unsigned irq) { struct irq_info *info; + #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); @@ -442,6 +560,7 @@ static void xen_irq_init(unsigned irq) set_info_for_irq(irq, info); + INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); } @@ -497,6 +616,9 @@ static void xen_free_irq(unsigned irq) write_lock_irqsave(&evtchn_rwlock, flags); + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + list_del(&info->list); set_info_for_irq(irq, NULL); @@ -1356,6 +1478,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +struct evtchn_loop_ctrl { + ktime_t timeout; + unsigned count; + bool defer_eoi; +}; + +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) +{ + int irq; + struct irq_info *info; + + irq = get_evtchn_to_irq(port); + if (irq == -1) + return; + + /* + * Check for timeout every 256 events. + * We are setting the timeout value only after the first 256 + * events in order to not hurt the common case of few loop + * iterations. The 256 is basically an arbitrary value. + * + * In case we are hitting the timeout we need to defer all further + * EOIs in order to ensure to leave the event handling loop rather + * sooner than later. + */ + if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { + ktime_t kt = ktime_get(); + + if (!ctrl->timeout) { + kt = ktime_add_ms(kt, + jiffies_to_msecs(event_loop_timeout)); + ctrl->timeout = kt; + } else if (kt > ctrl->timeout) { + ctrl->defer_eoi = true; + } + } + + info = info_for_irq(irq); + + if (ctrl->defer_eoi) { + info->eoi_cpu = smp_processor_id(); + info->irq_epoch = __this_cpu_read(irq_epoch); + info->eoi_time = get_jiffies_64() + event_eoi_delay; + } + + generic_handle_irq(irq); +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); static void __xen_evtchn_do_upcall(void) @@ -1363,6 +1533,7 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = get_cpu(); unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; read_lock(&evtchn_rwlock); @@ -1372,7 +1543,7 @@ static void __xen_evtchn_do_upcall(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; - xen_evtchn_handle_events(cpu); + xen_evtchn_handle_events(cpu, &ctrl); BUG_ON(!irqs_disabled()); @@ -1383,6 +1554,13 @@ static void __xen_evtchn_do_upcall(void) out: read_unlock(&evtchn_rwlock); + /* + * Increment irq_epoch only now to defer EOIs only for + * xen_irq_lateeoi() invocations occurring from inside the loop + * above. + */ + __this_cpu_inc(irq_epoch); + put_cpu(); } @@ -1829,9 +2007,6 @@ void xen_callback_vector(void) void xen_callback_vector(void) {} #endif -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "xen." - static bool fifo_events = true; module_param(fifo_events, bool, 0); @@ -1839,6 +2014,8 @@ static int xen_evtchn_cpu_prepare(unsigned int cpu) { int ret = 0; + xen_cpu_init_eoi(cpu); + if (evtchn_ops->percpu_init) ret = evtchn_ops->percpu_init(cpu); @@ -1865,6 +2042,8 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + xen_cpu_init_eoi(smp_processor_id()); + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, "xen/evtchn:prepare", xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 59e6002c9699..33462521bfd0 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word) return w & EVTCHN_FIFO_LINK_MASK; } -static void handle_irq_for_port(unsigned port) -{ - int irq; - - irq = get_evtchn_to_irq(port); - if (irq != -1) - generic_handle_irq(irq); -} - -static void consume_one_event(unsigned cpu, +static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready, - bool drop) + unsigned priority, unsigned long *ready) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -320,16 +310,17 @@ static void consume_one_event(unsigned cpu, clear_bit(priority, ready); if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { - if (unlikely(drop)) + if (unlikely(!ctrl)) pr_warn("Dropping pending event for port %u\n", port); else - handle_irq_for_port(port); + handle_irq_for_port(port, ctrl); } q->head[priority] = head; } -static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) +static void __evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready, drop); + consume_one_event(cpu, ctrl, control_block, q, &ready); ready |= xchg(&control_block->ready, 0); } } -static void evtchn_fifo_handle_events(unsigned cpu) +static void evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - __evtchn_fifo_handle_events(cpu, false); + __evtchn_fifo_handle_events(cpu, ctrl); } static void evtchn_fifo_resume(void) @@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsigned int cpu) static int evtchn_fifo_percpu_deinit(unsigned int cpu) { - __evtchn_fifo_handle_events(cpu, true); + __evtchn_fifo_handle_events(cpu, NULL); return 0; } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index fef1d645261e..756c87532d33 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -30,11 +30,15 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; + struct list_head eoi_list; int refcnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned int evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ + unsigned short eoi_cpu; /* EOI must happen on this cpu */ + unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ + u64 eoi_time; /* Time in jiffies when to EOI. */ union { unsigned short virq; @@ -53,6 +57,8 @@ struct irq_info { #define PIRQ_SHAREABLE (1 << 1) #define PIRQ_MSI_GROUP (1 << 2) +struct evtchn_loop_ctrl; + struct evtchn_ops { unsigned (*max_channels)(void); unsigned (*nr_channels)(void); @@ -67,7 +73,7 @@ struct evtchn_ops { void (*mask)(unsigned port); void (*unmask)(unsigned port); - void (*handle_events)(unsigned cpu); + void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl); void (*resume)(void); int (*percpu_init)(unsigned int cpu); @@ -78,6 +84,7 @@ extern const struct evtchn_ops *evtchn_ops; extern int **evtchn_to_irq; int get_evtchn_to_irq(unsigned int evtchn); +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); struct irq_info *info_for_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq); @@ -135,9 +142,10 @@ static inline void unmask_evtchn(unsigned port) return evtchn_ops->unmask(port); } -static inline void xen_evtchn_handle_events(unsigned cpu) +static inline void xen_evtchn_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - return evtchn_ops->handle_events(cpu); + return evtchn_ops->handle_events(cpu, ctrl); } static inline void xen_evtchn_resume(void) -- cgit v1.2.3 From 85d9d02a49e29f9805d221ea249a4cd0c16c5fe2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Sep 2020 14:01:02 +0200 Subject: xen/events: block rogue events for some time commit 5f7f77400ab5b357b5fdb7122c3442239672186c upstream. In order to avoid high dom0 load due to rogue guests sending events at high frequency, block those events in case there was no action needed in dom0 to handle the events. This is done by adding a per-event counter, which set to zero in case an EOI without the XEN_EOI_FLAG_SPURIOUS is received from a backend driver, and incremented when this flag has been set. In case the counter is 2 or higher delay the EOI by 1 << (cnt - 2) jiffies, but not more than 1 second. In order not to waste memory shorten the per-event refcnt to two bytes (it should normally never exceed a value of 2). Add an overflow check to evtchn_get() to make sure the 2 bytes really won't overflow. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 27 ++++++++++++++++++++++----- drivers/xen/events/events_internal.h | 3 ++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index f67b92a488da..26df84c45db4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -460,17 +460,34 @@ static void lateeoi_list_add(struct irq_info *info) spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); } -static void xen_irq_lateeoi_locked(struct irq_info *info) +static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) { evtchn_port_t evtchn; unsigned int cpu; + unsigned int delay = 0; evtchn = info->evtchn; if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) return; + if (spurious) { + if ((1 << info->spurious_cnt) < (HZ << 2)) + info->spurious_cnt++; + if (info->spurious_cnt > 1) { + delay = 1 << (info->spurious_cnt - 2); + if (delay > HZ) + delay = HZ; + if (!info->eoi_time) + info->eoi_cpu = smp_processor_id(); + info->eoi_time = get_jiffies_64() + delay; + } + } else { + info->spurious_cnt = 0; + } + cpu = info->eoi_cpu; - if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { + if (info->eoi_time && + (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { lateeoi_list_add(info); return; } @@ -507,7 +524,7 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) info->eoi_time = 0; - xen_irq_lateeoi_locked(info); + xen_irq_lateeoi_locked(info, false); } if (info) @@ -536,7 +553,7 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) info = info_for_irq(irq); if (info) - xen_irq_lateeoi_locked(info); + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); read_unlock_irqrestore(&evtchn_rwlock, flags); } @@ -1439,7 +1456,7 @@ int evtchn_get(unsigned int evtchn) goto done; err = -EINVAL; - if (info->refcnt <= 0) + if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) goto done; info->refcnt++; diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 756c87532d33..a35c8c7ac606 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -31,7 +31,8 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; - int refcnt; + short refcnt; + short spurious_cnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned int evtchn; /* event channel */ -- cgit v1.2.3 From aedcfe9a02f869b053f1acd901da1f8f89c43dcd Mon Sep 17 00:00:00 2001 From: Etienne Carriere Date: Thu, 8 Oct 2020 16:37:22 +0200 Subject: firmware: arm_scmi: Fix ARCH_COLD_RESET [ Upstream commit 45b9e04d5ba0b043783dfe2b19bb728e712cb32e ] The defination for ARCH_COLD_RESET is wrong. Let us fix it according to the SCMI specification. Link: https://lore.kernel.org/r/20201008143722.21888-5-etienne.carriere@linaro.org Fixes: 95a15d80aa0d ("firmware: arm_scmi: Add RESET protocol in SCMI v2.0") Signed-off-by: Etienne Carriere Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/reset.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index ab42c21c5517..6d223f345b6c 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -35,9 +35,7 @@ struct scmi_msg_reset_domain_reset { #define EXPLICIT_RESET_ASSERT BIT(1) #define ASYNCHRONOUS_RESET BIT(2) __le32 reset_state; -#define ARCH_RESET_TYPE BIT(31) -#define COLD_RESET_STATE BIT(0) -#define ARCH_COLD_RESET (ARCH_RESET_TYPE | COLD_RESET_STATE) +#define ARCH_COLD_RESET 0 }; struct reset_dom_info { -- cgit v1.2.3 From 6937c143e3d3dee432b77213ce546128fcb34995 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 12 Oct 2020 14:26:24 +0100 Subject: firmware: arm_scmi: Add missing Rx size re-initialisation [ Upstream commit 9724722fde8f9bbd2b87340f00b9300c9284001e ] Few commands provide the list of description partially and require to be called consecutively until all the descriptors are fetched completely. In such cases, we don't release the buffers and reuse them for consecutive transmits. However, currently we don't reset the Rx size which will be set as per the response for the last transmit. This may result in incorrect response size being interpretted as the firmware may repond with size greater than the one set but we read only upto the size set by previous response. Let us reset the receive buffer size to max possible in such cases as we don't know the exact size of the response. Link: https://lore.kernel.org/r/20201012141746.32575-1-sudeep.holla@arm.com Fixes: b6f20ff8bd94 ("firmware: arm_scmi: add common infrastructure and support for base protocol") Reported-by: Etienne Carriere Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/base.c | 2 ++ drivers/firmware/arm_scmi/clock.c | 2 ++ drivers/firmware/arm_scmi/common.h | 2 ++ drivers/firmware/arm_scmi/driver.c | 8 ++++++++ drivers/firmware/arm_scmi/perf.c | 2 ++ drivers/firmware/arm_scmi/sensors.c | 2 ++ 6 files changed, 18 insertions(+) diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index f804e8af6521..f986ee8919f0 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -173,6 +173,8 @@ static int scmi_base_implementation_list_get(const struct scmi_handle *handle, protocols_imp[tot_num_ret + loop] = *(list + loop); tot_num_ret += loop_num_ret; + + scmi_reset_rx_to_maxsz(handle, t); } while (loop_num_ret); scmi_xfer_put(handle, t); diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 32526a793f3a..38400a8d0ca8 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -177,6 +177,8 @@ scmi_clock_describe_rates_get(const struct scmi_handle *handle, u32 clk_id, } tot_rate_cnt += num_returned; + + scmi_reset_rx_to_maxsz(handle, t); /* * check for both returned and remaining to avoid infinite * loop due to buggy firmware diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 5237c2ff79fe..9a680b9af9e5 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -103,6 +103,8 @@ int scmi_do_xfer_with_response(const struct scmi_handle *h, struct scmi_xfer *xfer); int scmi_xfer_get_init(const struct scmi_handle *h, u8 msg_id, u8 prot_id, size_t tx_size, size_t rx_size, struct scmi_xfer **p); +void scmi_reset_rx_to_maxsz(const struct scmi_handle *handle, + struct scmi_xfer *xfer); int scmi_handle_put(const struct scmi_handle *handle); struct scmi_handle *scmi_handle_get(struct device *dev); void scmi_set_handle(struct scmi_device *scmi_dev); diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 3eb0382491ce..11078199abed 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -481,6 +481,14 @@ int scmi_do_xfer(const struct scmi_handle *handle, struct scmi_xfer *xfer) return ret; } +void scmi_reset_rx_to_maxsz(const struct scmi_handle *handle, + struct scmi_xfer *xfer) +{ + struct scmi_info *info = handle_to_scmi_info(handle); + + xfer->rx.len = info->desc->max_msg_size; +} + #define SCMI_MAX_RESPONSE_TIMEOUT (2 * MSEC_PER_SEC) /** diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 601af4edad5e..129a2887e964 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -281,6 +281,8 @@ scmi_perf_describe_levels_get(const struct scmi_handle *handle, u32 domain, } tot_opp_cnt += num_returned; + + scmi_reset_rx_to_maxsz(handle, t); /* * check for both returned and remaining to avoid infinite * loop due to buggy firmware diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index a400ea805fc2..931208bc48f1 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -154,6 +154,8 @@ static int scmi_sensor_description_get(const struct scmi_handle *handle, } desc_index += num_returned; + + scmi_reset_rx_to_maxsz(handle, t); /* * check for both returned and remaining to avoid infinite * loop due to buggy firmware -- cgit v1.2.3 From f7e7de28d10656a7128ba80d06434fc84f18ff50 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 14 Oct 2020 07:30:51 +0200 Subject: x86/unwind/orc: Fix inactive tasks with stack pointer in %sp on GCC 10 compiled kernels [ Upstream commit f2ac57a4c49d40409c21c82d23b5706df9b438af ] GCC 10 optimizes the scheduler code differently than its predecessors. When CONFIG_DEBUG_SECTION_MISMATCH=y, the Makefile forces GCC not to inline some functions (-fno-inline-functions-called-once). Before GCC 10, "no-inlined" __schedule() starts with the usual prologue: push %bp mov %sp, %bp So the ORC unwinder simply picks stack pointer from %bp and unwinds from __schedule() just perfectly: $ cat /proc/1/stack [<0>] ep_poll+0x3e9/0x450 [<0>] do_epoll_wait+0xaa/0xc0 [<0>] __x64_sys_epoll_wait+0x1a/0x20 [<0>] do_syscall_64+0x33/0x40 [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 But now, with GCC 10, there is no %bp prologue in __schedule(): $ cat /proc/1/stack The ORC entry of the point in __schedule() is: sp:sp+88 bp:last_sp-48 type:call end:0 In this case, nobody subtracts sizeof "struct inactive_task_frame" in __unwind_start(). The struct is put on the stack by __switch_to_asm() and only then __switch_to_asm() stores %sp to task->thread.sp. But we start unwinding from a point in __schedule() (stored in frame->ret_addr by 'call') and not in __switch_to_asm(). So for these example values in __unwind_start(): sp=ffff94b50001fdc8 bp=ffff8e1f41d29340 ip=__schedule+0x1f0 The stack is: ffff94b50001fdc8: ffff8e1f41578000 # struct inactive_task_frame ffff94b50001fdd0: 0000000000000000 ffff94b50001fdd8: ffff8e1f41d29340 ffff94b50001fde0: ffff8e1f41611d40 # ... ffff94b50001fde8: ffffffff93c41920 # bx ffff94b50001fdf0: ffff8e1f41d29340 # bp ffff94b50001fdf8: ffffffff9376cad0 # ret_addr (and end of the struct) 0xffffffff9376cad0 is __schedule+0x1f0 (after the call to __switch_to_asm). Now follow those 88 bytes from the ORC entry (sp+88). The entry is correct, __schedule() really pushes 48 bytes (8*7) + 32 bytes via subq to store some local values (like 4U below). So to unwind, look at the offset 88-sizeof(long) = 0x50 from here: ffff94b50001fe00: ffff8e1f41578618 ffff94b50001fe08: 00000cc000000255 ffff94b50001fe10: 0000000500000004 ffff94b50001fe18: 7793fab6956b2d00 # NOTE (see below) ffff94b50001fe20: ffff8e1f41578000 ffff94b50001fe28: ffff8e1f41578000 ffff94b50001fe30: ffff8e1f41578000 ffff94b50001fe38: ffff8e1f41578000 ffff94b50001fe40: ffff94b50001fed8 ffff94b50001fe48: ffff8e1f41577ff0 ffff94b50001fe50: ffffffff9376cf12 Here ^^^^^^^^^^^^^^^^ is the correct ret addr from __schedule(). It translates to schedule+0x42 (insn after a call to __schedule()). BUT, unwind_next_frame() tries to take the address starting from 0xffff94b50001fdc8. That is exactly from thread.sp+88-sizeof(long) = 0xffff94b50001fdc8+88-8 = 0xffff94b50001fe18, which is garbage marked as NOTE above. So this quits the unwinding as 7793fab6956b2d00 is obviously not a kernel address. There was a fix to skip 'struct inactive_task_frame' in unwind_get_return_address_ptr in the following commit: 187b96db5ca7 ("x86/unwind/orc: Fix unwind_get_return_address_ptr() for inactive tasks") But we need to skip the struct already in the unwinder proper. So subtract the size (increase the stack pointer) of the structure in __unwind_start() directly. This allows for removal of the code added by commit 187b96db5ca7 completely, as the address is now at '(unsigned long *)state->sp - 1', the same as in the generic case. [ mingo: Cleaned up the changelog a bit, for better readability. ] Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Bug: https://bugzilla.suse.com/show_bug.cgi?id=1176907 Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20201014053051.24199-1-jslaby@suse.cz Signed-off-by: Sasha Levin --- arch/x86/kernel/unwind_orc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 187a86e0e753..f29f015a5e7f 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -311,19 +311,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { - struct task_struct *task = state->task; - if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; - if (task != current && state->sp == task->thread.sp) { - struct inactive_task_frame *frame = (void *)task->thread.sp; - return &frame->ret_addr; - } - if (state->sp) return (unsigned long *)state->sp - 1; @@ -653,7 +646,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } else { struct inactive_task_frame *frame = (void *)task->thread.sp; - state->sp = task->thread.sp; + state->sp = task->thread.sp + sizeof(*frame); state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); state->signal = (void *)state->ip == ret_from_fork; -- cgit v1.2.3 From d90dd1599cf3518e072ac141a1a0996bdf27cef5 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Sat, 24 Oct 2020 16:37:33 +0300 Subject: mlxsw: core: Fix use-after-free in mlxsw_emad_trans_finish() [ Upstream commit 0daf2bf5a2dcf33d446b76360908f109816e2e21 ] Each EMAD transaction stores the skb used to issue the EMAD request ('trans->tx_skb') so that the request could be retried in case of a timeout. The skb can be freed when a corresponding response is received or as part of the retry logic (e.g., failed retransmit, exceeded maximum number of retries). The two tasks (i.e., response processing and retransmits) are synchronized by the atomic 'trans->active' field which ensures that responses to inactive transactions are ignored. In case of a failed retransmit the transaction is finished and all of its resources are freed. However, the current code does not mark it as inactive. Syzkaller was able to hit a race condition in which a concurrent response is processed while the transaction's resources are being freed, resulting in a use-after-free [1]. Fix the issue by making sure to mark the transaction as inactive after a failed retransmit and free its resources only if a concurrent task did not already do that. [1] BUG: KASAN: use-after-free in consume_skb+0x30/0x370 net/core/skbuff.c:833 Read of size 4 at addr ffff88804f570494 by task syz-executor.0/1004 CPU: 0 PID: 1004 Comm: syz-executor.0 Not tainted 5.8.0-rc7+ #68 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 check_memory_region_inline mm/kasan/generic.c:186 [inline] check_memory_region+0x14e/0x1b0 mm/kasan/generic.c:192 instrument_atomic_read include/linux/instrumented.h:56 [inline] atomic_read include/asm-generic/atomic-instrumented.h:27 [inline] refcount_read include/linux/refcount.h:147 [inline] skb_unref include/linux/skbuff.h:1044 [inline] consume_skb+0x30/0x370 net/core/skbuff.c:833 mlxsw_emad_trans_finish+0x64/0x1c0 drivers/net/ethernet/mellanox/mlxsw/core.c:592 mlxsw_emad_process_response drivers/net/ethernet/mellanox/mlxsw/core.c:651 [inline] mlxsw_emad_rx_listener_func+0x5c9/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:672 mlxsw_core_skb_receive+0x4df/0x770 drivers/net/ethernet/mellanox/mlxsw/core.c:2063 mlxsw_pci_cqe_rdq_handle drivers/net/ethernet/mellanox/mlxsw/pci.c:595 [inline] mlxsw_pci_cq_tasklet+0x12a6/0x2520 drivers/net/ethernet/mellanox/mlxsw/pci.c:651 tasklet_action_common.isra.0+0x13f/0x3e0 kernel/softirq.c:550 __do_softirq+0x223/0x964 kernel/softirq.c:292 asm_call_on_stack+0x12/0x20 arch/x86/entry/entry_64.S:711 Allocated by task 1006: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467 slab_post_alloc_hook mm/slab.h:586 [inline] slab_alloc_node mm/slub.c:2824 [inline] slab_alloc mm/slub.c:2832 [inline] kmem_cache_alloc+0xcd/0x2e0 mm/slub.c:2837 __build_skb+0x21/0x60 net/core/skbuff.c:311 __netdev_alloc_skb+0x1e2/0x360 net/core/skbuff.c:464 netdev_alloc_skb include/linux/skbuff.h:2810 [inline] mlxsw_emad_alloc drivers/net/ethernet/mellanox/mlxsw/core.c:756 [inline] mlxsw_emad_reg_access drivers/net/ethernet/mellanox/mlxsw/core.c:787 [inline] mlxsw_core_reg_access_emad+0x1ab/0x1420 drivers/net/ethernet/mellanox/mlxsw/core.c:1817 mlxsw_reg_trans_query+0x39/0x50 drivers/net/ethernet/mellanox/mlxsw/core.c:1831 mlxsw_sp_sb_pm_occ_clear drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c:260 [inline] mlxsw_sp_sb_occ_max_clear+0xbff/0x10a0 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c:1365 mlxsw_devlink_sb_occ_max_clear+0x76/0xb0 drivers/net/ethernet/mellanox/mlxsw/core.c:1037 devlink_nl_cmd_sb_occ_max_clear_doit+0x1ec/0x280 net/core/devlink.c:1765 genl_family_rcv_msg_doit net/netlink/genetlink.c:669 [inline] genl_family_rcv_msg net/netlink/genetlink.c:714 [inline] genl_rcv_msg+0x617/0x980 net/netlink/genetlink.c:731 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2470 genl_rcv+0x24/0x40 net/netlink/genetlink.c:742 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0x150/0x190 net/socket.c:671 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2359 ___sys_sendmsg+0xff/0x170 net/socket.c:2413 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2446 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 73: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1474 [inline] slab_free_freelist_hook mm/slub.c:1507 [inline] slab_free mm/slub.c:3072 [inline] kmem_cache_free+0xbe/0x380 mm/slub.c:3088 kfree_skbmem net/core/skbuff.c:622 [inline] kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:616 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:837 [inline] consume_skb+0xe1/0x370 net/core/skbuff.c:831 mlxsw_emad_trans_finish+0x64/0x1c0 drivers/net/ethernet/mellanox/mlxsw/core.c:592 mlxsw_emad_transmit_retry.isra.0+0x9d/0xc0 drivers/net/ethernet/mellanox/mlxsw/core.c:613 mlxsw_emad_trans_timeout_work+0x43/0x50 drivers/net/ethernet/mellanox/mlxsw/core.c:625 process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269 worker_thread+0x9e/0x1050 kernel/workqueue.c:2415 kthread+0x355/0x470 kernel/kthread.c:291 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff88804f5703c0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 212 bytes inside of 224-byte region [ffff88804f5703c0, ffff88804f5704a0) The buggy address belongs to the page: page:ffffea00013d5c00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x100000000000200(slab) raw: 0100000000000200 dead000000000100 dead000000000122 ffff88806c625400 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88804f570380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff88804f570400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88804f570480: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88804f570500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88804f570580: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc Fixes: caf7297e7ab5f ("mlxsw: core: Introduce support for asynchronous EMAD register access") Signed-off-by: Amit Cohen Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 7277706847b1..8f0eec9fb17b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -493,6 +493,9 @@ static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, err = mlxsw_emad_transmit(trans->core, trans); if (err == 0) return; + + if (!atomic_dec_and_test(&trans->active)) + return; } else { err = -EIO; } -- cgit v1.2.3 From da8e2fbe458c4b3baaf1da5957347b9bcf858af3 Mon Sep 17 00:00:00 2001 From: Alok Prasad Date: Wed, 21 Oct 2020 11:50:08 +0000 Subject: RDMA/qedr: Fix memory leak in iWARP CM [ Upstream commit a2267f8a52eea9096861affd463f691be0f0e8c9 ] Fixes memory leak in iWARP CM Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Link: https://lore.kernel.org/r/20201021115008.28138-1-palok@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: Alok Prasad Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index e521f3c3dbbf..653ddf30973e 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -727,6 +727,7 @@ int qedr_iw_destroy_listen(struct iw_cm_id *cm_id) listener->qed_handle); cm_id->rem_ref(cm_id); + kfree(listener); return rc; } -- cgit v1.2.3 From 87d9ac94c7e77fb44d275912f467376cc115c840 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 8 May 2020 07:28:19 +0200 Subject: ata: sata_nv: Fix retrieving of active qcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8e4c309f9f33b76c09daa02b796ef87918eee494 ] ata_qc_complete_multiple() has to be called with the tags physically active, that is the hw tag is at bit 0. ap->qc_active has the same tag at bit ATA_TAG_INTERNAL instead, so call ata_qc_get_active() to fix that up. This is done in the vein of 8385d756e114 ("libata: Fix retrieving of active qcs"). Fixes: 28361c403683 ("libata: add extra internal command") Tested-by: Pali Rohár Signed-off-by: Sascha Hauer Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/sata_nv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 18b147c182b9..0514aa7e80e3 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -2100,7 +2100,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) pp->dhfis_bits &= ~done_mask; pp->dmafis_bits &= ~done_mask; pp->sdbfis_bits |= done_mask; - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); if (!ap->qc_active) { DPRINTK("over\n"); -- cgit v1.2.3 From 2db7590371520735366639647352b44c0eeda11f Mon Sep 17 00:00:00 2001 From: Mateusz Nosek Date: Sun, 27 Sep 2020 02:08:58 +0200 Subject: futex: Fix incorrect should_fail_futex() handling [ Upstream commit 921c7ebd1337d1a46783d7e15a850e12aed2eaa0 ] If should_futex_fail() returns true in futex_wake_pi(), then the 'ret' variable is set to -EFAULT and then immediately overwritten. So the failure injection is non-functional. Fix it by actually leaving the function and returning -EFAULT. The Fixes tag is kinda blury because the initial commit which introduced failure injection was already sloppy, but the below mentioned commit broke it completely. [ tglx: Massaged changelog ] Fixes: 6b4f4bc9cb22 ("locking/futex: Allow low-level atomic operations to return -EAGAIN") Signed-off-by: Mateusz Nosek Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200927000858.24219-1-mateusznosek0@gmail.com Signed-off-by: Sasha Levin --- kernel/futex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index 5660c02b01b0..17fba7a986e0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1594,8 +1594,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (unlikely(should_fail_futex(true))) + if (unlikely(should_fail_futex(true))) { ret = -EFAULT; + goto out_unlock; + } ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); if (!ret && (curval != uval)) { -- cgit v1.2.3 From dc17b990ee90c07ce041722a588678681001d4a7 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:05 +1000 Subject: powerpc/powernv/smp: Fix spurious DBG() warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f6bac19cf65c5be21d14a0c9684c8f560f2096dd ] When building with W=1 we get the following warning: arch/powerpc/platforms/powernv/smp.c: In function ‘pnv_smp_cpu_kill_self’: arch/powerpc/platforms/powernv/smp.c:276:16: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body] 276 | cpu, srr1); | ^ cc1: all warnings being treated as errors The full context is this block: if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline srr1=%lx!\n", cpu, srr1); When building with DEBUG undefined DBG() expands to nothing and GCC emits the warning due to the lack of braces around an empty statement. Signed-off-by: Oliver O'Halloran Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-2-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b2ba3e95bda7..bbf361f23ae8 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -43,7 +43,7 @@ #include #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) -- cgit v1.2.3 From 82e93f94ac653d57373f1368a4dcb066d24af490 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:16 +1000 Subject: mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race [ Upstream commit d53c3dfb23c45f7d4f910c3a3ca84bf0a99c6143 ] Reading and modifying current->mm and current->active_mm and switching mm should be done with irqs off, to prevent races seeing an intermediate state. This is similar to commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate"). At exec-time when the new mm is activated, the old one should usually be single-threaded and no longer used, unless something else is holding an mm_users reference (which may be possible). Absent other mm_users, there is also a race with preemption and lazy tlb switching. Consider the kernel_execve case where the current thread is using a lazy tlb active mm: call_usermodehelper() kernel_execve() old_mm = current->mm; active_mm = current->active_mm; *** preempt *** --------------------> schedule() prev->active_mm = NULL; mmdrop(prev active_mm); ... <-------------------- schedule() current->mm = mm; current->active_mm = mm; if (!old_mm) mmdrop(active_mm); If we switch back to the kernel thread from a different mm, there is a double free of the old active_mm, and a missing free of the new one. Closing this race only requires interrupts to be disabled while ->mm and ->active_mm are being switched, but the TLB problem requires also holding interrupts off over activate_mm. Unfortunately not all archs can do that yet, e.g., arm defers the switch if irqs are disabled and expects finish_arch_post_lock_switch() to be called to complete the flush; um takes a blocking lock in activate_mm(). So as a first step, disable interrupts across the mm/active_mm updates to close the lazy tlb preempt race, and provide an arch option to extend that to activate_mm which allows architectures doing IPI based TLB shootdowns to close the second race. This is a bit ugly, but in the interest of fixing the bug and backporting before all architectures are converted this is a compromise. Signed-off-by: Nicholas Piggin Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-2-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/Kconfig | 7 +++++++ fs/exec.c | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 238dccfa7691..84653a823d3b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -405,6 +405,13 @@ config MMU_GATHER_NO_RANGE config HAVE_MMU_GATHER_NO_GATHER bool +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/fs/exec.c b/fs/exec.c index de833553ae27..2441eb1a1e2d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1044,11 +1044,24 @@ static int exec_mmap(struct mm_struct *mm) } task_lock(tsk); - active_mm = tsk->active_mm; membarrier_exec_mmap(mm); - tsk->mm = mm; + + local_irq_disable(); + active_mm = tsk->active_mm; tsk->active_mm = mm; + tsk->mm = mm; + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for + * lazy tlb mm refcounting when these are updated by context + * switches. Not all architectures can handle irqs off over + * activate_mm yet. + */ + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); activate_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); -- cgit v1.2.3 From 7d59323cff67def586fcf8ca66fa60892650c1c5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:17 +1000 Subject: powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM [ Upstream commit 66acd46080bd9e5ad2be4b0eb1d498d5145d058e ] powerpc uses IPIs in some situations to switch a kernel thread away from a lazy tlb mm, which is subject to the TLB flushing race described in the changelog introducing ARCH_WANT_IRQS_OFF_ACTIVATE_MM. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-3-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/mmu_context.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad620637cbd1..27ef333e96f6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_EXTABLE_SORT diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 58efca934311..f132b418a8c7 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -216,7 +216,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ -- cgit v1.2.3 From 2eab8974aea88f4b46dfb524c6679aa09baeb145 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:18 +1000 Subject: sparc64: remove mm_cpumask clearing to fix kthread_use_mm race [ Upstream commit bafb056ce27940c9994ea905336aa8f27b4f7275 ] The de facto (and apparently uncommented) standard for using an mm had, thanks to this code in sparc if nothing else, been that you must have a reference on mm_users *and that reference must have been obtained with mmget()*, i.e., from a thread with a reference to mm_users that had used the mm. The introduction of mmget_not_zero() in commit d2005e3f41d4 ("userfaultfd: don't pin the user memory in userfaultfd_file_create()") allowed mm_count holders to aoperate on user mappings asynchronously from the actual threads using the mm, but they were not to load those mappings into their TLB (i.e., walking vmas and page tables is okay, kthread_use_mm() is not). io_uring 2b188cc1bb857 ("Add io_uring IO interface") added code which does a kthread_use_mm() from a mmget_not_zero() refcount. The problem with this is code which previously assumed mm == current->mm and mm->mm_users == 1 implies the mm will remain single-threaded at least until this thread creates another mm_users reference, has now broken. arch/sparc/kernel/smp_64.c: if (atomic_read(&mm->mm_users) == 1) { cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); goto local_flush_and_out; } vs fs/io_uring.c if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || !mmget_not_zero(ctx->sqo_mm))) return -EFAULT; kthread_use_mm(ctx->sqo_mm); mmget_not_zero() could come in right after the mm_users == 1 test, then kthread_use_mm() which sets its CPU in the mm_cpumask. That update could be lost if cpumask_copy() occurs afterward. I propose we fix this by allowing mmget_not_zero() to be a first-class reference, and not have this obscure undocumented and unchecked restriction. The basic fix for sparc64 is to remove its mm_cpumask clearing code. The optimisation could be effectively restored by sending IPIs to mm_cpumask members and having them remove themselves from mm_cpumask. This is more tricky so I leave it as an exercise for someone with a sparc64 SMP. powerpc has a (currently similarly broken) example. Signed-off-by: Nicholas Piggin Acked-by: David S. Miller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-4-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/sparc/kernel/smp_64.c | 65 ++++++++++------------------------------------ 1 file changed, 14 insertions(+), 51 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a8275fea4b70..aa81c25b44cf 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); -- cgit v1.2.3 From 40b357f7436dce5ee5432e9c8b120d07545627b7 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 21 Sep 2020 20:45:44 +0800 Subject: f2fs: add trace exit in exception path [ Upstream commit 9b66482282888d02832b7d90239e1cdb18e4b431 ] Missing the trace exit in f2fs_sync_dirty_inodes Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bbd07fe8a492..3d7f9e20a54b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1044,8 +1044,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) get_pages(sbi, is_dir ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return -EIO; + } spin_lock(&sbi->inode_lock[type]); -- cgit v1.2.3 From 1544dcb514ad322d08ef711609f8bd2cd6a5465e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 29 Sep 2020 09:22:50 +0800 Subject: f2fs: fix uninit-value in f2fs_lookup [ Upstream commit 6d7ab88a98c1b7a47c228f8ffb4f44d631eaf284 ] As syzbot reported: Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:122 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:219 f2fs_lookup+0xe05/0x1a80 fs/f2fs/namei.c:503 lookup_open fs/namei.c:3082 [inline] open_last_lookups fs/namei.c:3177 [inline] path_openat+0x2729/0x6a90 fs/namei.c:3365 do_filp_open+0x2b8/0x710 fs/namei.c:3395 do_sys_openat2+0xa88/0x1140 fs/open.c:1168 do_sys_open fs/open.c:1184 [inline] __do_compat_sys_openat fs/open.c:1242 [inline] __se_compat_sys_openat+0x2a4/0x310 fs/open.c:1240 __ia32_compat_sys_openat+0x56/0x70 fs/open.c:1240 do_syscall_32_irqs_on arch/x86/entry/common.c:80 [inline] __do_fast_syscall_32+0x129/0x180 arch/x86/entry/common.c:139 do_fast_syscall_32+0x6a/0xc0 arch/x86/entry/common.c:162 do_SYSENTER_32+0x73/0x90 arch/x86/entry/common.c:205 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c In f2fs_lookup(), @res_page could be used before being initialized, because in __f2fs_find_entry(), once F2FS_I(dir)->i_current_depth was been fuzzed to zero, then @res_page will never be initialized, causing this kmsan warning, relocating @res_page initialization place to fix this bug. Reported-by: syzbot+0eac6f0bbd558fd866d7@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/dir.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e9af46dc06f7..78d041f9775a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -303,16 +303,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) { - *res_page = NULL; de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } - if (npages == 0) { - *res_page = NULL; + if (npages == 0) goto out; - } max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { @@ -323,7 +322,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - *res_page = NULL; de = find_in_level(dir, level, fname, res_page); if (de || IS_ERR(*res_page)) break; -- cgit v1.2.3 From 9804eda4a97569bae944b64b3c657397648c5fdf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 29 Sep 2020 09:23:12 +0800 Subject: f2fs: fix to check segment boundary during SIT page readahead [ Upstream commit 6a257471fa42c8c9c04a875cd3a2a22db148e0f0 ] As syzbot reported: kernel BUG at fs/f2fs/segment.h:657! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16220 Comm: syz-executor.0 Not tainted 5.9.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:f2fs_ra_meta_pages+0xa51/0xdc0 fs/f2fs/segment.h:657 Call Trace: build_sit_entries fs/f2fs/segment.c:4195 [inline] f2fs_build_segment_manager+0x4b8a/0xa3c0 fs/f2fs/segment.c:4779 f2fs_fill_super+0x377d/0x6b80 fs/f2fs/super.c:3633 mount_bdev+0x32e/0x3f0 fs/super.c:1417 legacy_get_tree+0x105/0x220 fs/fs_context.c:592 vfs_get_tree+0x89/0x2f0 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x1387/0x2070 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount fs/namespace.c:3390 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3390 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 @blkno in f2fs_ra_meta_pages could exceed max segment count, causing panic in following sanity check in current_sit_addr(), add check condition to avoid this issue. Reported-by: syzbot+3698081bcf0bb2d12174@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3d7f9e20a54b..6d9be7783d25 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: + if (unlikely(blkno >= TOTAL_SEGS(sbi))) + goto out; /* get sit block addr */ fio.new_blkaddr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); -- cgit v1.2.3 From fb9b18150e3f843b37fe99d712ec4d7a9fd131f7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 19:07:04 +0200 Subject: s390/startup: avoid save_area_sync overflow [ Upstream commit 2835c2ea95d50625108e47a459e1a47f6be836ce ] Currently we overflow save_area_sync and write over save_area_async. Although this is not a real problem make startup_pgm_check_handler consistent with late pgm check handler and store [%r0,%r7] directly into gpregs_save_area. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/boot/head.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 4b86a8d3c121..e6bf5f40bff3 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -360,22 +360,23 @@ ENTRY(startup_kdump) # the save area and does disabled wait with a faulty address. # ENTRY(startup_pgm_check_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - la %r1,4095 - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1) - mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC - mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit oi __LC_RETURN_PSW+1,0x2 # set wait state bit - larl %r2,.Lold_psw_disabled_wait - stg %r2,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2) + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9) brasl %r14,print_pgm_check_info .Lold_psw_disabled_wait: - la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -- cgit v1.2.3 From 15c7ec03ddb8ec7990d9d903cf29c0afa33403be Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jun 2020 13:23:17 +0200 Subject: um: change sigio_spinlock to a mutex [ Upstream commit f2d05059e15af3f70502074f4e3a504530af504a ] Lockdep complains at boot: ============================= [ BUG: Invalid wait context ] 5.7.0-05093-g46d91ecd597b #98 Not tainted ----------------------------- swapper/1 is trying to lock: 0000000060931b98 (&desc[i].request_mutex){+.+.}-{3:3}, at: __setup_irq+0x11d/0x623 other info that might help us debug this: context-{4:4} 1 lock held by swapper/1: #0: 000000006074fed8 (sigio_spinlock){+.+.}-{2:2}, at: sigio_lock+0x1a/0x1c stack backtrace: CPU: 0 PID: 1 Comm: swapper Not tainted 5.7.0-05093-g46d91ecd597b #98 Stack: 7fa4fab0 6028dfd1 0000002a 6008bea5 7fa50700 7fa50040 7fa4fac0 6028e016 7fa4fb50 6007f6da 60959c18 00000000 Call Trace: [<60023a0e>] show_stack+0x13b/0x155 [<6028e016>] dump_stack+0x2a/0x2c [<6007f6da>] __lock_acquire+0x515/0x15f2 [<6007eb50>] lock_acquire+0x245/0x273 [<6050d9f1>] __mutex_lock+0xbd/0x325 [<6050dc76>] mutex_lock_nested+0x1d/0x1f [<6008e27e>] __setup_irq+0x11d/0x623 [<6008e8ed>] request_threaded_irq+0x169/0x1a6 [<60021eb0>] um_request_irq+0x1ee/0x24b [<600234ee>] write_sigio_irq+0x3b/0x76 [<600383ca>] sigio_broken+0x146/0x2e4 [<60020bd8>] do_one_initcall+0xde/0x281 Because we hold sigio_spinlock and then get into requesting an interrupt with a mutex. Change the spinlock to a mutex to avoid that. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/kernel/sigio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 10c99e058fca..d1cffc2a7f21 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -35,14 +35,14 @@ int write_sigio_irq(int fd) } /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ -static DEFINE_SPINLOCK(sigio_spinlock); +static DEFINE_MUTEX(sigio_mutex); void sigio_lock(void) { - spin_lock(&sigio_spinlock); + mutex_lock(&sigio_mutex); } void sigio_unlock(void) { - spin_unlock(&sigio_spinlock); + mutex_unlock(&sigio_mutex); } -- cgit v1.2.3 From df5b07f2172a159a4d9f0c280e6ecddba992ba35 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Oct 2020 14:17:35 -0700 Subject: f2fs: handle errors of f2fs_get_meta_page_nofail [ Upstream commit 86f33603f8c51537265ff7ac0320638fd2cbdb1b ] First problem is we hit BUG_ON() in f2fs_get_sum_page given EIO on f2fs_get_meta_page_nofail(). Quick fix was not to give any error with infinite loop, but syzbot caught a case where it goes to that loop from fuzzed image. In turned out we abused f2fs_get_meta_page_nofail() like in the below call stack. - f2fs_fill_super - f2fs_build_segment_manager - build_sit_entries - get_current_sit_page INFO: task syz-executor178:6870 can't die for more than 143 seconds. task:syz-executor178 state:R stack:26960 pid: 6870 ppid: 6869 flags:0x00004006 Call Trace: Showing all locks held in the system: 1 lock held by khungtaskd/1179: #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242 1 lock held by systemd-journal/3920: 1 lock held by in:imklog/6769: #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930 1 lock held by syz-executor178/6870: #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229 Actually, we didn't have to use _nofail in this case, since we could return error to mount(2) already with the error handler. As a result, this patch tries to 1) remove _nofail callers as much as possible, 2) deal with error case in last remaining caller, f2fs_get_sum_page(). Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 12 +++++++++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6d9be7783d25..c966ccc44c15 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -108,7 +108,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, true); } -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index) { struct page *page; int count = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b3b7e63394be..63440abe58c4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3149,7 +3149,7 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ed12e9668184..2a4a382f28fe 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7d8578401267..5ba677f85533 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2310,7 +2310,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) */ struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno)); + if (unlikely(f2fs_cp_error(sbi))) + return ERR_PTR(-EIO); + return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); } void f2fs_update_meta_page(struct f2fs_sb_info *sbi, @@ -2582,7 +2584,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) __next_free_blkoff(sbi, curseg, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); - f2fs_bug_on(sbi, IS_ERR(sum_page)); + if (IS_ERR(sum_page)) { + /* GC won't be able to use stale summary pages by cp_error */ + memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); + return; + } sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -3713,7 +3719,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 13081d5ddb5882bf13e7884103d4f7f43c9b0f68 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 6 Aug 2020 23:24:35 +0100 Subject: ARM: 8997/2: hw_breakpoint: Handle inexact watchpoint addresses [ Upstream commit 22c9e58299e5f18274788ce54c03d4fb761e3c5d ] This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact watchpoint addresses") but ported to arm32, which has the same problem. This problem was found by Android CTS tests, notably the "watchpoint_imprecise" test [1]. I tested locally against a copycat (simplified) version of the test though. [1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp Link: https://lkml.kernel.org/r/20191019111216.1.I82eae759ca6dc28a245b043f485ca490e3015321@changeid Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Acked-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/hw_breakpoint.c | 100 +++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 28 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5f95e4b911a0..7021ef0b4e71 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -680,6 +680,40 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } +/* + * Arm32 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See this same function in the arm64 platform code, which has the same + * problem. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u32 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_fault_on_uaccess(struct pt_regs *regs, struct arch_hw_breakpoint *info) { @@ -689,23 +723,25 @@ static int watchpoint_fault_on_uaccess(struct pt_regs *regs, static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int i, access; - u32 val, ctrl_reg, alignment_mask; + int i, access, closest_match = 0; + u32 min_dist = -1, dist; + u32 val, ctrl_reg; struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; + continue; - info = counter_arch_bp(wp); /* * The DFAR is an unknown value on debug architectures prior * to 7.1. Since we only allow a single watchpoint on these @@ -714,33 +750,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, */ if (debug_arch < ARM_DEBUG_ARCH_V7_1) { BUG_ON(i > 0); + info = counter_arch_bp(wp); info->trigger = wp->attr.bp_addr; } else { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(ARM_BASE_WVR + i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select. */ - ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; - /* Check that the access type matches. */ if (debug_exception_updates_fsr()) { access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; } + val = read_wb_reg(ARM_BASE_WVR + i); + ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + /* We have a winner. */ + info = counter_arch_bp(wp); info->trigger = addr; } @@ -762,13 +796,23 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * we can single-step over the watchpoint trigger. */ if (!is_default_overflow_handler(wp)) - goto unlock; - + continue; step: enable_single_step(wp, instruction_pointer(regs)); -unlock: - rcu_read_unlock(); } + + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + perf_bp_event(wp, regs); + if (is_default_overflow_handler(wp)) + enable_single_step(wp, instruction_pointer(regs)); + } + + rcu_read_unlock(); } static void watchpoint_single_step_handler(unsigned long pc) -- cgit v1.2.3 From 036b0f4d7671f9195433a0ee988f7b72f29aef26 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 4 Aug 2020 12:11:47 -0400 Subject: NFS4: Fix oops when copy_file_range is attempted with NFS4.0 source [ Upstream commit d8a6ad913c286d4763ae20b14c02fe6f39d7cd9f ] The following oops is seen during xfstest/565 when the 'test' (source of the copy) is NFS4.0 and 'scratch' (destination) is NFS4.2 [ 59.692458] run fstests generic/565 at 2020-08-01 05:50:35 [ 60.613588] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 60.624970] #PF: supervisor read access in kernel mode [ 60.627671] #PF: error_code(0x0000) - not-present page [ 60.630347] PGD 0 P4D 0 [ 60.631853] Oops: 0000 [#1] SMP PTI [ 60.634086] CPU: 6 PID: 2828 Comm: xfs_io Kdump: loaded Not tainted 5.8.0-rc3 #1 [ 60.637676] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 60.639901] RIP: 0010:nfs4_check_serverowner_major_id+0x5/0x30 [nfsv4] [ 60.642719] Code: 89 ff e8 3e b3 b8 e1 e9 71 fe ff ff 41 bc da d8 ff ff e9 c3 fe ff ff e8 e9 9d 08 e2 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <8b> 57 08 31 c0 3b 56 08 75 12 48 83 c6 0c 48 83 c7 0c e8 c4 97 bb [ 60.652629] RSP: 0018:ffffc265417f7e10 EFLAGS: 00010287 [ 60.655379] RAX: ffffa0664b066400 RBX: 0000000000000000 RCX: 0000000000000001 [ 60.658754] RDX: ffffa066725fb000 RSI: ffffa066725fd000 RDI: 0000000000000000 [ 60.662292] RBP: 0000000000020000 R08: 0000000000020000 R09: 0000000000000000 [ 60.666189] R10: 0000000000000003 R11: 0000000000000000 R12: ffffa06648258d00 [ 60.669914] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa06648258100 [ 60.673645] FS: 00007faa9fb35800(0000) GS:ffffa06677d80000(0000) knlGS:0000000000000000 [ 60.677698] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.680773] CR2: 0000000000000008 CR3: 0000000203f14000 CR4: 00000000000406e0 [ 60.684476] Call Trace: [ 60.685809] nfs4_copy_file_range+0xfc/0x230 [nfsv4] [ 60.688704] vfs_copy_file_range+0x2ee/0x310 [ 60.691104] __x64_sys_copy_file_range+0xd6/0x210 [ 60.693527] do_syscall_64+0x4d/0x90 [ 60.695512] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 60.698006] RIP: 0033:0x7faa9febc1bd Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 534b6fd70ffd..6b31cb5f9c9d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -138,7 +138,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, /* Only offload copy if superblock is the same */ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) || + !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; -- cgit v1.2.3 From a10ed3b55fed6bdcaf67853bac0f15f2f3453aec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 19 Sep 2020 16:04:14 +0200 Subject: power: supply: bq27xxx: report "not charging" on all types [ Upstream commit 7bf738ba110722b63e9dc8af760d3fb2aef25593 ] Commit 6f24ff97e323 ("power: supply: bq27xxx_battery: Add the BQ27Z561 Battery monitor") and commit d74534c27775 ("power: bq27xxx_battery: Add support for additional bq27xxx family devices") added support for new device types by copying most of the code and adding necessary quirks. However they did not copy the code in bq27xxx_battery_status() responsible for returning POWER_SUPPLY_STATUS_NOT_CHARGING. Unify the bq27xxx_battery_status() so for all types when charger is supplied, it will return "not charging" status. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/bq27xxx_battery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 664e50103eaa..aff0a0a5e7f8 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1678,8 +1678,6 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_FULL; else if (di->cache.flags & BQ27000_FLAG_CHGS) status = POWER_SUPPLY_STATUS_CHARGING; - else if (power_supply_am_i_supplied(di->bat) > 0) - status = POWER_SUPPLY_STATUS_NOT_CHARGING; else status = POWER_SUPPLY_STATUS_DISCHARGING; } else { @@ -1691,6 +1689,10 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_CHARGING; } + if ((status == POWER_SUPPLY_STATUS_DISCHARGING) && + (power_supply_am_i_supplied(di->bat) > 0)) + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + val->intval = status; return 0; -- cgit v1.2.3 From b2844ba3d37cba994fc9e591e7bed17c7a0408d7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 7 Oct 2020 13:55:16 -0700 Subject: xfs: fix realtime bitmap/summary file truncation when growing rt volume [ Upstream commit f4c32e87de7d66074d5612567c5eac7325024428 ] The realtime bitmap and summary files are regular files that are hidden away from the directory tree. Since they're regular files, inode inactivation will try to purge what it thinks are speculative preallocations beyond the incore size of the file. Unfortunately, xfs_growfs_rt forgets to update the incore size when it resizes the inodes, with the result that inactivating the rt inodes at unmount time will cause their contents to be truncated. Fix this by updating the incore size when we change the ondisk size as part of updating the superblock. Note that we don't do this when we're allocating blocks to the rt inodes because we actually want those blocks to get purged if the growfs fails. This fixes corruption complaints from the online rtsummary checker when running xfs/233. Since that test requires rmap, one can also trigger this by growing an rt volume, cycling the mount, and creating rt files. Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Sasha Levin --- fs/xfs/xfs_rtalloc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index b58366937082..6d5ddc4e5135 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1021,10 +1021,13 @@ xfs_growfs_rt( xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* - * Update the bitmap inode's size. + * Update the bitmap inode's size ondisk and incore. We need + * to update the incore size so that inode inactivation won't + * punch what it thinks are "posteof" blocks. */ mp->m_rbmip->i_d.di_size = nsbp->sb_rbmblocks * nsbp->sb_blocksize; + i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); /* * Get the summary inode into the transaction. @@ -1032,9 +1035,12 @@ xfs_growfs_rt( xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* - * Update the summary inode's size. + * Update the summary inode's size. We need to update the + * incore size so that inode inactivation won't punch what it + * thinks are "posteof" blocks. */ mp->m_rsumip->i_d.di_size = nmp->m_rsumsize; + i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); /* * Copy summary data from old to new sizes. -- cgit v1.2.3 From 759721fb58862b67e97a178e6e5341c39900edb5 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 20 Jul 2020 12:18:45 -0700 Subject: video: fbdev: pvr2fb: initialize variables [ Upstream commit 8e1ba47c60bcd325fdd097cd76054639155e5d2e ] clang static analysis reports this repesentative error pvr2fb.c:1049:2: warning: 1st function call argument is an uninitialized value [core.CallAndMessage] if (*cable_arg) ^~~~~~~~~~~~~~~ Problem is that cable_arg depends on the input loop to set the cable_arg[0]. If it does not, then some random value from the stack is used. A similar problem exists for output_arg. So initialize cable_arg and output_arg. Signed-off-by: Tom Rix Acked-by: Arnd Bergmann Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200720191845.20115-1-trix@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/pvr2fb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 0a3b2b7c7891..c916e9161443 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -1016,6 +1016,8 @@ static int __init pvr2fb_setup(char *options) if (!options || !*options) return 0; + cable_arg[0] = output_arg[0] = 0; + while ((this_opt = strsep(&options, ","))) { if (!*this_opt) continue; -- cgit v1.2.3 From b30a5c8d9defdad0e0fccf232809614f184889ff Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 14 Aug 2020 18:17:08 +0300 Subject: ath10k: start recovery process when payload length exceeds max htc length for sdio [ Upstream commit 2fd3c8f34d08af0a6236085f9961866ad92ef9ec ] When simulate random transfer fail for sdio write and read, it happened "payload length exceeds max htc length" and recovery later sometimes. Test steps: 1. Add config and update kernel: CONFIG_FAIL_MMC_REQUEST=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y 2. Run simulate fail: cd /sys/kernel/debug/mmc1/fail_mmc_request echo 10 > probability echo 10 > times # repeat until hitting issues 3. It happened payload length exceeds max htc length. [ 199.935506] ath10k_sdio mmc1:0001:1: payload length 57005 exceeds max htc length: 4088 .... [ 264.990191] ath10k_sdio mmc1:0001:1: payload length 57005 exceeds max htc length: 4088 4. after some time, such as 60 seconds, it start recovery which triggered by wmi command timeout for periodic scan. [ 269.229232] ieee80211 phy0: Hardware restart was requested [ 269.734693] ath10k_sdio mmc1:0001:1: device successfully recovered The simulate fail of sdio is not a real sdio transter fail, it only set an error status in mmc_should_fail_request after the transfer end, actually the transfer is success, then sdio_io_rw_ext_helper will return error status and stop transfer the left data. For example, the really RX len is 286 bytes, then it will split to 2 blocks in sdio_io_rw_ext_helper, one is 256 bytes, left is 30 bytes, if the first 256 bytes get an error status by mmc_should_fail_request,then the left 30 bytes will not read in this RX operation. Then when the next RX arrive, the left 30 bytes will be considered as the header of the read, the top 4 bytes of the 30 bytes will be considered as lookaheads, but actually the 4 bytes is not the lookaheads, so the len from this lookaheads is not correct, it exceeds max htc length 4088 sometimes. When happened exceeds, the buffer chain is not matched between firmware and ath10k, then it need to start recovery ASAP. Recently then recovery will be started by wmi command timeout, but it will be long time later, for example, it is 60+ seconds later from the periodic scan, if it does not have periodic scan, it will be longer. Start recovery when it happened "payload length exceeds max htc length" will be reasonable. This patch only effect sdio chips. Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00029. Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200108031957.22308-3-wgong@codeaurora.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/sdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 8fe626deadeb..24b1927a0751 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -550,6 +550,10 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar, le16_to_cpu(htc_hdr->len), ATH10K_HTC_MBOX_MAX_PAYLOAD_LENGTH); ret = -ENOMEM; + + queue_work(ar->workqueue, &ar->restart_work); + ath10k_warn(ar, "exceeds length, start recovery\n"); + goto err; } -- cgit v1.2.3 From ed0bd7b12939e1ccc60529c48c489bc7602ad102 Mon Sep 17 00:00:00 2001 From: Sathishkumar Muruganandam Date: Fri, 14 Aug 2020 13:46:11 +0530 Subject: ath10k: fix VHT NSS calculation when STBC is enabled [ Upstream commit 99f41b8e43b8b4b31262adb8ac3e69088fff1289 ] When STBC is enabled, NSTS_SU value need to be accounted for VHT NSS calculation for SU case. Without this fix, 1SS + STBC enabled case was reported wrongly as 2SS in radiotap header on monitor mode capture. Tested-on: QCA9984 10.4-3.10-00047 Signed-off-by: Sathishkumar Muruganandam Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1597392971-3897-1-git-send-email-murugana@codeaurora.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/htt_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 8ca0a808a644..04095f91d301 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -949,6 +949,7 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, u8 preamble = 0; u8 group_id; u32 info1, info2, info3; + u32 stbc, nsts_su; info1 = __le32_to_cpu(rxd->ppdu_start.info1); info2 = __le32_to_cpu(rxd->ppdu_start.info2); @@ -993,11 +994,16 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, */ bw = info2 & 3; sgi = info3 & 1; + stbc = (info2 >> 3) & 1; group_id = (info2 >> 4) & 0x3F; if (GROUP_ID_IS_SU_MIMO(group_id)) { mcs = (info3 >> 4) & 0x0F; - nss = ((info2 >> 10) & 0x07) + 1; + nsts_su = ((info2 >> 10) & 0x07); + if (stbc) + nss = (nsts_su >> 2) + 1; + else + nss = (nsts_su + 1); } else { /* Hardware doesn't decode VHT-SIG-B into Rx descriptor * so it's impossible to decode MCS. Also since -- cgit v1.2.3 From 581940d9b9c85b3869e2897112e03ec8e9f69c8a Mon Sep 17 00:00:00 2001 From: Nadezda Lutovinova Date: Wed, 19 Aug 2020 17:37:56 +0300 Subject: drm/brige/megachips: Add checking if ge_b850v3_lvds_init() is working correctly [ Upstream commit f688a345f0d7a6df4dd2aeca8e4f3c05e123a0ee ] If ge_b850v3_lvds_init() does not allocate memory for ge_b850v3_lvds_ptr, then a null pointer dereference is accessed. The patch adds checking of the return value of ge_b850v3_lvds_init(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Nadezda Lutovinova Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200819143756.30626-1-lutovinova@ispras.ru Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 6e81e5db57f2..b050fd1f3d20 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -295,8 +295,12 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp4028_i2c->dev; + int ret; + + ret = ge_b850v3_lvds_init(dev); - ge_b850v3_lvds_init(dev); + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); @@ -354,8 +358,12 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp2690_i2c->dev; + int ret; + + ret = ge_b850v3_lvds_init(dev); - ge_b850v3_lvds_init(dev); + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); -- cgit v1.2.3 From ac437801e3c212e183ac373f75c3d75dbacf2dc8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Aug 2020 10:00:45 -0700 Subject: selftests/x86/fsgsbase: Reap a forgotten child [ Upstream commit ab2dd173330a3f07142e68cd65682205036cd00f ] The ptrace() test forgot to reap its child. Reap it. Signed-off-by: Andy Lutomirski Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/e7700a503f30e79ab35a63103938a19893dbeff2.1598461151.git.luto@kernel.org Signed-off-by: Sasha Levin --- tools/testing/selftests/x86/fsgsbase.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 15a329da59fa..5f3aea210e01 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -499,6 +499,9 @@ static void test_ptrace_write_gsbase(void) END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() -- cgit v1.2.3 From 3a85688062852cb2dec93dfda83239fd400ac8a2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 20 Aug 2020 12:47:16 +0200 Subject: media: videodev2.h: RGB BT2020 and HSV are always full range [ Upstream commit b305dfe2e93434b12d438434461b709641f62af4 ] The default RGB quantization range for BT.2020 is full range (just as for all the other RGB pixel encodings), not limited range. Update the V4L2_MAP_QUANTIZATION_DEFAULT macro and documentation accordingly. Also mention that HSV is always full range and cannot be limited range. When RGB BT2020 was introduced in V4L2 it was not clear whether it should be limited or full range, but full range is the right (and consistent) choice. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- Documentation/media/uapi/v4l/colorspaces-defs.rst | 9 ++++----- Documentation/media/uapi/v4l/colorspaces-details.rst | 5 ++--- include/uapi/linux/videodev2.h | 17 ++++++++--------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index e122bbe3d799..aabb08130354 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -36,8 +36,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum :c:type:`v4l2_hsv_encoding` specifies which encoding is used. .. note:: The default R'G'B' quantization is full range for all - colorspaces except for BT.2020 which uses limited range R'G'B' - quantization. + colorspaces. HSV formats are always full range. .. tabularcolumns:: |p{6.7cm}|p{10.8cm}| @@ -169,8 +168,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Details * - ``V4L2_QUANTIZATION_DEFAULT`` - Use the default quantization encoding as defined by the - colorspace. This is always full range for R'G'B' (except for the - BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr. + colorspace. This is always full range for R'G'B' and HSV. + It is usually limited range for Y'CbCr. * - ``V4L2_QUANTIZATION_FULL_RANGE`` - Use the full range quantization encoding. I.e. the range [0…1] is mapped to [0…255] (with possible clipping to [1…254] to avoid the @@ -180,4 +179,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum * - ``V4L2_QUANTIZATION_LIM_RANGE`` - Use the limited range quantization encoding. I.e. the range [0…1] is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to - [16…240]. + [16…240]. Limited Range cannot be used with HSV. diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index 8b0ba3668101..fd0cf57691d8 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -377,9 +377,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020) The :ref:`itu2020` standard defines the colorspace used by Ultra-high definition television (UHDTV). The default transfer function is ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited -range (!), and so is the default Y'CbCr quantization. The chromaticities -of the primary colors and the white reference are: +``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range. +The chromaticities of the primary colors and the white reference are: diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 530638dffd93..3210b3c82a4a 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -371,9 +371,9 @@ enum v4l2_hsv_encoding { enum v4l2_quantization { /* - * The default for R'G'B' quantization is always full range, except - * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG: this is full range. + * The default for R'G'B' quantization is always full range. + * For Y'CbCr the quantization is always limited range, except + * for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -382,14 +382,13 @@ enum v4l2_quantization { /* * Determine how QUANTIZATION_DEFAULT should map to a proper quantization. - * This depends on whether the image is RGB or not, the colorspace and the - * Y'CbCr encoding. + * This depends on whether the image is RGB or not, the colorspace. + * The Y'CbCr encoding is not used anymore, but is still there for backwards + * compatibility. */ #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ - (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ - V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ - V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE) /* * Deprecated names for opRGB colorspace (IEC 61966-2-5) -- cgit v1.2.3 From 5472c5d1d505fc214799a098b42912ed6799e709 Mon Sep 17 00:00:00 2001 From: Xia Jiang Date: Fri, 14 Aug 2020 09:11:35 +0200 Subject: media: platform: Improve queue set up flow for bug fixing [ Upstream commit 5095a6413a0cf896ab468009b6142cb0fe617e66 ] Add checking created buffer size follow in mtk_jpeg_queue_setup(). Reviewed-by: Tomasz Figa Signed-off-by: Xia Jiang Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c index ee802fc3bcdf..9fa1bc5514f3 100644 --- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c @@ -571,6 +571,13 @@ static int mtk_jpeg_queue_setup(struct vb2_queue *q, if (!q_data) return -EINVAL; + if (*num_planes) { + for (i = 0; i < *num_planes; i++) + if (sizes[i] < q_data->sizeimage[i]) + return -EINVAL; + return 0; + } + *num_planes = q_data->fmt->colplanes; for (i = 0; i < q_data->fmt->colplanes; i++) { sizes[i] = q_data->sizeimage[i]; -- cgit v1.2.3 From 47ab020f32909a79017ace45eab7912dcd37ae4a Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 17 Aug 2020 11:38:27 -0700 Subject: usb: typec: tcpm: During PR_SWAP, source caps should be sent only after tSwapSourceStart [ Upstream commit 6bbe2a90a0bb4af8dd99c3565e907fe9b5e7fd88 ] The patch addresses the compliance test failures while running TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 of the "Deterministic PD Compliance MOI" test plan published in https://www.usb.org/usbc. For a product to be Type-C compliant, it's expected that these tests are run on usb.org certified Type-C compliance tester as mentioned in https://www.usb.org/usbc. The purpose of the tests TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 is to verify the PR_SWAP response of the device. While doing so, the test asserts that Source Capabilities message is NOT received from the test device within tSwapSourceStart min (20 ms) from the time the last bit of GoodCRC corresponding to the RS_RDY message sent by the UUT was sent. If it does then the test fails. This is in line with the requirements from the USB Power Delivery Specification Revision 3.0, Version 1.2: "6.6.8.1 SwapSourceStartTimer The SwapSourceStartTimer Shall be used by the new Source, after a Power Role Swap or Fast Role Swap, to ensure that it does not send Source_Capabilities Message before the new Sink is ready to receive the Source_Capabilities Message. The new Source Shall Not send the Source_Capabilities Message earlier than tSwapSourceStart after the last bit of the EOP of GoodCRC Message sent in response to the PS_RDY Message sent by the new Source indicating that its power supply is ready." The patch makes sure that TCPM does not send the Source_Capabilities Message within tSwapSourceStart(20ms) by transitioning into SRC_STARTUP only after tSwapSourceStart(20ms). Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200817183828.1895015-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/tcpm/tcpm.c | 2 +- include/linux/usb/pd.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 355a2c7fac0b..16e124753df7 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3482,7 +3482,7 @@ static void run_state_machine(struct tcpm_port *port) */ tcpm_set_pwr_role(port, TYPEC_SOURCE); tcpm_pd_send_control(port, PD_CTRL_PS_RDY); - tcpm_set_state(port, SRC_STARTUP, 0); + tcpm_set_state(port, SRC_STARTUP, PD_T_SWAP_SRC_START); break; case VCONN_SWAP_ACCEPT: diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 145c38e351c2..6655ce32feff 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -442,6 +442,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ #define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ #define PD_T_NEWSRC 250 /* Maximum of 275ms */ +#define PD_T_SWAP_SRC_START 20 /* Minimum of 20ms */ #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ -- cgit v1.2.3 From 448e5004ad855cbdf6be187715caa84db877a23f Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 10 Aug 2020 21:25:18 +0200 Subject: media: tw5864: check status of tw5864_frameinterval_get [ Upstream commit 780d815dcc9b34d93ae69385a8465c38d423ff0f ] clang static analysis reports this problem tw5864-video.c:773:32: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage fintv->stepwise.max.numerator *= std_max_fps; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ stepwise.max is set with frameinterval, which comes from ret = tw5864_frameinterval_get(input, &frameinterval); fintv->stepwise.step = frameinterval; fintv->stepwise.min = frameinterval; fintv->stepwise.max = frameinterval; fintv->stepwise.max.numerator *= std_max_fps; When tw5864_frameinterval_get() fails, frameinterval is not set. So check the status and fix another similar problem. Signed-off-by: Tom Rix Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/tw5864/tw5864-video.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/pci/tw5864/tw5864-video.c b/drivers/media/pci/tw5864/tw5864-video.c index 09732eed7eb4..656142c7a2cc 100644 --- a/drivers/media/pci/tw5864/tw5864-video.c +++ b/drivers/media/pci/tw5864/tw5864-video.c @@ -767,6 +767,9 @@ static int tw5864_enum_frameintervals(struct file *file, void *priv, fintv->type = V4L2_FRMIVAL_TYPE_STEPWISE; ret = tw5864_frameinterval_get(input, &frameinterval); + if (ret) + return ret; + fintv->stepwise.step = frameinterval; fintv->stepwise.min = frameinterval; fintv->stepwise.max = frameinterval; @@ -785,6 +788,9 @@ static int tw5864_g_parm(struct file *file, void *priv, cp->capability = V4L2_CAP_TIMEPERFRAME; ret = tw5864_frameinterval_get(input, &cp->timeperframe); + if (ret) + return ret; + cp->timeperframe.numerator *= input->frame_interval; cp->capturemode = 0; cp->readbuffers = 2; -- cgit v1.2.3 From 67e18c92e0818005b2caf892e4a274eefaa65e1b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 3 Jul 2020 11:20:32 +0200 Subject: media: imx274: fix frame interval handling [ Upstream commit 49b20d981d723fae5a93843c617af2b2c23611ec ] 1) the numerator and/or denominator might be 0, in that case fall back to the default frame interval. This is per the spec and this caused a v4l2-compliance failure. 2) the updated frame interval wasn't returned in the s_frame_interval subdev op. Signed-off-by: Hans Verkuil Reviewed-by: Luca Ceresoli Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/imx274.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c index 6011cec5e351..e6aa9f32b6a8 100644 --- a/drivers/media/i2c/imx274.c +++ b/drivers/media/i2c/imx274.c @@ -1235,6 +1235,8 @@ static int imx274_s_frame_interval(struct v4l2_subdev *sd, ret = imx274_set_frame_interval(imx274, fi->interval); if (!ret) { + fi->interval = imx274->frame_interval; + /* * exposure time range is decided by frame interval * need to update it after frame interval changes @@ -1730,9 +1732,9 @@ static int imx274_set_frame_interval(struct stimx274 *priv, __func__, frame_interval.numerator, frame_interval.denominator); - if (frame_interval.numerator == 0) { - err = -EINVAL; - goto fail; + if (frame_interval.numerator == 0 || frame_interval.denominator == 0) { + frame_interval.denominator = IMX274_DEF_FRAME_RATE; + frame_interval.numerator = 1; } req_frame_rate = (u32)(frame_interval.denominator -- cgit v1.2.3 From d3fb88a51c047e768634a9a6de5c9743119c3924 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sat, 22 Aug 2020 11:45:28 +0530 Subject: mmc: via-sdmmc: Fix data race bug [ Upstream commit 87d7ad089b318b4f319bf57f1daa64eb6d1d10ad ] via_save_pcictrlreg() should be called with host->lock held as it writes to pm_pcictrl_reg, otherwise there can be a race condition between via_sd_suspend() and via_sdc_card_detect(). The same pattern is used in the function via_reset_pcictrl() as well, where via_save_pcictrlreg() is called with host->lock held. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200822061528.7035-1-madhuparnabhowmik10@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/via-sdmmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c index 8d96ecba1b55..d12a068b0f9e 100644 --- a/drivers/mmc/host/via-sdmmc.c +++ b/drivers/mmc/host/via-sdmmc.c @@ -1259,11 +1259,14 @@ static void via_init_sdc_pm(struct via_crdr_mmc_host *host) static int via_sd_suspend(struct pci_dev *pcidev, pm_message_t state) { struct via_crdr_mmc_host *host; + unsigned long flags; host = pci_get_drvdata(pcidev); + spin_lock_irqsave(&host->lock, flags); via_save_pcictrlreg(host); via_save_sdcreg(host); + spin_unlock_irqrestore(&host->lock, flags); pci_save_state(pcidev); pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); -- cgit v1.2.3 From 7975367a005f6675aca53efd48f2f02d073037e8 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Wed, 1 Jul 2020 21:42:34 +0200 Subject: drm/bridge/synopsys: dsi: add support for non-continuous HS clock [ Upstream commit c6d94e37bdbb6dfe7e581e937a915ab58399b8a5 ] Current code enables the HS clock when video mode is started or to send out a HS command, and disables the HS clock to send out a LP command. This is not what DSI spec specify. Enable HS clock either in command and in video mode. Set automatic HS clock management for panels and devices that support non-continuous HS clock. Signed-off-by: Antonio Borneo Tested-by: Philippe Cornu Reviewed-by: Philippe Cornu Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200701194234.18123-1-yannick.fertre@st.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 675442bfc1bd..77384c49fb8d 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -365,7 +365,6 @@ static void dw_mipi_message_config(struct dw_mipi_dsi *dsi, if (lpm) val |= CMD_MODE_ALL_LP; - dsi_write(dsi, DSI_LPCLK_CTRL, lpm ? 0 : PHY_TXREQUESTCLKHS); dsi_write(dsi, DSI_CMD_MODE_CFG, val); } @@ -541,16 +540,22 @@ static void dw_mipi_dsi_video_mode_config(struct dw_mipi_dsi *dsi) static void dw_mipi_dsi_set_mode(struct dw_mipi_dsi *dsi, unsigned long mode_flags) { + u32 val; + dsi_write(dsi, DSI_PWR_UP, RESET); if (mode_flags & MIPI_DSI_MODE_VIDEO) { dsi_write(dsi, DSI_MODE_CFG, ENABLE_VIDEO_MODE); dw_mipi_dsi_video_mode_config(dsi); - dsi_write(dsi, DSI_LPCLK_CTRL, PHY_TXREQUESTCLKHS); } else { dsi_write(dsi, DSI_MODE_CFG, ENABLE_CMD_MODE); } + val = PHY_TXREQUESTCLKHS; + if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) + val |= AUTO_CLKLANE_CTRL; + dsi_write(dsi, DSI_LPCLK_CTRL, val); + dsi_write(dsi, DSI_PWR_UP, POWERUP); } -- cgit v1.2.3 From 80685a94f7c41f8e817d8588ef4e8afb3667101a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Sat, 29 Aug 2020 14:00:16 +0100 Subject: arm64: topology: Stop using MPIDR for topology information [ Upstream commit 3102bc0e6ac752cc5df896acb557d779af4d82a1 ] In the absence of ACPI or DT topology data, we fallback to haphazardly decoding *something* out of MPIDR. Sadly, the contents of that register are mostly unusable due to the implementation leniancy and things like Aff0 having to be capped to 15 (despite being encoded on 8 bits). Consider a simple system with a single package of 32 cores, all under the same LLC. We ought to be shoving them in the same core_sibling mask, but MPIDR is going to look like: | CPU | 0 | ... | 15 | 16 | ... | 31 | |------+---+-----+----+----+-----+----+ | Aff0 | 0 | ... | 15 | 0 | ... | 15 | | Aff1 | 0 | ... | 0 | 1 | ... | 1 | | Aff2 | 0 | ... | 0 | 0 | ... | 0 | Which will eventually yield core_sibling(0-15) == 0-15 core_sibling(16-31) == 16-31 NUMA woes ========= If we try to play games with this and set up NUMA boundaries within those groups of 16 cores via e.g. QEMU: # Node0: 0-9; Node1: 10-19 $ qemu-system-aarch64 \ -smp 20 -numa node,cpus=0-9,nodeid=0 -numa node,cpus=10-19,nodeid=1 The scheduler's MC domain (all CPUs with same LLC) is going to be built via arch_topology.c::cpu_coregroup_mask() In there we try to figure out a sensible mask out of the topology information we have. In short, here we'll pick the smallest of NUMA or core sibling mask. node_mask(CPU9) == 0-9 core_sibling(CPU9) == 0-15 MC mask for CPU9 will thus be 0-9, not a problem. node_mask(CPU10) == 10-19 core_sibling(CPU10) == 0-15 MC mask for CPU10 will thus be 10-19, not a problem. node_mask(CPU16) == 10-19 core_sibling(CPU16) == 16-19 MC mask for CPU16 will thus be 16-19... Uh oh. CPUs 16-19 are in two different unique MC spans, and the scheduler has no idea what to make of that. That triggers the WARN_ON() added by commit ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") Fixing MPIDR-derived topology ============================= We could try to come up with some cleverer scheme to figure out which of the available masks to pick, but really if one of those masks resulted from MPIDR then it should be discarded because it's bound to be bogus. I was hoping to give MPIDR a chance for SMT, to figure out which threads are in the same core using Aff1-3 as core ID, but Sudeep and Robin pointed out to me that there are systems out there where *all* cores have non-zero values in their higher affinity fields (e.g. RK3288 has "5" in all of its cores' MPIDR.Aff1), which would expose a bogus core ID to userspace. Stop using MPIDR for topology information. When no other source of topology information is available, mark each CPU as its own core and its NUMA node as its LLC domain. Signed-off-by: Valentin Schneider Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200829130016.26106-1-valentin.schneider@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/topology.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index fa9528dfd0ce..113903db666c 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -35,21 +35,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, -- cgit v1.2.3 From b3142fe7ff63b92e6efac0b75e4e7941f338bc53 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 12 Aug 2020 09:37:22 +0206 Subject: printk: reduce LOG_BUF_SHIFT range for H8300 [ Upstream commit 550c10d28d21bd82a8bb48debbb27e6ed53262f6 ] The .bss section for the h8300 is relatively small. A value of CONFIG_LOG_BUF_SHIFT that is larger than 19 will create a static printk ringbuffer that is too large. Limit the range appropriately for the H8300. Reported-by: kernel test robot Signed-off-by: John Ogness Reviewed-by: Sergey Senozhatsky Acked-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200812073122.25412-1-john.ogness@linutronix.de Signed-off-by: Sasha Levin --- init/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 6db3e310a5e4..96fc45d1b686 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -594,7 +594,8 @@ config IKHEADERS config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - range 12 25 + range 12 25 if !H8300 + range 12 19 if H8300 default 17 depends on PRINTK help -- cgit v1.2.3 From 77fa5e15c933a1ec812de61ad709c00aa51e96ae Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 29 Aug 2020 22:01:09 +0900 Subject: ia64: kprobes: Use generic kretprobe trampoline handler [ Upstream commit e792ff804f49720ce003b3e4c618b5d996256a18 ] Use the generic kretprobe trampoline handler. Don't use framepointer verification. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870606883.1229682.12331813108378725668.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/ia64/kernel/kprobes.c | 77 ++-------------------------------------------- 1 file changed, 2 insertions(+), 75 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b8356edbde65..b3dc39050c1a 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -396,83 +396,9 @@ static void kretprobe_trampoline(void) { } -/* - * At this point the target function has been tricked into - * returning into our trampoline. Lookup the associated instance - * and then: - * - call the handler function - * - cleanup by marking the instance as unused - * - long jump back to the original return address - */ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = - ((struct fnptr *)kretprobe_trampoline)->ip; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more than one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - regs->cr_iip = orig_ret_address; - - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - if (ri->rp && ri->rp->handler) - ri->rp->handler(ri, regs); - - orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - kretprobe_hash_unlock(current, &flags); - - hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } + regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -485,6 +411,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->b0; + ri->fp = NULL; /* Replace the return addr with trampoline addr */ regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; -- cgit v1.2.3 From f7f7b77ee507ace0edfda7b14efa189eb926bc2e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Jun 2020 15:14:38 -0700 Subject: kgdb: Make "kgdbcon" work properly with "kgdb_earlycon" [ Upstream commit b18b099e04f450cdc77bec72acefcde7042bd1f3 ] On my system the kernel processes the "kgdb_earlycon" parameter before the "kgdbcon" parameter. When we setup "kgdb_earlycon" we'll end up in kgdb_register_callbacks() and "kgdb_use_con" won't have been set yet so we'll never get around to starting "kgdbcon". Let's remedy this by detecting that the IO module was already registered when setting "kgdb_use_con" and registering the console then. As part of this, to avoid pre-declaring things, move the handling of the "kgdbcon" further down in the file. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200630151422.1.I4aa062751ff5e281f5116655c976dff545c09a46@changeid Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/debug_core.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 2222f3225e53..097ab02989f9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -96,14 +96,6 @@ int dbg_switch_cpu; /* Use kdb or gdbserver mode */ int dbg_kdb_mode = 1; -static int __init opt_kgdb_con(char *str) -{ - kgdb_use_con = 1; - return 0; -} - -early_param("kgdbcon", opt_kgdb_con); - module_param(kgdb_use_con, int, 0644); module_param(kgdbreboot, int, 0644); @@ -876,6 +868,20 @@ static struct console kgdbcons = { .index = -1, }; +static int __init opt_kgdb_con(char *str) +{ + kgdb_use_con = 1; + + if (kgdb_io_module_registered && !kgdb_con_registered) { + register_console(&kgdbcons); + kgdb_con_registered = 1; + } + + return 0; +} + +early_param("kgdbcon", opt_kgdb_con); + #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_dbg(int key) { -- cgit v1.2.3 From 4801ffdd6962ad70f32aa6e7d8fc9cfc4789ab81 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 8 Sep 2020 10:57:02 -0700 Subject: bpf: Permit map_ptr arithmetic with opcode add and offset 0 [ Upstream commit 7c6967326267bd5c0dded0a99541357d70dd11ac ] Commit 41c48f3a98231 ("bpf: Support access to bpf map fields") added support to access map fields with CORE support. For example, struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; /* .. array->map.max_entries .. */ } In kernel, bpf_htab has similar structure, struct bpf_htab { struct bpf_map map; ... } In the above cg_skb(), to access array->map.max_entries, with CORE, the clang will generate two builtin's. base = &m_array; /* access array.map */ map_addr = __builtin_preserve_struct_access_info(base, 0, 0); /* access array.map.max_entries */ max_entries_addr = __builtin_preserve_struct_access_info(map_addr, 0, 0); max_entries = *max_entries_addr; In the current llvm, if two builtin's are in the same function or in the same function after inlining, the compiler is smart enough to chain them together and generates like below: base = &m_array; max_entries = *(base + reloc_offset); /* reloc_offset = 0 in this case */ and we are fine. But if we force no inlining for one of functions in test_map_ptr() selftest, e.g., check_default(), the above two __builtin_preserve_* will be in two different functions. In this case, we will have code like: func check_hash(): reloc_offset_map = 0; base = &m_array; map_base = base + reloc_offset_map; check_default(map_base, ...) func check_default(map_base, ...): max_entries = *(map_base + reloc_offset_max_entries); In kernel, map_ptr (CONST_PTR_TO_MAP) does not allow any arithmetic. The above "map_base = base + reloc_offset_map" will trigger a verifier failure. ; VERIFY(check_default(&hash->map, map)); 0: (18) r7 = 0xffffb4fe8018a004 2: (b4) w1 = 110 3: (63) *(u32 *)(r7 +0) = r1 R1_w=invP110 R7_w=map_value(id=0,off=4,ks=4,vs=8,imm=0) R10=fp0 ; VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash); 4: (18) r1 = 0xffffb4fe8018a000 6: (b4) w2 = 1 7: (63) *(u32 *)(r1 +0) = r2 R1_w=map_value(id=0,off=0,ks=4,vs=8,imm=0) R2_w=invP1 R7_w=map_value(id=0,off=4,ks=4,vs=8,imm=0) R10=fp0 8: (b7) r2 = 0 9: (18) r8 = 0xffff90bcb500c000 11: (18) r1 = 0xffff90bcb500c000 13: (0f) r1 += r2 R1 pointer arithmetic on map_ptr prohibited To fix the issue, let us permit map_ptr + 0 arithmetic which will result in exactly the same map_ptr. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200908175702.2463625-1-yhs@fb.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 507474f79195..a67bfa803d98 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4427,6 +4427,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst, reg_type_str[ptr_reg->type]); return -EACCES; case CONST_PTR_TO_MAP: + /* smin_val represents the known value */ + if (known && smin_val == 0 && opcode == BPF_ADD) + break; + /* fall-through */ case PTR_TO_PACKET_END: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: -- cgit v1.2.3 From 8f71fb76a312719226f010249c55f6269f4d9a57 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Fri, 7 Aug 2020 10:35:30 +0200 Subject: media: uvcvideo: Fix dereference of out-of-bound list iterator [ Upstream commit f875bcc375c738bf2f599ff2e1c5b918dbd07c45 ] Fixes the following coccinelle report: drivers/media/usb/uvc/uvc_ctrl.c:1860:5-11: ERROR: invalid reference to the index variable of the iterator on line 1854 by adding a boolean variable to check if the loop has found the Found using - Coccinelle (http://coccinelle.lip6.fr) [Replace cursor variable with bool found] Signed-off-by: Daniel W. S. Almeida Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/uvc/uvc_ctrl.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index a30a8a731eda..c13ed95cb06f 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1848,30 +1848,35 @@ int uvc_xu_ctrl_query(struct uvc_video_chain *chain, { struct uvc_entity *entity; struct uvc_control *ctrl; - unsigned int i, found = 0; + unsigned int i; + bool found; u32 reqflags; u16 size; u8 *data = NULL; int ret; /* Find the extension unit. */ + found = false; list_for_each_entry(entity, &chain->entities, chain) { if (UVC_ENTITY_TYPE(entity) == UVC_VC_EXTENSION_UNIT && - entity->id == xqry->unit) + entity->id == xqry->unit) { + found = true; break; + } } - if (entity->id != xqry->unit) { + if (!found) { uvc_trace(UVC_TRACE_CONTROL, "Extension unit %u not found.\n", xqry->unit); return -ENOENT; } /* Find the control and perform delayed initialization if needed. */ + found = false; for (i = 0; i < entity->ncontrols; ++i) { ctrl = &entity->controls[i]; if (ctrl->index == xqry->selector - 1) { - found = 1; + found = true; break; } } -- cgit v1.2.3 From 58c80462e4671f9f146d6424328f1d68412769fa Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 10 Sep 2020 13:27:18 -0700 Subject: selftests/bpf: Define string const as global for test_sysctl_prog.c [ Upstream commit 6e057fc15a2da4ee03eb1fa6889cf687e690106e ] When tweaking llvm optimizations, I found that selftest build failed with the following error: libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1 libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name' in section '.rodata.str1.1' Error: failed to open BPF object file: Relocation failed make: *** [/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h] Error 255 make: *** Deleting file `/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h' The local string constant "tcp_mem_name" is put into '.rodata.str1.1' section which libbpf cannot handle. Using untweaked upstream llvm, "tcp_mem_name" is completely inlined after loop unrolling. Commit 7fb5eefd7639 ("selftests/bpf: Fix test_sysctl_loop{1, 2} failure due to clang change") solved a similar problem by defining the string const as a global. Let us do the same here for test_sysctl_prog.c so it can weather future potential llvm changes. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910202718.956042-1-yhs@fb.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 5cbbff416998..4396faf33394 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -19,11 +19,11 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - char tcp_mem_name[] = "net/ipv4/tcp_mem"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); -- cgit v1.2.3 From 7762afa04fd4810e88f380cf066c1d5945fb7b7b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 10 Sep 2020 10:31:05 +0200 Subject: samples/bpf: Fix possible deadlock in xdpsock [ Upstream commit 5a2a0dd88f0f267ac5953acd81050ae43a82201f ] Fix a possible deadlock in the l2fwd application in xdpsock that can occur when there is no space in the Tx ring. There are two ways to get the kernel to consume entries in the Tx ring: calling sendto() to make it send packets and freeing entries from the completion ring, as the kernel will not send a packet if there is no space for it to add a completion entry in the completion ring. The Tx loop in l2fwd only used to call sendto(). This patches adds cleaning the completion ring in that loop. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1599726666-8431-3-git-send-email-magnus.karlsson@gmail.com Signed-off-by: Sasha Levin --- samples/bpf/xdpsock_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index df011ac33402..79d1005ff2ee 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -677,6 +677,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + complete_tx_l2fwd(xsk, fds); if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); -- cgit v1.2.3 From 2eab702ee94587333b256eba7f9f1c9cfed57c32 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 31 Aug 2020 15:33:49 +0800 Subject: riscv: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO [ Upstream commit b5fca7c55f9fbab5ad732c3bce00f31af6ba5cfa ] AT_VECTOR_SIZE_ARCH should be defined with the maximum number of NEW_AUX_ENT entries that ARCH_DLINFO can contain, but it wasn't defined for RISC-V at all even though ARCH_DLINFO will contain one NEW_AUX_ENT for the VDSO address. Signed-off-by: Zong Li Reviewed-by: Palmer Dabbelt Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/uapi/asm/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index d86cb17bbabe..22e0ae888406 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -10,4 +10,7 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ -- cgit v1.2.3 From 65052761eeb9931c7478dd578e3616857e0d4b66 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 31 Aug 2020 08:10:11 +0200 Subject: cpufreq: sti-cpufreq: add stih418 support [ Upstream commit 01a163c52039e9426c7d3d3ab16ca261ad622597 ] The STiH418 can be controlled the same way as STiH407 & STiH410 regarding cpufreq. Signed-off-by: Alain Volmat Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/sti-cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 8f16bbb164b8..2855b7878a20 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -141,7 +141,8 @@ static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = { static const struct reg_field *sti_cpufreq_match(void) { if (of_machine_is_compatible("st,stih407") || - of_machine_is_compatible("st,stih410")) + of_machine_is_compatible("st,stih410") || + of_machine_is_compatible("st,stih418")) return sti_stih407_dvfs_regfields; return NULL; @@ -258,7 +259,8 @@ static int sti_cpufreq_init(void) int ret; if ((!of_machine_is_compatible("st,stih407")) && - (!of_machine_is_compatible("st,stih410"))) + (!of_machine_is_compatible("st,stih410")) && + (!of_machine_is_compatible("st,stih418"))) return -ENODEV; ddata.cpu = get_cpu_device(0); -- cgit v1.2.3 From 16b9e40d2989871df227d032af7f9a79bb1556bd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Sep 2020 13:26:00 +0200 Subject: USB: adutux: fix debugging [ Upstream commit c56150c1bc8da5524831b1dac2eec3c67b89f587 ] Handling for removal of the controller was missing at one place. Add it. Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20200917112600.26508-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/adutux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index d8d157c4c271..96495fcd952a 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -209,6 +209,7 @@ static void adu_interrupt_out_callback(struct urb *urb) if (status != 0) { if ((status != -ENOENT) && + (status != -ESHUTDOWN) && (status != -ECONNRESET)) { dev_dbg(&dev->udev->dev, "%s :nonzero status received: %d\n", __func__, -- cgit v1.2.3 From 2502107a9ccd23c36331eeff575fe0f29f3ddc75 Mon Sep 17 00:00:00 2001 From: Lang Dai Date: Mon, 14 Sep 2020 11:26:41 +0800 Subject: uio: free uio id after uio file node is freed [ Upstream commit 8fd0e2a6df262539eaa28b0a2364cca10d1dc662 ] uio_register_device() do two things. 1) get an uio id from a global pool, e.g. the id is 2) create file nodes like /sys/class/uio/uio uio_unregister_device() do two things. 1) free the uio id and return it to the global pool 2) free the file node /sys/class/uio/uio There is a situation is that one worker is calling uio_unregister_device(), and another worker is calling uio_register_device(). If the two workers are X and Y, they go as below sequence, 1) X free the uio id 2) Y get an uio id 3) Y create file node /sys/class/uio/uio 4) X free the file note /sys/class/uio/uio Then it will failed at the 3rd step and cause the phenomenon we saw as it is creating a duplicated file node. Failure reports as follows: sysfs: cannot create duplicate filename '/class/uio/uio10' Call Trace: sysfs_do_create_link_sd.isra.2+0x9e/0xb0 sysfs_create_link+0x25/0x40 device_add+0x2c4/0x640 __uio_register_device+0x1c5/0x576 [uio] adf_uio_init_bundle_dev+0x231/0x280 [intel_qat] adf_uio_register+0x1c0/0x340 [intel_qat] adf_dev_start+0x202/0x370 [intel_qat] adf_dev_start_async+0x40/0xa0 [intel_qat] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ? process_one_work+0x410/0x410 ? kthread_bind+0x40/0x40 ret_from_fork+0x1f/0x40 Code: 85 c0 48 89 c3 74 12 b9 00 10 00 00 48 89 c2 31 f6 4c 89 ef e8 ec c4 ff ff 4c 89 e2 48 89 de 48 c7 c7 e8 b4 ee b4 e8 6a d4 d7 ff <0f> 0b 48 89 df e8 20 fa f3 ff 5b 41 5c 41 5d 5d c3 66 0f 1f 84 ---[ end trace a7531c1ed5269e84 ]--- c6xxvf b002:00:00.0: Failed to register UIO devices c6xxvf b002:00:00.0: Failed to register UIO devices Signed-off-by: Lang Dai Link: https://lore.kernel.org/r/1600054002-17722-1-git-send-email-lang.dai@intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a57698985f9c..8313f81968d5 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -1010,8 +1010,6 @@ void uio_unregister_device(struct uio_info *info) idev = info->uio_dev; - uio_free_minor(idev); - mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1026,6 +1024,8 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); + uio_free_minor(idev); + return; } EXPORT_SYMBOL_GPL(uio_unregister_device); -- cgit v1.2.3 From 8fd52a21ab570e80f84f39e12affce42a5300e91 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 16 Sep 2020 13:17:35 -0600 Subject: coresight: Make sysfs functional on topologies with per core sink [ Upstream commit 6d578258b955fc8888e1bbd9a8fefe7b10065a84 ] Coresight driver assumes sink is common across all the ETMs, and tries to build a path between ETM and the first enabled sink found using bus based search. This breaks sysFS usage on implementations that has multiple per core sinks in enabled state. To fix this, coresight_get_enabled_sink API is updated to do a connection based search starting from the given source, instead of bus based search. With sink selection using sysfs depecrated for perf interface, provision for reset is removed as well in this API. Signed-off-by: Linu Cherian [Fixed indentation problem and removed obsolete comment] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200916191737.4001561-15-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-priv.h | 3 +- drivers/hwtracing/coresight/coresight.c | 62 ++++++++++++---------------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 82e563cdc879..dfd24b85a577 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -147,7 +147,8 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val, void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); -struct coresight_device *coresight_get_enabled_sink(bool reset); +struct coresight_device * +coresight_get_enabled_sink(struct coresight_device *source); struct coresight_device *coresight_get_sink_by_id(u32 id); struct list_head *coresight_build_path(struct coresight_device *csdev, struct coresight_device *sink); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 0bbce0d29158..90ecd04a2f20 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -481,50 +481,46 @@ struct coresight_device *coresight_get_sink(struct list_head *path) return csdev; } -static int coresight_enabled_sink(struct device *dev, const void *data) +static struct coresight_device * +coresight_find_enabled_sink(struct coresight_device *csdev) { - const bool *reset = data; - struct coresight_device *csdev = to_coresight_device(dev); + int i; + struct coresight_device *sink; if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && - csdev->activated) { - /* - * Now that we have a handle on the sink for this session, - * disable the sysFS "enable_sink" flag so that possible - * concurrent perf session that wish to use another sink don't - * trip on it. Doing so has no ramification for the current - * session. - */ - if (*reset) - csdev->activated = false; + csdev->activated) + return csdev; - return 1; + /* + * Recursively explore each port found on this element. + */ + for (i = 0; i < csdev->pdata->nr_outport; i++) { + struct coresight_device *child_dev; + + child_dev = csdev->pdata->conns[i].child_dev; + if (child_dev) + sink = coresight_find_enabled_sink(child_dev); + if (sink) + return sink; } - return 0; + return NULL; } /** - * coresight_get_enabled_sink - returns the first enabled sink found on the bus - * @deactivate: Whether the 'enable_sink' flag should be reset + * coresight_get_enabled_sink - returns the first enabled sink using + * connection based search starting from the source reference * - * When operated from perf the deactivate parameter should be set to 'true'. - * That way the "enabled_sink" flag of the sink that was selected can be reset, - * allowing for other concurrent perf sessions to choose a different sink. - * - * When operated from sysFS users have full control and as such the deactivate - * parameter should be set to 'false', hence mandating users to explicitly - * clear the flag. + * @source: Coresight source device reference */ -struct coresight_device *coresight_get_enabled_sink(bool deactivate) +struct coresight_device * +coresight_get_enabled_sink(struct coresight_device *source) { - struct device *dev = NULL; - - dev = bus_find_device(&coresight_bustype, NULL, &deactivate, - coresight_enabled_sink); + if (!source) + return NULL; - return dev ? to_coresight_device(dev) : NULL; + return coresight_find_enabled_sink(source); } static int coresight_sink_by_id(struct device *dev, const void *data) @@ -764,11 +760,7 @@ int coresight_enable(struct coresight_device *csdev) goto out; } - /* - * Search for a valid sink for this session but don't reset the - * "enable_sink" flag in sysFS. Users get to do that explicitly. - */ - sink = coresight_get_enabled_sink(false); + sink = coresight_get_enabled_sink(csdev); if (!sink) { ret = -EINVAL; goto out; -- cgit v1.2.3 From 7f7f437277ac745697c24e01012c234f958d10c9 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 18 Sep 2020 16:17:49 +0300 Subject: usb: xhci: omit duplicate actions when suspending a runtime suspended host. [ Upstream commit 18a367e8947d72dd91b6fc401e88a2952c6363f7 ] If the xhci-plat.c is the platform driver, after the runtime pm is enabled, the xhci_suspend is called if nothing is connected on the port. When the system goes to suspend, it will call xhci_suspend again if USB wakeup is enabled. Since the runtime suspend wakeup setting is not always the same as system suspend wakeup setting, eg, at runtime suspend we always need wakeup if the controller is in low power mode; but at system suspend, we may not need wakeup. So, we move the judgement after changing wakeup setting. [commit message rewording -Mathias] Reviewed-by: Jun Li Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200918131752.16488-8-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0d10ede581cb..7123ab44671b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -982,12 +982,15 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) xhci->shared_hcd->state != HC_STATE_SUSPENDED) return -EINVAL; - xhci_dbc_suspend(xhci); - /* Clear root port wake on bits if wakeup not allowed. */ if (!do_wakeup) xhci_disable_port_wake_on_bits(xhci); + if (!HCD_HW_ACCESSIBLE(hcd)) + return 0; + + xhci_dbc_suspend(xhci); + /* Don't poll the roothubs on bus suspend. */ xhci_dbg(xhci, "%s: stopping port polling.\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); -- cgit v1.2.3 From ea888a14ac6e563f4289c95049bbd49a8e8c42a3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:53 -0400 Subject: SUNRPC: Mitigate cond_resched() in xprt_transmit() [ Upstream commit 6f9f17287e78e5049931af2037b15b26d134a32a ] The original purpose of this expensive call is to prevent a long queue of requests from blocking other work. The cond_resched() call is unnecessary after just a single send operation. For longer queues, instead of invoking the kernel scheduler, simply release the transport send lock and return to the RPC scheduler. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- net/sunrpc/xprt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 41df4c507193..a6fee86f400e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1503,10 +1503,13 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int counter, status; spin_lock(&xprt->queue_lock); + counter = 0; while (!list_empty(&xprt->xmit_queue)) { + if (++counter == 20) + break; next = list_first_entry(&xprt->xmit_queue, struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); @@ -1514,7 +1517,6 @@ xprt_transmit(struct rpc_task *task) status = xprt_request_transmit(next, task); if (status == -EBADMSG && next != req) status = 0; - cond_resched(); spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); if (status == 0) { -- cgit v1.2.3 From 64129ad98b74b47120da035e723ceccf75a65323 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 21 Sep 2020 10:39:36 +0800 Subject: arm64/mm: return cpu_all_mask when node is NUMA_NO_NODE [ Upstream commit a194c5f2d2b3a05428805146afcabe5140b5d378 ] The @node passed to cpumask_of_node() can be NUMA_NO_NODE, in that case it will trigger the following WARN_ON(node >= nr_node_ids) due to mismatched data types of @node and @nr_node_ids. Actually we should return cpu_all_mask just like most other architectures do if passed NUMA_NO_NODE. Also add a similar check to the inline cpumask_of_node() in numa.h. Signed-off-by: Zhengyuan Liu Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20200921023936.21846-1-liuzhengyuan@tj.kylinos.cn Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/numa.h | 3 +++ arch/arm64/mm/numa.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4decf1659700..53ebb4babf3a 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) -- cgit v1.2.3 From 7551e2f4fddd49968ee7f6beba6baacff6235663 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Tue, 10 Dec 2019 09:00:13 +0000 Subject: can: flexcan: disable clocks during stop mode [ Upstream commit 02f71c6605e1f8259c07f16178330db766189a74 ] Disable clocks while CAN core is in stop mode. Signed-off-by: Joakim Zhang Tested-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20191210085721.9853-2-qiangqing.zhang@nxp.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/flexcan.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index aaa7ed1dc97e..d59c6c87164f 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1703,8 +1703,6 @@ static int __maybe_unused flexcan_suspend(struct device *device) err = flexcan_chip_disable(priv); if (err) return err; - - err = pm_runtime_force_suspend(device); } netif_stop_queue(dev); netif_device_detach(dev); @@ -1730,10 +1728,6 @@ static int __maybe_unused flexcan_resume(struct device *device) if (err) return err; } else { - err = pm_runtime_force_resume(device); - if (err) - return err; - err = flexcan_chip_enable(priv); } } @@ -1764,8 +1758,16 @@ static int __maybe_unused flexcan_noirq_suspend(struct device *device) struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); - if (netif_running(dev) && device_may_wakeup(device)) - flexcan_enable_wakeup_irq(priv, true); + if (netif_running(dev)) { + int err; + + if (device_may_wakeup(device)) + flexcan_enable_wakeup_irq(priv, true); + + err = pm_runtime_force_suspend(device); + if (err) + return err; + } return 0; } @@ -1775,8 +1777,16 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device) struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); - if (netif_running(dev) && device_may_wakeup(device)) - flexcan_enable_wakeup_irq(priv, false); + if (netif_running(dev)) { + int err; + + err = pm_runtime_force_resume(device); + if (err) + return err; + + if (device_may_wakeup(device)) + flexcan_enable_wakeup_irq(priv, false); + } return 0; } -- cgit v1.2.3 From 5880a0d1c83536e02502d014f24c186b127effdd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 21 Sep 2020 09:15:08 -0700 Subject: xfs: don't free rt blocks when we're doing a REMAP bunmapi call [ Upstream commit 8df0fa39bdd86ca81a8d706a6ed9d33cc65ca625 ] When callers pass XFS_BMAPI_REMAP into xfs_bunmapi, they want the extent to be unmapped from the given file fork without the extent being freed. We do this for non-rt files, but we forgot to do this for realtime files. So far this isn't a big deal since nobody makes a bunmapi call to a rt file with the REMAP flag set, but don't leave a logic bomb. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_bmap.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f8db3fe616df..c114d24be619 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4985,20 +4985,25 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; xfs_filblks_t len; xfs_extlen_t mod; - bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, &mod); ASSERT(mod == 0); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; + if (!(bflags & XFS_BMAPI_REMAP)) { + xfs_fsblock_t bno; + + bno = div_u64_rem(del->br_startblock, + mp->m_sb.sb_rextsize, &mod); + ASSERT(mod == 0); + + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + } + do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; -- cgit v1.2.3 From cf9cc49cd881139d56bdd7303ee897d8ff35b383 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 18 Aug 2020 22:24:25 +0800 Subject: ACPI: Add out of bounds and numa_off protections to pxm_to_node() [ Upstream commit 8a3decac087aa897df5af04358c2089e52e70ac4 ] The function should check the validity of the pxm value before using it to index the pxm_to_node_map[] array. Whilst hardening this code may be good in general, the main intent here is to enable following patches that use this function to replace acpi_map_pxm_to_node() for non SRAT usecases which should return NO_NUMA_NODE for PXM entries not matching with those in SRAT. Signed-off-by: Jonathan Cameron Reviewed-by: Barry Song Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index eadbf90e65d1..85e01752fbe4 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -31,7 +31,7 @@ int acpi_numa __initdata; int pxm_to_node(int pxm) { - if (pxm < 0) + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) return NUMA_NO_NODE; return pxm_to_node_map[pxm]; } -- cgit v1.2.3 From 592cbc0a6a83c06e245365e127c5452a400c8aec Mon Sep 17 00:00:00 2001 From: Wright Feng Date: Mon, 28 Sep 2020 00:49:22 -0500 Subject: brcmfmac: Fix warning message after dongle setup failed [ Upstream commit 6aa5a83a7ed8036c1388a811eb8bdfa77b21f19c ] Brcmfmac showed warning message in fweh.c when checking the size of event queue which is not initialized. Therefore, we only cancel the worker and reset event handler only when it is initialized. [ 145.505899] brcmfmac 0000:02:00.0: brcmf_pcie_setup: Dongle setup [ 145.929970] ------------[ cut here ]------------ [ 145.929994] WARNING: CPU: 0 PID: 288 at drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c:312 brcmf_fweh_detach+0xbc/0xd0 [brcmfmac] ... [ 145.930029] Call Trace: [ 145.930036] brcmf_detach+0x77/0x100 [brcmfmac] [ 145.930043] brcmf_pcie_remove+0x79/0x130 [brcmfmac] [ 145.930046] pci_device_remove+0x39/0xc0 [ 145.930048] device_release_driver_internal+0x141/0x200 [ 145.930049] device_release_driver+0x12/0x20 [ 145.930054] brcmf_pcie_setup+0x101/0x3c0 [brcmfmac] [ 145.930060] brcmf_fw_request_done+0x11d/0x1f0 [brcmfmac] [ 145.930062] ? lock_timer_base+0x7d/0xa0 [ 145.930063] ? internal_add_timer+0x1f/0xa0 [ 145.930064] ? add_timer+0x11a/0x1d0 [ 145.930066] ? __kmalloc_track_caller+0x18c/0x230 [ 145.930068] ? kstrdup_const+0x23/0x30 [ 145.930069] ? add_dr+0x46/0x80 [ 145.930070] ? devres_add+0x3f/0x50 [ 145.930072] ? usermodehelper_read_unlock+0x15/0x20 [ 145.930073] ? _request_firmware+0x288/0xa20 [ 145.930075] request_firmware_work_func+0x36/0x60 [ 145.930077] process_one_work+0x144/0x360 [ 145.930078] worker_thread+0x4d/0x3c0 [ 145.930079] kthread+0x112/0x150 [ 145.930080] ? rescuer_thread+0x340/0x340 [ 145.930081] ? kthread_park+0x60/0x60 [ 145.930083] ret_from_fork+0x25/0x30 Signed-off-by: Wright Feng Signed-off-by: Chi-hsien Lin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200928054922.44580-3-wright.feng@cypress.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 79c8a858b6d6..a30fcfbf2ee7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -304,10 +304,12 @@ void brcmf_fweh_detach(struct brcmf_pub *drvr) { struct brcmf_fweh_info *fweh = &drvr->fweh; - /* cancel the worker */ - cancel_work_sync(&fweh->event_work); - WARN_ON(!list_empty(&fweh->event_q)); - memset(fweh->evt_handler, 0, sizeof(fweh->evt_handler)); + /* cancel the worker if initialized */ + if (fweh->event_work.func) { + cancel_work_sync(&fweh->event_work); + WARN_ON(!list_empty(&fweh->event_q)); + memset(fweh->evt_handler, 0, sizeof(fweh->evt_handler)); + } } /** -- cgit v1.2.3 From 0606a8df86fe0cc0e03869cf2f6d01dec554b163 Mon Sep 17 00:00:00 2001 From: Xie He Date: Mon, 28 Sep 2020 05:56:43 -0700 Subject: drivers/net/wan/hdlc_fr: Correctly handle special skb->protocol values [ Upstream commit 8306266c1d51aac9aa7aa907fe99032a58c6382c ] The fr_hard_header function is used to prepend the header to skbs before transmission. It is used in 3 situations: 1) When a control packet is generated internally in this driver; 2) When a user sends an skb on an Ethernet-emulating PVC device; 3) When a user sends an skb on a normal PVC device. These 3 situations need to be handled differently by fr_hard_header. Different headers should be prepended to the skb in different situations. Currently fr_hard_header distinguishes these 3 situations using skb->protocol. For situation 1 and 2, a special skb->protocol value will be assigned before calling fr_hard_header, so that it can recognize these 2 situations. All skb->protocol values other than these special ones are treated by fr_hard_header as situation 3. However, it is possible that in situation 3, the user sends an skb with one of the special skb->protocol values. In this case, fr_hard_header would incorrectly treat it as situation 1 or 2. This patch tries to solve this issue by using skb->dev instead of skb->protocol to distinguish between these 3 situations. For situation 1, skb->dev would be NULL; for situation 2, skb->dev->type would be ARPHRD_ETHER; and for situation 3, skb->dev->type would be ARPHRD_DLCI. This way fr_hard_header would be able to distinguish these 3 situations correctly regardless what skb->protocol value the user tries to use in situation 3. Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/hdlc_fr.c | 98 ++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index d6cfd51613ed..3a44dad87602 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -273,63 +273,69 @@ static inline struct net_device **get_dev_p(struct pvc_device *pvc, static int fr_hard_header(struct sk_buff **skb_p, u16 dlci) { - u16 head_len; struct sk_buff *skb = *skb_p; - switch (skb->protocol) { - case cpu_to_be16(NLPID_CCITT_ANSI_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CCITT_ANSI_LMI; - break; - - case cpu_to_be16(NLPID_CISCO_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CISCO_LMI; - break; - - case cpu_to_be16(ETH_P_IP): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IP; - break; - - case cpu_to_be16(ETH_P_IPV6): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IPV6; - break; - - case cpu_to_be16(ETH_P_802_3): - head_len = 10; - if (skb_headroom(skb) < head_len) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, - head_len); + if (!skb->dev) { /* Control packets */ + switch (dlci) { + case LMI_CCITT_ANSI_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CCITT_ANSI_LMI; + break; + + case LMI_CISCO_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CISCO_LMI; + break; + + default: + return -EINVAL; + } + + } else if (skb->dev->type == ARPHRD_DLCI) { + switch (skb->protocol) { + case htons(ETH_P_IP): + skb_push(skb, 4); + skb->data[3] = NLPID_IP; + break; + + case htons(ETH_P_IPV6): + skb_push(skb, 4); + skb->data[3] = NLPID_IPV6; + break; + + default: + skb_push(skb, 10); + skb->data[3] = FR_PAD; + skb->data[4] = NLPID_SNAP; + /* OUI 00-00-00 indicates an Ethertype follows */ + skb->data[5] = 0x00; + skb->data[6] = 0x00; + skb->data[7] = 0x00; + /* This should be an Ethertype: */ + *(__be16 *)(skb->data + 8) = skb->protocol; + } + + } else if (skb->dev->type == ARPHRD_ETHER) { + if (skb_headroom(skb) < 10) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, 10); if (!skb2) return -ENOBUFS; dev_kfree_skb(skb); skb = *skb_p = skb2; } - skb_push(skb, head_len); + skb_push(skb, 10); skb->data[3] = FR_PAD; skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; + /* OUI 00-80-C2 stands for the 802.1 organization */ + skb->data[5] = 0x00; skb->data[6] = 0x80; skb->data[7] = 0xC2; + /* PID 00-07 stands for Ethernet frames without FCS */ skb->data[8] = 0x00; - skb->data[9] = 0x07; /* bridged Ethernet frame w/out FCS */ - break; + skb->data[9] = 0x07; - default: - head_len = 10; - skb_push(skb, head_len); - skb->data[3] = FR_PAD; - skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; - skb->data[6] = FR_PAD; - skb->data[7] = FR_PAD; - *(__be16*)(skb->data + 8) = skb->protocol; + } else { + return -EINVAL; } dlci_to_q922(skb->data, dlci); @@ -425,8 +431,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) skb_put(skb, pad); memset(skb->data + len, 0, pad); } - skb->protocol = cpu_to_be16(ETH_P_802_3); } + skb->dev = dev; if (!fr_hard_header(&skb, pvc->dlci)) { dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; @@ -494,10 +500,8 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) memset(skb->data, 0, len); skb_reserve(skb, 4); if (lmi == LMI_CISCO) { - skb->protocol = cpu_to_be16(NLPID_CISCO_LMI); fr_hard_header(&skb, LMI_CISCO_DLCI); } else { - skb->protocol = cpu_to_be16(NLPID_CCITT_ANSI_LMI); fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI); } data = skb_tail_pointer(skb); -- cgit v1.2.3 From 37464a8a7f68373536f76fd9dbaa5293a88f1823 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:38 +0300 Subject: bus/fsl_mc: Do not rely on caller to provide non NULL mc_io [ Upstream commit 5026cf605143e764e1785bbf9158559d17f8d260 ] Before destroying the mc_io, check first that it was allocated. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-11-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/bus/fsl-mc/mc-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c index d9629fc13a15..0a4a387b615d 100644 --- a/drivers/bus/fsl-mc/mc-io.c +++ b/drivers/bus/fsl-mc/mc-io.c @@ -129,7 +129,12 @@ error_destroy_mc_io: */ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io) { - struct fsl_mc_device *dpmcp_dev = mc_io->dpmcp_dev; + struct fsl_mc_device *dpmcp_dev; + + if (!mc_io) + return; + + dpmcp_dev = mc_io->dpmcp_dev; if (dpmcp_dev) fsl_mc_io_unset_dpmcp(mc_io); -- cgit v1.2.3 From cf5a6124f23725d8510b1fba976a4f341f5d1925 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 30 Sep 2020 22:05:45 +0800 Subject: ACPI: HMAT: Fix handling of changes from ACPI 6.2 to ACPI 6.3 [ Upstream commit 2c5b9bde95c96942f2873cea6ef383c02800e4a8 ] In ACPI 6.3, the Memory Proximity Domain Attributes Structure changed substantially. One of those changes was that the flag for "Memory Proximity Domain field is valid" was deprecated. This was because the field "Proximity Domain for the Memory" became a required field and hence having a validity flag makes no sense. So the correct logic is to always assume the field is there. Current code assumes it never is. Signed-off-by: Jonathan Cameron Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/hmat/hmat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c index 8b0de8a3c647..0f1c939b7e90 100644 --- a/drivers/acpi/hmat/hmat.c +++ b/drivers/acpi/hmat/hmat.c @@ -403,7 +403,8 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", p->flags, p->processor_PD, p->memory_PD); - if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) { + if ((hmat_revision == 1 && p->flags & ACPI_HMAT_MEMORY_PD_VALID) || + hmat_revision > 1) { target = find_mem_target(p->memory_PD); if (!target) { pr_debug("HMAT: Memory Domain missing from SRAT\n"); -- cgit v1.2.3 From 3ef6095d65872de3e42eff559ad06a150840d7d8 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 4 Sep 2020 14:09:58 +0800 Subject: power: supply: test_power: add missing newlines when printing parameters by sysfs [ Upstream commit c07fa6c1631333f02750cf59f22b615d768b4d8f ] When I cat some module parameters by sysfs, it displays as follows. It's better to add a newline for easy reading. root@syzkaller:~# cd /sys/module/test_power/parameters/ root@syzkaller:/sys/module/test_power/parameters# cat ac_online onroot@syzkaller:/sys/module/test_power/parameters# cat battery_present trueroot@syzkaller:/sys/module/test_power/parameters# cat battery_health goodroot@syzkaller:/sys/module/test_power/parameters# cat battery_status dischargingroot@syzkaller:/sys/module/test_power/parameters# cat battery_technology LIONroot@syzkaller:/sys/module/test_power/parameters# cat usb_online onroot@syzkaller:/sys/module/test_power/parameters# Signed-off-by: Xiongfeng Wang Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/test_power.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c index c3cad2b6daba..1139ca725195 100644 --- a/drivers/power/supply/test_power.c +++ b/drivers/power/supply/test_power.c @@ -341,6 +341,7 @@ static int param_set_ac_online(const char *key, const struct kernel_param *kp) static int param_get_ac_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, ac_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -354,6 +355,7 @@ static int param_set_usb_online(const char *key, const struct kernel_param *kp) static int param_get_usb_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, usb_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -368,6 +370,7 @@ static int param_set_battery_status(const char *key, static int param_get_battery_status(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_status, battery_status, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -382,6 +385,7 @@ static int param_set_battery_health(const char *key, static int param_get_battery_health(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_health, battery_health, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -397,6 +401,7 @@ static int param_get_battery_present(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_present, battery_present, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -414,6 +419,7 @@ static int param_get_battery_technology(char *buffer, { strcpy(buffer, map_get_key(map_technology, battery_technology, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } -- cgit v1.2.3 From 5759f38a63dbc0130080d925af557e9cdc63b7bf Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 21 Sep 2020 17:52:43 -0400 Subject: drm/amd/display: HDMI remote sink need mode validation for Linux [ Upstream commit 95d620adb48f7728e67d82f56f756e8d451cf8d2 ] [Why] Currently mode validation is bypassed if remote sink exists. That leads to mode set issue when a BW bottle neck exists in the link path, e.g., a DP-to-HDMI converter that only supports HDMI 1.4. Any invalid mode passed to Linux user space will cause the modeset failure due to limitation of Linux user space implementation. [How] Mode validation is skipped only if in edid override. For real remote sink, clock limit check should be done for HDMI remote sink. Have HDMI related remote sink going through mode validation to elimiate modes which pixel clock exceeds BW limitation. Signed-off-by: Fangzhi Zuo Reviewed-by: Hersen Wu Acked-by: Eryk Brol Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 3efee7b3378a..47cefc05fd3f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2268,7 +2268,7 @@ enum dc_status dc_link_validate_mode_timing( /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle */ - if (link->remote_sinks[0]) + if (link->remote_sinks[0] && link->remote_sinks[0]->sink_signal == SIGNAL_TYPE_VIRTUAL) return DC_OK; /* Passive Dongle */ -- cgit v1.2.3 From 1f145a1193ea8c87a11f3d58a96ce665832cbb13 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 24 Sep 2020 15:17:54 +0800 Subject: ARC: [dts] fix the errors detected by dtbs_check [ Upstream commit 05b1be68c4d6d76970025e6139bfd735c2256ee5 ] xxx/arc/boot/dts/axs101.dt.yaml: dw-apb-ictl@e0012000: $nodename:0: \ 'dw-apb-ictl@e0012000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' From schema: xxx/interrupt-controller/snps,dw-apb-ictl.yaml The node name of the interrupt controller must start with "interrupt-controller" instead of "dw-apb-ictl". Signed-off-by: Zhen Lei Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/boot/dts/axc001.dtsi | 2 +- arch/arc/boot/dts/axc003.dtsi | 2 +- arch/arc/boot/dts/axc003_idu.dtsi | 2 +- arch/arc/boot/dts/vdk_axc003.dtsi | 2 +- arch/arc/boot/dts/vdk_axc003_idu.dtsi | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 6ec1fcdfc0d7..92247288d056 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -85,7 +85,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index ac8e1b463a70..cd1edcf4f95e 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -129,7 +129,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 9da21e7fd246..70779386ca79 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -135,7 +135,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index f8be7ba8dad4..c21d0eb07bf6 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -46,7 +46,7 @@ }; - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 0afa3e53a4e3..4d348853ac7c 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -54,7 +54,7 @@ }; - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; -- cgit v1.2.3 From 4ebdad05129ed4ecfe32e559b7df116e85b1d9f6 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 5 Sep 2020 01:34:22 +0800 Subject: btrfs: fix replace of seed device [ Upstream commit c6a5d954950c5031444173ad2195efc163afcac9 ] If you replace a seed device in a sprouted fs, it appears to have successfully replaced the seed device, but if you look closely, it didn't. Here is an example. $ mkfs.btrfs /dev/sda $ btrfstune -S1 /dev/sda $ mount /dev/sda /btrfs $ btrfs device add /dev/sdb /btrfs $ umount /btrfs $ btrfs device scan --forget $ mount -o device=/dev/sda /dev/sdb /btrfs $ btrfs replace start -f /dev/sda /dev/sdc /btrfs $ echo $? 0 BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc started BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc finished $ btrfs fi show Label: none uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f Total devices 2 FS bytes used 256.00KiB devid 1 size 3.00GiB used 520.00MiB path /dev/sdc devid 2 size 3.00GiB used 896.00MiB path /dev/sdb Label: none uuid: 10bd3202-0415-43af-96a8-d5409f310a7e Total devices 1 FS bytes used 128.00KiB devid 1 size 3.00GiB used 536.00MiB path /dev/sda So as per the replace start command and kernel log replace was successful. Now let's try to clean mount. $ umount /btrfs $ btrfs device scan --forget $ mount -o device=/dev/sdc /dev/sdb /btrfs mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error. [ 636.157517] BTRFS error (device sdc): failed to read chunk tree: -2 [ 636.180177] BTRFS error (device sdc): open_ctree failed That's because per dev items it is still looking for the original seed device. $ btrfs inspect-internal dump-tree -d /dev/sdb item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98 devid 1 total_bytes 3221225472 bytes_used 545259520 io_align 4096 io_width 4096 sector_size 4096 type 0 generation 6 start_offset 0 dev_group 0 seek_speed 0 bandwidth 0 uuid 59368f50-9af2-4b17-91da-8a783cc418d4 <--- seed uuid fsid 10bd3202-0415-43af-96a8-d5409f310a7e <--- seed fsid item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98 devid 2 total_bytes 3221225472 bytes_used 939524096 io_align 4096 io_width 4096 sector_size 4096 type 0 generation 0 start_offset 0 dev_group 0 seek_speed 0 bandwidth 0 uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a <- sprout uuid fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid But the replaced target has the following uuid+fsid in its superblock which doesn't match with the expected uuid+fsid in its devitem. $ btrfs in dump-super /dev/sdc | egrep '^generation|dev_item.uuid|dev_item.fsid|devid' generation 20 dev_item.uuid 59368f50-9af2-4b17-91da-8a783cc418d4 dev_item.fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match] dev_item.devid 1 So if you provide the original seed device the mount shall be successful. Which so long happening in the test case btrfs/163. $ btrfs device scan --forget $ mount -o device=/dev/sda /dev/sdb /btrfs Fix in this patch: If a seed is not sprouted then there is no replacement of it, because of its read-only filesystem with a read-only device. Similarly, in the case of a sprouted filesystem, the seed device is still read only. So, mark it as you can't replace a seed device, you can only add a new device and then delete the seed device. If replace is attempted then returns -EINVAL. Signed-off-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/dev-replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 196bd241e701..34ddf2d75c1a 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -190,7 +190,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, int ret = 0; *device_out = NULL; - if (fs_info->fs_devices->seeding) { + if (srcdev->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } -- cgit v1.2.3 From 78452408bb3e488b11243698acb7322a22be3d36 Mon Sep 17 00:00:00 2001 From: Zhao Heming Date: Tue, 6 Oct 2020 00:00:24 +0800 Subject: md/bitmap: md_bitmap_get_counter returns wrong blocks [ Upstream commit d837f7277f56e70d82b3a4a037d744854e62f387 ] md_bitmap_get_counter() has code: ``` if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + PAGE_COUNTER_SHIFT - 1); ``` The minus 1 is wrong, this branch should report 2048 bits of space. With "-1" action, this only report 1024 bit of space. This bug code returns wrong blocks, but it doesn't inflence bitmap logic: 1. Most callers focus this function return value (the counter of offset), not the parameter blocks. 2. The bug is only triggered when hijacked is true or map is NULL. the hijacked true condition is very rare. the "map == null" only true when array is creating or resizing. 3. Even the caller gets wrong blocks, current code makes caller just to call md_bitmap_get_counter() one more time. Signed-off-by: Zhao Heming Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md-bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 7227d03dbbea..0a6c200e3dcb 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1372,7 +1372,7 @@ __acquires(bitmap->lock) if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + - PAGE_COUNTER_SHIFT - 1); + PAGE_COUNTER_SHIFT); else csize = ((sector_t)1) << bitmap->chunkshift; *blocks = csize - (offset & (csize - 1)); -- cgit v1.2.3 From dfcfccd0507534737a19055d1f7d424c376aa559 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 12 Oct 2020 05:10:51 -0400 Subject: bnxt_en: Log unknown link speed appropriately. [ Upstream commit 8eddb3e7ce124dd6375d3664f1aae13873318b0f ] If the VF virtual link is set to always enabled, the speed may be unknown when the physical link is down. The driver currently logs the link speed as 4294967295 Mbps which is SPEED_UNKNOWN. Modify the link up log message as "speed unknown" which makes more sense. Reviewed-by: Vasundhara Volam Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1602493854-29283-7-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cdd3764760ed..6f777e9b4b93 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8375,6 +8375,11 @@ static void bnxt_report_link(struct bnxt *bp) u16 fec; netif_carrier_on(bp->dev); + speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); + if (speed == SPEED_UNKNOWN) { + netdev_info(bp->dev, "NIC Link is Up, speed unknown\n"); + return; + } if (bp->link_info.duplex == BNXT_LINK_DUPLEX_FULL) duplex = "full"; else @@ -8387,7 +8392,6 @@ static void bnxt_report_link(struct bnxt *bp) flow_ctrl = "ON - receive"; else flow_ctrl = "none"; - speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n", speed, duplex, flow_ctrl); if (bp->flags & BNXT_FLAG_EEE_CAP) -- cgit v1.2.3 From f66125e1c4df1eb634732d78ebecc96ee902d5fb Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 24 Jun 2020 22:15:18 +0530 Subject: rpmsg: glink: Use complete_all for open states [ Upstream commit 4fcdaf6e28d11e2f3820d54dd23cd12a47ddd44e ] The open_req and open_ack completion variables are the state variables to represet a remote channel as open. Use complete_all so there are no races with waiters and using completion_done. Signed-off-by: Chris Lew Signed-off-by: Arun Kumar Neelakantam Signed-off-by: Deepak Kumar Singh Link: https://lore.kernel.org/r/1593017121-7953-2-git-send-email-deesin@codeaurora.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/rpmsg/qcom_glink_native.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 1995f5b3ea67..d5114abcde19 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -970,7 +970,7 @@ static int qcom_glink_rx_open_ack(struct qcom_glink *glink, unsigned int lcid) return -EINVAL; } - complete(&channel->open_ack); + complete_all(&channel->open_ack); return 0; } @@ -1178,7 +1178,7 @@ static int qcom_glink_announce_create(struct rpmsg_device *rpdev) __be32 *val = defaults; int size; - if (glink->intentless) + if (glink->intentless || !completion_done(&channel->open_ack)) return 0; prop = of_find_property(np, "qcom,intents", NULL); @@ -1413,7 +1413,7 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid, channel->rcid = ret; spin_unlock_irqrestore(&glink->idr_lock, flags); - complete(&channel->open_req); + complete_all(&channel->open_req); if (create_device) { rpdev = kzalloc(sizeof(*rpdev), GFP_KERNEL); -- cgit v1.2.3 From 660e2d9d14174a06da03d29c3a6e01e40b47f60b Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 7 Sep 2020 11:25:59 +0300 Subject: clk: ti: clockdomain: fix static checker warning [ Upstream commit b7a7943fe291b983b104bcbd2f16e8e896f56590 ] Fix a memory leak induced by not calling clk_put after doing of_clk_get. Reported-by: Dan Murphy Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20200907082600.454-3-t-kristo@ti.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clockdomain.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c index 423a99b9f10c..8d0dea188a28 100644 --- a/drivers/clk/ti/clockdomain.c +++ b/drivers/clk/ti/clockdomain.c @@ -146,10 +146,12 @@ static void __init of_ti_clockdomain_setup(struct device_node *node) if (!omap2_clk_is_hw_omap(clk_hw)) { pr_warn("can't setup clkdm for basic clk %s\n", __clk_get_name(clk)); + clk_put(clk); continue; } to_clk_hw_omap(clk_hw)->clkdm_name = clkdm_name; omap2_init_clk_clkdm(clk_hw); + clk_put(clk); } } -- cgit v1.2.3 From 229bdf0b13198d1c4ef0fa1f1c3ab05b2e0d4075 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Mon, 12 Oct 2020 09:54:04 +0530 Subject: net: 9p: initialize sun_server.sun_path to have addr's value only when addr is valid [ Upstream commit 7ca1db21ef8e0e6725b4d25deed1ca196f7efb28 ] In p9_fd_create_unix, checking is performed to see if the addr (passed as an argument) is NULL or not. However, no check is performed to see if addr is a valid address, i.e., it doesn't entirely consist of only 0's. The initialization of sun_server.sun_path to be equal to this faulty addr value leads to an uninitialized variable, as detected by KMSAN. Checking for this (faulty addr) and returning a negative error number appropriately, resolves this issue. Link: http://lkml.kernel.org/r/20201012042404.2508-1-anant.thazhemadam@gmail.com Reported-by: syzbot+75d51fe5bf4ebe988518@syzkaller.appspotmail.com Tested-by: syzbot+75d51fe5bf4ebe988518@syzkaller.appspotmail.com Signed-off-by: Anant Thazhemadam Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 12ecacf0c55f..60eb9a2b209b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1023,7 +1023,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (addr == NULL) + if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { -- cgit v1.2.3 From d01b6332079910051ae08b763c84018e37a2f425 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Fri, 7 Aug 2020 16:59:02 +0530 Subject: drivers: watchdog: rdc321x_wdt: Fix race condition bugs [ Upstream commit 4b2e7f99cdd314263c9d172bc17193b8b6bba463 ] In rdc321x_wdt_probe(), rdc321x_wdt_device.queue is initialized after misc_register(), hence if ioctl is called before its initialization which can call rdc321x_wdt_start() function, it will see an uninitialized value of rdc321x_wdt_device.queue, hence initialize it before misc_register(). Also, rdc321x_wdt_device.default_ticks is accessed in reset() function called from write callback, thus initialize it before misc_register(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Madhuparna Bhowmik Reviewed-by: Guenter Roeck Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20200807112902.28764-1-madhuparnabhowmik10@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/rdc321x_wdt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index 2e608ae6cbc7..e0efbc583198 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -230,6 +230,8 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.sb_pdev = pdata->sb_pdev; rdc321x_wdt_device.base_reg = r->start; + rdc321x_wdt_device.queue = 0; + rdc321x_wdt_device.default_ticks = ticks; err = misc_register(&rdc321x_wdt_misc); if (err < 0) { @@ -244,14 +246,11 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.base_reg, RDC_WDT_RST); init_completion(&rdc321x_wdt_device.stop); - rdc321x_wdt_device.queue = 0; clear_bit(0, &rdc321x_wdt_device.inuse); timer_setup(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0); - rdc321x_wdt_device.default_ticks = ticks; - dev_info(&pdev->dev, "watchdog init success\n"); return 0; -- cgit v1.2.3 From d7d7920a7f6614e32ebdae205e108ccc2d85d9af Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Oct 2020 13:03:30 +0200 Subject: ext4: Detect already used quota file early [ Upstream commit e0770e91424f694b461141cbc99adf6b23006b60 ] When we try to use file already used as a quota file again (for the same or different quota type), strange things can happen. At the very least lockdep annotations may be wrong but also inode flags may be wrongly set / reset. When the file is used for two quota types at once we can even corrupt the file and likely crash the kernel. Catch all these cases by checking whether passed file is already used as quota file and bail early in that case. This fixes occasional generic/219 failure due to lockdep complaint. Reviewed-by: Andreas Dilger Reported-by: Ritesh Harjani Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201015110330.28716-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4aae7e3e89a1..2603537b1f66 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5856,6 +5856,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) return -EXDEV; + + /* Quota already enabled for this file? */ + if (IS_NOQUOTA(d_inode(path->dentry))) + return -EBUSY; + /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ -- cgit v1.2.3 From 9b58c55ba81c55e5428efd2c0dcd8bd9c0283c9e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 11 Sep 2020 01:16:07 -0300 Subject: KVM: PPC: Book3S HV: Do not allocate HPT for a nested guest [ Upstream commit 05e6295dc7de859c9d56334805485c4d20bebf25 ] The current nested KVM code does not support HPT guests. This is informed/enforced in some ways: - Hosts < P9 will not be able to enable the nested HV feature; - The nested hypervisor MMU capabilities will not contain KVM_CAP_PPC_MMU_HASH_V3; - QEMU reflects the MMU capabilities in the 'ibm,arch-vec-5-platform-support' device-tree property; - The nested guest, at 'prom_parse_mmu_model' ignores the 'disable_radix' kernel command line option if HPT is not supported; - The KVM_PPC_CONFIGURE_V3_MMU ioctl will fail if trying to use HPT. There is, however, still a way to start a HPT guest by using max-compat-cpu=power8 at the QEMU machine options. This leads to the guest being set to use hash after QEMU calls the KVM_PPC_ALLOCATE_HTAB ioctl. With the guest set to hash, the nested hypervisor goes through the entry path that has no knowledge of nesting (kvmppc_run_vcpu) and crashes when it tries to execute an hypervisor-privileged (mtspr HDEC) instruction at __kvmppc_vcore_entry: root@L1:~ $ qemu-system-ppc64 -machine pseries,max-cpu-compat=power8 ... [ 538.543303] CPU: 83 PID: 25185 Comm: CPU 0/KVM Not tainted 5.9.0-rc4 #1 [ 538.543355] NIP: c00800000753f388 LR: c00800000753f368 CTR: c0000000001e5ec0 [ 538.543417] REGS: c0000013e91e33b0 TRAP: 0700 Not tainted (5.9.0-rc4) [ 538.543470] MSR: 8000000002843033 CR: 22422882 XER: 20040000 [ 538.543546] CFAR: c00800000753f4b0 IRQMASK: 3 GPR00: c0080000075397a0 c0000013e91e3640 c00800000755e600 0000000080000000 GPR04: 0000000000000000 c0000013eab19800 c000001394de0000 00000043a054db72 GPR08: 00000000003b1652 0000000000000000 0000000000000000 c0080000075502e0 GPR12: c0000000001e5ec0 c0000007ffa74200 c0000013eab19800 0000000000000008 GPR16: 0000000000000000 c00000139676c6c0 c000000001d23948 c0000013e91e38b8 GPR20: 0000000000000053 0000000000000000 0000000000000001 0000000000000000 GPR24: 0000000000000001 0000000000000001 0000000000000000 0000000000000001 GPR28: 0000000000000001 0000000000000053 c0000013eab19800 0000000000000001 [ 538.544067] NIP [c00800000753f388] __kvmppc_vcore_entry+0x90/0x104 [kvm_hv] [ 538.544121] LR [c00800000753f368] __kvmppc_vcore_entry+0x70/0x104 [kvm_hv] [ 538.544173] Call Trace: [ 538.544196] [c0000013e91e3640] [c0000013e91e3680] 0xc0000013e91e3680 (unreliable) [ 538.544260] [c0000013e91e3820] [c0080000075397a0] kvmppc_run_core+0xbc8/0x19d0 [kvm_hv] [ 538.544325] [c0000013e91e39e0] [c00800000753d99c] kvmppc_vcpu_run_hv+0x404/0xc00 [kvm_hv] [ 538.544394] [c0000013e91e3ad0] [c0080000072da4fc] kvmppc_vcpu_run+0x34/0x48 [kvm] [ 538.544472] [c0000013e91e3af0] [c0080000072d61b8] kvm_arch_vcpu_ioctl_run+0x310/0x420 [kvm] [ 538.544539] [c0000013e91e3b80] [c0080000072c7450] kvm_vcpu_ioctl+0x298/0x778 [kvm] [ 538.544605] [c0000013e91e3ce0] [c0000000004b8c2c] sys_ioctl+0x1dc/0xc90 [ 538.544662] [c0000013e91e3dc0] [c00000000002f9a4] system_call_exception+0xe4/0x1c0 [ 538.544726] [c0000013e91e3e20] [c00000000000d140] system_call_common+0xf0/0x27c [ 538.544787] Instruction dump: [ 538.544821] f86d1098 60000000 60000000 48000099 e8ad0fe8 e8c500a0 e9264140 75290002 [ 538.544886] 7d1602a6 7cec42a6 40820008 7d0807b4 <7d164ba6> 7d083a14 f90d10a0 480104fd [ 538.544953] ---[ end trace 74423e2b948c2e0c ]--- This patch makes the KVM_PPC_ALLOCATE_HTAB ioctl fail when running in the nested hypervisor, causing QEMU to abort. Reported-by: Satheesh Rajendran Signed-off-by: Fabiano Rosas Reviewed-by: Greg Kurz Reviewed-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_hv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e2183fed947d..dd9b19b1f459 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5191,6 +5191,12 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; + /* If we're a nested hypervisor, we currently only support radix */ + if (kvmhv_on_pseries()) { + r = -EOPNOTSUPP; + break; + } + r = -EFAULT; if (get_user(htab_order, (u32 __user *)argp)) break; -- cgit v1.2.3 From 9f7e4bfadfe935d285918cad51b15d151175fdcc Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Mon, 12 Oct 2020 14:13:09 +0100 Subject: gfs2: use-after-free in sysfs deregistration [ Upstream commit c2a04b02c060c4858762edce4674d5cba3e5a96f ] syzkaller found the following splat with CONFIG_DEBUG_KOBJECT_RELEASE=y: Read of size 1 at addr ffff000028e896b8 by task kworker/1:2/228 CPU: 1 PID: 228 Comm: kworker/1:2 Tainted: G S 5.9.0-rc8+ #101 Hardware name: linux,dummy-virt (DT) Workqueue: events kobject_delayed_cleanup Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 print_address_description.constprop.0+0x5c/0x550 kasan_report+0x13c/0x1c0 __asan_report_load1_noabort+0x34/0x60 memcmp+0xd0/0xd8 gfs2_uevent+0xc4/0x188 kobject_uevent_env+0x54c/0x1240 kobject_uevent+0x2c/0x40 __kobject_del+0x190/0x1d8 kobject_delayed_cleanup+0x2bc/0x3b8 process_one_work+0x96c/0x18c0 worker_thread+0x3f0/0xc30 kthread+0x390/0x498 ret_from_fork+0x10/0x18 Allocated by task 1110: kasan_save_stack+0x28/0x58 __kasan_kmalloc.isra.0+0xc8/0xe8 kasan_kmalloc+0x10/0x20 kmem_cache_alloc_trace+0x1d8/0x2f0 alloc_super+0x64/0x8c0 sget_fc+0x110/0x620 get_tree_bdev+0x190/0x648 gfs2_get_tree+0x50/0x228 vfs_get_tree+0x84/0x2e8 path_mount+0x1134/0x1da8 do_mount+0x124/0x138 __arm64_sys_mount+0x164/0x238 el0_svc_common.constprop.0+0x15c/0x598 do_el0_svc+0x60/0x150 el0_svc+0x34/0xb0 el0_sync_handler+0xc8/0x5b4 el0_sync+0x15c/0x180 Freed by task 228: kasan_save_stack+0x28/0x58 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x48 __kasan_slab_free+0x118/0x190 kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0x6c/0x210 kfree+0x13c/0x460 Use the same pattern as f2fs + ext4 where the kobject destruction must complete before allowing the FS itself to be freed. This means that we need an explicit free_sbd in the callers. Cc: Bob Peterson Cc: Andreas Gruenbacher Signed-off-by: Jamie Iles [Also go to fail_free when init_names fails.] Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 22 +++++----------------- fs/gfs2/super.c | 1 + fs/gfs2/sys.c | 5 ++++- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5f89c515f5bb..33a6b074209d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -694,6 +694,7 @@ struct gfs2_sbd { struct super_block *sd_vfs; struct gfs2_pcpu_lkstats __percpu *sd_lkstats; struct kobject sd_kobj; + struct completion sd_kobj_unregister; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e0c55765b06d..338666a97fff 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1094,26 +1094,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) } error = init_names(sdp, silent); - if (error) { - /* In this case, we haven't initialized sysfs, so we have to - manually free the sdp. */ - free_sbd(sdp); - sb->s_fs_info = NULL; - return error; - } + if (error) + goto fail_free; snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); error = gfs2_sys_fs_add(sdp); - /* - * If we hit an error here, gfs2_sys_fs_add will have called function - * kobject_put which causes the sysfs usage count to go to zero, which - * causes sysfs to call function gfs2_sbd_release, which frees sdp. - * Subsequent error paths here will call gfs2_sys_fs_del, which also - * kobject_put to free sdp. - */ if (error) - return error; + goto fail_free; gfs2_create_debugfs_file(sdp); @@ -1210,9 +1198,9 @@ fail_lm: gfs2_lm_unmount(sdp); fail_debug: gfs2_delete_debugfs_file(sdp); - /* gfs2_sys_fs_del must be the last thing we do, since it causes - * sysfs to call function gfs2_sbd_release, which frees sdp. */ gfs2_sys_fs_del(sdp); +fail_free: + free_sbd(sdp); sb->s_fs_info = NULL; return error; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5fa1eec4fb4f..5935ce5ae563 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -695,6 +695,7 @@ restart: /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); + free_sbd(sdp); } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index dd15b8e4af2c..1c6e52dc878e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -302,7 +302,7 @@ static void gfs2_sbd_release(struct kobject *kobj) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - free_sbd(sdp); + complete(&sdp->sd_kobj_unregister); } static struct kobj_type gfs2_ktype = { @@ -652,6 +652,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) sprintf(ro, "RDONLY=%d", sb_rdonly(sb)); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); + init_completion(&sdp->sd_kobj_unregister); sdp->sd_kobj.kset = gfs2_kset; error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, "%s", sdp->sd_table_name); @@ -682,6 +683,7 @@ fail_tune: fail_reg: fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); + wait_for_completion(&sdp->sd_kobj_unregister); sb->s_fs_info = NULL; return error; } @@ -692,6 +694,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) sysfs_remove_group(&sdp->sd_kobj, &tune_group); sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); kobject_put(&sdp->sd_kobj); + wait_for_completion(&sdp->sd_kobj_unregister); } static int gfs2_uevent(struct kset *kset, struct kobject *kobj, -- cgit v1.2.3 From 3c78eb161c26550d07abb545473f607bfa251357 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 14 Oct 2020 22:01:09 +0530 Subject: gfs2: add validation checks for size of superblock [ Upstream commit 0ddc5154b24c96f20e94d653b0a814438de6032b ] In gfs2_check_sb(), no validation checks are performed with regards to the size of the superblock. syzkaller detected a slab-out-of-bounds bug that was primarily caused because the block size for a superblock was set to zero. A valid size for a superblock is a power of 2 between 512 and PAGE_SIZE. Performing validation checks and ensuring that the size of the superblock is valid fixes this bug. Reported-by: syzbot+af90d47a37376844e731@syzkaller.appspotmail.com Tested-by: syzbot+af90d47a37376844e731@syzkaller.appspotmail.com Suggested-by: Andrew Price Signed-off-by: Anant Thazhemadam [Minor code reordering.] Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/ops_fstype.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 338666a97fff..29b27d769860 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -169,15 +169,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - /* If format numbers match exactly, we're done. */ - - if (sb->sb_fs_format == GFS2_FORMAT_FS && - sb->sb_multihost_format == GFS2_FORMAT_MULTI) - return 0; + if (sb->sb_fs_format != GFS2_FORMAT_FS || + sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); + return -EINVAL; + } - fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); + if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + (sb->sb_bsize & (sb->sb_bsize - 1))) { + pr_warn("Invalid superblock size\n"); + return -EINVAL; + } - return -EINVAL; + return 0; } static void end_bio_io_page(struct bio *bio) -- cgit v1.2.3 From e2dca8845c37923200751b9b3f87d6d7320dc07f Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 9 Oct 2020 09:32:56 +1000 Subject: cifs: handle -EINTR in cifs_setattr [ Upstream commit c6cc4c5a72505a0ecefc9b413f16bec512f38078 ] RHBZ: 1848178 Some calls that set attributes, like utimensat(), are not supposed to return -EINTR and thus do not have handlers for this in glibc which causes us to leak -EINTR to the applications which are also unprepared to handle it. For example tar will break if utimensat() return -EINTR and abort unpacking the archive. Other applications may break too. To handle this we add checks, and retry, for -EINTR in cifs_setattr() Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 17df90b5f57a..fd9e289f3e72 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2614,13 +2614,18 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb); + int rc, retries = 0; - if (pTcon->unix_ext) - return cifs_setattr_unix(direntry, attrs); - - return cifs_setattr_nounix(direntry, attrs); + do { + if (pTcon->unix_ext) + rc = cifs_setattr_unix(direntry, attrs); + else + rc = cifs_setattr_nounix(direntry, attrs); + retries++; + } while (is_retryable_error(rc) && retries < 2); /* BB: add cifs_setattr_legacy for really old servers */ + return rc; } #if 0 -- cgit v1.2.3 From c70f909e7ad6a8c2b94b3db0d598333abb1526f9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 17 Jul 2020 21:33:21 +0900 Subject: arm64: dts: renesas: ulcb: add full-pwr-cycle-in-suspend into eMMC nodes [ Upstream commit 992d7a8b88c83c05664b649fc54501ce58e19132 ] Add full-pwr-cycle-in-suspend property to do a graceful shutdown of the eMMC device in system suspend. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1594989201-24228-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/ulcb.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 3ef89171538f..d8fccf3d4987 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -470,6 +470,7 @@ mmc-hs200-1_8v; mmc-hs400-1_8v; non-removable; + full-pwr-cycle-in-suspend; status = "okay"; }; -- cgit v1.2.3 From 2f98e2843b692fdc2e78c83a88c99cddfd6bd1cd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 10 Mar 2020 14:02:48 -0700 Subject: ARM: dts: omap4: Fix sgx clock rate for 4430 [ Upstream commit 19d3e9a0bdd57b90175f30390edeb06851f5f9f3 ] We currently have a different clock rate for droid4 compared to the stock v3.0.8 based Android Linux kernel: # cat /sys/kernel/debug/clk/dpll_*_m7x2_ck/clk_rate 266666667 307200000 # cat /sys/kernel/debug/clk/l3_gfx_cm:clk:0000:0/clk_rate 307200000 Let's fix this by configuring sgx to use 153.6 MHz instead of 307.2 MHz. Looks like also at least duover needs this change to avoid hangs, so let's apply it for all 4430. This helps a bit with thermal issues that seem to be related to memory corruption when using sgx. It seems that other driver related issues still remain though. Cc: Arthur Demchenkov Cc: Merlijn Wajer Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap4.dtsi | 2 +- arch/arm/boot/dts/omap443x.dtsi | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index e5506ab669fc..904852006b9b 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -328,7 +328,7 @@ status = "disabled"; }; - target-module@56000000 { + sgx_module: target-module@56000000 { compatible = "ti,sysc-omap4", "ti,sysc"; reg = <0x5600fe00 0x4>, <0x5600fe10 0x4>; diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi index cbcdcb4e7d1c..86b9caf461df 100644 --- a/arch/arm/boot/dts/omap443x.dtsi +++ b/arch/arm/boot/dts/omap443x.dtsi @@ -74,3 +74,13 @@ }; /include/ "omap443x-clocks.dtsi" + +/* + * Use dpll_per for sgx at 153.6MHz like droid4 stock v3.0.8 Android kernel + */ +&sgx_module { + assigned-clocks = <&l3_gfx_clkctrl OMAP4_GPU_CLKCTRL 24>, + <&dpll_per_m7x2_ck>; + assigned-clock-rates = <0>, <153600000>; + assigned-clock-parents = <&dpll_per_m7x2_ck>; +}; -- cgit v1.2.3 From c6029d9bc68de0674d3ed12d4661b34d4a276f50 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Aug 2020 14:37:59 +0300 Subject: memory: emif: Remove bogus debugfs error handling [ Upstream commit fd22781648080cc400772b3c68aa6b059d2d5420 ] Callers are generally not supposed to check the return values from debugfs functions. Debugfs functions never return NULL so this error handling will never trigger. (Historically debugfs functions used to return a mix of NULL and error pointers but it was eventually deemed too complicated for something which wasn't intended to be used in normal situations). Delete all the error handling. Signed-off-by: Dan Carpenter Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20200826113759.GF393664@mwanda Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/emif.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 402c6bc8e621..af296b6fcbbd 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -163,35 +163,12 @@ static const struct file_operations emif_mr4_fops = { static int __init_or_module emif_debugfs_init(struct emif_data *emif) { - struct dentry *dentry; - int ret; - - dentry = debugfs_create_dir(dev_name(emif->dev), NULL); - if (!dentry) { - ret = -ENOMEM; - goto err0; - } - emif->debugfs_root = dentry; - - dentry = debugfs_create_file("regcache_dump", S_IRUGO, - emif->debugfs_root, emif, &emif_regdump_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - - dentry = debugfs_create_file("mr4", S_IRUGO, - emif->debugfs_root, emif, &emif_mr4_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - + emif->debugfs_root = debugfs_create_dir(dev_name(emif->dev), NULL); + debugfs_create_file("regcache_dump", S_IRUGO, emif->debugfs_root, emif, + &emif_regdump_fops); + debugfs_create_file("mr4", S_IRUGO, emif->debugfs_root, emif, + &emif_mr4_fops); return 0; -err1: - debugfs_remove_recursive(emif->debugfs_root); -err0: - return ret; } static void __exit emif_debugfs_exit(struct emif_data *emif) -- cgit v1.2.3 From 8c1b47e8aa4317534df3fd97a400e50c3ba61d87 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:21 +0200 Subject: ARM: dts: s5pv210: remove DMA controller bus node name to fix dtschema warnings [ Upstream commit ea4e792f3c8931fffec4d700cf6197d84e9f35a6 ] There is no need to keep DMA controller nodes under AMBA bus node. Remove the "amba" node to fix dtschema warnings like: amba: $nodename:0: 'amba' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-6-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 49 ++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 2ad642f51fd9..8b194da334a5 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -128,35 +128,28 @@ }; }; - amba { - #address-cells = <1>; - #size-cells = <1>; - compatible = "simple-bus"; - ranges; - - pdma0: dma@e0900000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0900000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <19>; - clocks = <&clocks CLK_PDMA0>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma0: dma@e0900000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0900000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <19>; + clocks = <&clocks CLK_PDMA0>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + }; - pdma1: dma@e0a00000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0a00000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <20>; - clocks = <&clocks CLK_PDMA1>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma1: dma@e0a00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0a00000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <20>; + clocks = <&clocks CLK_PDMA1>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; spi0: spi@e1300000 { -- cgit v1.2.3 From 8a9024f6e29f6113f607ae75a64537fde2b71b09 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:22 +0200 Subject: ARM: dts: s5pv210: move fixed clocks under root node [ Upstream commit d38cae370e5f2094cbc38db3082b8e9509ae52ce ] The fixed clocks are kept under dedicated 'external-clocks' node, thus a fake 'reg' was added. This is not correct with dtschema as fixed-clock binding does not have a 'reg' property. Moving fixed clocks out of 'soc' to root node fixes multiple dtbs_check warnings: external-clocks: $nodename:0: 'external-clocks' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' external-clocks: #size-cells:0:0: 0 is not one of [1, 2] external-clocks: oscillator@0:reg:0: [0] is too short external-clocks: oscillator@1:reg:0: [1] is too short external-clocks: 'ranges' is a required property oscillator@0: 'reg' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-7-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 8b194da334a5..ec41e46edace 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -52,34 +52,26 @@ }; }; + xxti: oscillator-0 { + compatible = "fixed-clock"; + clock-frequency = <0>; + clock-output-names = "xxti"; + #clock-cells = <0>; + }; + + xusbxti: oscillator-1 { + compatible = "fixed-clock"; + clock-frequency = <0>; + clock-output-names = "xusbxti"; + #clock-cells = <0>; + }; + soc { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges; - external-clocks { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <0>; - - xxti: oscillator@0 { - compatible = "fixed-clock"; - reg = <0>; - clock-frequency = <0>; - clock-output-names = "xxti"; - #clock-cells = <0>; - }; - - xusbxti: oscillator@1 { - compatible = "fixed-clock"; - reg = <1>; - clock-frequency = <0>; - clock-output-names = "xusbxti"; - #clock-cells = <0>; - }; - }; - onenand: onenand@b0600000 { compatible = "samsung,s5pv210-onenand"; reg = <0xb0600000 0x2000>, -- cgit v1.2.3 From 3ad1464467e7fa1a9daf9933e20b041f8f0ada16 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:23 +0200 Subject: ARM: dts: s5pv210: move PMU node out of clock controller [ Upstream commit bb98fff84ad1ea321823759edaba573a16fa02bd ] The Power Management Unit (PMU) is a separate device which has little common with clock controller. Moving it to one level up (from clock controller child to SoC) allows to remove fake simple-bus compatible and dtbs_check warnings like: clock-controller@e0100000: $nodename:0: 'clock-controller@e0100000' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-8-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index ec41e46edace..f10139bd80a5 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -92,19 +92,16 @@ }; clocks: clock-controller@e0100000 { - compatible = "samsung,s5pv210-clock", "simple-bus"; + compatible = "samsung,s5pv210-clock"; reg = <0xe0100000 0x10000>; clock-names = "xxti", "xusbxti"; clocks = <&xxti>, <&xusbxti>; #clock-cells = <1>; - #address-cells = <1>; - #size-cells = <1>; - ranges; + }; - pmu_syscon: syscon@e0108000 { - compatible = "samsung-s5pv210-pmu", "syscon"; - reg = <0xe0108000 0x8000>; - }; + pmu_syscon: syscon@e0108000 { + compatible = "samsung-s5pv210-pmu", "syscon"; + reg = <0xe0108000 0x8000>; }; pinctrl0: pinctrl@e0200000 { -- cgit v1.2.3 From b71dbaf08f9f5b8fafdeca143c7ed89ed62c1030 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:24 +0200 Subject: ARM: dts: s5pv210: remove dedicated 'audio-subsystem' node [ Upstream commit 6c17a2974abf68a58517f75741b15c4aba42b4b8 ] The 'audio-subsystem' node is an artificial creation, not representing real hardware. The hardware is described by its nodes - AUDSS clock controller and I2S0. Remove the 'audio-subsystem' node along with its undocumented compatible to fix dtbs_check warnings like: audio-subsystem: $nodename:0: 'audio-subsystem' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-9-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 65 +++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index f10139bd80a5..61822afa30ab 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -211,43 +211,36 @@ status = "disabled"; }; - audio-subsystem { - compatible = "samsung,s5pv210-audss", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - - clk_audss: clock-controller@eee10000 { - compatible = "samsung,s5pv210-audss-clock"; - reg = <0xeee10000 0x1000>; - clock-names = "hclk", "xxti", - "fout_epll", - "sclk_audio0"; - clocks = <&clocks DOUT_HCLKP>, <&xxti>, - <&clocks FOUT_EPLL>, - <&clocks SCLK_AUDIO0>; - #clock-cells = <1>; - }; + clk_audss: clock-controller@eee10000 { + compatible = "samsung,s5pv210-audss-clock"; + reg = <0xeee10000 0x1000>; + clock-names = "hclk", "xxti", + "fout_epll", + "sclk_audio0"; + clocks = <&clocks DOUT_HCLKP>, <&xxti>, + <&clocks FOUT_EPLL>, + <&clocks SCLK_AUDIO0>; + #clock-cells = <1>; + }; - i2s0: i2s@eee30000 { - compatible = "samsung,s5pv210-i2s"; - reg = <0xeee30000 0x1000>; - interrupt-parent = <&vic2>; - interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; - clock-names = "iis", - "i2s_opclk0", - "i2s_opclk1"; - clocks = <&clk_audss CLK_I2S>, - <&clk_audss CLK_I2S>, - <&clk_audss CLK_DOUT_AUD_BUS>; - samsung,idma-addr = <0xc0010000>; - pinctrl-names = "default"; - pinctrl-0 = <&i2s0_bus>; - #sound-dai-cells = <0>; - status = "disabled"; - }; + i2s0: i2s@eee30000 { + compatible = "samsung,s5pv210-i2s"; + reg = <0xeee30000 0x1000>; + interrupt-parent = <&vic2>; + interrupts = <16>; + dma-names = "rx", "tx", "tx-sec"; + dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + clock-names = "iis", + "i2s_opclk0", + "i2s_opclk1"; + clocks = <&clk_audss CLK_I2S>, + <&clk_audss CLK_I2S>, + <&clk_audss CLK_DOUT_AUD_BUS>; + samsung,idma-addr = <0xc0010000>; + pinctrl-names = "default"; + pinctrl-0 = <&i2s0_bus>; + #sound-dai-cells = <0>; + status = "disabled"; }; i2s1: i2s@e2100000 { -- cgit v1.2.3 From 742fd49cf811ca164489e339b862e3fb8e240a73 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 13 Oct 2020 22:45:14 -0400 Subject: nbd: make the config put is called before the notifying the waiter [ Upstream commit 87aac3a80af5cbad93e63250e8a1e19095ba0d30 ] There has one race case for ceph's rbd-nbd tool. When do mapping it may fail with EBUSY from ioctl(nbd, NBD_DO_IT), but actually the nbd device has already unmaped. It dues to if just after the wake_up(), the recv_work() is scheduled out and defers calling the nbd_config_put(), though the map process has exited the "nbd->recv_task" is not cleared. Signed-off-by: Xiubo Li Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7c577cabb9c3..742f8160b6e2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -787,9 +787,9 @@ static void recv_work(struct work_struct *work) blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); - nbd_config_put(nbd); kfree(args); } -- cgit v1.2.3 From c421c082088ef10baec2612cea5973907a6acc21 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 15 Oct 2020 14:57:35 -0400 Subject: sgl_alloc_order: fix memory leak [ Upstream commit b2a182a40278bc5849730e66bca01a762188ed86 ] sgl_alloc_order() can fail when 'length' is large on a memory constrained system. When order > 0 it will potentially be making several multi-page allocations with the later ones more likely to fail than the earlier one. So it is important that sgl_alloc_order() frees up any pages it has obtained before returning NULL. In the case when order > 0 it calls the wrong free page function and leaks. In testing the leak was sufficient to bring down my 8 GiB laptop with OOM. Reviewed-by: Bart Van Assche Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5813072bc589..29346184fcf2 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -514,7 +514,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, elem_len = min_t(u64, length, PAGE_SIZE << order); page = alloc_pages(gfp, order); if (!page) { - sgl_free(sgl); + sgl_free_order(sgl, order); return NULL; } -- cgit v1.2.3 From a7aa5d578fedeb7fa672654fe293ef2defcecf23 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Mon, 12 Oct 2020 16:10:40 +0800 Subject: nvme-rdma: fix crash when connect rejected [ Upstream commit 43efdb8e870ee0f58633fd579aa5b5185bf5d39e ] A crash can happened when a connect is rejected. The host establishes the connection after received ConnectReply, and then continues to send the fabrics Connect command. If the controller does not receive the ReadyToUse capsule, host may receive a ConnectReject reply. Call nvme_rdma_destroy_queue_ib after the host received the RDMA_CM_EVENT_REJECTED event. Then when the fabrics Connect command times out, nvme_rdma_timeout calls nvme_rdma_complete_rq to fail the request. A crash happenes due to use after free in nvme_rdma_complete_rq. nvme_rdma_destroy_queue_ib is redundant when handling the RDMA_CM_EVENT_REJECTED event as nvme_rdma_destroy_queue_ib is already called in connection failure handler. Signed-off-by: Chao Leng Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index abe4fe496d05..a41ee9feab8e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1679,7 +1679,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, complete(&queue->cm_done); return 0; case RDMA_CM_EVENT_REJECTED: - nvme_rdma_destroy_queue_ib(queue); cm_error = nvme_rdma_conn_rejected(queue, ev); break; case RDMA_CM_EVENT_ROUTE_ERROR: -- cgit v1.2.3 From b91d4797b3da0a0644e31cbc2f41a14a61472ab6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 5 Oct 2020 09:35:21 -0700 Subject: md/raid5: fix oops during stripe resizing commit b44c018cdf748b96b676ba09fdbc5b34fc443ada upstream. KoWei reported crash during raid5 reshape: [ 1032.252932] Oops: 0002 [#1] SMP PTI [...] [ 1032.252943] RIP: 0010:memcpy_erms+0x6/0x10 [...] [ 1032.252947] RSP: 0018:ffffba1ac0c03b78 EFLAGS: 00010286 [ 1032.252949] RAX: 0000784ac0000000 RBX: ffff91bec3d09740 RCX: 0000000000001000 [ 1032.252951] RDX: 0000000000001000 RSI: ffff91be6781c000 RDI: 0000784ac0000000 [ 1032.252953] RBP: ffffba1ac0c03bd8 R08: 0000000000001000 R09: ffffba1ac0c03bf8 [ 1032.252954] R10: 0000000000000000 R11: 0000000000000000 R12: ffffba1ac0c03bf8 [ 1032.252955] R13: 0000000000001000 R14: 0000000000000000 R15: 0000000000000000 [ 1032.252958] FS: 0000000000000000(0000) GS:ffff91becf500000(0000) knlGS:0000000000000000 [ 1032.252959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1032.252961] CR2: 0000784ac0000000 CR3: 000000031780a002 CR4: 00000000001606e0 [ 1032.252962] Call Trace: [ 1032.252969] ? async_memcpy+0x179/0x1000 [async_memcpy] [ 1032.252977] ? raid5_release_stripe+0x8e/0x110 [raid456] [ 1032.252982] handle_stripe_expansion+0x15a/0x1f0 [raid456] [ 1032.252988] handle_stripe+0x592/0x1270 [raid456] [ 1032.252993] handle_active_stripes.isra.0+0x3cb/0x5a0 [raid456] [ 1032.252999] raid5d+0x35c/0x550 [raid456] [ 1032.253002] ? schedule+0x42/0xb0 [ 1032.253006] ? schedule_timeout+0x10e/0x160 [ 1032.253011] md_thread+0x97/0x160 [ 1032.253015] ? wait_woken+0x80/0x80 [ 1032.253019] kthread+0x104/0x140 [ 1032.253022] ? md_start_sync+0x60/0x60 [ 1032.253024] ? kthread_park+0x90/0x90 [ 1032.253027] ret_from_fork+0x35/0x40 This is because cache_size_mutex was unlocked too early in resize_stripes, which races with grow_one_stripe() that grow_one_stripe() allocates a stripe with wrong pool_size. Fix this issue by unlocking cache_size_mutex after updating pool_size. Cc: # v4.4+ Reported-by: KoWei Sung Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 02acd5d5a848..08a7f97750f7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2407,8 +2407,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; - mutex_unlock(&conf->cache_size_mutex); - conf->slab_cache = sc; conf->active_name = 1-conf->active_name; @@ -2431,6 +2429,8 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!err) conf->pool_size = newsize; + mutex_unlock(&conf->cache_size_mutex); + return err; } -- cgit v1.2.3 From 3f56e94b6f7c7d1a07814b07f78268a0e6922357 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 18 Aug 2020 13:45:08 +0300 Subject: mmc: sdhci: Add LTR support for some Intel BYT based controllers commit 46f4a69ec8ed6ab9f6a6172afe50df792c8bc1b6 upstream. Some Intel BYT based host controllers support the setting of latency tolerance. Accordingly, implement the PM QoS ->set_latency_tolerance() callback. The raw register values are also exposed via debugfs. Intel EHL controllers require this support. Signed-off-by: Adrian Hunter Fixes: cb3a7d4a0aec4e ("mmc: sdhci-pci: Add support for Intel EHL") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200818104508.7149-1-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 154 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 91d0cb08238c..ddea4621cda1 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include #include @@ -520,6 +522,8 @@ struct intel_host { bool rpm_retune_ok; u32 glk_rx_ctrl1; u32 glk_tun_val; + u32 active_ltr; + u32 idle_ltr; }; static const guid_t intel_dsm_guid = @@ -764,6 +768,108 @@ static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode) return 0; } +#define INTEL_ACTIVELTR 0x804 +#define INTEL_IDLELTR 0x808 + +#define INTEL_LTR_REQ BIT(15) +#define INTEL_LTR_SCALE_MASK GENMASK(11, 10) +#define INTEL_LTR_SCALE_1US (2 << 10) +#define INTEL_LTR_SCALE_32US (3 << 10) +#define INTEL_LTR_VALUE_MASK GENMASK(9, 0) + +static void intel_cache_ltr(struct sdhci_pci_slot *slot) +{ + struct intel_host *intel_host = sdhci_pci_priv(slot); + struct sdhci_host *host = slot->host; + + intel_host->active_ltr = readl(host->ioaddr + INTEL_ACTIVELTR); + intel_host->idle_ltr = readl(host->ioaddr + INTEL_IDLELTR); +} + +static void intel_ltr_set(struct device *dev, s32 val) +{ + struct sdhci_pci_chip *chip = dev_get_drvdata(dev); + struct sdhci_pci_slot *slot = chip->slots[0]; + struct intel_host *intel_host = sdhci_pci_priv(slot); + struct sdhci_host *host = slot->host; + u32 ltr; + + pm_runtime_get_sync(dev); + + /* + * Program latency tolerance (LTR) accordingly what has been asked + * by the PM QoS layer or disable it in case we were passed + * negative value or PM_QOS_LATENCY_ANY. + */ + ltr = readl(host->ioaddr + INTEL_ACTIVELTR); + + if (val == PM_QOS_LATENCY_ANY || val < 0) { + ltr &= ~INTEL_LTR_REQ; + } else { + ltr |= INTEL_LTR_REQ; + ltr &= ~INTEL_LTR_SCALE_MASK; + ltr &= ~INTEL_LTR_VALUE_MASK; + + if (val > INTEL_LTR_VALUE_MASK) { + val >>= 5; + if (val > INTEL_LTR_VALUE_MASK) + val = INTEL_LTR_VALUE_MASK; + ltr |= INTEL_LTR_SCALE_32US | val; + } else { + ltr |= INTEL_LTR_SCALE_1US | val; + } + } + + if (ltr == intel_host->active_ltr) + goto out; + + writel(ltr, host->ioaddr + INTEL_ACTIVELTR); + writel(ltr, host->ioaddr + INTEL_IDLELTR); + + /* Cache the values into lpss structure */ + intel_cache_ltr(slot); +out: + pm_runtime_put_autosuspend(dev); +} + +static bool intel_use_ltr(struct sdhci_pci_chip *chip) +{ + switch (chip->pdev->device) { + case PCI_DEVICE_ID_INTEL_BYT_EMMC: + case PCI_DEVICE_ID_INTEL_BYT_EMMC2: + case PCI_DEVICE_ID_INTEL_BYT_SDIO: + case PCI_DEVICE_ID_INTEL_BYT_SD: + case PCI_DEVICE_ID_INTEL_BSW_EMMC: + case PCI_DEVICE_ID_INTEL_BSW_SDIO: + case PCI_DEVICE_ID_INTEL_BSW_SD: + return false; + default: + return true; + } +} + +static void intel_ltr_expose(struct sdhci_pci_chip *chip) +{ + struct device *dev = &chip->pdev->dev; + + if (!intel_use_ltr(chip)) + return; + + dev->power.set_latency_tolerance = intel_ltr_set; + dev_pm_qos_expose_latency_tolerance(dev); +} + +static void intel_ltr_hide(struct sdhci_pci_chip *chip) +{ + struct device *dev = &chip->pdev->dev; + + if (!intel_use_ltr(chip)) + return; + + dev_pm_qos_hide_latency_tolerance(dev); + dev->power.set_latency_tolerance = NULL; +} + static void byt_probe_slot(struct sdhci_pci_slot *slot) { struct mmc_host_ops *ops = &slot->host->mmc_host_ops; @@ -778,6 +884,43 @@ static void byt_probe_slot(struct sdhci_pci_slot *slot) ops->start_signal_voltage_switch = intel_start_signal_voltage_switch; device_property_read_u32(dev, "max-frequency", &mmc->f_max); + + if (!mmc->slotno) { + slot->chip->slots[mmc->slotno] = slot; + intel_ltr_expose(slot->chip); + } +} + +static void byt_add_debugfs(struct sdhci_pci_slot *slot) +{ + struct intel_host *intel_host = sdhci_pci_priv(slot); + struct mmc_host *mmc = slot->host->mmc; + struct dentry *dir = mmc->debugfs_root; + + if (!intel_use_ltr(slot->chip)) + return; + + debugfs_create_x32("active_ltr", 0444, dir, &intel_host->active_ltr); + debugfs_create_x32("idle_ltr", 0444, dir, &intel_host->idle_ltr); + + intel_cache_ltr(slot); +} + +static int byt_add_host(struct sdhci_pci_slot *slot) +{ + int ret = sdhci_add_host(slot->host); + + if (!ret) + byt_add_debugfs(slot); + return ret; +} + +static void byt_remove_slot(struct sdhci_pci_slot *slot, int dead) +{ + struct mmc_host *mmc = slot->host->mmc; + + if (!mmc->slotno) + intel_ltr_hide(slot->chip); } static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) @@ -859,6 +1002,8 @@ static int glk_emmc_add_host(struct sdhci_pci_slot *slot) if (ret) goto cleanup; + byt_add_debugfs(slot); + return 0; cleanup: @@ -1036,6 +1181,8 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { #endif .allow_runtime_pm = true, .probe_slot = byt_emmc_probe_slot, + .add_host = byt_add_host, + .remove_slot = byt_remove_slot, .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | @@ -1049,6 +1196,7 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { .allow_runtime_pm = true, .probe_slot = glk_emmc_probe_slot, .add_host = glk_emmc_add_host, + .remove_slot = byt_remove_slot, #ifdef CONFIG_PM_SLEEP .suspend = sdhci_cqhci_suspend, .resume = sdhci_cqhci_resume, @@ -1079,6 +1227,8 @@ static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = { SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .allow_runtime_pm = true, .probe_slot = ni_byt_sdio_probe_slot, + .add_host = byt_add_host, + .remove_slot = byt_remove_slot, .ops = &sdhci_intel_byt_ops, .priv_size = sizeof(struct intel_host), }; @@ -1096,6 +1246,8 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .allow_runtime_pm = true, .probe_slot = byt_sdio_probe_slot, + .add_host = byt_add_host, + .remove_slot = byt_remove_slot, .ops = &sdhci_intel_byt_ops, .priv_size = sizeof(struct intel_host), }; @@ -1115,6 +1267,8 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { .allow_runtime_pm = true, .own_cd_for_runtime_pm = true, .probe_slot = byt_sd_probe_slot, + .add_host = byt_add_host, + .remove_slot = byt_remove_slot, .ops = &sdhci_intel_byt_ops, .priv_size = sizeof(struct intel_host), }; -- cgit v1.2.3 From 470c8c409e1c8e8f2fafa94c6269dedd59c96b54 Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Mon, 28 Sep 2020 15:59:20 -0600 Subject: mmc: sdhci-acpi: AMDI0040: Set SDHCI_QUIRK2_PRESET_VALUE_BROKEN commit f23cc3ba491af77395cea3f9d51204398729f26b upstream. This change fixes HS400 tuning for devices with invalid presets. SDHCI presets are not currently used for eMMC HS/HS200/HS400, but are used for DDR52. The HS400 retuning sequence is: HS400->DDR52->HS->HS200->Perform Tuning->HS->HS400 This means that when HS400 tuning happens, we transition through DDR52 for a very brief period. This causes presets to be enabled unintentionally and stay enabled when transitioning back to HS200 or HS400. Some firmware has invalid presets, so we end up with driver strengths that can cause I/O problems. Fixes: 34597a3f60b1 ("mmc: sdhci-acpi: Add support for ACPI HID of AMD Controller with HS400") Signed-off-by: Raul E Rangel Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200928154718.1.Icc21d4b2f354e83e26e57e270dc952f5fe0b0a40@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index b2d924c5e82e..0dc8eafdc81d 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -658,6 +658,43 @@ static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev, (host->mmc->caps & MMC_CAP_1_8V_DDR)) host->mmc->caps2 = MMC_CAP2_HS400_1_8V; + /* + * There are two types of presets out in the wild: + * 1) Default/broken presets. + * These presets have two sets of problems: + * a) The clock divisor for SDR12, SDR25, and SDR50 is too small. + * This results in clock frequencies that are 2x higher than + * acceptable. i.e., SDR12 = 25 MHz, SDR25 = 50 MHz, SDR50 = + * 100 MHz.x + * b) The HS200 and HS400 driver strengths don't match. + * By default, the SDR104 preset register has a driver strength of + * A, but the (internal) HS400 preset register has a driver + * strength of B. As part of initializing HS400, HS200 tuning + * needs to be performed. Having different driver strengths + * between tuning and operation is wrong. It results in different + * rise/fall times that lead to incorrect sampling. + * 2) Firmware with properly initialized presets. + * These presets have proper clock divisors. i.e., SDR12 => 12MHz, + * SDR25 => 25 MHz, SDR50 => 50 MHz. Additionally the HS200 and + * HS400 preset driver strengths match. + * + * Enabling presets for HS400 doesn't work for the following reasons: + * 1) sdhci_set_ios has a hard coded list of timings that are used + * to determine if presets should be enabled. + * 2) sdhci_get_preset_value is using a non-standard register to + * read out HS400 presets. The AMD controller doesn't support this + * non-standard register. In fact, it doesn't expose the HS400 + * preset register anywhere in the SDHCI memory map. This results + * in reading a garbage value and using the wrong presets. + * + * Since HS400 and HS200 presets must be identical, we could + * instead use the the SDR104 preset register. + * + * If the above issues are resolved we could remove this quirk for + * firmware that that has valid presets (i.e., SDR12 <= 12 MHz). + */ + host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN; + host->mmc_host_ops.select_drive_strength = amd_select_drive_strength; host->mmc_host_ops.set_ios = amd_set_ios; host->mmc_host_ops.execute_tuning = amd_sdhci_execute_tuning; -- cgit v1.2.3 From 2d1c48227780432605bd6e79b60171886c44da30 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Oct 2020 03:44:01 +0200 Subject: seccomp: Make duplicate listener detection non-racy commit dfe719fef03d752f1682fa8aeddf30ba501c8555 upstream. Currently, init_listener() tries to prevent adding a filter with SECCOMP_FILTER_FLAG_NEW_LISTENER if one of the existing filters already has a listener. However, this check happens without holding any lock that would prevent another thread from concurrently installing a new filter (potentially with a listener) on top of the ones we already have. Theoretically, this is also a data race: The plain load from current->seccomp.filter can race with concurrent writes to the same location. Fix it by moving the check into the region that holds the siglock to guard against concurrent TSYNC. (The "Fixes" tag points to the commit that introduced the theoretical data race; concurrent installation of another filter with TSYNC only became possible later, in commit 51891498f2da ("seccomp: allow TSYNC and USER_NOTIF together").) Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Reviewed-by: Tycho Andersen Signed-off-by: Jann Horn Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201005014401.490175-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e0fd97235653..0d991e9626f6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1219,13 +1219,7 @@ static const struct file_operations seccomp_notify_ops = { static struct file *init_listener(struct seccomp_filter *filter) { - struct file *ret = ERR_PTR(-EBUSY); - struct seccomp_filter *cur; - - for (cur = current->seccomp.filter; cur; cur = cur->prev) { - if (cur->notif) - goto out; - } + struct file *ret; ret = ERR_PTR(-ENOMEM); filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); @@ -1252,6 +1246,31 @@ out: return ret; } +/* + * Does @new_child have a listener while an ancestor also has a listener? + * If so, we'll want to reject this filter. + * This only has to be tested for the current process, even in the TSYNC case, + * because TSYNC installs @child with the same parent on all threads. + * Note that @new_child is not hooked up to its parent at this point yet, so + * we use current->seccomp.filter. + */ +static bool has_duplicate_listener(struct seccomp_filter *new_child) +{ + struct seccomp_filter *cur; + + /* must be protected against concurrent TSYNC */ + lockdep_assert_held(¤t->sighand->siglock); + + if (!new_child->notif) + return false; + for (cur = current->seccomp.filter; cur; cur = cur->prev) { + if (cur->notif) + return true; + } + + return false; +} + /** * seccomp_set_mode_filter: internal function for setting seccomp filter * @flags: flags to change filter behavior @@ -1321,6 +1340,11 @@ static long seccomp_set_mode_filter(unsigned int flags, if (!seccomp_may_assign_mode(seccomp_mode)) goto out; + if (has_duplicate_listener(prepared)) { + ret = -EBUSY; + goto out; + } + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; -- cgit v1.2.3 From 3674b0445b70e49105f03db7f3ac16eea1051d8c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Aug 2020 10:00:46 -0700 Subject: selftests/x86/fsgsbase: Test PTRACE_PEEKUSER for GSBASE with invalid LDT GS commit 1b9abd1755ad947d7c9913e92e7837b533124c90 upstream. This tests commit: 8ab49526b53d ("x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task") Unpatched kernels will OOPS. Signed-off-by: Andy Lutomirski Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/c618ae86d1f757e01b1a8e79869f553cb88acf9a.1598461151.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/fsgsbase.c | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5f3aea210e01..757bdb218a66 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -442,6 +442,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -511,6 +573,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { -- cgit v1.2.3 From f9a48ff99961cb9c1bbb735066892e37da23f9da Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Sep 2020 06:47:26 -0700 Subject: perf/x86/intel: Fix Ice Lake event constraint table commit 010cb00265f150bf82b23c02ad1fb87ce5c781e1 upstream. An error occues when sampling non-PEBS INST_RETIRED.PREC_DIST(0x01c0) event. perf record -e cpu/event=0xc0,umask=0x01/ -- sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu/event=0xc0,umask=0x01/). /bin/dmesg | grep -i perf may provide additional information. The idxmsk64 of the event is set to 0. The event never be successfully scheduled. The event should be limit to the fixed counter 0. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Reported-by: Yi, Ammy Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200928134726.13090-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0461ab257df6..c4def9077747 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -243,7 +243,7 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { static struct event_constraint intel_icl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ -- cgit v1.2.3 From 2e2a324641f9f70b304d702d3c5b56a232fe78b3 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:37 -0500 Subject: perf/x86/amd/ibs: Don't include randomized bits in get_ibs_op_count() commit 680d69635005ba0e58fe3f4c52fc162b8fc743b0 upstream. get_ibs_op_count() adds hardware's current count (IbsOpCurCnt) bits to its count regardless of hardware's valid status. According to the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54, if the counter rolls over, valid status is set, and the lower 7 bits of IbsOpCurCnt are randomized by hardware. Don't include those bits in the driver's event count. Fixes: 8b1e13638d46 ("perf/x86-ibs: Fix usage of IBS op current count") Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index a023cbe21230..6a241d6323df 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -335,11 +335,15 @@ static u64 get_ibs_op_count(u64 config) { u64 count = 0; + /* + * If the internal 27-bit counter rolled over, the count is MaxCnt + * and the lower 7 bits of CurCnt are randomized. + * Otherwise CurCnt has the full 27-bit current counter value. + */ if (config & IBS_OP_VAL) - count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ - - if (ibs_caps & IBS_CAPS_RDWROPCNT) - count += (config & IBS_OP_CUR_CNT) >> 32; + count = (config & IBS_OP_MAX_CNT) << 4; + else if (ibs_caps & IBS_CAPS_RDWROPCNT) + count = (config & IBS_OP_CUR_CNT) >> 32; return count; } -- cgit v1.2.3 From d789e1c5b1ce2fbaca88fdefd550f37bb73885e6 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:38 -0500 Subject: perf/x86/amd/ibs: Fix raw sample data accumulation commit 36e1be8ada994d509538b3b1d0af8b63c351e729 upstream. Neither IbsBrTarget nor OPDATA4 are populated in IBS Fetch mode. Don't accumulate them into raw sample user data in that case. Also, in Fetch mode, add saving the IBS Fetch Control Extended MSR. Technically, there is an ABI change here with respect to the IBS raw sample data format, but I don't see any perf driver version information being included in perf.data file headers, but, existing users can detect whether the size of the sample record has reduced by 8 bytes to determine whether the IBS driver has this fix. Fixes: 904cb3677f3a ("perf/x86/amd/ibs: Update IBS MSRs and feature definitions") Reported-by: Stephane Eranian Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200908214740.18097-6-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 26 ++++++++++++++++---------- arch/x86/include/asm/msr-index.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 6a241d6323df..39169885adfa 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -636,18 +636,24 @@ fail: perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + /* + * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately + * depending on their availability. + * Can't add to offset_max as they are staggered + */ if (event->attr.sample_type & PERF_SAMPLE_RAW) { - /* - * Read IbsBrTarget and IbsOpData4 separately - * depending on their availability. - * Can't add to offset_max as they are staggered - */ - if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); - size++; + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_BRNTRGT) { + rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + size++; + } + if (ibs_caps & IBS_CAPS_OPDATA4) { + rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + size++; + } } - if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { + rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 391812e0384e..f312b6f6ac48 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -432,6 +432,7 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< Date: Tue, 1 Sep 2020 17:27:03 +0200 Subject: spi: sprd: Release DMA channel also on probe deferral commit 687a2e76186dcfa42f22c14b655c3fb159839e79 upstream. If dma_request_chan() for TX channel fails with EPROBE_DEFER, the RX channel would not be released and on next re-probe it would be requested second time. Fixes: 386119bc7be9 ("spi: sprd: spi: sprd: Add DMA mode support") Cc: Signed-off-by: Krzysztof Kozlowski Acked-by: Chunyan Zhang Link: https://lore.kernel.org/r/20200901152713.18629-1-krzk@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sprd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index fa597e27be17..e60aff990395 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -563,11 +563,11 @@ static int sprd_spi_dma_request(struct sprd_spi *ss) ss->dma.dma_chan[SPRD_SPI_TX] = dma_request_chan(ss->dev, "tx_chn"); if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_TX])) { + dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]); if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]) == -EPROBE_DEFER) return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]); dev_err(ss->dev, "request TX DMA channel failed!\n"); - dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]); return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]); } -- cgit v1.2.3 From a908e29705ee0cadccf53b729bdd8f005d1b771f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Aug 2020 09:00:00 +0200 Subject: extcon: ptn5150: Fix usage of atomic GPIO with sleeping GPIO chips commit 6aaad58c872db062f7ea2761421ca748bd0931cc upstream. The driver uses atomic version of gpiod_set_value() without any real reason. It is called in a workqueue under mutex so it could sleep there. Changing it to "can_sleep" flavor allows to use the driver with all GPIO chips. Fixes: 4ed754de2d66 ("extcon: Add support for ptn5150 extcon driver") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Vijai Kumar K Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon-ptn5150.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/extcon/extcon-ptn5150.c b/drivers/extcon/extcon-ptn5150.c index d1c997599390..5f5252752644 100644 --- a/drivers/extcon/extcon-ptn5150.c +++ b/drivers/extcon/extcon-ptn5150.c @@ -127,7 +127,7 @@ static void ptn5150_irq_work(struct work_struct *work) case PTN5150_DFP_ATTACHED: extcon_set_state_sync(info->edev, EXTCON_USB_HOST, false); - gpiod_set_value(info->vbus_gpiod, 0); + gpiod_set_value_cansleep(info->vbus_gpiod, 0); extcon_set_state_sync(info->edev, EXTCON_USB, true); break; @@ -138,9 +138,9 @@ static void ptn5150_irq_work(struct work_struct *work) PTN5150_REG_CC_VBUS_DETECTION_MASK) >> PTN5150_REG_CC_VBUS_DETECTION_SHIFT); if (vbus) - gpiod_set_value(info->vbus_gpiod, 0); + gpiod_set_value_cansleep(info->vbus_gpiod, 0); else - gpiod_set_value(info->vbus_gpiod, 1); + gpiod_set_value_cansleep(info->vbus_gpiod, 1); extcon_set_state_sync(info->edev, EXTCON_USB_HOST, true); @@ -156,7 +156,7 @@ static void ptn5150_irq_work(struct work_struct *work) EXTCON_USB_HOST, false); extcon_set_state_sync(info->edev, EXTCON_USB, false); - gpiod_set_value(info->vbus_gpiod, 0); + gpiod_set_value_cansleep(info->vbus_gpiod, 0); } } -- cgit v1.2.3 From 8ac92a5e5fd76ca866ac7279dd8954304110379f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 18 Sep 2020 00:32:58 +0200 Subject: leds: bcm6328, bcm6358: use devres LED registering function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ff5c89d44453e7ad99502b04bf798a3fc32c758b upstream. These two drivers do not provide remove method and use devres for allocation of other resources, yet they use led_classdev_register instead of the devres variant, devm_led_classdev_register. Fix this. Signed-off-by: Marek Behún Cc: Álvaro Fernández Rojas Cc: Kevin Cernekee Cc: Jaedon Shin Signed-off-by: Pavel Machek Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/leds/leds-bcm6328.c | 2 +- drivers/leds/leds-bcm6358.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c index c50d34e2b098..de932755bcb3 100644 --- a/drivers/leds/leds-bcm6328.c +++ b/drivers/leds/leds-bcm6328.c @@ -332,7 +332,7 @@ static int bcm6328_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6328_led_set; led->cdev.blink_set = bcm6328_blink_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; diff --git a/drivers/leds/leds-bcm6358.c b/drivers/leds/leds-bcm6358.c index aec285fd21c0..dbb8953334d9 100644 --- a/drivers/leds/leds-bcm6358.c +++ b/drivers/leds/leds-bcm6358.c @@ -137,7 +137,7 @@ static int bcm6358_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6358_led_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; -- cgit v1.2.3 From 2f3cb993a6f206a2c4e598a640fd3013cd3358ad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Jul 2020 13:22:08 +0200 Subject: media: uvcvideo: Fix uvc_ctrl_fixup_xu_info() not having any effect commit 93df48d37c3f03886d84831992926333e7810640 upstream. uvc_ctrl_add_info() calls uvc_ctrl_get_flags() which will override the fixed-up flags set by uvc_ctrl_fixup_xu_info(). uvc_ctrl_init_xu_ctrl() already calls uvc_ctrl_get_flags() before calling uvc_ctrl_add_info(), so the uvc_ctrl_get_flags() call in uvc_ctrl_add_info() is not necessary for xu ctrls. This commit moves the uvc_ctrl_get_flags() call for normal controls from uvc_ctrl_add_info() to uvc_ctrl_init_ctrl(), so that we no longer call uvc_ctrl_get_flags() twice for xu controls and so that we no longer override the fixed-up flags set by uvc_ctrl_fixup_xu_info(). This fixes the xu motor controls not working properly on a Logitech 046d:08cc, and presumably also on the other Logitech models which have a quirk for this in the uvc_ctrl_fixup_xu_info() function. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_ctrl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index c13ed95cb06f..36abe47997b0 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2033,13 +2033,6 @@ static int uvc_ctrl_add_info(struct uvc_device *dev, struct uvc_control *ctrl, goto done; } - /* - * Retrieve control flags from the device. Ignore errors and work with - * default flag values from the uvc_ctrl array when the device doesn't - * properly implement GET_INFO on standard controls. - */ - uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); - ctrl->initialized = 1; uvc_trace(UVC_TRACE_CONTROL, "Added control %pUl/%u to device %s " @@ -2262,6 +2255,13 @@ static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl) if (uvc_entity_match_guid(ctrl->entity, info->entity) && ctrl->index == info->index) { uvc_ctrl_add_info(dev, ctrl, info); + /* + * Retrieve control flags from the device. Ignore errors + * and work with default flag values from the uvc_ctrl + * array when the device doesn't properly implement + * GET_INFO on standard controls. + */ + uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); break; } } -- cgit v1.2.3 From f8a6a2ed4b7d1c3c8631eeb6d00572bc853094a8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Sep 2020 10:58:51 +0200 Subject: fs: Don't invalidate page buffers in block_write_full_page() commit 6dbf7bb555981fb5faf7b691e8f6169fc2b2e63b upstream. If block_write_full_page() is called for a page that is beyond current inode size, it will truncate page buffers for the page and return 0. This logic has been added in 2.5.62 in commit 81eb69062588 ("fix ext3 BUG due to race with truncate") in history.git tree to fix a problem with ext3 in data=ordered mode. This particular problem doesn't exist anymore because ext3 is long gone and ext4 handles ordered data differently. Also normally buffers are invalidated by truncate code and there's no need to specially handle this in ->writepage() code. This invalidation of page buffers in block_write_full_page() is causing issues to filesystems (e.g. ext4 or ocfs2) when block device is shrunk under filesystem's hands and metadata buffers get discarded while being tracked by the journalling layer. Although it is obviously "not supported" it can cause kernel crashes like: [ 7986.689400] BUG: unable to handle kernel NULL pointer dereference at +0000000000000008 [ 7986.697197] PGD 0 P4D 0 [ 7986.699724] Oops: 0002 [#1] SMP PTI [ 7986.703200] CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G +O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1 [ 7986.716438] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015 [ 7986.723462] RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2] ... [ 7986.810150] Call Trace: [ 7986.812595] __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2] [ 7986.818408] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2] [ 7986.836467] kjournald2+0xbd/0x270 [jbd2] which is not great. The crash happens because bh->b_private is suddently NULL although BH_JBD flag is still set (this is because block_invalidatepage() cleared BH_Mapped flag and subsequent bh lookup found buffer without BH_Mapped set, called init_page_buffers() which has rewritten bh->b_private). So just remove the invalidation in block_write_full_page(). Note that the buffer cache invalidation when block device changes size is already careful to avoid similar problems by using invalidate_mapping_pages() which skips busy buffers so it was only this odd block_write_full_page() behavior that could tear down bdev buffers under filesystem's hands. Reported-by: Ye Bin Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 22d8ac4a8c40..0d7bd7712076 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2739,16 +2739,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ -#if 0 - /* Not really sure about this - do we need this ? */ - if (page->mapping->a_ops->invalidatepage) - page->mapping->a_ops->invalidatepage(page, offset); -#endif unlock_page(page); return 0; /* don't care */ } @@ -2943,12 +2933,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - do_invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return 0; /* don't care */ } -- cgit v1.2.3 From c75b77cb9f014031e05b2dcc9c0ff5ad57fbeda5 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 5 Oct 2020 02:22:43 -0700 Subject: NFS: fix nfs_path in case of a rename retry commit 247db73560bc3e5aef6db50c443c3c0db115bc93 upstream. We are generating incorrect path in case of rename retry because we are restarting from wrong dentry. We should restart from the dentry which was received in the call to nfs_path. CC: stable@vger.kernel.org Signed-off-by: Ashish Sangwan Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 9287eb666322..2db17fdf516b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -31,9 +31,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -48,15 +48,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; -- cgit v1.2.3 From 5e25b44cc2eb9317864426a262445fe52d9fe02c Mon Sep 17 00:00:00 2001 From: "dmitry.torokhov@gmail.com" Date: Sun, 4 Oct 2020 22:11:25 -0700 Subject: ACPI: button: fix handling lid state changes when input device closed commit 21988a8e51479ceffe7b0568b170effabb708dfe upstream. The original intent of 84d3f6b76447 was to delay evaluating lid state until all drivers have been loaded, with input device being opened from userspace serving as a signal for this condition. Let's ensure that state updates happen even if userspace closed (or in the future inhibited) input device. Note that if we go through suspend/resume cycle we assume the system has been fully initialized even if LID input device has not been opened yet. This has a side-effect of fixing access to input->users outside of input->mutex protections by the way of eliminating said accesses and using driver private flag. Fixes: 84d3f6b76447 ("ACPI / button: Delay acpi_lid_initialize_state() until first user space open") Signed-off-by: Dmitry Torokhov Reviewed-by: Hans de Goede Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/button.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 985afc62da82..690bfe67e643 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -136,6 +136,7 @@ struct acpi_button { int last_state; ktime_t last_time; bool suspended; + bool lid_state_initialized; }; static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); @@ -391,6 +392,8 @@ static int acpi_lid_update_state(struct acpi_device *device, static void acpi_lid_initialize_state(struct acpi_device *device) { + struct acpi_button *button = acpi_driver_data(device); + switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN: (void)acpi_lid_notify_state(device, 1); @@ -402,13 +405,14 @@ static void acpi_lid_initialize_state(struct acpi_device *device) default: break; } + + button->lid_state_initialized = true; } static void acpi_button_notify(struct acpi_device *device, u32 event) { struct acpi_button *button = acpi_driver_data(device); struct input_dev *input; - int users; switch (event) { case ACPI_FIXED_HARDWARE_EVENT: @@ -417,10 +421,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) case ACPI_BUTTON_NOTIFY_STATUS: input = button->input; if (button->type == ACPI_BUTTON_TYPE_LID) { - mutex_lock(&button->input->mutex); - users = button->input->users; - mutex_unlock(&button->input->mutex); - if (users) + if (button->lid_state_initialized) acpi_lid_update_state(device, true); } else { int keycode; @@ -465,7 +466,7 @@ static int acpi_button_resume(struct device *dev) struct acpi_button *button = acpi_driver_data(device); button->suspended = false; - if (button->type == ACPI_BUTTON_TYPE_LID && button->input->users) { + if (button->type == ACPI_BUTTON_TYPE_LID) { button->last_state = !!acpi_lid_evaluate_state(device); button->last_time = ktime_get(); acpi_lid_initialize_state(device); -- cgit v1.2.3 From 9578d7381432e4ea64e2d4120305370fe4904392 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 27 Sep 2020 22:50:42 +0100 Subject: ACPI / extlog: Check for RDMSR failure commit 7cecb47f55e00282f972a1e0b09136c8cd938221 upstream. extlog_init() uses rdmsrl() to read an MSR, which on older CPUs provokes a error message at boot: unchecked MSR access error: RDMSR from 0x179 at rIP: 0xcd047307 (native_read_msr+0x7/0x40) Use rdmsrl_safe() instead, and return -ENODEV if it fails. Reported-by: jim@photojim.ca References: https://bugs.debian.org/971058 Cc: All applicable Signed-off-by: Ben Hutchings Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_extlog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 8596a106a933..91d0b0fc392b 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -223,9 +223,9 @@ static int __init extlog_init(void) u64 cap; int rc; - rdmsrl(MSR_IA32_MCG_CAP, cap); - - if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) || + !(cap & MCG_ELOG_P) || + !extlog_get_l1addr()) return -ENODEV; if (edac_get_report_status() == EDAC_REPORTING_FORCE) { -- cgit v1.2.3 From 1a5f62a3c6942b280835cb7a35319f14ae947396 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sun, 13 Sep 2020 16:34:03 -0600 Subject: ACPI: video: use ACPI backlight for HP 635 Notebook commit b226faab4e7890bbbccdf794e8b94276414f9058 upstream. The default backlight interface is AMD's radeon_bl0 which does not work on this system, so use the ACPI backlight interface on it instead. BugLink: https://bugs.launchpad.net/bugs/1894667 Cc: All applicable Signed-off-by: Alex Hung [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 5bcb4c01ec5f..55af78b55c51 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -282,6 +282,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "530U4E/540U4E"), }, }, + /* https://bugs.launchpad.net/bugs/1894667 */ + { + .callback = video_detect_force_video, + .ident = "HP 635 Notebook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP 635 Notebook PC"), + }, + }, /* Non win8 machines which need native backlight nevertheless */ { -- cgit v1.2.3 From 6341984bef17a91fa91d8d9d7b31feeb81b70475 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Mon, 12 Oct 2020 14:04:46 +0100 Subject: ACPI: debug: don't allow debugging when ACPI is disabled commit 0fada277147ffc6d694aa32162f51198d4f10d94 upstream. If ACPI is disabled then loading the acpi_dbg module will result in the following splat when lock debugging is enabled. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 1 at kernel/locking/mutex.c:938 __mutex_lock+0xa10/0x1290 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8+ #103 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 panic+0x360/0x7a0 __warn+0x244/0x2ec report_bug+0x240/0x398 bug_handler+0x50/0xc0 call_break_hook+0x160/0x1d8 brk_handler+0x30/0xc0 do_debug_exception+0x184/0x340 el1_dbg+0x48/0xb0 el1_sync_handler+0x170/0x1c8 el1_sync+0x80/0x100 __mutex_lock+0xa10/0x1290 mutex_lock_nested+0x6c/0xc0 acpi_register_debugger+0x40/0x88 acpi_aml_init+0xc4/0x114 do_one_initcall+0x24c/0xb10 kernel_init_freeable+0x690/0x728 kernel_init+0x20/0x1e8 ret_from_fork+0x10/0x18 This is because acpi_debugger.lock has not been initialized as acpi_debugger_init() is not called when ACPI is disabled. Fail module loading to avoid this and any subsequent problems that might arise by trying to debug AML when ACPI is disabled. Fixes: 8cfb0cdf07e2 ("ACPI / debugger: Add IO interface to access debugger functionalities") Reviewed-by: Hanjun Guo Signed-off-by: Jamie Iles Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_dbg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c index 7a265c2171c0..cc4b509bad94 100644 --- a/drivers/acpi/acpi_dbg.c +++ b/drivers/acpi/acpi_dbg.c @@ -749,6 +749,9 @@ int __init acpi_aml_init(void) { int ret; + if (acpi_disabled) + return -ENODEV; + /* Initialize AML IO interface */ mutex_init(&acpi_aml_io.lock); init_waitqueue_head(&acpi_aml_io.wait); -- cgit v1.2.3 From e7f52fd6e0ef5fb62d6137de53c52a61403b4ddc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 2 Oct 2020 07:10:12 +0200 Subject: PCI/ACPI: Whitelist hotplug ports for D3 if power managed by ACPI commit c6e331312ebfb52b7186e5d82d517d68b4d2f2d8 upstream. Recent laptops with dual AMD GPUs fail to suspend the discrete GPU, thus causing lockups on system sleep and high power consumption at runtime. The discrete GPU would normally be suspended to D3cold by turning off ACPI _PR3 Power Resources of the Root Port above the GPU. However on affected systems, the Root Port is hotplug-capable and pci_bridge_d3_possible() only allows hotplug ports to go to D3 if they belong to a Thunderbolt device or if the Root Port possesses a "HotPlugSupportInD3" ACPI property. Neither is the case on affected laptops. The reason for whitelisting only specific, known to work hotplug ports for D3 is that there have been reports of SkyLake Xeon-SP systems raising Hardware Error NMIs upon suspending their hotplug ports: https://lore.kernel.org/linux-pci/20170503180426.GA4058@otc-nc-03/ But if a hotplug port is power manageable by ACPI (as can be detected through presence of Power Resources and corresponding _PS0 and _PS3 methods) then it ought to be safe to suspend it to D3. To this end, amend acpi_pci_bridge_d3() to whitelist such ports for D3. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1222 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1252 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1304 Reported-and-tested-by: Arthur Borsboom Reported-and-tested-by: matoro Reported-by: Aaron Zakhrov Reported-by: Michal Rostecki Reported-by: Shai Coleman Signed-off-by: Lukas Wunner Acked-by: Alex Deucher Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 0c02d500158f..3f40f951a6cd 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -944,6 +944,16 @@ static bool acpi_pci_bridge_d3(struct pci_dev *dev) if (!dev->is_hotplug_bridge) return false; + /* Assume D3 support if the bridge is power-manageable by ACPI. */ + adev = ACPI_COMPANION(&dev->dev); + if (!adev && !pci_dev_is_added(dev)) { + adev = acpi_pci_find_companion(&dev->dev); + ACPI_COMPANION_SET(&dev->dev, adev); + } + + if (adev && acpi_device_power_manageable(adev)) + return true; + /* * Look for a special _DSD property for the root port and if it * is set we know the hierarchy behind it supports D3 just fine. -- cgit v1.2.3 From 0adf4dbae9c049155e5a7e09db60eb59a7866a90 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Oct 2020 19:13:15 +0200 Subject: ACPI: EC: PM: Flush EC work unconditionally after wakeup commit 5e92442bb4121562231e6daf8a2d1306cb5f8805 upstream. Commit 607b9df63057 ("ACPI: EC: PM: Avoid flushing EC work when EC GPE is inactive") has been reported to cause some power button wakeup events to be missed on some systems, so modify acpi_ec_dispatch_gpe() to call acpi_ec_flush_work() unconditionally to effectively reverse the changes made by that commit. Also note that the problem which prompted commit 607b9df63057 is not reproducible any more on the affected machine. Fixes: 607b9df63057 ("ACPI: EC: PM: Avoid flushing EC work when EC GPE is inactive") Reported-by: Raymond Tan Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 1ec55345252b..317f43f670ff 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1976,12 +1976,11 @@ bool acpi_ec_dispatch_gpe(void) * to allow the caller to process events properly after that. */ ret = acpi_dispatch_gpe(NULL, first_ec->gpe); - if (ret == ACPI_INTERRUPT_HANDLED) { + if (ret == ACPI_INTERRUPT_HANDLED) pm_pr_dbg("EC GPE dispatched\n"); - /* Flush the event and query workqueues. */ - acpi_ec_flush_work(); - } + /* Flush the event and query workqueues. */ + acpi_ec_flush_work(); return false; } -- cgit v1.2.3 From 7f9d9a007e59be420c5d0de4ff8e3e7310ef7fd4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Oct 2020 19:13:46 +0200 Subject: ACPI: EC: PM: Drop ec_no_wakeup check from acpi_ec_dispatch_gpe() commit e0e9ce390d7bc6a705653d4a8aa4ea92c9a65e53 upstream. It turns out that in some cases there are EC events to flush in acpi_ec_dispatch_gpe() even though the ec_no_wakeup kernel parameter is set and the EC GPE is disabled while sleeping, so drop the ec_no_wakeup check that prevents those events from being processed from acpi_ec_dispatch_gpe(). Reported-by: Todd Brandt Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 317f43f670ff..c64001e789ed 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1968,9 +1968,6 @@ bool acpi_ec_dispatch_gpe(void) if (acpi_any_gpe_status_set(first_ec->gpe)) return true; - if (ec_no_wakeup) - return false; - /* * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. -- cgit v1.2.3 From a034ea12bdd4bdb844460248827483f8f43126d5 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sun, 18 Oct 2020 22:57:41 -0500 Subject: acpi-cpufreq: Honor _PSD table setting on new AMD CPUs commit 5368512abe08a28525d9b24abbfc2a72493e8dba upstream. acpi-cpufreq has a old quirk that overrides the _PSD table supplied by BIOS on AMD CPUs. However the _PSD table of new AMD CPUs (Family 19h+) now accurately reports the P-state dependency of CPU cores. Hence this quirk needs to be fixed in order to support new CPUs' frequency control. Fixes: acd316248205 ("acpi-cpufreq: Add quirk to disable _PSD usage on all AMD CPUs") Signed-off-by: Wei Huang [ rjw: Subject edit ] Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/acpi-cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d6f7df33ab8c..4195834a4591 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -688,7 +688,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } - if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 && + !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, -- cgit v1.2.3 From 3fc2cbba40693892d19ca8edba765a3d7bd9d3a3 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 30 Sep 2020 10:36:46 +0200 Subject: w1: mxc_w1: Fix timeout resolution problem leading to bus error commit c9723750a699c3bd465493ac2be8992b72ccb105 upstream. On my platform (i.MX53) bus access sometimes fails with w1_search: max_slave_count 64 reached, will continue next search. The reason is the use of jiffies to implement a 200us timeout in mxc_w1_ds2_touch_bit(). On some platforms the jiffies timer resolution is insufficient for this. Fix by replacing jiffies by ktime_get(). For consistency apply the same change to the other use of jiffies in mxc_w1_ds2_reset_bus(). Fixes: f80b2581a706 ("w1: mxc_w1: Optimize mxc_w1_ds2_touch_bit()") Cc: stable Signed-off-by: Martin Fuzzey Link: https://lore.kernel.org/r/1601455030-6607-1-git-send-email-martin.fuzzey@flowbird.group Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/mxc_w1.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 1ca880e01476..090cbbf9e1e2 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -40,12 +40,12 @@ struct mxc_w1_device { static u8 mxc_w1_ds2_reset_bus(void *data) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_RPP, dev->regs + MXC_W1_CONTROL); /* Wait for reset sequence 511+512us, use 1500us for sure */ - timeout = jiffies + usecs_to_jiffies(1500); + timeout = ktime_add_us(ktime_get(), 1500); udelay(511 + 512); @@ -55,7 +55,7 @@ static u8 mxc_w1_ds2_reset_bus(void *data) /* PST bit is valid after the RPP bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_RPP)) return !(ctrl & MXC_W1_CONTROL_PST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 1; } @@ -68,12 +68,12 @@ static u8 mxc_w1_ds2_reset_bus(void *data) static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_WR(bit), dev->regs + MXC_W1_CONTROL); /* Wait for read/write bit (60us, Max 120us), use 200us for sure */ - timeout = jiffies + usecs_to_jiffies(200); + timeout = ktime_add_us(ktime_get(), 200); udelay(60); @@ -83,7 +83,7 @@ static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) /* RDST bit is valid after the WR1/RD bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_WR(bit))) return !!(ctrl & MXC_W1_CONTROL_RDST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 0; } -- cgit v1.2.3 From f0ef0e2299f5ab0ee6f34c7e0c6e41b3341bf64c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 22 Oct 2020 11:00:05 +0200 Subject: scsi: mptfusion: Fix null pointer dereferences in mptscsih_remove() commit 2f4843b172c2c0360ee7792ad98025fae7baefde upstream. The mptscsih_remove() function triggers a kernel oops if the Scsi_Host pointer (ioc->sh) is NULL, as can be seen in this syslog: ioc0: LSI53C1030 B2: Capabilities={Initiator,Target} Begin: Waiting for root file system ... scsi host2: error handler thread failed to spawn, error = -4 mptspi: ioc0: WARNING - Unable to register controller with SCSI subsystem Backtrace: [<000000001045b7cc>] mptspi_probe+0x248/0x3d0 [mptspi] [<0000000040946470>] pci_device_probe+0x1ac/0x2d8 [<0000000040add668>] really_probe+0x1bc/0x988 [<0000000040ade704>] driver_probe_device+0x160/0x218 [<0000000040adee24>] device_driver_attach+0x160/0x188 [<0000000040adef90>] __driver_attach+0x144/0x320 [<0000000040ad7c78>] bus_for_each_dev+0xd4/0x158 [<0000000040adc138>] driver_attach+0x4c/0x80 [<0000000040adb3ec>] bus_add_driver+0x3e0/0x498 [<0000000040ae0130>] driver_register+0xf4/0x298 [<00000000409450c4>] __pci_register_driver+0x78/0xa8 [<000000000007d248>] mptspi_init+0x18c/0x1c4 [mptspi] This patch adds the necessary NULL-pointer checks. Successfully tested on a HP C8000 parisc workstation with buggy SCSI drives. Link: https://lore.kernel.org/r/20201022090005.GA9000@ls3530.fritz.box Cc: Signed-off-by: Helge Deller Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/message/fusion/mptscsih.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 1491561d2e5c..4bc43d8eeb9e 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1176,8 +1176,10 @@ mptscsih_remove(struct pci_dev *pdev) MPT_SCSI_HOST *hd; int sz1; - if((hd = shost_priv(host)) == NULL) - return; + if (host == NULL) + hd = NULL; + else + hd = shost_priv(host); mptscsih_shutdown(pdev); @@ -1193,14 +1195,15 @@ mptscsih_remove(struct pci_dev *pdev) "Free'd ScsiLookup (%d) memory\n", ioc->name, sz1)); - kfree(hd->info_kbuf); + if (hd) + kfree(hd->info_kbuf); /* NULL the Scsi_Host pointer */ ioc->sh = NULL; - scsi_host_put(host); - + if (host) + scsi_host_put(host); mpt_detach(pdev); } -- cgit v1.2.3 From a0bdb5b16392754b930a50d56a43ff29f5cdb160 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 29 Sep 2020 03:21:50 -0700 Subject: scsi: qla2xxx: Fix crash on session cleanup with unload commit 50457dab670f396557e60c07f086358460876353 upstream. On unload, session cleanup prematurely gave the signal for driver unload path to advance. Link: https://lore.kernel.org/r/20200929102152.32278-6-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable@vger.kernel.org Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index d84d95cac2a1..412009e2b948 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1230,14 +1230,15 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) case DSC_DELETE_PEND: return; case DSC_DELETED: - if (tgt && tgt->tgt_stop && (tgt->sess_count == 0)) - wake_up_all(&tgt->waitQ); - if (sess->vha->fcport_count == 0) - wake_up_all(&sess->vha->fcport_waitQ); - if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] && - !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) + !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) { + if (tgt && tgt->tgt_stop && tgt->sess_count == 0) + wake_up_all(&tgt->waitQ); + + if (sess->vha->fcport_count == 0) + wake_up_all(&sess->vha->fcport_waitQ); return; + } break; case DSC_UPD_FCPORT: /* -- cgit v1.2.3 From 1e2f16dd611b9cc29ea3bce7a0dd9360cf0c01cd Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 22 Sep 2020 21:11:06 +0800 Subject: PM: runtime: Remove link state checks in rpm_get/put_supplier() commit d12544fb2aa9944b180c35914031a8384ab082c1 upstream. To support runtime PM for hisi SAS driver (the driver is in directory drivers/scsi/hisi_sas), we add device link between scsi_device->sdev_gendev (consumer device) and hisi_hba->dev(supplier device) with flags DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE. After runtime suspended consumers and supplier, unload the dirver which causes a hung. We found that it called function device_release_driver_internal() to release the supplier device (hisi_hba->dev), as the device link was busy, it set the device link state to DL_STATE_SUPPLIER_UNBIND, and then it called device_release_driver_internal() to release the consumer device (scsi_device->sdev_gendev). Then it would try to call pm_runtime_get_sync() to resume the consumer device, but because consumer-supplier relation existed, it would try to resume the supplier first, but as the link state was already DL_STATE_SUPPLIER_UNBIND, so it skipped resuming the supplier and only resumed the consumer which hanged (it sends IOs to resume scsi_device while the SAS controller is suspended). Simple flow is as follows: device_release_driver_internal -> (supplier device) if device_links_busy -> device_links_unbind_consumers -> ... WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND) device_release_driver_internal (consumer device) pm_runtime_get_sync -> (consumer device) ... __rpm_callback -> rpm_get_suppliers -> if link->state == DL_STATE_SUPPLIER_UNBIND -> skip the action of resuming the supplier ... pm_runtime_clean_up_links ... Correct suspend/resume ordering between a supplier device and its consumer devices (resume the supplier device before resuming consumer devices, and suspend consumer devices before suspending the supplier device) should be guaranteed by runtime PM, but the state checks in rpm_get_supplier() and rpm_put_supplier() break this rule, so remove them. Signed-off-by: Xiang Chen [ rjw: Subject and changelog edits ] Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/runtime.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 48616f358854..4244e22e4b40 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -291,8 +291,7 @@ static int rpm_get_suppliers(struct device *dev) device_links_read_lock_held()) { int retval; - if (!(link->flags & DL_FLAG_PM_RUNTIME) || - READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) + if (!(link->flags & DL_FLAG_PM_RUNTIME)) continue; retval = pm_runtime_get_sync(link->supplier); @@ -312,8 +311,6 @@ static void rpm_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) { - if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) - continue; while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); -- cgit v1.2.3 From c5f2a5091263e9b6e983049a2dd5dd8fa571b25d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 24 Jul 2020 14:46:09 +0800 Subject: btrfs: qgroup: fix wrong qgroup metadata reserve for delayed inode commit b4c5d8fdfff3e2b6c4fa4a5043e8946dff500f8c upstream. For delayed inode facility, qgroup metadata is reserved for it, and later freed. However we're freeing more bytes than we reserved. In btrfs_delayed_inode_reserve_metadata(): num_bytes = btrfs_calc_metadata_size(fs_info, 1); ... ret = btrfs_qgroup_reserve_meta_prealloc(root, fs_info->nodesize, true); ... if (!ret) { node->bytes_reserved = num_bytes; But in btrfs_delayed_inode_release_metadata(): if (qgroup_free) btrfs_qgroup_free_meta_prealloc(node->root, node->bytes_reserved); else btrfs_qgroup_convert_reserved_meta(node->root, node->bytes_reserved); This means, we're always releasing more qgroup metadata rsv than we have reserved. This won't trigger selftest warning, as btrfs qgroup metadata rsv has extra protection against cases like quota enabled half-way. But we still need to fix this problem any way. This patch will use the same num_bytes for qgroup metadata rsv so we could handle it correctly. Fixes: f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a34ee9c2f315..bef62b01824d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -627,8 +627,7 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, - fs_info->nodesize, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, -- cgit v1.2.3 From 223b462744b38d8ac1e64b5e58713880e49ea093 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 3 Sep 2020 21:30:12 +0800 Subject: btrfs: improve device scanning messages commit 79dae17d8d44b2d15779e332180080af45df5352 upstream. Systems booting without the initramfs seems to scan an unusual kind of device path (/dev/root). And at a later time, the device is updated to the correct path. We generally print the process name and PID of the process scanning the device but we don't capture the same information if the device path is rescanned with a different pathname. The current message is too long, so drop the unnecessary UUID and add process name and PID. While at this also update the duplicate device warning to include the process name and PID so the messages are consistent CC: stable@vger.kernel.org # 4.19+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=89721 Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e798caee978e..76a937440335 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1123,16 +1123,18 @@ static noinline struct btrfs_device *device_list_add(const char *path, bdput(path_bdev); mutex_unlock(&fs_devices->device_list_mutex); btrfs_warn_in_rcu(device->fs_info, - "duplicate device fsid:devid for %pU:%llu old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + "duplicate device %s devid %llu generation %llu scanned by %s (%d)", + path, devid, found_transid, + current->comm, + task_pid_nr(current)); return ERR_PTR(-EEXIST); } bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, - "device fsid %pU devid %llu moved old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + "devid %llu device path %s changed to %s scanned by %s (%d)", + devid, rcu_str_deref(device->name), + path, current->comm, + task_pid_nr(current)); } name = rcu_string_strdup(path, GFP_NOFS); -- cgit v1.2.3 From e1cf034899b6a48adc84e8ff0087fe323c91f5a0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Sep 2020 15:27:50 +0100 Subject: btrfs: reschedule if necessary when logging directory items commit bb56f02f26fe23798edb1b2175707419b28c752a upstream. Logging directories with many entries can take a significant amount of time, and in some cases monopolize a cpu/core for a long time if the logging task doesn't happen to block often enough. Johannes and Lu Fengqi reported test case generic/041 triggering a soft lockup when the kernel has CONFIG_SOFTLOCKUP_DETECTOR=y. For this test case we log an inode with 3002 hard links, and because the test removed one hard link before fsyncing the file, the inode logging causes the parent directory do be logged as well, which has 6004 directory items to log (3002 BTRFS_DIR_ITEM_KEY items plus 3002 BTRFS_DIR_INDEX_KEY items), so it can take a significant amount of time and trigger the soft lockup. So just make tree-log.c:log_dir_items() reschedule when necessary, releasing the current search path before doing so and then resume from where it was before the reschedule. The stack trace produced when the soft lockup happens is the following: [10480.277653] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [xfs_io:28172] [10480.279418] Modules linked in: dm_thin_pool dm_persistent_data (...) [10480.284915] irq event stamp: 29646366 [10480.285987] hardirqs last enabled at (29646365): [] __slab_alloc.constprop.0+0x56/0x60 [10480.288482] hardirqs last disabled at (29646366): [] irqentry_enter+0x1d/0x50 [10480.290856] softirqs last enabled at (4612): [] __do_softirq+0x323/0x56c [10480.293615] softirqs last disabled at (4483): [] asm_call_on_stack+0xf/0x20 [10480.296428] CPU: 2 PID: 28172 Comm: xfs_io Not tainted 5.9.0-rc4-default+ #1248 [10480.298948] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014 [10480.302455] RIP: 0010:__slab_alloc.constprop.0+0x19/0x60 [10480.304151] Code: 86 e8 31 75 21 00 66 66 2e 0f 1f 84 00 00 00 (...) [10480.309558] RSP: 0018:ffffadbe09397a58 EFLAGS: 00000282 [10480.311179] RAX: ffff8a495ab92840 RBX: 0000000000000282 RCX: 0000000000000006 [10480.313242] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff85249b66 [10480.315260] RBP: ffff8a497d04b740 R08: 0000000000000001 R09: 0000000000000001 [10480.317229] R10: ffff8a497d044800 R11: ffff8a495ab93c40 R12: 0000000000000000 [10480.319169] R13: 0000000000000000 R14: 0000000000000c40 R15: ffffffffc01daf70 [10480.321104] FS: 00007fa1dc5c0e40(0000) GS:ffff8a497da00000(0000) knlGS:0000000000000000 [10480.323559] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [10480.325235] CR2: 00007fa1dc5befb8 CR3: 0000000004f8a006 CR4: 0000000000170ea0 [10480.327259] Call Trace: [10480.328286] ? overwrite_item+0x1f0/0x5a0 [btrfs] [10480.329784] __kmalloc+0x831/0xa20 [10480.331009] ? btrfs_get_32+0xb0/0x1d0 [btrfs] [10480.332464] overwrite_item+0x1f0/0x5a0 [btrfs] [10480.333948] log_dir_items+0x2ee/0x570 [btrfs] [10480.335413] log_directory_changes+0x82/0xd0 [btrfs] [10480.336926] btrfs_log_inode+0xc9b/0xda0 [btrfs] [10480.338374] ? init_once+0x20/0x20 [btrfs] [10480.339711] btrfs_log_inode_parent+0x8d3/0xd10 [btrfs] [10480.341257] ? dget_parent+0x97/0x2e0 [10480.342480] btrfs_log_dentry_safe+0x3a/0x50 [btrfs] [10480.343977] btrfs_sync_file+0x24b/0x5e0 [btrfs] [10480.345381] do_fsync+0x38/0x70 [10480.346483] __x64_sys_fsync+0x10/0x20 [10480.347703] do_syscall_64+0x2d/0x70 [10480.348891] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [10480.350444] RIP: 0033:0x7fa1dc80970b [10480.351642] Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 (...) [10480.356952] RSP: 002b:00007fffb3d081d0 EFLAGS: 00000293 ORIG_RAX: 000000000000004a [10480.359458] RAX: ffffffffffffffda RBX: 0000562d93d45e40 RCX: 00007fa1dc80970b [10480.361426] RDX: 0000562d93d44ab0 RSI: 0000562d93d45e60 RDI: 0000000000000003 [10480.363367] RBP: 0000000000000001 R08: 0000000000000000 R09: 00007fa1dc7b2a40 [10480.365317] R10: 0000562d93d0e366 R11: 0000000000000293 R12: 0000000000000001 [10480.367299] R13: 0000562d93d45290 R14: 0000562d93d45e40 R15: 0000562d93d45e60 Link: https://lore.kernel.org/linux-btrfs/20180713090216.GC575@fnst.localdomain/ Reported-by: Johannes Thumshirn CC: stable@vger.kernel.org # 4.4+ Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7042b84edc89..de53e5166997 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3639,6 +3639,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * search and this search we'll not find the key again and can just * bail. */ +search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); if (ret != 0) goto done; @@ -3658,6 +3659,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, if (min_key.objectid != ino || min_key.type != key_type) goto done; + + if (need_resched()) { + btrfs_release_path(path); + cond_resched(); + goto search; + } + ret = overwrite_item(trans, log, dst_path, src, i, &min_key); if (ret) { -- cgit v1.2.3 From c2dcc9b03b7f70aa66e9998689a18eb2a39b1cb6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 21 Sep 2020 14:13:29 +0100 Subject: btrfs: send, orphanize first all conflicting inodes when processing references commit 98272bb77bf4cc20ed1ffca89832d713e70ebf09 upstream. When doing an incremental send it is possible that when processing the new references for an inode we end up issuing rename or link operations that have an invalid path, which contains the orphanized name of a directory before we actually orphanized it, causing the receiver to fail. The following reproducer triggers such scenario: $ cat reproducer.sh #!/bin/bash mkfs.btrfs -f /dev/sdi >/dev/null mount /dev/sdi /mnt/sdi touch /mnt/sdi/a touch /mnt/sdi/b mkdir /mnt/sdi/testdir # We want "a" to have a lower inode number then "testdir" (257 vs 259). mv /mnt/sdi/a /mnt/sdi/testdir/a # Filesystem looks like: # # . (ino 256) # |----- testdir/ (ino 259) # | |----- a (ino 257) # | # |----- b (ino 258) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap1 btrfs send -f /tmp/snap1.send /mnt/sdi/snap1 # Now rename 259 to "testdir_2", then change the name of 257 to # "testdir" and make it a direct descendant of the root inode (256). # Also create a new link for inode 257 with the old name of inode 258. # By swapping the names and location of several inodes and create a # nasty dependency chain of rename and link operations. mv /mnt/sdi/testdir/a /mnt/sdi/a2 touch /mnt/sdi/testdir/a mv /mnt/sdi/b /mnt/sdi/b2 ln /mnt/sdi/a2 /mnt/sdi/b mv /mnt/sdi/testdir /mnt/sdi/testdir_2 mv /mnt/sdi/a2 /mnt/sdi/testdir # Filesystem now looks like: # # . (ino 256) # |----- testdir_2/ (ino 259) # | |----- a (ino 260) # | # |----- testdir (ino 257) # |----- b (ino 257) # |----- b2 (ino 258) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap2 btrfs send -f /tmp/snap2.send -p /mnt/sdi/snap1 /mnt/sdi/snap2 mkfs.btrfs -f /dev/sdj >/dev/null mount /dev/sdj /mnt/sdj btrfs receive -f /tmp/snap1.send /mnt/sdj btrfs receive -f /tmp/snap2.send /mnt/sdj umount /mnt/sdi umount /mnt/sdj When running the reproducer, the receive of the incremental send stream fails: $ ./reproducer.sh Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 At subvol snap1 At snapshot snap2 ERROR: link b -> o259-6-0/a failed: No such file or directory The problem happens because of the following: 1) Before we start iterating the list of new references for inode 257, we generate its current path and store it at @valid_path, done at the very beginning of process_recorded_refs(). The generated path is "o259-6-0/a", containing the orphanized name for inode 259; 2) Then we iterate over the list of new references, which has the references "b" and "testdir" in that specific order; 3) We process reference "b" first, because it is in the list before reference "testdir". We then issue a link operation to create the new reference "b" using a target path corresponding to the content at @valid_path, which corresponds to "o259-6-0/a". However we haven't yet orphanized inode 259, its name is still "testdir", and not "o259-6-0". The orphanization of 259 did not happen yet because we will process the reference named "testdir" for inode 257 only in the next iteration of the loop that goes over the list of new references. Fix the issue by having a preliminar iteration over all the new references at process_recorded_refs(). This iteration is responsible only for doing the orphanization of other inodes that have and old reference that conflicts with one of the new references of the inode we are currently processing. The emission of rename and link operations happen now in the next iteration of the new references. A test case for fstests will follow soon. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 127 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b0e5dfb9be7a..2a26901d3b19 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3880,52 +3880,56 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) goto out; } + /* + * Before doing any rename and link operations, do a first pass on the + * new references to orphanize any unprocessed inodes that may have a + * reference that conflicts with one of the new references of the current + * inode. This needs to happen first because a new reference may conflict + * with the old reference of a parent directory, so we must make sure + * that the path used for link and rename commands don't use an + * orphanized name when an ancestor was not yet orphanized. + * + * Example: + * + * Parent snapshot: + * + * . (ino 256) + * |----- testdir/ (ino 259) + * | |----- a (ino 257) + * | + * |----- b (ino 258) + * + * Send snapshot: + * + * . (ino 256) + * |----- testdir_2/ (ino 259) + * | |----- a (ino 260) + * | + * |----- testdir (ino 257) + * |----- b (ino 257) + * |----- b2 (ino 258) + * + * Processing the new reference for inode 257 with name "b" may happen + * before processing the new reference with name "testdir". If so, we + * must make sure that by the time we send a link command to create the + * hard link "b", inode 259 was already orphanized, since the generated + * path in "valid_path" already contains the orphanized name for 259. + * We are processing inode 257, so only later when processing 259 we do + * the rename operation to change its temporary (orphanized) name to + * "testdir_2". + */ list_for_each_entry(cur, &sctx->new_refs, list) { - /* - * We may have refs where the parent directory does not exist - * yet. This happens if the parent directories inum is higher - * than the current inum. To handle this case, we create the - * parent directory out of order. But we need to check if this - * did already happen before due to other refs in the same dir. - */ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - if (ret == inode_state_will_create) { - ret = 0; - /* - * First check if any of the current inodes refs did - * already create the dir. - */ - list_for_each_entry(cur2, &sctx->new_refs, list) { - if (cur == cur2) - break; - if (cur2->dir == cur->dir) { - ret = 1; - break; - } - } - - /* - * If that did not happen, check if a previous inode - * did already create the dir. - */ - if (!ret) - ret = did_create_dir(sctx, cur->dir); - if (ret < 0) - goto out; - if (!ret) { - ret = send_create_inode(sctx, cur->dir); - if (ret < 0) - goto out; - } - } + if (ret == inode_state_will_create) + continue; /* - * Check if this new ref would overwrite the first ref of - * another unprocessed inode. If yes, orphanize the - * overwritten inode. If we find an overwritten ref that is - * not the first ref, simply unlink it. + * Check if this new ref would overwrite the first ref of another + * unprocessed inode. If yes, orphanize the overwritten inode. + * If we find an overwritten ref that is not the first ref, + * simply unlink it. */ ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, cur->name, cur->name_len, @@ -4004,6 +4008,49 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) } } + } + + list_for_each_entry(cur, &sctx->new_refs, list) { + /* + * We may have refs where the parent directory does not exist + * yet. This happens if the parent directories inum is higher + * than the current inum. To handle this case, we create the + * parent directory out of order. But we need to check if this + * did already happen before due to other refs in the same dir. + */ + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); + if (ret < 0) + goto out; + if (ret == inode_state_will_create) { + ret = 0; + /* + * First check if any of the current inodes refs did + * already create the dir. + */ + list_for_each_entry(cur2, &sctx->new_refs, list) { + if (cur == cur2) + break; + if (cur2->dir == cur->dir) { + ret = 1; + break; + } + } + + /* + * If that did not happen, check if a previous inode + * did already create the dir. + */ + if (!ret) + ret = did_create_dir(sctx, cur->dir); + if (ret < 0) + goto out; + if (!ret) { + ret = send_create_inode(sctx, cur->dir); + if (ret < 0) + goto out; + } + } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { ret = wait_for_dest_dir_move(sctx, cur, is_orphan); if (ret < 0) -- cgit v1.2.3 From 6ec4b82fc322a56aa68777f0fb1e14ff57d1f857 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 21 Sep 2020 14:13:30 +0100 Subject: btrfs: send, recompute reference path after orphanization of a directory commit 9c2b4e0347067396ceb3ae929d6888c81d610259 upstream. During an incremental send, when an inode has multiple new references we might end up emitting rename operations for orphanizations that have a source path that is no longer valid due to a previous orphanization of some directory inode. This causes the receiver to fail since it tries to rename a path that does not exists. Example reproducer: $ cat reproducer.sh #!/bin/bash mkfs.btrfs -f /dev/sdi >/dev/null mount /dev/sdi /mnt/sdi touch /mnt/sdi/f1 touch /mnt/sdi/f2 mkdir /mnt/sdi/d1 mkdir /mnt/sdi/d1/d2 # Filesystem looks like: # # . (ino 256) # |----- f1 (ino 257) # |----- f2 (ino 258) # |----- d1/ (ino 259) # |----- d2/ (ino 260) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap1 btrfs send -f /tmp/snap1.send /mnt/sdi/snap1 # Now do a series of changes such that: # # *) inode 258 has one new hardlink and the previous name changed # # *) both names conflict with the old names of two other inodes: # # 1) the new name "d1" conflicts with the old name of inode 259, # under directory inode 256 (root) # # 2) the new name "d2" conflicts with the old name of inode 260 # under directory inode 259 # # *) inodes 259 and 260 now have the old names of inode 258 # # *) inode 257 is now located under inode 260 - an inode with a number # smaller than the inode (258) for which we created a second hard # link and swapped its names with inodes 259 and 260 # ln /mnt/sdi/f2 /mnt/sdi/d1/f2_link mv /mnt/sdi/f1 /mnt/sdi/d1/d2/f1 # Swap d1 and f2. mv /mnt/sdi/d1 /mnt/sdi/tmp mv /mnt/sdi/f2 /mnt/sdi/d1 mv /mnt/sdi/tmp /mnt/sdi/f2 # Swap d2 and f2_link mv /mnt/sdi/f2/d2 /mnt/sdi/tmp mv /mnt/sdi/f2/f2_link /mnt/sdi/f2/d2 mv /mnt/sdi/tmp /mnt/sdi/f2/f2_link # Filesystem now looks like: # # . (ino 256) # |----- d1 (ino 258) # |----- f2/ (ino 259) # |----- f2_link/ (ino 260) # | |----- f1 (ino 257) # | # |----- d2 (ino 258) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap2 btrfs send -f /tmp/snap2.send -p /mnt/sdi/snap1 /mnt/sdi/snap2 mkfs.btrfs -f /dev/sdj >/dev/null mount /dev/sdj /mnt/sdj btrfs receive -f /tmp/snap1.send /mnt/sdj btrfs receive -f /tmp/snap2.send /mnt/sdj umount /mnt/sdi umount /mnt/sdj When executed the receive of the incremental stream fails: $ ./reproducer.sh Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 At subvol snap1 At snapshot snap2 ERROR: rename d1/d2 -> o260-6-0 failed: No such file or directory This happens because: 1) When processing inode 257 we end up computing the name for inode 259 because it is an ancestor in the send snapshot, and at that point it still has its old name, "d1", from the parent snapshot because inode 259 was not yet processed. We then cache that name, which is valid until we start processing inode 259 (or set the progress to 260 after processing its references); 2) Later we start processing inode 258 and collecting all its new references into the list sctx->new_refs. The first reference in the list happens to be the reference for name "d1" while the reference for name "d2" is next (the last element of the list). We compute the full path "d1/d2" for this second reference and store it in the reference (its ->full_path member). The path used for the new parent directory was "d1" and not "f2" because inode 259, the new parent, was not yet processed; 3) When we start processing the new references at process_recorded_refs() we start with the first reference in the list, for the new name "d1". Because there is a conflicting inode that was not yet processed, which is directory inode 259, we orphanize it, renaming it from "d1" to "o259-6-0"; 4) Then we start processing the new reference for name "d2", and we realize it conflicts with the reference of inode 260 in the parent snapshot. So we issue an orphanization operation for inode 260 by emitting a rename operation with a destination path of "o260-6-0" and a source path of "d1/d2" - this source path is the value we stored in the reference earlier at step 2), corresponding to the ->full_path member of the reference, however that path is no longer valid due to the orphanization of the directory inode 259 in step 3). This makes the receiver fail since the path does not exists, it should have been "o259-6-0/d2". Fix this by recomputing the full path of a reference before emitting an orphanization if we previously orphanized any directory, since that directory could be a parent in the new path. This is a rare scenario so keeping it simple and not checking if that previously orphanized directory is in fact an ancestor of the inode we are trying to orphanize. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2a26901d3b19..40e56d0f90cf 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3812,6 +3812,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) return 0; } +/* + * When processing the new references for an inode we may orphanize an existing + * directory inode because its old name conflicts with one of the new references + * of the current inode. Later, when processing another new reference of our + * inode, we might need to orphanize another inode, but the path we have in the + * reference reflects the pre-orphanization name of the directory we previously + * orphanized. For example: + * + * parent snapshot looks like: + * + * . (ino 256) + * |----- f1 (ino 257) + * |----- f2 (ino 258) + * |----- d1/ (ino 259) + * |----- d2/ (ino 260) + * + * send snapshot looks like: + * + * . (ino 256) + * |----- d1 (ino 258) + * |----- f2/ (ino 259) + * |----- f2_link/ (ino 260) + * | |----- f1 (ino 257) + * | + * |----- d2 (ino 258) + * + * When processing inode 257 we compute the name for inode 259 as "d1", and we + * cache it in the name cache. Later when we start processing inode 258, when + * collecting all its new references we set a full path of "d1/d2" for its new + * reference with name "d2". When we start processing the new references we + * start by processing the new reference with name "d1", and this results in + * orphanizing inode 259, since its old reference causes a conflict. Then we + * move on the next new reference, with name "d2", and we find out we must + * orphanize inode 260, as its old reference conflicts with ours - but for the + * orphanization we use a source path corresponding to the path we stored in the + * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the + * receiver fail since the path component "d1/" no longer exists, it was renamed + * to "o259-6-0/" when processing the previous new reference. So in this case we + * must recompute the path in the new reference and use it for the new + * orphanization operation. + */ +static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) +{ + char *name; + int ret; + + name = kmemdup(ref->name, ref->name_len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + fs_path_reset(ref->full_path); + ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path); + if (ret < 0) + goto out; + + ret = fs_path_add(ref->full_path, name, ref->name_len); + if (ret < 0) + goto out; + + /* Update the reference's base name pointer. */ + set_ref_path(ref, ref->full_path); +out: + kfree(name); + return ret; +} + /* * This does all the move/link/unlink/rmdir magic. */ @@ -3946,6 +4012,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct name_cache_entry *nce; struct waiting_dir_move *wdm; + if (orphanized_dir) { + ret = refresh_ref_path(sctx, cur); + if (ret < 0) + goto out; + } + ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) -- cgit v1.2.3 From 4b82b8aba08d436b075226a448881b98807981f2 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 21 Sep 2020 20:03:35 +0300 Subject: btrfs: use kvzalloc() to allocate clone_roots in btrfs_ioctl_send() commit 8eb2fd00153a3a96a19c62ac9c6d48c2efebe5e8 upstream. btrfs_ioctl_send() used open-coded kvzalloc implementation earlier. The code was accidentally replaced with kzalloc() call [1]. Restore the original code by using kvzalloc() to allocate sctx->clone_roots. [1] https://patchwork.kernel.org/patch/9757891/#20529627 Fixes: 818e010bf9d0 ("btrfs: replace opencoded kvzalloc with the helper") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Denis Efremov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 40e56d0f90cf..88940f494428 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7352,7 +7352,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); - sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL); + sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From d3ce2d0fb8b28823573a5e9051e473f5c5e870bb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 22 Sep 2020 10:37:01 +0800 Subject: btrfs: tree-checker: fix false alert caused by legacy btrfs root item commit 1465af12e254a68706e110846f59cf0f09683184 upstream. Commit 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check") introduced btrfs root item size check, however btrfs root item has two versions, the legacy one which just ends before generation_v2 member, is smaller than current btrfs root item size. This caused btrfs kernel to reject valid but old tree root leaves. Fix this problem by also allowing legacy root item, since kernel can already handle them pretty well and upgrade to newer root item format when needed. Reported-by: Martin Steigerwald Fixes: 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check") CC: stable@vger.kernel.org # 5.4+ Tested-By: Martin Steigerwald Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 17 ++++++++++++----- include/uapi/linux/btrfs_tree.h | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 84b8d6ebf98f..e79f9cb6f691 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -869,7 +869,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_fs_info *fs_info = leaf->fs_info; - struct btrfs_root_item ri; + struct btrfs_root_item ri = { 0 }; const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY | BTRFS_ROOT_SUBVOL_DEAD; @@ -889,14 +889,21 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, return -EUCLEAN; } - if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) { + if (btrfs_item_size_nr(leaf, slot) != sizeof(ri) && + btrfs_item_size_nr(leaf, slot) != btrfs_legacy_root_item_size()) { generic_err(leaf, slot, - "invalid root item size, have %u expect %zu", - btrfs_item_size_nr(leaf, slot), sizeof(ri)); + "invalid root item size, have %u expect %zu or %u", + btrfs_item_size_nr(leaf, slot), sizeof(ri), + btrfs_legacy_root_item_size()); } + /* + * For legacy root item, the members starting at generation_v2 will be + * all filled with 0. + * And since we allow geneartion_v2 as 0, it will still pass the check. + */ read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot), - sizeof(ri)); + btrfs_item_size_nr(leaf, slot)); /* Generation related */ if (btrfs_root_generation(&ri) > diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index b65c7ee75bc7..035e59eef05b 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -4,6 +4,11 @@ #include #include +#ifdef __KERNEL__ +#include +#else +#include +#endif /* * This header contains the structure definitions and constants used @@ -650,6 +655,15 @@ struct btrfs_root_item { __le64 reserved[8]; /* for future */ } __attribute__ ((__packed__)); +/* + * Btrfs root item used to be smaller than current size. The old format ends + * at where member generation_v2 is. + */ +static inline __u32 btrfs_legacy_root_item_size(void) +{ + return offsetof(struct btrfs_root_item, generation_v2); +} + /* * this is used for both forward and backward root refs */ -- cgit v1.2.3 From 1cedc54ad3d47af758d4a317e350534c85780939 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Sep 2020 08:53:54 -0400 Subject: btrfs: cleanup cow block on error commit 572c83acdcdafeb04e70aa46be1fa539310be20c upstream. In fstest btrfs/064 a transaction abort in __btrfs_cow_block could lead to a system lockup. It gets stuck trying to write back inodes, and the write back thread was trying to lock an extent buffer: $ cat /proc/2143497/stack [<0>] __btrfs_tree_lock+0x108/0x250 [<0>] lock_extent_buffer_for_io+0x35e/0x3a0 [<0>] btree_write_cache_pages+0x15a/0x3b0 [<0>] do_writepages+0x28/0xb0 [<0>] __writeback_single_inode+0x54/0x5c0 [<0>] writeback_sb_inodes+0x1e8/0x510 [<0>] wb_writeback+0xcc/0x440 [<0>] wb_workfn+0xd7/0x650 [<0>] process_one_work+0x236/0x560 [<0>] worker_thread+0x55/0x3c0 [<0>] kthread+0x13a/0x150 [<0>] ret_from_fork+0x1f/0x30 This is because we got an error while COWing a block, specifically here if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { btrfs_abort_transaction(trans, ret); return ret; } } [16402.241552] BTRFS: Transaction aborted (error -2) [16402.242362] WARNING: CPU: 1 PID: 2563188 at fs/btrfs/ctree.c:1074 __btrfs_cow_block+0x376/0x540 [16402.249469] CPU: 1 PID: 2563188 Comm: fsstress Not tainted 5.9.0-rc6+ #8 [16402.249936] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 [16402.250525] RIP: 0010:__btrfs_cow_block+0x376/0x540 [16402.252417] RSP: 0018:ffff9cca40e578b0 EFLAGS: 00010282 [16402.252787] RAX: 0000000000000025 RBX: 0000000000000002 RCX: ffff9132bbd19388 [16402.253278] RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff9132bbd19380 [16402.254063] RBP: ffff9132b41a49c0 R08: 0000000000000000 R09: 0000000000000000 [16402.254887] R10: 0000000000000000 R11: ffff91324758b080 R12: ffff91326ef17ce0 [16402.255694] R13: ffff91325fc0f000 R14: ffff91326ef176b0 R15: ffff9132815e2000 [16402.256321] FS: 00007f542c6d7b80(0000) GS:ffff9132bbd00000(0000) knlGS:0000000000000000 [16402.256973] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [16402.257374] CR2: 00007f127b83f250 CR3: 0000000133480002 CR4: 0000000000370ee0 [16402.257867] Call Trace: [16402.258072] btrfs_cow_block+0x109/0x230 [16402.258356] btrfs_search_slot+0x530/0x9d0 [16402.258655] btrfs_lookup_file_extent+0x37/0x40 [16402.259155] __btrfs_drop_extents+0x13c/0xd60 [16402.259628] ? btrfs_block_rsv_migrate+0x4f/0xb0 [16402.259949] btrfs_replace_file_extents+0x190/0x820 [16402.260873] btrfs_clone+0x9ae/0xc00 [16402.261139] btrfs_extent_same_range+0x66/0x90 [16402.261771] btrfs_remap_file_range+0x353/0x3b1 [16402.262333] vfs_dedupe_file_range_one.part.0+0xd5/0x140 [16402.262821] vfs_dedupe_file_range+0x189/0x220 [16402.263150] do_vfs_ioctl+0x552/0x700 [16402.263662] __x64_sys_ioctl+0x62/0xb0 [16402.264023] do_syscall_64+0x33/0x40 [16402.264364] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [16402.264862] RIP: 0033:0x7f542c7d15cb [16402.266901] RSP: 002b:00007ffd35944ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [16402.267627] RAX: ffffffffffffffda RBX: 00000000009d1968 RCX: 00007f542c7d15cb [16402.268298] RDX: 00000000009d2490 RSI: 00000000c0189436 RDI: 0000000000000003 [16402.268958] RBP: 00000000009d2520 R08: 0000000000000036 R09: 00000000009d2e64 [16402.269726] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 [16402.270659] R13: 000000000001f000 R14: 00000000009d1970 R15: 00000000009d2e80 [16402.271498] irq event stamp: 0 [16402.271846] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [16402.272497] hardirqs last disabled at (0): [] copy_process+0x6b9/0x1ba0 [16402.273343] softirqs last enabled at (0): [] copy_process+0x6b9/0x1ba0 [16402.273905] softirqs last disabled at (0): [<0000000000000000>] 0x0 [16402.274338] ---[ end trace 737874a5a41a8236 ]--- [16402.274669] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.276179] BTRFS info (device dm-9): forced readonly [16402.277046] BTRFS: error (device dm-9) in btrfs_replace_file_extents:2723: errno=-2 No such entry [16402.278744] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.279968] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.280582] BTRFS info (device dm-9): balance: ended with status: -30 The problem here is that as soon as we allocate the new block it is locked and marked dirty in the btree inode. This means that we could attempt to writeback this block and need to lock the extent buffer. However we're not unlocking it here and thus we deadlock. Fix this by unlocking the cow block if we have any errors inside of __btrfs_cow_block, and also free it so we do not leak it. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c05127f50637..e25133a9e9df 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1103,6 +1103,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1110,6 +1112,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1142,6 +1146,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (last_ref) { ret = tree_mod_log_free_eb(buf); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } -- cgit v1.2.3 From 834a61b2123ba89d576385afb11a37148fa76170 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 8 Oct 2020 18:09:10 -0700 Subject: btrfs: tree-checker: validate number of chunk stripes and parity commit 85d07fbe09efd1c529ff3e025e2f0d2c6c96a1b7 upstream. If there's no parity and num_stripes < ncopies, a crafted image can trigger a division by zero in calc_stripe_length(). The image was generated through fuzzing. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209587 Signed-off-by: Daniel Xu Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e79f9cb6f691..48e46323d519 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -577,18 +577,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, u64 type; u64 features; bool mixed = false; + int raid_index; + int nparity; + int ncopies; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); type = btrfs_chunk_type(leaf, chunk); + raid_index = btrfs_bg_flags_to_raid_index(type); + ncopies = btrfs_raid_array[raid_index].ncopies; + nparity = btrfs_raid_array[raid_index].nparity; if (!num_stripes) { chunk_err(leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); return -EUCLEAN; } + if (num_stripes < ncopies) { + chunk_err(leaf, chunk, logical, + "invalid chunk num_stripes < ncopies, have %u < %d", + num_stripes, ncopies); + return -EUCLEAN; + } + if (nparity && num_stripes == nparity) { + chunk_err(leaf, chunk, logical, + "invalid chunk num_stripes == nparity, have %u == %d", + num_stripes, nparity); + return -EUCLEAN; + } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { chunk_err(leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", -- cgit v1.2.3 From dfda50e882f57c709ca91f5659af87e915e5af4e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 12 Oct 2020 11:55:23 +0100 Subject: btrfs: fix use-after-free on readahead extent after failure to create it commit 83bc1560e02e25c6439341352024ebe8488f4fbd upstream. If we fail to find suitable zones for a new readahead extent, we end up leaving a stale pointer in the global readahead extents radix tree (fs_info->reada_tree), which can trigger the following trace later on: [13367.696354] BUG: kernel NULL pointer dereference, address: 00000000000000b0 [13367.696802] #PF: supervisor read access in kernel mode [13367.697249] #PF: error_code(0x0000) - not-present page [13367.697721] PGD 0 P4D 0 [13367.698171] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [13367.698632] CPU: 6 PID: 851214 Comm: btrfs Tainted: G W 5.9.0-rc6-btrfs-next-69 #1 [13367.699100] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [13367.700069] RIP: 0010:__lock_acquire+0x20a/0x3970 [13367.700562] Code: ff 1f 0f b7 c0 48 0f (...) [13367.701609] RSP: 0018:ffffb14448f57790 EFLAGS: 00010046 [13367.702140] RAX: 0000000000000000 RBX: 29b935140c15e8cf RCX: 0000000000000000 [13367.702698] RDX: 0000000000000002 RSI: ffffffffb3d66bd0 RDI: 0000000000000046 [13367.703240] RBP: ffff8a52ba8ac040 R08: 00000c2866ad9288 R09: 0000000000000001 [13367.703783] R10: 0000000000000001 R11: 00000000b66d9b53 R12: ffff8a52ba8ac9b0 [13367.704330] R13: 0000000000000000 R14: ffff8a532b6333e8 R15: 0000000000000000 [13367.704880] FS: 00007fe1df6b5700(0000) GS:ffff8a5376600000(0000) knlGS:0000000000000000 [13367.705438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13367.705995] CR2: 00000000000000b0 CR3: 000000022cca8004 CR4: 00000000003706e0 [13367.706565] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [13367.707127] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [13367.707686] Call Trace: [13367.708246] ? ___slab_alloc+0x395/0x740 [13367.708820] ? reada_add_block+0xae/0xee0 [btrfs] [13367.709383] lock_acquire+0xb1/0x480 [13367.709955] ? reada_add_block+0xe0/0xee0 [btrfs] [13367.710537] ? reada_add_block+0xae/0xee0 [btrfs] [13367.711097] ? rcu_read_lock_sched_held+0x5d/0x90 [13367.711659] ? kmem_cache_alloc_trace+0x8d2/0x990 [13367.712221] ? lock_acquired+0x33b/0x470 [13367.712784] _raw_spin_lock+0x34/0x80 [13367.713356] ? reada_add_block+0xe0/0xee0 [btrfs] [13367.713966] reada_add_block+0xe0/0xee0 [btrfs] [13367.714529] ? btrfs_root_node+0x15/0x1f0 [btrfs] [13367.715077] btrfs_reada_add+0x117/0x170 [btrfs] [13367.715620] scrub_stripe+0x21e/0x10d0 [btrfs] [13367.716141] ? kvm_sched_clock_read+0x5/0x10 [13367.716657] ? __lock_acquire+0x41e/0x3970 [13367.717184] ? scrub_chunk+0x60/0x140 [btrfs] [13367.717697] ? find_held_lock+0x32/0x90 [13367.718254] ? scrub_chunk+0x60/0x140 [btrfs] [13367.718773] ? lock_acquired+0x33b/0x470 [13367.719278] ? scrub_chunk+0xcd/0x140 [btrfs] [13367.719786] scrub_chunk+0xcd/0x140 [btrfs] [13367.720291] scrub_enumerate_chunks+0x270/0x5c0 [btrfs] [13367.720787] ? finish_wait+0x90/0x90 [13367.721281] btrfs_scrub_dev+0x1ee/0x620 [btrfs] [13367.721762] ? rcu_read_lock_any_held+0x8e/0xb0 [13367.722235] ? preempt_count_add+0x49/0xa0 [13367.722710] ? __sb_start_write+0x19b/0x290 [13367.723192] btrfs_ioctl+0x7f5/0x36f0 [btrfs] [13367.723660] ? __fget_files+0x101/0x1d0 [13367.724118] ? find_held_lock+0x32/0x90 [13367.724559] ? __fget_files+0x101/0x1d0 [13367.724982] ? __x64_sys_ioctl+0x83/0xb0 [13367.725399] __x64_sys_ioctl+0x83/0xb0 [13367.725802] do_syscall_64+0x33/0x80 [13367.726188] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [13367.726574] RIP: 0033:0x7fe1df7add87 [13367.726948] Code: 00 00 00 48 8b 05 09 91 (...) [13367.727763] RSP: 002b:00007fe1df6b4d48 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [13367.728179] RAX: ffffffffffffffda RBX: 000055ce1fb596a0 RCX: 00007fe1df7add87 [13367.728604] RDX: 000055ce1fb596a0 RSI: 00000000c400941b RDI: 0000000000000003 [13367.729021] RBP: 0000000000000000 R08: 00007fe1df6b5700 R09: 0000000000000000 [13367.729431] R10: 00007fe1df6b5700 R11: 0000000000000246 R12: 00007ffd922b07de [13367.729842] R13: 00007ffd922b07df R14: 00007fe1df6b4e40 R15: 0000000000802000 [13367.730275] Modules linked in: btrfs blake2b_generic xor (...) [13367.732638] CR2: 00000000000000b0 [13367.733166] ---[ end trace d298b6805556acd9 ]--- What happens is the following: 1) At reada_find_extent() we don't find any existing readahead extent for the metadata extent starting at logical address X; 2) So we proceed to create a new one. We then call btrfs_map_block() to get information about which stripes contain extent X; 3) After that we iterate over the stripes and create only one zone for the readahead extent - only one because reada_find_zone() returned NULL for all iterations except for one, either because a memory allocation failed or it couldn't find the block group of the extent (it may have just been deleted); 4) We then add the new readahead extent to the readahead extents radix tree at fs_info->reada_tree; 5) Then we iterate over each zone of the new readahead extent, and find that the device used for that zone no longer exists, because it was removed or it was the source device of a device replace operation. Since this left 'have_zone' set to 0, after finishing the loop we jump to the 'error' label, call kfree() on the new readahead extent and return without removing it from the radix tree at fs_info->reada_tree; 6) Any future call to reada_find_extent() for the logical address X will find the stale pointer in the readahead extents radix tree, increment its reference counter, which can trigger the use-after-free right away or return it to the caller reada_add_block() that results in the use-after-free of the example trace above. So fix this by making sure we delete the readahead extent from the radix tree if we fail to setup zones for it (when 'have_zone = 0'). Fixes: 319450211842ba ("btrfs: reada: bypass adding extent when all zone failed") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/reada.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 1feaeadc8cf5..dd007258b2fb 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -445,6 +445,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, } have_zone = 1; } + if (!have_zone) + radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); up_read(&fs_info->dev_replace.rwsem); -- cgit v1.2.3 From c964d386e84918522c8d2f36e5c1d2ae6ec1ec20 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 12 Oct 2020 11:55:24 +0100 Subject: btrfs: fix readahead hang and use-after-free after removing a device commit 66d204a16c94f24ad08290a7663ab67e7fc04e82 upstream. Very sporadically I had test case btrfs/069 from fstests hanging (for years, it is not a recent regression), with the following traces in dmesg/syslog: [162301.160628] BTRFS info (device sdc): dev_replace from /dev/sdd (devid 2) to /dev/sdg started [162301.181196] BTRFS info (device sdc): scrub: finished on devid 4 with status: 0 [162301.287162] BTRFS info (device sdc): dev_replace from /dev/sdd (devid 2) to /dev/sdg finished [162513.513792] INFO: task btrfs-transacti:1356167 blocked for more than 120 seconds. [162513.514318] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.514522] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.514747] task:btrfs-transacti state:D stack: 0 pid:1356167 ppid: 2 flags:0x00004000 [162513.514751] Call Trace: [162513.514761] __schedule+0x5ce/0xd00 [162513.514765] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.514771] schedule+0x46/0xf0 [162513.514844] wait_current_trans+0xde/0x140 [btrfs] [162513.514850] ? finish_wait+0x90/0x90 [162513.514864] start_transaction+0x37c/0x5f0 [btrfs] [162513.514879] transaction_kthread+0xa4/0x170 [btrfs] [162513.514891] ? btrfs_cleanup_transaction+0x660/0x660 [btrfs] [162513.514894] kthread+0x153/0x170 [162513.514897] ? kthread_stop+0x2c0/0x2c0 [162513.514902] ret_from_fork+0x22/0x30 [162513.514916] INFO: task fsstress:1356184 blocked for more than 120 seconds. [162513.515192] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.515431] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.515680] task:fsstress state:D stack: 0 pid:1356184 ppid:1356177 flags:0x00004000 [162513.515682] Call Trace: [162513.515688] __schedule+0x5ce/0xd00 [162513.515691] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.515697] schedule+0x46/0xf0 [162513.515712] wait_current_trans+0xde/0x140 [btrfs] [162513.515716] ? finish_wait+0x90/0x90 [162513.515729] start_transaction+0x37c/0x5f0 [btrfs] [162513.515743] btrfs_attach_transaction_barrier+0x1f/0x50 [btrfs] [162513.515753] btrfs_sync_fs+0x61/0x1c0 [btrfs] [162513.515758] ? __ia32_sys_fdatasync+0x20/0x20 [162513.515761] iterate_supers+0x87/0xf0 [162513.515765] ksys_sync+0x60/0xb0 [162513.515768] __do_sys_sync+0xa/0x10 [162513.515771] do_syscall_64+0x33/0x80 [162513.515774] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [162513.515781] RIP: 0033:0x7f5238f50bd7 [162513.515782] Code: Bad RIP value. [162513.515784] RSP: 002b:00007fff67b978e8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a2 [162513.515786] RAX: ffffffffffffffda RBX: 000055b1fad2c560 RCX: 00007f5238f50bd7 [162513.515788] RDX: 00000000ffffffff RSI: 000000000daf0e74 RDI: 000000000000003a [162513.515789] RBP: 0000000000000032 R08: 000000000000000a R09: 00007f5239019be0 [162513.515791] R10: fffffffffffff24f R11: 0000000000000206 R12: 000000000000003a [162513.515792] R13: 00007fff67b97950 R14: 00007fff67b97906 R15: 000055b1fad1a340 [162513.515804] INFO: task fsstress:1356185 blocked for more than 120 seconds. [162513.516064] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.516329] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.516617] task:fsstress state:D stack: 0 pid:1356185 ppid:1356177 flags:0x00000000 [162513.516620] Call Trace: [162513.516625] __schedule+0x5ce/0xd00 [162513.516628] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.516634] schedule+0x46/0xf0 [162513.516647] wait_current_trans+0xde/0x140 [btrfs] [162513.516650] ? finish_wait+0x90/0x90 [162513.516662] start_transaction+0x4d7/0x5f0 [btrfs] [162513.516679] btrfs_setxattr_trans+0x3c/0x100 [btrfs] [162513.516686] __vfs_setxattr+0x66/0x80 [162513.516691] __vfs_setxattr_noperm+0x70/0x200 [162513.516697] vfs_setxattr+0x6b/0x120 [162513.516703] setxattr+0x125/0x240 [162513.516709] ? lock_acquire+0xb1/0x480 [162513.516712] ? mnt_want_write+0x20/0x50 [162513.516721] ? rcu_read_lock_any_held+0x8e/0xb0 [162513.516723] ? preempt_count_add+0x49/0xa0 [162513.516725] ? __sb_start_write+0x19b/0x290 [162513.516727] ? preempt_count_add+0x49/0xa0 [162513.516732] path_setxattr+0xba/0xd0 [162513.516739] __x64_sys_setxattr+0x27/0x30 [162513.516741] do_syscall_64+0x33/0x80 [162513.516743] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [162513.516745] RIP: 0033:0x7f5238f56d5a [162513.516746] Code: Bad RIP value. [162513.516748] RSP: 002b:00007fff67b97868 EFLAGS: 00000202 ORIG_RAX: 00000000000000bc [162513.516750] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f5238f56d5a [162513.516751] RDX: 000055b1fbb0d5a0 RSI: 00007fff67b978a0 RDI: 000055b1fbb0d470 [162513.516753] RBP: 000055b1fbb0d5a0 R08: 0000000000000001 R09: 00007fff67b97700 [162513.516754] R10: 0000000000000004 R11: 0000000000000202 R12: 0000000000000004 [162513.516756] R13: 0000000000000024 R14: 0000000000000001 R15: 00007fff67b978a0 [162513.516767] INFO: task fsstress:1356196 blocked for more than 120 seconds. [162513.517064] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.517365] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.517763] task:fsstress state:D stack: 0 pid:1356196 ppid:1356177 flags:0x00004000 [162513.517780] Call Trace: [162513.517786] __schedule+0x5ce/0xd00 [162513.517789] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.517796] schedule+0x46/0xf0 [162513.517810] wait_current_trans+0xde/0x140 [btrfs] [162513.517814] ? finish_wait+0x90/0x90 [162513.517829] start_transaction+0x37c/0x5f0 [btrfs] [162513.517845] btrfs_attach_transaction_barrier+0x1f/0x50 [btrfs] [162513.517857] btrfs_sync_fs+0x61/0x1c0 [btrfs] [162513.517862] ? __ia32_sys_fdatasync+0x20/0x20 [162513.517865] iterate_supers+0x87/0xf0 [162513.517869] ksys_sync+0x60/0xb0 [162513.517872] __do_sys_sync+0xa/0x10 [162513.517875] do_syscall_64+0x33/0x80 [162513.517878] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [162513.517881] RIP: 0033:0x7f5238f50bd7 [162513.517883] Code: Bad RIP value. [162513.517885] RSP: 002b:00007fff67b978e8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a2 [162513.517887] RAX: ffffffffffffffda RBX: 000055b1fad2c560 RCX: 00007f5238f50bd7 [162513.517889] RDX: 0000000000000000 RSI: 000000007660add2 RDI: 0000000000000053 [162513.517891] RBP: 0000000000000032 R08: 0000000000000067 R09: 00007f5239019be0 [162513.517893] R10: fffffffffffff24f R11: 0000000000000206 R12: 0000000000000053 [162513.517895] R13: 00007fff67b97950 R14: 00007fff67b97906 R15: 000055b1fad1a340 [162513.517908] INFO: task fsstress:1356197 blocked for more than 120 seconds. [162513.518298] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.518672] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.519157] task:fsstress state:D stack: 0 pid:1356197 ppid:1356177 flags:0x00000000 [162513.519160] Call Trace: [162513.519165] __schedule+0x5ce/0xd00 [162513.519168] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.519174] schedule+0x46/0xf0 [162513.519190] wait_current_trans+0xde/0x140 [btrfs] [162513.519193] ? finish_wait+0x90/0x90 [162513.519206] start_transaction+0x4d7/0x5f0 [btrfs] [162513.519222] btrfs_create+0x57/0x200 [btrfs] [162513.519230] lookup_open+0x522/0x650 [162513.519246] path_openat+0x2b8/0xa50 [162513.519270] do_filp_open+0x91/0x100 [162513.519275] ? find_held_lock+0x32/0x90 [162513.519280] ? lock_acquired+0x33b/0x470 [162513.519285] ? do_raw_spin_unlock+0x4b/0xc0 [162513.519287] ? _raw_spin_unlock+0x29/0x40 [162513.519295] do_sys_openat2+0x20d/0x2d0 [162513.519300] do_sys_open+0x44/0x80 [162513.519304] do_syscall_64+0x33/0x80 [162513.519307] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [162513.519309] RIP: 0033:0x7f5238f4a903 [162513.519310] Code: Bad RIP value. [162513.519312] RSP: 002b:00007fff67b97758 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 [162513.519314] RAX: ffffffffffffffda RBX: 00000000ffffffff RCX: 00007f5238f4a903 [162513.519316] RDX: 0000000000000000 RSI: 00000000000001b6 RDI: 000055b1fbb0d470 [162513.519317] RBP: 00007fff67b978c0 R08: 0000000000000001 R09: 0000000000000002 [162513.519319] R10: 00007fff67b974f7 R11: 0000000000000246 R12: 0000000000000013 [162513.519320] R13: 00000000000001b6 R14: 00007fff67b97906 R15: 000055b1fad1c620 [162513.519332] INFO: task btrfs:1356211 blocked for more than 120 seconds. [162513.519727] Not tainted 5.9.0-rc6-btrfs-next-69 #1 [162513.520115] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [162513.520508] task:btrfs state:D stack: 0 pid:1356211 ppid:1356178 flags:0x00004002 [162513.520511] Call Trace: [162513.520516] __schedule+0x5ce/0xd00 [162513.520519] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [162513.520525] schedule+0x46/0xf0 [162513.520544] btrfs_scrub_pause+0x11f/0x180 [btrfs] [162513.520548] ? finish_wait+0x90/0x90 [162513.520562] btrfs_commit_transaction+0x45a/0xc30 [btrfs] [162513.520574] ? start_transaction+0xe0/0x5f0 [btrfs] [162513.520596] btrfs_dev_replace_finishing+0x6d8/0x711 [btrfs] [162513.520619] btrfs_dev_replace_by_ioctl.cold+0x1cc/0x1fd [btrfs] [162513.520639] btrfs_ioctl+0x2a25/0x36f0 [btrfs] [162513.520643] ? do_sigaction+0xf3/0x240 [162513.520645] ? find_held_lock+0x32/0x90 [162513.520648] ? do_sigaction+0xf3/0x240 [162513.520651] ? lock_acquired+0x33b/0x470 [162513.520655] ? _raw_spin_unlock_irq+0x24/0x50 [162513.520657] ? lockdep_hardirqs_on+0x7d/0x100 [162513.520660] ? _raw_spin_unlock_irq+0x35/0x50 [162513.520662] ? do_sigaction+0xf3/0x240 [162513.520671] ? __x64_sys_ioctl+0x83/0xb0 [162513.520672] __x64_sys_ioctl+0x83/0xb0 [162513.520677] do_syscall_64+0x33/0x80 [162513.520679] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [162513.520681] RIP: 0033:0x7fc3cd307d87 [162513.520682] Code: Bad RIP value. [162513.520684] RSP: 002b:00007ffe30a56bb8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [162513.520686] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fc3cd307d87 [162513.520687] RDX: 00007ffe30a57a30 RSI: 00000000ca289435 RDI: 0000000000000003 [162513.520689] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [162513.520690] R10: 0000000000000008 R11: 0000000000000202 R12: 0000000000000003 [162513.520692] R13: 0000557323a212e0 R14: 00007ffe30a5a520 R15: 0000000000000001 [162513.520703] Showing all locks held in the system: [162513.520712] 1 lock held by khungtaskd/54: [162513.520713] #0: ffffffffb40a91a0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x15/0x197 [162513.520728] 1 lock held by in:imklog/596: [162513.520729] #0: ffff8f3f0d781400 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0x4d/0x60 [162513.520782] 1 lock held by btrfs-transacti/1356167: [162513.520784] #0: ffff8f3d810cc848 (&fs_info->transaction_kthread_mutex){+.+.}-{3:3}, at: transaction_kthread+0x4a/0x170 [btrfs] [162513.520798] 1 lock held by btrfs/1356190: [162513.520800] #0: ffff8f3d57644470 (sb_writers#15){.+.+}-{0:0}, at: mnt_want_write_file+0x22/0x60 [162513.520805] 1 lock held by fsstress/1356184: [162513.520806] #0: ffff8f3d576440e8 (&type->s_umount_key#62){++++}-{3:3}, at: iterate_supers+0x6f/0xf0 [162513.520811] 3 locks held by fsstress/1356185: [162513.520812] #0: ffff8f3d57644470 (sb_writers#15){.+.+}-{0:0}, at: mnt_want_write+0x20/0x50 [162513.520815] #1: ffff8f3d80a650b8 (&type->i_mutex_dir_key#10){++++}-{3:3}, at: vfs_setxattr+0x50/0x120 [162513.520820] #2: ffff8f3d57644690 (sb_internal#2){.+.+}-{0:0}, at: start_transaction+0x40e/0x5f0 [btrfs] [162513.520833] 1 lock held by fsstress/1356196: [162513.520834] #0: ffff8f3d576440e8 (&type->s_umount_key#62){++++}-{3:3}, at: iterate_supers+0x6f/0xf0 [162513.520838] 3 locks held by fsstress/1356197: [162513.520839] #0: ffff8f3d57644470 (sb_writers#15){.+.+}-{0:0}, at: mnt_want_write+0x20/0x50 [162513.520843] #1: ffff8f3d506465e8 (&type->i_mutex_dir_key#10){++++}-{3:3}, at: path_openat+0x2a7/0xa50 [162513.520846] #2: ffff8f3d57644690 (sb_internal#2){.+.+}-{0:0}, at: start_transaction+0x40e/0x5f0 [btrfs] [162513.520858] 2 locks held by btrfs/1356211: [162513.520859] #0: ffff8f3d810cde30 (&fs_info->dev_replace.lock_finishing_cancel_unmount){+.+.}-{3:3}, at: btrfs_dev_replace_finishing+0x52/0x711 [btrfs] [162513.520877] #1: ffff8f3d57644690 (sb_internal#2){.+.+}-{0:0}, at: start_transaction+0x40e/0x5f0 [btrfs] This was weird because the stack traces show that a transaction commit, triggered by a device replace operation, is blocking trying to pause any running scrubs but there are no stack traces of blocked tasks doing a scrub. After poking around with drgn, I noticed there was a scrub task that was constantly running and blocking for shorts periods of time: >>> t = find_task(prog, 1356190) >>> prog.stack_trace(t) #0 __schedule+0x5ce/0xcfc #1 schedule+0x46/0xe4 #2 schedule_timeout+0x1df/0x475 #3 btrfs_reada_wait+0xda/0x132 #4 scrub_stripe+0x2a8/0x112f #5 scrub_chunk+0xcd/0x134 #6 scrub_enumerate_chunks+0x29e/0x5ee #7 btrfs_scrub_dev+0x2d5/0x91b #8 btrfs_ioctl+0x7f5/0x36e7 #9 __x64_sys_ioctl+0x83/0xb0 #10 do_syscall_64+0x33/0x77 #11 entry_SYSCALL_64+0x7c/0x156 Which corresponds to: int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; struct btrfs_fs_info *fs_info = rc->fs_info; while (atomic_read(&rc->elems)) { if (!atomic_read(&fs_info->reada_works_cnt)) reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, (HZ + 9) / 10); } (...) So the counter "rc->elems" was set to 1 and never decreased to 0, causing the scrub task to loop forever in that function. Then I used the following script for drgn to check the readahead requests: $ cat dump_reada.py import sys import drgn from drgn import NULL, Object, cast, container_of, execscript, \ reinterpret, sizeof from drgn.helpers.linux import * mnt_path = b"/home/fdmanana/btrfs-tests/scratch_1" mnt = None for mnt in for_each_mount(prog, dst = mnt_path): pass if mnt is None: sys.stderr.write(f'Error: mount point {mnt_path} not found\n') sys.exit(1) fs_info = cast('struct btrfs_fs_info *', mnt.mnt.mnt_sb.s_fs_info) def dump_re(re): nzones = re.nzones.value_() print(f're at {hex(re.value_())}') print(f'\t logical {re.logical.value_()}') print(f'\t refcnt {re.refcnt.value_()}') print(f'\t nzones {nzones}') for i in range(nzones): dev = re.zones[i].device name = dev.name.str.string_() print(f'\t\t dev id {dev.devid.value_()} name {name}') print() for _, e in radix_tree_for_each(fs_info.reada_tree): re = cast('struct reada_extent *', e) dump_re(re) $ drgn dump_reada.py re at 0xffff8f3da9d25ad8 logical 38928384 refcnt 1 nzones 1 dev id 0 name b'/dev/sdd' $ So there was one readahead extent with a single zone corresponding to the source device of that last device replace operation logged in dmesg/syslog. Also the ID of that zone's device was 0 which is a special value set in the source device of a device replace operation when the operation finishes (constant BTRFS_DEV_REPLACE_DEVID set at btrfs_dev_replace_finishing()), confirming again that device /dev/sdd was the source of a device replace operation. Normally there should be as many zones in the readahead extent as there are devices, and I wasn't expecting the extent to be in a block group with a 'single' profile, so I went and confirmed with the following drgn script that there weren't any single profile block groups: $ cat dump_block_groups.py import sys import drgn from drgn import NULL, Object, cast, container_of, execscript, \ reinterpret, sizeof from drgn.helpers.linux import * mnt_path = b"/home/fdmanana/btrfs-tests/scratch_1" mnt = None for mnt in for_each_mount(prog, dst = mnt_path): pass if mnt is None: sys.stderr.write(f'Error: mount point {mnt_path} not found\n') sys.exit(1) fs_info = cast('struct btrfs_fs_info *', mnt.mnt.mnt_sb.s_fs_info) BTRFS_BLOCK_GROUP_DATA = (1 << 0) BTRFS_BLOCK_GROUP_SYSTEM = (1 << 1) BTRFS_BLOCK_GROUP_METADATA = (1 << 2) BTRFS_BLOCK_GROUP_RAID0 = (1 << 3) BTRFS_BLOCK_GROUP_RAID1 = (1 << 4) BTRFS_BLOCK_GROUP_DUP = (1 << 5) BTRFS_BLOCK_GROUP_RAID10 = (1 << 6) BTRFS_BLOCK_GROUP_RAID5 = (1 << 7) BTRFS_BLOCK_GROUP_RAID6 = (1 << 8) BTRFS_BLOCK_GROUP_RAID1C3 = (1 << 9) BTRFS_BLOCK_GROUP_RAID1C4 = (1 << 10) def bg_flags_string(bg): flags = bg.flags.value_() ret = '' if flags & BTRFS_BLOCK_GROUP_DATA: ret = 'data' if flags & BTRFS_BLOCK_GROUP_METADATA: if len(ret) > 0: ret += '|' ret += 'meta' if flags & BTRFS_BLOCK_GROUP_SYSTEM: if len(ret) > 0: ret += '|' ret += 'system' if flags & BTRFS_BLOCK_GROUP_RAID0: ret += ' raid0' elif flags & BTRFS_BLOCK_GROUP_RAID1: ret += ' raid1' elif flags & BTRFS_BLOCK_GROUP_DUP: ret += ' dup' elif flags & BTRFS_BLOCK_GROUP_RAID10: ret += ' raid10' elif flags & BTRFS_BLOCK_GROUP_RAID5: ret += ' raid5' elif flags & BTRFS_BLOCK_GROUP_RAID6: ret += ' raid6' elif flags & BTRFS_BLOCK_GROUP_RAID1C3: ret += ' raid1c3' elif flags & BTRFS_BLOCK_GROUP_RAID1C4: ret += ' raid1c4' else: ret += ' single' return ret def dump_bg(bg): print() print(f'block group at {hex(bg.value_())}') print(f'\t start {bg.start.value_()} length {bg.length.value_()}') print(f'\t flags {bg.flags.value_()} - {bg_flags_string(bg)}') bg_root = fs_info.block_group_cache_tree.address_of_() for bg in rbtree_inorder_for_each_entry('struct btrfs_block_group', bg_root, 'cache_node'): dump_bg(bg) $ drgn dump_block_groups.py block group at 0xffff8f3d673b0400 start 22020096 length 16777216 flags 258 - system raid6 block group at 0xffff8f3d53ddb400 start 38797312 length 536870912 flags 260 - meta raid6 block group at 0xffff8f3d5f4d9c00 start 575668224 length 2147483648 flags 257 - data raid6 block group at 0xffff8f3d08189000 start 2723151872 length 67108864 flags 258 - system raid6 block group at 0xffff8f3db70ff000 start 2790260736 length 1073741824 flags 260 - meta raid6 block group at 0xffff8f3d5f4dd800 start 3864002560 length 67108864 flags 258 - system raid6 block group at 0xffff8f3d67037000 start 3931111424 length 2147483648 flags 257 - data raid6 $ So there were only 2 reasons left for having a readahead extent with a single zone: reada_find_zone(), called when creating a readahead extent, returned NULL either because we failed to find the corresponding block group or because a memory allocation failed. With some additional and custom tracing I figured out that on every further ocurrence of the problem the block group had just been deleted when we were looping to create the zones for the readahead extent (at reada_find_extent()), so we ended up with only one zone in the readahead extent, corresponding to a device that ends up getting replaced. So after figuring that out it became obvious why the hang happens: 1) Task A starts a scrub on any device of the filesystem, except for device /dev/sdd; 2) Task B starts a device replace with /dev/sdd as the source device; 3) Task A calls btrfs_reada_add() from scrub_stripe() and it is currently starting to scrub a stripe from block group X. This call to btrfs_reada_add() is the one for the extent tree. When btrfs_reada_add() calls reada_add_block(), it passes the logical address of the extent tree's root node as its 'logical' argument - a value of 38928384; 4) Task A then enters reada_find_extent(), called from reada_add_block(). It finds there isn't any existing readahead extent for the logical address 38928384, so it proceeds to the path of creating a new one. It calls btrfs_map_block() to find out which stripes exist for the block group X. On the first iteration of the for loop that iterates over the stripes, it finds the stripe for device /dev/sdd, so it creates one zone for that device and adds it to the readahead extent. Before getting into the second iteration of the loop, the cleanup kthread deletes block group X because it was empty. So in the iterations for the remaining stripes it does not add more zones to the readahead extent, because the calls to reada_find_zone() returned NULL because they couldn't find block group X anymore. As a result the new readahead extent has a single zone, corresponding to the device /dev/sdd; 4) Before task A returns to btrfs_reada_add() and queues the readahead job for the readahead work queue, task B finishes the device replace and at btrfs_dev_replace_finishing() swaps the device /dev/sdd with the new device /dev/sdg; 5) Task A returns to reada_add_block(), which increments the counter "->elems" of the reada_control structure allocated at btrfs_reada_add(). Then it returns back to btrfs_reada_add() and calls reada_start_machine(). This queues a job in the readahead work queue to run the function reada_start_machine_worker(), which calls __reada_start_machine(). At __reada_start_machine() we take the device list mutex and for each device found in the current device list, we call reada_start_machine_dev() to start the readahead work. However at this point the device /dev/sdd was already freed and is not in the device list anymore. This means the corresponding readahead for the extent at 38928384 is never started, and therefore the "->elems" counter of the reada_control structure allocated at btrfs_reada_add() never goes down to 0, causing the call to btrfs_reada_wait(), done by the scrub task, to wait forever. Note that the readahead request can be made either after the device replace started or before it started, however in pratice it is very unlikely that a device replace is able to start after a readahead request is made and is able to complete before the readahead request completes - maybe only on a very small and nearly empty filesystem. This hang however is not the only problem we can have with readahead and device removals. When the readahead extent has other zones other than the one corresponding to the device that is being removed (either by a device replace or a device remove operation), we risk having a use-after-free on the device when dropping the last reference of the readahead extent. For example if we create a readahead extent with two zones, one for the device /dev/sdd and one for the device /dev/sde: 1) Before the readahead worker starts, the device /dev/sdd is removed, and the corresponding btrfs_device structure is freed. However the readahead extent still has the zone pointing to the device structure; 2) When the readahead worker starts, it only finds device /dev/sde in the current device list of the filesystem; 3) It starts the readahead work, at reada_start_machine_dev(), using the device /dev/sde; 4) Then when it finishes reading the extent from device /dev/sde, it calls __readahead_hook() which ends up dropping the last reference on the readahead extent through the last call to reada_extent_put(); 5) At reada_extent_put() it iterates over each zone of the readahead extent and attempts to delete an element from the device's 'reada_extents' radix tree, resulting in a use-after-free, as the device pointer of the zone for /dev/sdd is now stale. We can also access the device after dropping the last reference of a zone, through reada_zone_release(), also called by reada_extent_put(). And a device remove suffers the same problem, however since it shrinks the device size down to zero before removing the device, it is very unlikely to still have readahead requests not completed by the time we free the device, the only possibility is if the device has a very little space allocated. While the hang problem is exclusive to scrub, since it is currently the only user of btrfs_reada_add() and btrfs_reada_wait(), the use-after-free problem affects any path that triggers readhead, which includes btree_readahead_hook() and __readahead_hook() (a readahead worker can trigger readahed for the children of a node) for example - any path that ends up calling reada_add_block() can trigger the use-after-free after a device is removed. So fix this by waiting for any readahead requests for a device to complete before removing a device, ensuring that while waiting for existing ones no new ones can be made. This problem has been around for a very long time - the readahead code was added in 2011, device remove exists since 2008 and device replace was introduced in 2013, hard to pick a specific commit for a git Fixes tag. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/dev-replace.c | 5 +++++ fs/btrfs/reada.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 3 +++ fs/btrfs/volumes.h | 1 + 5 files changed, 56 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 23b4f38e2392..27128164fac9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3404,6 +3404,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct extent_buffer *eb, int err); +void btrfs_reada_remove_dev(struct btrfs_device *dev); +void btrfs_reada_undo_remove_dev(struct btrfs_device *dev); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 34ddf2d75c1a..96843934dcbb 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -631,6 +631,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + if (!scrub_ret) + btrfs_reada_remove_dev(src_device); + /* * We have to use this loop approach because at this point src_device * has to be available for transaction commit to complete, yet new @@ -639,6 +642,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, while (1) { trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { + btrfs_reada_undo_remove_dev(src_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return PTR_ERR(trans); } @@ -689,6 +693,7 @@ error: up_write(&dev_replace->rwsem); mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + btrfs_reada_undo_remove_dev(src_device); btrfs_rm_dev_replace_blocked(fs_info); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(tgt_device); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dd007258b2fb..2656dc8de99c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!dev->bdev) continue; + if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state)) + continue; + if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* @@ -1014,3 +1017,45 @@ void btrfs_reada_detach(void *handle) kref_put(&rc->refcnt, reada_control_release); } + +/* + * Before removing a device (device replace or device remove ioctls), call this + * function to wait for all existing readahead requests on the device and to + * make sure no one queues more readahead requests for the device. + * + * Must be called without holding neither the device list mutex nor the device + * replace semaphore, otherwise it will deadlock. + */ +void btrfs_reada_remove_dev(struct btrfs_device *dev) +{ + struct btrfs_fs_info *fs_info = dev->fs_info; + + /* Serialize with readahead extent creation at reada_find_extent(). */ + spin_lock(&fs_info->reada_lock); + set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&fs_info->reada_lock); + + /* + * There might be readahead requests added to the radix trees which + * were not yet added to the readahead work queue. We need to start + * them and wait for their completion, otherwise we can end up with + * use-after-free problems when dropping the last reference on the + * readahead extents and their zones, as they need to access the + * device structure. + */ + reada_start_machine(fs_info); + btrfs_flush_workqueue(fs_info->readahead_workers); +} + +/* + * If when removing a device (device replace or device remove ioctls) an error + * happens after calling btrfs_reada_remove_dev(), call this to undo what that + * function did. This is safe to call even if btrfs_reada_remove_dev() was not + * called before. + */ +void btrfs_reada_undo_remove_dev(struct btrfs_device *dev) +{ + spin_lock(&dev->fs_info->reada_lock); + clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&dev->fs_info->reada_lock); +} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 76a937440335..58910a0a3e4a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2208,6 +2208,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + if (!ret) + btrfs_reada_remove_dev(device); mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -2294,6 +2296,7 @@ out: return ret; error_undo: + btrfs_reada_undo_remove_dev(device); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); list_add(&device->dev_alloc_list, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5acf5c507ec2..aa6a6d7b2978 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -56,6 +56,7 @@ struct btrfs_io_geometry { #define BTRFS_DEV_STATE_MISSING (2) #define BTRFS_DEV_STATE_REPLACE_TGT (3) #define BTRFS_DEV_STATE_FLUSH_SENT (4) +#define BTRFS_DEV_STATE_NO_READA (5) struct btrfs_device { struct list_head dev_list; /* device_list_mutex */ -- cgit v1.2.3 From 2600a131e1f619769bb6970a5d73474466f1c78d Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 28 Oct 2020 22:31:23 +0200 Subject: usb: xhci: Workaround for S3 issue on AMD SNPS 3.0 xHC commit 2a632815683d2d34df52b701a36fe5ac6654e719 upstream. On some platform of AMD, S3 fails with HCE and SRE errors. To fix this, need to disable a bit which is enable in sparse controller. Cc: stable@vger.kernel.org #v4.19+ Signed-off-by: Sanket Goswami Signed-off-by: Sandeep Singh Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20201028203124.375344-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 17 +++++++++++++++++ drivers/usb/host/xhci.h | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index bbd616324faa..3c90c14390d6 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -21,6 +21,8 @@ #define SSIC_PORT_CFG2_OFFSET 0x30 #define PROG_DONE (1 << 30) #define SSIC_PORT_UNUSED (1 << 31) +#define SPARSE_DISABLE_BIT 17 +#define SPARSE_CNTL_ENABLE 0xC12C /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 @@ -149,6 +151,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == 0x15e0 || pdev->device == 0x15e1)) xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) + xhci->quirks |= XHCI_DISABLE_SPARSE; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; @@ -467,6 +472,15 @@ static void xhci_pme_quirk(struct usb_hcd *hcd) readl(reg); } +static void xhci_sparse_control_quirk(struct usb_hcd *hcd) +{ + u32 reg; + + reg = readl(hcd->regs + SPARSE_CNTL_ENABLE); + reg &= ~BIT(SPARSE_DISABLE_BIT); + writel(reg, hcd->regs + SPARSE_CNTL_ENABLE); +} + static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -486,6 +500,9 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) xhci_ssic_port_unused_quirk(hcd, true); + if (xhci->quirks & XHCI_DISABLE_SPARSE) + xhci_sparse_control_quirk(hcd); + ret = xhci_suspend(xhci, do_wakeup); if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED)) xhci_ssic_port_unused_quirk(hcd, false); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c656b41b57b5..b483317bcb17 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1873,6 +1873,7 @@ struct xhci_hcd { #define XHCI_DEFAULT_PM_RUNTIME_ALLOW BIT_ULL(33) #define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) #define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) +#define XHCI_DISABLE_SPARSE BIT_ULL(38) unsigned int num_active_eps; unsigned int limit_active_eps; -- cgit v1.2.3 From 3468cbceb563ac37cc7e96c87bc9fd1931d2b297 Mon Sep 17 00:00:00 2001 From: Raymond Tan Date: Fri, 21 Aug 2020 16:11:01 +0300 Subject: usb: dwc3: pci: Allow Elkhart Lake to utilize DSM method for PM functionality commit a609ce2a13360d639b384b6ca783b38c1247f2db upstream. Similar to some other IA platforms, Elkhart Lake too depends on the PMU register write to request transition of Dx power state. Thus, we add the PCI_DEVICE_ID_INTEL_EHLLP to the list of devices that shall execute the ACPI _DSM method during D0/D3 sequence. [heikki.krogerus@linux.intel.com: included Fixes tag] Fixes: dbb0569de852 ("usb: dwc3: pci: Add Support for Intel Elkhart Lake Devices") Cc: stable@vger.kernel.org Signed-off-by: Raymond Tan Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 139474c3e77b..ba88039449e0 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -147,7 +147,8 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) if (pdev->vendor == PCI_VENDOR_ID_INTEL) { if (pdev->device == PCI_DEVICE_ID_INTEL_BXT || - pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) { + pdev->device == PCI_DEVICE_ID_INTEL_BXT_M || + pdev->device == PCI_DEVICE_ID_INTEL_EHLLP) { guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid); dwc->has_dsm_for_pm = true; } -- cgit v1.2.3 From 1c9e86c933eaf0bcd579f26453f76e1114a8d834 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 24 Sep 2020 01:21:43 -0700 Subject: usb: dwc3: ep0: Fix ZLP for OUT ep0 requests commit 66706077dc89c66a4777a4c6298273816afb848c upstream. The current ZLP handling for ep0 requests is only for control IN requests. For OUT direction, DWC3 needs to check and setup for MPS alignment. Usually, control OUT requests can indicate its transfer size via the wLength field of the control message. So usb_request->zero is usually not needed for OUT direction. To handle ZLP OUT for control endpoint, make sure the TRB is MPS size. Cc: stable@vger.kernel.org Fixes: c7fcdeb2627c ("usb: dwc3: ep0: simplify EP0 state machine") Fixes: d6e5a549cc4d ("usb: dwc3: simplify ZLP handling") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 6dee4dabc0a4..697656aad6ea 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -942,12 +942,16 @@ static void dwc3_ep0_xfer_complete(struct dwc3 *dwc, static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req) { + unsigned int trb_length = 0; int ret; req->direction = !!dep->number; if (req->request.length == 0) { - dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 0, + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + + dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) @@ -994,9 +998,12 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->trb = &dwc->ep0_trb[dep->trb_enqueue - 1]; + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + /* Now prepare one extra TRB to align transfer size */ dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, - 0, DWC3_TRBCTL_CONTROL_DATA, + trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else { -- cgit v1.2.3 From f935b70cf724ba77f21189e0335954c75804451e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 24 Sep 2020 01:21:18 -0700 Subject: usb: dwc3: gadget: Check MPS of the request length commit ca3df3468eec87f6374662f7de425bc44c3810c1 upstream. When preparing for SG, not all the entries are prepared at once. When resume, don't use the remaining request length to calculate for MPS alignment. Use the entire request->length to do that. Cc: stable@vger.kernel.org Fixes: 5d187c0454ef ("usb: dwc3: gadget: Don't setup more than requested") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 809103254fc6..bcf554e76827 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1057,6 +1057,8 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, struct scatterlist *s; int i; unsigned int length = req->request.length; + unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); + unsigned int rem = length % maxp; unsigned int remaining = req->request.num_mapped_sgs - req->num_queued_sgs; @@ -1068,8 +1070,6 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, length -= sg_dma_len(s); for_each_sg(sg, s, remaining, i) { - unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); - unsigned int rem = length % maxp; unsigned int trb_length; unsigned chain = true; -- cgit v1.2.3 From 726f638e7cd1ac994db3fbe5817f9c0f186e9196 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 28 Jul 2020 20:42:41 +0800 Subject: usb: dwc3: core: add phy cleanup for probe error handling commit 03c1fd622f72c7624c81b64fdba4a567ae5ee9cb upstream. Add the phy cleanup if dwc3 mode init fail, which is the missing part of de-init for dwc3 core init. Fixes: c499ff71ff2a ("usb: dwc3: core: re-factor init and exit paths") Cc: Signed-off-by: Li Jun Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 4cbf29539006..1d9949b79dde 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1535,6 +1535,17 @@ static int dwc3_probe(struct platform_device *pdev) err5: dwc3_event_buffers_cleanup(dwc); + + usb_phy_shutdown(dwc->usb2_phy); + usb_phy_shutdown(dwc->usb3_phy); + phy_exit(dwc->usb2_generic_phy); + phy_exit(dwc->usb3_generic_phy); + + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + phy_power_off(dwc->usb2_generic_phy); + phy_power_off(dwc->usb3_generic_phy); + dwc3_ulpi_exit(dwc); err4: -- cgit v1.2.3 From 97224cdc0440b006b60239b90cd2c1ffb8bb23aa Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 28 Jul 2020 20:42:40 +0800 Subject: usb: dwc3: core: don't trigger runtime pm when remove driver commit 266d0493900ac5d6a21cdbe6b1624ed2da94d47a upstream. No need to trigger runtime pm in driver removal, otherwise if user disable auto suspend via sys file, runtime suspend may be entered, which will call dwc3_core_exit() again and there will be clock disable not balance warning: [ 2026.820154] xhci-hcd xhci-hcd.0.auto: remove, state 4 [ 2026.825268] usb usb2: USB disconnect, device number 1 [ 2026.831017] xhci-hcd xhci-hcd.0.auto: USB bus 2 deregistered [ 2026.836806] xhci-hcd xhci-hcd.0.auto: remove, state 4 [ 2026.842029] usb usb1: USB disconnect, device number 1 [ 2026.848029] xhci-hcd xhci-hcd.0.auto: USB bus 1 deregistered [ 2026.865889] ------------[ cut here ]------------ [ 2026.870506] usb2_ctrl_root_clk already disabled [ 2026.875082] WARNING: CPU: 0 PID: 731 at drivers/clk/clk.c:958 clk_core_disable+0xa0/0xa8 [ 2026.883170] Modules linked in: dwc3(-) phy_fsl_imx8mq_usb [last unloaded: dwc3] [ 2026.890488] CPU: 0 PID: 731 Comm: rmmod Not tainted 5.8.0-rc7-00280-g9d08cca-dirty #245 [ 2026.898489] Hardware name: NXP i.MX8MQ EVK (DT) [ 2026.903020] pstate: 20000085 (nzCv daIf -PAN -UAO BTYPE=--) [ 2026.908594] pc : clk_core_disable+0xa0/0xa8 [ 2026.912777] lr : clk_core_disable+0xa0/0xa8 [ 2026.916958] sp : ffff8000121b39a0 [ 2026.920271] x29: ffff8000121b39a0 x28: ffff0000b11f3700 [ 2026.925583] x27: 0000000000000000 x26: ffff0000b539c700 [ 2026.930895] x25: 000001d7e44e1232 x24: ffff0000b76fa800 [ 2026.936208] x23: ffff0000b76fa6f8 x22: ffff800008d01040 [ 2026.941520] x21: ffff0000b539ce00 x20: ffff0000b7105000 [ 2026.946832] x19: ffff0000b7105000 x18: 0000000000000010 [ 2026.952144] x17: 0000000000000001 x16: 0000000000000000 [ 2026.957456] x15: ffff0000b11f3b70 x14: ffffffffffffffff [ 2026.962768] x13: ffff8000921b36f7 x12: ffff8000121b36ff [ 2026.968080] x11: ffff8000119e1000 x10: ffff800011bf26d0 [ 2026.973392] x9 : 0000000000000000 x8 : ffff800011bf3000 [ 2026.978704] x7 : ffff800010695d68 x6 : 0000000000000252 [ 2026.984016] x5 : ffff0000bb9881f0 x4 : 0000000000000000 [ 2026.989327] x3 : 0000000000000027 x2 : 0000000000000023 [ 2026.994639] x1 : ac2fa471aa7cab00 x0 : 0000000000000000 [ 2026.999951] Call trace: [ 2027.002401] clk_core_disable+0xa0/0xa8 [ 2027.006238] clk_core_disable_lock+0x20/0x38 [ 2027.010508] clk_disable+0x1c/0x28 [ 2027.013911] clk_bulk_disable+0x34/0x50 [ 2027.017758] dwc3_core_exit+0xec/0x110 [dwc3] [ 2027.022122] dwc3_suspend_common+0x84/0x188 [dwc3] [ 2027.026919] dwc3_runtime_suspend+0x74/0x9c [dwc3] [ 2027.031712] pm_generic_runtime_suspend+0x28/0x40 [ 2027.036419] genpd_runtime_suspend+0xa0/0x258 [ 2027.040777] __rpm_callback+0x88/0x140 [ 2027.044526] rpm_callback+0x20/0x80 [ 2027.048015] rpm_suspend+0xd0/0x418 [ 2027.051503] __pm_runtime_suspend+0x58/0xa0 [ 2027.055693] dwc3_runtime_idle+0x7c/0x90 [dwc3] [ 2027.060224] __rpm_callback+0x88/0x140 [ 2027.063973] rpm_idle+0x78/0x150 [ 2027.067201] __pm_runtime_idle+0x58/0xa0 [ 2027.071130] dwc3_remove+0x64/0xc0 [dwc3] [ 2027.075140] platform_drv_remove+0x28/0x48 [ 2027.079239] device_release_driver_internal+0xf4/0x1c0 [ 2027.084377] driver_detach+0x4c/0xd8 [ 2027.087954] bus_remove_driver+0x54/0xa8 [ 2027.091877] driver_unregister+0x2c/0x58 [ 2027.095799] platform_driver_unregister+0x10/0x18 [ 2027.100509] dwc3_driver_exit+0x14/0x1408 [dwc3] [ 2027.105129] __arm64_sys_delete_module+0x178/0x218 [ 2027.109922] el0_svc_common.constprop.0+0x68/0x160 [ 2027.114714] do_el0_svc+0x20/0x80 [ 2027.118031] el0_sync_handler+0x88/0x190 [ 2027.121953] el0_sync+0x140/0x180 [ 2027.125267] ---[ end trace 027f4f8189958f1f ]--- [ 2027.129976] ------------[ cut here ]------------ Fixes: fc8bb91bc83e ("usb: dwc3: implement runtime PM") Cc: Signed-off-by: Li Jun Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 1d9949b79dde..440dbf55ddf7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1581,9 +1581,9 @@ static int dwc3_remove(struct platform_device *pdev) dwc3_core_exit(dwc); dwc3_ulpi_exit(dwc); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_allow(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); dwc3_free_event_buffers(dwc); dwc3_free_scratch_buffers(dwc); -- cgit v1.2.3 From 206dcd6ce82f1b5ae6f357694c3688693a1e22cd Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 2 Sep 2020 18:42:58 -0700 Subject: usb: dwc3: gadget: Resume pending requests after CLEAR_STALL commit c503672abe1348f10f5a54a662336358c6e1a297 upstream. The function driver may queue new requests right after halting the endpoint (i.e. queue new requests while the endpoint is stalled). There's no restriction preventing it from doing so. However, dwc3 currently drops those requests after CLEAR_STALL. The driver should only drop started requests. Keep the pending requests in the pending list to resume and process them after the host issues ClearFeature(Halt) to the endpoint. Cc: stable@vger.kernel.org Fixes: cb11ea56f37a ("usb: dwc3: gadget: Properly handle ClearFeature(halt)") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index bcf554e76827..6c8e495455d4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1521,8 +1521,13 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) list_add_tail(&req->list, &dep->pending_list); req->status = DWC3_REQUEST_STATUS_QUEUED; - /* Start the transfer only after the END_TRANSFER is completed */ - if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + /* + * Start the transfer only after the END_TRANSFER is completed + * and endpoint STALL is cleared. + */ + if ((dep->flags & DWC3_EP_END_TRANSFER_PENDING) || + (dep->flags & DWC3_EP_WEDGE) || + (dep->flags & DWC3_EP_STALL)) { dep->flags |= DWC3_EP_DELAY_START; return 0; } @@ -1728,13 +1733,14 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) list_for_each_entry_safe(req, tmp, &dep->started_list, list) dwc3_gadget_move_cancelled_request(req); - list_for_each_entry_safe(req, tmp, &dep->pending_list, list) - dwc3_gadget_move_cancelled_request(req); - - if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING)) { - dep->flags &= ~DWC3_EP_DELAY_START; + if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING)) dwc3_gadget_ep_cleanup_cancelled_requests(dep); - } + + if ((dep->flags & DWC3_EP_DELAY_START) && + !usb_endpoint_xfer_isoc(dep->endpoint.desc)) + __dwc3_gadget_kick_transfer(dep); + + dep->flags &= ~DWC3_EP_DELAY_START; } return ret; -- cgit v1.2.3 From 2850f148cd7fde40b24108a168ed84f58195314b Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 2 Sep 2020 18:43:04 -0700 Subject: usb: dwc3: gadget: END_TRANSFER before CLEAR_STALL command commit d97c78a1908e59a1fdbcbece87cd0440b5d7a1f2 upstream. According the programming guide (for all DWC3 IPs), when the driver handles ClearFeature(halt) request, it should issue CLEAR_STALL command _after_ the END_TRANSFER command completes. The END_TRANSFER command may take some time to complete. So, delay the ClearFeature(halt) request control status stage and wait for END_TRANSFER command completion interrupt. Only after END_TRANSFER command completes that the driver may issue CLEAR_STALL command. Cc: stable@vger.kernel.org Fixes: cb11ea56f37a ("usb: dwc3: gadget: Properly handle ClearFeature(halt)") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ep0.c | 16 ++++++++++++++++ drivers/usb/dwc3/gadget.c | 40 ++++++++++++++++++++++++++++++++-------- drivers/usb/dwc3/gadget.h | 1 + 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 4dfbffa944de..c848f9164f92 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -700,6 +700,7 @@ struct dwc3_ep { #define DWC3_EP_END_TRANSFER_PENDING BIT(4) #define DWC3_EP_PENDING_REQUEST BIT(5) #define DWC3_EP_DELAY_START BIT(6) +#define DWC3_EP_PENDING_CLEAR_STALL BIT(11) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 697656aad6ea..991cab9a7491 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -524,6 +524,11 @@ static int dwc3_ep0_handle_endpoint(struct dwc3 *dwc, ret = __dwc3_gadget_ep_set_halt(dep, set, true); if (ret) return -EINVAL; + + /* ClearFeature(Halt) may need delayed status */ + if (!set && (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) + return USB_GADGET_DELAYED_STATUS; + break; default: return -EINVAL; @@ -1049,6 +1054,17 @@ static void dwc3_ep0_do_control_status(struct dwc3 *dwc, __dwc3_ep0_do_control_status(dwc, dep); } +void dwc3_ep0_send_delayed_status(struct dwc3 *dwc) +{ + unsigned int direction = !dwc->ep0_expect_in; + + if (dwc->ep0state != EP0_STATUS_PHASE) + return; + + dwc->delayed_status = false; + __dwc3_ep0_do_control_status(dwc, dwc->eps[direction]); +} + static void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6c8e495455d4..1d65de84464d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1719,6 +1719,18 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) return 0; } + dwc3_stop_active_transfer(dep, true, true); + + list_for_each_entry_safe(req, tmp, &dep->started_list, list) + dwc3_gadget_move_cancelled_request(req); + + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + dep->flags |= DWC3_EP_PENDING_CLEAR_STALL; + return 0; + } + + dwc3_gadget_ep_cleanup_cancelled_requests(dep); + ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) { dev_err(dwc->dev, "failed to clear STALL on %s\n", @@ -1728,14 +1740,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) dep->flags &= ~(DWC3_EP_STALL | DWC3_EP_WEDGE); - dwc3_stop_active_transfer(dep, true, true); - - list_for_each_entry_safe(req, tmp, &dep->started_list, list) - dwc3_gadget_move_cancelled_request(req); - - if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING)) - dwc3_gadget_ep_cleanup_cancelled_requests(dep); - if ((dep->flags & DWC3_EP_DELAY_START) && !usb_endpoint_xfer_isoc(dep->endpoint.desc)) __dwc3_gadget_kick_transfer(dep); @@ -2767,6 +2771,26 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; dwc3_gadget_ep_cleanup_cancelled_requests(dep); + + if (dep->flags & DWC3_EP_PENDING_CLEAR_STALL) { + struct dwc3 *dwc = dep->dwc; + + dep->flags &= ~DWC3_EP_PENDING_CLEAR_STALL; + if (dwc3_send_clear_stall_ep_cmd(dep)) { + struct usb_ep *ep0 = &dwc->eps[0]->endpoint; + + dev_err(dwc->dev, "failed to clear STALL on %s\n", + dep->name); + if (dwc->delayed_status) + __dwc3_gadget_ep0_set_halt(ep0, 1); + return; + } + + dep->flags &= ~(DWC3_EP_STALL | DWC3_EP_WEDGE); + if (dwc->delayed_status) + dwc3_ep0_send_delayed_status(dwc); + } + if ((dep->flags & DWC3_EP_DELAY_START) && !usb_endpoint_xfer_isoc(dep->endpoint.desc)) __dwc3_gadget_kick_transfer(dep); diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 5faf4d1249e0..f207e59c7d03 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -111,6 +111,7 @@ int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value); int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, gfp_t gfp_flags); int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol); +void dwc3_ep0_send_delayed_status(struct dwc3 *dwc); /** * dwc3_gadget_ep_get_transfer_index - Gets transfer index from HW -- cgit v1.2.3 From 543432d078c015a27e259032bac7ce90f3fe33e9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 19 Oct 2020 19:07:02 +0200 Subject: usb: cdc-acm: fix cooldown mechanism commit 38203b8385bf6283537162bde7d499f830964711 upstream. Commit a4e7279cd1d1 ("cdc-acm: introduce a cool down") is causing regression if there is some USB error, such as -EPROTO. This has been reported on some samples of the Odroid-N2 using the Combee II Zibgee USB dongle. > struct acm *acm = container_of(work, struct acm, work) is incorrect in case of a delayed work and causes warnings, usually from the workqueue: > WARNING: CPU: 0 PID: 0 at kernel/workqueue.c:1474 __queue_work+0x480/0x528. When this happens, USB eventually stops working completely after a while. Also the ACM_ERROR_DELAY bit is never set, so the cooldown mechanism previously introduced cannot be triggered and acm_submit_read_urb() is never called. This changes makes the cdc-acm driver use a single delayed work, fixing the pointer arithmetic in acm_softint() and set the ACM_ERROR_DELAY when the cooldown mechanism appear to be needed. Fixes: a4e7279cd1d1 ("cdc-acm: introduce a cool down") Cc: Oliver Neukum Reported-by: Pascal Vizeli Acked-by: Oliver Neukum Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201019170702.150534-1-jbrunet@baylibre.com Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 12 +++++------- drivers/usb/class/cdc-acm.h | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 808722b8294a..ed99d98172f4 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -507,6 +507,7 @@ static void acm_read_bulk_callback(struct urb *urb) "%s - cooling babbling device\n", __func__); usb_mark_last_busy(acm->dev); set_bit(rb->index, &acm->urbs_in_error_delay); + set_bit(ACM_ERROR_DELAY, &acm->flags); cooldown = true; break; default: @@ -532,7 +533,7 @@ static void acm_read_bulk_callback(struct urb *urb) if (stopped || stalled || cooldown) { if (stalled) - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); else if (cooldown) schedule_delayed_work(&acm->dwork, HZ / 2); return; @@ -562,13 +563,13 @@ static void acm_write_bulk(struct urb *urb) acm_write_done(acm, wb); spin_unlock_irqrestore(&acm->write_lock, flags); set_bit(EVENT_TTY_WAKEUP, &acm->flags); - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); } static void acm_softint(struct work_struct *work) { int i; - struct acm *acm = container_of(work, struct acm, work); + struct acm *acm = container_of(work, struct acm, dwork.work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { smp_mb(); /* against acm_suspend() */ @@ -584,7 +585,7 @@ static void acm_softint(struct work_struct *work) if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { for (i = 0; i < acm->rx_buflimit; i++) if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) - acm_submit_read_urb(acm, i, GFP_NOIO); + acm_submit_read_urb(acm, i, GFP_KERNEL); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) @@ -1364,7 +1365,6 @@ made_compressed_probe: acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; - INIT_WORK(&acm->work, acm_softint); INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); @@ -1574,7 +1574,6 @@ static void acm_disconnect(struct usb_interface *intf) } acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); @@ -1617,7 +1616,6 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) return 0; acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); acm->urbs_in_error_delay = 0; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index cd5e9d8ab237..b95ff769072e 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -112,8 +112,7 @@ struct acm { # define ACM_ERROR_DELAY 3 unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */ struct usb_cdc_line_coding line; /* bits, stop, parity */ - struct work_struct work; /* work queue entry for various purposes*/ - struct delayed_work dwork; /* for cool downs needed in error recovery */ + struct delayed_work dwork; /* work queue entry for various purposes */ unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */ unsigned int ctrlout; /* output control lines (DTR, RTS) */ struct async_icount iocount; /* counters for control line changes */ -- cgit v1.2.3 From 75d0d4ff5970e613d636cf9448cd9fbd18826306 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 12 Oct 2020 19:03:12 +0800 Subject: usb: typec: tcpm: reset hard_reset_count for any disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2d9c6442a9c81f4f8dee678d0b3c183173ab1e2d upstream. Current tcpm_detach() only reset hard_reset_count if port->attached is true, this may cause this counter clear is missed if the CC disconnect event is generated after tcpm_port_reset() is done by other events, e.g. VBUS off comes first before CC disconect for a power sink, in that case the first tcpm_detach() will only clear port->attached flag but leave hard_reset_count there because tcpm_port_is_disconnected() is still false, then later tcpm_detach() by CC disconnect will directly return due to port->attached is cleared, finally this will result tcpm will not try hard reset or error recovery for later attach. ChiYuan reported this issue on his platform with below tcpm trace: After power sink session setup after hard reset 2 times, detach from the power source and then attach: [ 4848.046358] VBUS off [ 4848.046384] state change SNK_READY -> SNK_UNATTACHED [ 4848.050908] Setting voltage/current limit 0 mV 0 mA [ 4848.050936] polarity 0 [ 4848.052593] Requesting mux state 0, usb-role 0, orientation 0 [ 4848.053222] Start toggling [ 4848.086500] state change SNK_UNATTACHED -> TOGGLING [ 4848.089983] CC1: 0 -> 0, CC2: 3 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.089993] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.090031] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4848.141162] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 4848.141170] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [ 4848.141184] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @20 ms [ 4848.163156] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 20 ms] [ 4848.163162] Start toggling [ 4848.216918] CC1: 0 -> 0, CC2: 0 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.216954] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.217080] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4848.231771] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 4848.231800] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [ 4848.231857] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @20 ms [ 4848.256022] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed20 ms] [ 4848.256049] Start toggling [ 4848.871148] VBUS on [ 4848.885324] CC1: 0 -> 0, CC2: 0 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.885372] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.885548] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4849.088240] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed200 ms] [ 4849.088284] state change SNK_DEBOUNCED -> SNK_ATTACHED [ 4849.088291] polarity 1 [ 4849.088769] Requesting mux state 1, usb-role 2, orientation 2 [ 4849.088895] state change SNK_ATTACHED -> SNK_STARTUP [ 4849.088907] state change SNK_STARTUP -> SNK_DISCOVERY [ 4849.088915] Setting voltage/current limit 5000 mV 0 mA [ 4849.088927] vbus=0 charge:=1 [ 4849.090505] state change SNK_DISCOVERY -> SNK_WAIT_CAPABILITIES [ 4849.090828] pending state change SNK_WAIT_CAPABILITIES -> SNK_READY @240 ms [ 4849.335878] state change SNK_WAIT_CAPABILITIES -> SNK_READY [delayed240 ms] this patch fix this issue by clear hard_reset_count at any cases of cc disconnect, í.e. don't check port->attached flag. Fixes: 4b4e02c83167 ("typec: tcpm: Move out of staging") Cc: stable@vger.kernel.org Reported-and-tested-by: ChiYuan Huang Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1602500592-3817-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 16e124753df7..5bb84cb4876a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2723,12 +2723,12 @@ static void tcpm_reset_port(struct tcpm_port *port) static void tcpm_detach(struct tcpm_port *port) { - if (!port->attached) - return; - if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; + if (!port->attached) + return; + tcpm_reset_port(port); } -- cgit v1.2.3 From 94478c1dc57d88413c7927a303522eafad83c1dc Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Sat, 10 Oct 2020 14:03:08 +0800 Subject: usb: host: fsl-mph-dr-of: check return of dma_set_mask() commit 3cd54a618834430a26a648d880dd83d740f2ae30 upstream. fsl_usb2_device_register() should stop init if dma_set_mask() return error. Fixes: cae058610465 ("drivers/usb/host: fsl: Set DMA_MASK of usb platform device") Reviewed-by: Peter Chen Signed-off-by: Ran Wang Link: https://lore.kernel.org/r/20201010060308.33693-1-ran.wang_1@nxp.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fsl-mph-dr-of.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index ae8f60f6e6a5..44a7e58a26e3 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -94,10 +94,13 @@ static struct platform_device *fsl_usb2_device_register( pdev->dev.coherent_dma_mask = ofdev->dev.coherent_dma_mask; - if (!pdev->dev.dma_mask) + if (!pdev->dev.dma_mask) { pdev->dev.dma_mask = &ofdev->dev.coherent_dma_mask; - else - dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + } else { + retval = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (retval) + goto error; + } retval = platform_device_add_data(pdev, pdata, sizeof(*pdata)); if (retval) -- cgit v1.2.3 From 8c16ca600657820b95ded8bb8d07136329c86036 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Oct 2020 11:15:23 +0100 Subject: drm/i915: Force VT'd workarounds when running as a guest OS commit 8195400f7ea95399f721ad21f4d663a62c65036f upstream. If i915.ko is being used as a passthrough device, it does not know if the host is using intel_iommu. Mixing the iommu and gfx causes a few issues (such as scanout overfetch) which we need to workaround inside the driver, so if we detect we are running under a hypervisor, also assume the device access is being virtualised. Reported-by: Stefan Fritsch Suggested-by: Stefan Fritsch Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Cc: Stefan Fritsch Cc: stable@vger.kernel.org Tested-by: Stefan Fritsch Reviewed-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20201019101523.4145-1-chris@chris-wilson.co.uk (cherry picked from commit f566fdcd6cc49a9d5b5d782f56e3e7cb243f01b8) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 89b6112bd66b..126a0eb6e054 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -2197,7 +2199,9 @@ static inline bool intel_vtd_active(void) if (intel_iommu_gfx_mapped) return true; #endif - return false; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return !hypervisor_is_type(X86_HYPER_NATIVE); } static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From eb4c460e2e0628786872a85315ff310ddaebed24 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 19 Oct 2020 10:55:16 +0200 Subject: vt: keyboard, simplify vt_kdgkbsent commit 6ca03f90527e499dd5e32d6522909e2ad390896b upstream. Use 'strlen' of the string, add one for NUL terminator and simply do 'copy_to_user' instead of the explicit 'for' loop. This makes the KDGKBSENT case more compact. The only thing we need to take care about is NULL 'func_table[i]'. Use an empty string in that case. The original check for overflow could never trigger as the func_buf strings are always shorter or equal to 'struct kbsentry's. Cc: Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201019085517.10176-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 568b2171f335..e64e3a7aa2f1 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1994,9 +1994,7 @@ out: int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { struct kbsentry *kbs; - char *p; u_char *q; - u_char __user *up; int sz, fnw_sz; int delta; char *first_free, *fj, *fnw; @@ -2022,23 +2020,15 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) i = kbs->kb_func; switch (cmd) { - case KDGKBSENT: - sz = sizeof(kbs->kb_string) - 1; /* sz should have been - a struct member */ - up = user_kdgkb->kb_string; - p = func_table[i]; - if(p) - for ( ; *p && sz; p++, sz--) - if (put_user(*p, up++)) { - ret = -EFAULT; - goto reterr; - } - if (put_user('\0', up)) { - ret = -EFAULT; - goto reterr; - } - kfree(kbs); - return ((p && *p) ? -EOVERFLOW : 0); + case KDGKBSENT: { + /* size should have been a struct member */ + unsigned char *from = func_table[i] ? : ""; + + ret = copy_to_user(user_kdgkb->kb_string, from, + strlen(from) + 1) ? -EFAULT : 0; + + goto reterr; + } case KDSKBSENT: if (!perm) { ret = -EPERM; -- cgit v1.2.3 From 87d398f348b8a2d5246d3670a93fb63d4fd9f62a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 19 Oct 2020 10:55:17 +0200 Subject: vt: keyboard, extend func_buf_lock to readers commit 82e61c3909db51d91b9d3e2071557b6435018b80 upstream. Both read-side users of func_table/func_buf need locking. Without that, one can easily confuse the code by repeatedly setting altering strings like: while (1) for (a = 0; a < 2; a++) { struct kbsentry kbs = {}; strcpy((char *)kbs.kb_string, a ? ".\n" : "88888\n"); ioctl(fd, KDSKBSENT, &kbs); } When that program runs, one can get unexpected output by holding F1 (note the unxpected period on the last line): . 88888 .8888 So protect all accesses to 'func_table' (and func_buf) by preexisting 'func_buf_lock'. It is easy in 'k_fn' handler as 'puts_queue' is expected not to sleep. On the other hand, KDGKBSENT needs a local (atomic) copy of the string because copy_to_user can sleep. Use already allocated, but unused 'kbs->kb_string' for that purpose. Note that the program above needs at least CAP_SYS_TTY_CONFIG. This depends on the previous patch and on the func_buf_lock lock added in commit 46ca3f735f34 (tty/vt: fix write/write race in ioctl(KDSKBSENT) handler) in 5.2. Likely fixes CVE-2020-25656. Cc: Reported-by: Minh Yuan Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201019085517.10176-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index e64e3a7aa2f1..b6e78fdbfdff 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -742,8 +742,13 @@ static void k_fn(struct vc_data *vc, unsigned char value, char up_flag) return; if ((unsigned)value < ARRAY_SIZE(func_table)) { + unsigned long flags; + + spin_lock_irqsave(&func_buf_lock, flags); if (func_table[value]) puts_queue(vc, func_table[value]); + spin_unlock_irqrestore(&func_buf_lock, flags); + } else pr_err("k_fn called with value=%d\n", value); } @@ -1990,7 +1995,7 @@ out: #undef s #undef v -/* FIXME: This one needs untangling and locking */ +/* FIXME: This one needs untangling */ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { struct kbsentry *kbs; @@ -2022,10 +2027,14 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) switch (cmd) { case KDGKBSENT: { /* size should have been a struct member */ - unsigned char *from = func_table[i] ? : ""; + ssize_t len = sizeof(user_kdgkb->kb_string); + + spin_lock_irqsave(&func_buf_lock, flags); + len = strlcpy(kbs->kb_string, func_table[i] ? : "", len); + spin_unlock_irqrestore(&func_buf_lock, flags); - ret = copy_to_user(user_kdgkb->kb_string, from, - strlen(from) + 1) ? -EFAULT : 0; + ret = copy_to_user(user_kdgkb->kb_string, kbs->kb_string, + len + 1) ? -EFAULT : 0; goto reterr; } -- cgit v1.2.3 From 93da9dcee2d2de4b59066fc36741a63d9c906292 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 23 Sep 2020 13:14:56 -0700 Subject: HID: wacom: Avoid entering wacom_wac_pen_report for pad / battery commit d9216d753b2b1406b801243b12aaf00a5ce5b861 upstream. It has recently been reported that the "heartbeat" report from devices like the 2nd-gen Intuos Pro (PTH-460, PTH-660, PTH-860) or the 2nd-gen Bluetooth-enabled Intuos tablets (CTL-4100WL, CTL-6100WL) can cause the driver to send a spurious BTN_TOUCH=0 once per second in the middle of drawing. This can result in broken lines while drawing on Chrome OS. The source of the issue has been traced back to a change which modified the driver to only call `wacom_wac_pad_report()` once per report instead of once per collection. As part of this change, pad-handling code was removed from `wacom_wac_collection()` under the assumption that the `WACOM_PEN_FIELD` and `WACOM_TOUCH_FIELD` checks would not be satisfied when a pad or battery collection was being processed. To be clear, the macros `WACOM_PAD_FIELD` and `WACOM_PEN_FIELD` do not currently check exclusive conditions. In fact, most "pad" fields will also appear to be "pen" fields simply due to their presence inside of a Digitizer application collection. Because of this, the removal of the check from `wacom_wac_collection()` just causes pad / battery collections to instead trigger a call to `wacom_wac_pen_report()` instead. The pen report function in turn resets the tip switch state just prior to exiting, resulting in the observed BTN_TOUCH=0 symptom. To correct this, we restore a version of the `WACOM_PAD_FIELD` check in `wacom_wac_collection()` and return early. This effectively prevents pad / battery collections from being reported until the very end of the report as originally intended. Fixes: d4b8efeb46d9 ("HID: wacom: generic: Correct pad syncing") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Tested-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1c96809b51c9..b74acbd5997b 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2773,7 +2773,9 @@ static int wacom_wac_collection(struct hid_device *hdev, struct hid_report *repo if (report->type != HID_INPUT_REPORT) return -1; - if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) + if (WACOM_PAD_FIELD(field)) + return 0; + else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) wacom_wac_pen_report(hdev, report); else if (WACOM_FINGER_FIELD(field) && wacom->wacom_wac.touch_input) wacom_wac_finger_report(hdev, report); -- cgit v1.2.3 From f707ccb2f10cb7c6388f8ba8ffab1c38b47c02d8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 22 Sep 2020 12:20:14 +0200 Subject: udf: Fix memory leak when mounting commit a7be300de800e755714c71103ae4a0d205e41e99 upstream. udf_process_sequence() allocates temporary array for processing partition descriptors on volume which it fails to free. Free the array when it is not needed anymore. Fixes: 7b78fd02fb19 ("udf: Fix handling of Partition Descriptors") CC: stable@vger.kernel.org Reported-by: syzbot+128f4dd6e796c98b3760@syzkaller.appspotmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index a0cd766b41cd..4aba4878ed96 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1703,7 +1703,8 @@ static noinline int udf_process_sequence( "Pointers (max %u supported)\n", UDF_MAX_TD_NESTING); brelse(bh); - return -EIO; + ret = -EIO; + goto out; } vdp = (struct volDescPtr *)bh->b_data; @@ -1723,7 +1724,8 @@ static noinline int udf_process_sequence( curr = get_volume_descriptor_record(ident, bh, &data); if (IS_ERR(curr)) { brelse(bh); - return PTR_ERR(curr); + ret = PTR_ERR(curr); + goto out; } /* Descriptor we don't care about? */ if (!curr) @@ -1745,28 +1747,31 @@ static noinline int udf_process_sequence( */ if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); - return -EAGAIN; + ret = -EAGAIN; + goto out; } ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) - return ret; + goto out; if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, data.vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset); if (ret < 0) - return ret; + goto out; } /* Now handle prevailing Partition Descriptors */ for (i = 0; i < data.num_part_descs; i++) { ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); if (ret < 0) - return ret; + goto out; } - - return 0; + ret = 0; +out: + kfree(data.part_descs_loc); + return ret; } /* -- cgit v1.2.3 From a4f02a81c7e6ce74be47ff7ba45b60c39e82eb5b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 4 Oct 2020 16:03:07 +0200 Subject: dmaengine: dma-jz4780: Fix race in jz4780_dma_tx_status commit baf6fd97b16ea8f981b8a8b04039596f32fc2972 upstream. The jz4780_dma_tx_status() function would check if a channel's cookie state was set to 'completed', and if not, it would enter the critical section. However, in that time frame, the jz4780_dma_chan_irq() function was able to set the cookie to 'completed', and clear the jzchan->vchan pointer, which was deferenced in the critical section of the first function. Fix this race by checking the channel's cookie state after entering the critical function and not before. Fixes: d894fc6046fe ("dmaengine: jz4780: add driver for the Ingenic JZ4780 DMA controller") Cc: stable@vger.kernel.org # v4.0 Signed-off-by: Paul Cercueil Reported-by: Artur Rojek Tested-by: Artur Rojek Link: https://lore.kernel.org/r/20201004140307.885556-1-paul@crapouillou.net Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 0ecb724b394f..e27043427653 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -639,11 +639,11 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, unsigned long flags; unsigned long residue = 0; + spin_lock_irqsave(&jzchan->vchan.lock, flags); + status = dma_cookie_status(chan, cookie, txstate); if ((status == DMA_COMPLETE) || (txstate == NULL)) - return status; - - spin_lock_irqsave(&jzchan->vchan.lock, flags); + goto out_unlock_irqrestore; vdesc = vchan_find_desc(&jzchan->vchan, cookie); if (vdesc) { @@ -660,6 +660,7 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) status = DMA_ERROR; +out_unlock_irqrestore: spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; } -- cgit v1.2.3 From ad877be5b983a740800e02ec5bb70f4f1ddf4c94 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:44 +0100 Subject: iio:light:si1145: Fix timestamp alignment and prevent data leak. commit 0456ecf34d466261970e0ff92b2b9c78a4908637 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 24 byte array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable array in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak appart from previous readings. Depending on the enabled channels, the location of the timestamp can be at various aligned offsets through the buffer. As such we any use of a structure to enforce this alignment would incorrectly suggest a single location for the timestamp. Comments adjusted to express this clearly in the code. Fixes: ac45e57f1590 ("iio: light: Add driver for Silabs si1132, si1141/2/3 and si1145/6/7 ambient light, uv index and proximity sensors") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Peter Meerwald-Stadler Cc: Link: https://lore.kernel.org/r/20200722155103.979802-9-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/si1145.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iio/light/si1145.c b/drivers/iio/light/si1145.c index 982bba0c54e7..cb40345e7b51 100644 --- a/drivers/iio/light/si1145.c +++ b/drivers/iio/light/si1145.c @@ -169,6 +169,7 @@ struct si1145_part_info { * @part_info: Part information * @trig: Pointer to iio trigger * @meas_rate: Value of MEAS_RATE register. Only set in HW in auto mode + * @buffer: Used to pack data read from sensor. */ struct si1145_data { struct i2c_client *client; @@ -180,6 +181,14 @@ struct si1145_data { bool autonomous; struct iio_trigger *trig; int meas_rate; + /* + * Ensure timestamp will be naturally aligned if present. + * Maximum buffer size (may be only partly used if not all + * channels are enabled): + * 6*2 bytes channels data + 4 bytes alignment + + * 8 bytes timestamp + */ + u8 buffer[24] __aligned(8); }; /** @@ -441,12 +450,6 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) struct iio_poll_func *pf = private; struct iio_dev *indio_dev = pf->indio_dev; struct si1145_data *data = iio_priv(indio_dev); - /* - * Maximum buffer size: - * 6*2 bytes channels data + 4 bytes alignment + - * 8 bytes timestamp - */ - u8 buffer[24]; int i, j = 0; int ret; u8 irq_status = 0; @@ -479,7 +482,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) ret = i2c_smbus_read_i2c_block_data_or_emulated( data->client, indio_dev->channels[i].address, - sizeof(u16) * run, &buffer[j]); + sizeof(u16) * run, &data->buffer[j]); if (ret < 0) goto done; j += run * sizeof(u16); @@ -494,7 +497,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) goto done; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, data->buffer, iio_get_time_ns(indio_dev)); done: -- cgit v1.2.3 From a8c59abdbc6b0c93235f699f4d68740e67d6d646 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Sat, 26 Sep 2020 18:19:46 +0200 Subject: iio: adc: gyroadc: fix leak of device node iterator commit da4410d4078ba4ead9d6f1027d6db77c5a74ecee upstream. Add missing of_node_put calls when exiting the for_each_child_of_node loop in rcar_gyroadc_parse_subdevs early. Also add goto-exception handling for the error paths in that loop. Fixes: 059c53b32329 ("iio: adc: Add Renesas GyroADC driver") Signed-off-by: Tobias Jordan Link: https://lore.kernel.org/r/20200926161946.GA10240@agrajag.zerfleddert.de Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/rcar-gyroadc.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/rcar-gyroadc.c b/drivers/iio/adc/rcar-gyroadc.c index c37f201294b2..b1fb1fd763fc 100644 --- a/drivers/iio/adc/rcar-gyroadc.c +++ b/drivers/iio/adc/rcar-gyroadc.c @@ -357,7 +357,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) num_channels = ARRAY_SIZE(rcar_gyroadc_iio_channels_3); break; default: - return -EINVAL; + goto err_e_inval; } /* @@ -374,7 +374,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) dev_err(dev, "Failed to get child reg property of ADC \"%pOFn\".\n", child); - return ret; + goto err_of_node_put; } /* Channel number is too high. */ @@ -382,7 +382,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) dev_err(dev, "Only %i channels supported with %pOFn, but reg = <%i>.\n", num_channels, child, reg); - return -EINVAL; + goto err_e_inval; } } @@ -391,7 +391,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) dev_err(dev, "Channel %i uses different ADC mode than the rest.\n", reg); - return -EINVAL; + goto err_e_inval; } /* Channel is valid, grab the regulator. */ @@ -401,7 +401,8 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) if (IS_ERR(vref)) { dev_dbg(dev, "Channel %i 'vref' supply not connected.\n", reg); - return PTR_ERR(vref); + ret = PTR_ERR(vref); + goto err_of_node_put; } priv->vref[reg] = vref; @@ -425,8 +426,10 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) * attached to the GyroADC at a time, so if we found it, * we can stop parsing here. */ - if (childmode == RCAR_GYROADC_MODE_SELECT_1_MB88101A) + if (childmode == RCAR_GYROADC_MODE_SELECT_1_MB88101A) { + of_node_put(child); break; + } } if (first) { @@ -435,6 +438,12 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) } return 0; + +err_e_inval: + ret = -EINVAL; +err_of_node_put: + of_node_put(child); + return ret; } static void rcar_gyroadc_deinit_supplies(struct iio_dev *indio_dev) -- cgit v1.2.3 From 96a5134423ae881f01076922914f4e2cd845c1e7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:00 +0100 Subject: iio:adc:ti-adc0832 Fix alignment issue with timestamp commit 39e91f3be4cba51c1560bcda3a343ed1f64dc916 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. We fix this issues by moving to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak apart from previous readings. Note that previously no data could leak 'including' previous readings but I don't think it is an issue to potentially leak them like this now does. In this case the postioning of the timestamp is depends on what other channels are enabled. As such we cannot use a structure to make the alignment explicit as it would be missleading by suggesting only one possible location for the timestamp. Fixes: 815bbc87462a ("iio: ti-adc0832: add triggered buffer support") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Akinobu Mita Cc: Link: https://lore.kernel.org/r/20200722155103.979802-25-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-adc0832.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-adc0832.c b/drivers/iio/adc/ti-adc0832.c index 6ea39f4bbb37..55abd2fd2ccc 100644 --- a/drivers/iio/adc/ti-adc0832.c +++ b/drivers/iio/adc/ti-adc0832.c @@ -28,6 +28,12 @@ struct adc0832 { struct regulator *reg; struct mutex lock; u8 mux_bits; + /* + * Max size needed: 16x 1 byte ADC data + 8 bytes timestamp + * May be shorter if not all channels are enabled subject + * to the timestamp remaining 8 byte aligned. + */ + u8 data[24] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -199,7 +205,6 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc0832 *adc = iio_priv(indio_dev); - u8 data[24] = { }; /* 16x 1 byte ADC data + 8 bytes timestamp */ int scan_index; int i = 0; @@ -217,10 +222,10 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) goto out; } - data[i] = ret; + adc->data[i] = ret; i++; } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); -- cgit v1.2.3 From 9f4f75df4b47f4f1e6c153f822e747297882621c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:01 +0100 Subject: iio:adc:ti-adc12138 Fix alignment issue with timestamp commit 293e809b2e8e608b65a949101aaf7c0bd1224247 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. We move to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak apart from previous readings. Note that previously no leak at all could occur, but previous readings should never be a problem. In this case the timestamp location depends on what other channels are enabled. As such we can't use a structure without misleading by suggesting only one possible timestamp location. Fixes: 50a6edb1b6e0 ("iio: adc: add ADC12130/ADC12132/ADC12138 ADC driver") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Akinobu Mita Cc: Link: https://lore.kernel.org/r/20200722155103.979802-26-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-adc12138.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 68a9dcb8faa2..db476482cc75 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -47,6 +47,12 @@ struct adc12138 { struct completion complete; /* The number of cclk periods for the S/H's acquisition time */ unsigned int acquisition_time; + /* + * Maximum size needed: 16x 2 bytes ADC data + 8 bytes timestamp. + * Less may be need if not all channels are enabled, as long as + * the 8 byte alignment of the timestamp is maintained. + */ + __be16 data[20] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -329,7 +335,6 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc12138 *adc = iio_priv(indio_dev); - __be16 data[20] = { }; /* 16x 2 bytes ADC data + 8 bytes timestamp */ __be16 trash; int ret; int scan_index; @@ -345,7 +350,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) reinit_completion(&adc->complete); ret = adc12138_start_and_read_conv(adc, scan_chan, - i ? &data[i - 1] : &trash); + i ? &adc->data[i - 1] : &trash); if (ret) { dev_warn(&adc->spi->dev, "failed to start conversion\n"); @@ -362,7 +367,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } if (i) { - ret = adc12138_read_conv_data(adc, &data[i - 1]); + ret = adc12138_read_conv_data(adc, &adc->data[i - 1]); if (ret) { dev_warn(&adc->spi->dev, "failed to get conversion data\n"); @@ -370,7 +375,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); -- cgit v1.2.3 From 829c0a9634b916487969edde88a06039fd26f349 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:41 +0100 Subject: iio:gyro:itg3200: Fix timestamp alignment and prevent data leak. commit 10ab7cfd5522f0041028556dac864a003e158556 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte array of smaller elements on the stack. This is fixed by using an explicit c structure. As there are no holes in the structure, there is no possiblity of data leakage in this case. The explicit alignment of ts is not strictly necessary but potentially makes the code slightly less fragile. It also removes the possibility of this being cut and paste into another driver where the alignment isn't already true. Fixes: 36e0371e7764 ("iio:itg3200: Use iio_push_to_buffers_with_timestamp()") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Link: https://lore.kernel.org/r/20200722155103.979802-6-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/itg3200_buffer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index d3fbe9d86467..1c3c1bd53374 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -46,13 +46,20 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct itg3200 *st = iio_priv(indio_dev); - __be16 buf[ITG3200_SCAN_ELEMENTS + sizeof(s64)/sizeof(u16)]; - - int ret = itg3200_read_all_channels(st->i2c, buf); + /* + * Ensure correct alignment and padding including for the + * timestamp that may be inserted. + */ + struct { + __be16 buf[ITG3200_SCAN_ELEMENTS]; + s64 ts __aligned(8); + } scan; + + int ret = itg3200_read_all_channels(st->i2c, scan.buf); if (ret < 0) goto error_ret; - iio_push_to_buffers_with_timestamp(indio_dev, buf, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); -- cgit v1.2.3 From 73597ab2a9b9ab069779aaf1ef8551b8569247e6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:33 +0530 Subject: powerpc/drmem: Make lmb_size 64 bit commit ec72024e35dddb88a81e40071c87ceb18b5ee835 upstream. Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. This was found by code audit. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-2-aneesh.kumar@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/drmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index bea7a2405ba5..ee61542c6c3d 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -20,7 +20,7 @@ struct drmem_lmb { struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; @@ -79,7 +79,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } -- cgit v1.2.3 From 58a7dc5f521a14f93482fe9bf829a9dbf653ed92 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 14 Oct 2020 22:34:56 +0100 Subject: MIPS: DEC: Restore bootmem reservation for firmware working memory area commit cf3af0a4d3b62ab48e0b90180ea161d0f5d4953f upstream. Fix a crash on DEC platforms starting with: VFS: Mounted root (nfs filesystem) on device 0:11. Freeing unused PROM memory: 124k freed BUG: Bad page state in process swapper pfn:00001 page:(ptrval) refcount:0 mapcount:-128 mapping:00000000 index:0x1 pfn:0x1 flags: 0x0() raw: 00000000 00000100 00000122 00000000 00000001 00000000 ffffff7f 00000000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.9.0-00858-g865c50e1d279 #1 Stack : 8065dc48 0000000b 8065d2b8 9bc27dcc 80645bfc 9bc259a4 806a1b97 80703124 80710000 8064a900 00000001 80099574 806b116c 1000ec00 9bc27d88 806a6f30 00000000 00000000 80645bfc 00000000 31232039 80706ba4 2e392e35 8039f348 2d383538 00000070 0000000a 35363867 00000000 806c2830 80710000 806b0000 80710000 8064a900 00000001 81000000 00000000 00000000 8035af2c 80700000 ... Call Trace: [<8004bc5c>] show_stack+0x34/0x104 [<8015675c>] bad_page+0xfc/0x128 [<80157714>] free_pcppages_bulk+0x1f4/0x5dc [<801591cc>] free_unref_page+0xc0/0x130 [<8015cb04>] free_reserved_area+0x144/0x1d8 [<805abd78>] kernel_init+0x20/0x100 [<80046070>] ret_from_kernel_thread+0x14/0x1c Disabling lock debugging due to kernel taint caused by an attempt to free bootmem space that as from commit b93ddc4f9156 ("mips: Reserve memory for the kernel image resources") has not been anymore reserved due to the removal of generic MIPS arch code that used to reserve all the memory from the beginning of RAM up to the kernel load address. This memory does need to be reserved on DEC platforms however as it is used by REX firmware as working area, as per the TURBOchannel firmware specification[1]: Table 2-2 REX Memory Regions ------------------------------------------------------------------------- Starting Ending Region Address Address Use ------------------------------------------------------------------------- 0 0xa0000000 0xa000ffff Restart block, exception vectors, REX stack and bss 1 0xa0010000 0xa0017fff Keyboard or tty drivers 2 0xa0018000 0xa001f3ff 1) CRT driver 3 0xa0020000 0xa002ffff boot, cnfg, init and t objects 4 0xa0020000 0xa002ffff 64KB scratch space ------------------------------------------------------------------------- 1) Note that the last 3 Kbytes of region 2 are reserved for backward compatibility with previous system software. ------------------------------------------------------------------------- (this table uses KSEG2 unmapped virtual addresses, which in the MIPS architecture are offset from physical addresses by a fixed value of 0xa0000000 and therefore the regions referred do correspond to the beginning of the physical address space) and we call into the firmware on several occasions throughout the bootstrap process. It is believed that pre-REX firmware used with non-TURBOchannel DEC platforms has the same requirements, as hinted by note #1 cited. Recreate the discarded reservation then, in DEC platform code, removing the crash. References: [1] "TURBOchannel Firmware Specification", On-line version, EK-TCAAD-FS-004, Digital Equipment Corporation, January 1993, Chapter 2 "System Module Firmware", p. 2-5 Signed-off-by: Maciej W. Rozycki Fixes: b93ddc4f9156 ("mips: Reserve memory for the kernel image resources") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: Thomas Bogendoerfer --- arch/mips/dec/setup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index 61a0bf13e308..1fc8dffa8d1d 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -6,7 +6,7 @@ * for more details. * * Copyright (C) 1998 Harald Koerfgen - * Copyright (C) 2000, 2001, 2002, 2003, 2005 Maciej W. Rozycki + * Copyright (C) 2000, 2001, 2002, 2003, 2005, 2020 Maciej W. Rozycki */ #include #include @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include +#include #include #include #include @@ -29,7 +31,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -166,6 +170,9 @@ void __init plat_mem_setup(void) ioport_resource.start = ~0UL; ioport_resource.end = 0UL; + + /* Stay away from the firmware working memory area for now. */ + memblock_reserve(PHYS_OFFSET, __pa_symbol(&_text) - PHYS_OFFSET); } /* -- cgit v1.2.3 From 551bf7c4bc24918c3629c1984910fe48cba68f85 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Sep 2020 08:53:50 +0200 Subject: s390/stp: add locking to sysfs functions commit b3bd02495cb339124f13135d51940cf48d83e5cb upstream. The sysfs function might race with stp_work_fn. To prevent that, add the required locking. Another issue is that the sysfs functions are checking the stp_online flag, but this flag just holds the user setting whether STP is enabled. Add a flag to clock_sync_flag whether stp_info holds valid data and use that instead. Cc: stable@vger.kernel.org Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/time.c | 118 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8ea9db599d38..11c32b228f51 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -354,8 +354,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -592,6 +593,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -613,8 +630,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -659,7 +675,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -696,10 +712,14 @@ static ssize_t stp_ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); @@ -708,9 +728,13 @@ static ssize_t stp_ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); @@ -719,9 +743,13 @@ static ssize_t stp_dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); @@ -730,9 +758,13 @@ static ssize_t stp_leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); @@ -741,9 +773,13 @@ static ssize_t stp_stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); @@ -752,9 +788,13 @@ static ssize_t stp_time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); @@ -763,9 +803,13 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_zone_offset, 0400, @@ -775,9 +819,13 @@ static ssize_t stp_timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); @@ -786,9 +834,13 @@ static ssize_t stp_timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); -- cgit v1.2.3 From 240baebeda09e1e010fff58acc9183992f41f638 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 20 Aug 2020 14:45:12 +1000 Subject: powerpc/rtas: Restrict RTAS requests from userspace commit bd59380c5ba4147dcbaad3e582b55ccfd120b764 upstream. A number of userspace utilities depend on making calls to RTAS to retrieve information and update various things. The existing API through which we expose RTAS to userspace exposes more RTAS functionality than we actually need, through the sys_rtas syscall, which allows root (or anyone with CAP_SYS_ADMIN) to make any RTAS call they want with arbitrary arguments. Many RTAS calls take the address of a buffer as an argument, and it's up to the caller to specify the physical address of the buffer as an argument. We allocate a buffer (the "RMO buffer") in the Real Memory Area that RTAS can access, and then expose the physical address and size of this buffer in /proc/powerpc/rtas/rmo_buffer. Userspace is expected to read this address, poke at the buffer using /dev/mem, and pass an address in the RMO buffer to the RTAS call. However, there's nothing stopping the caller from specifying whatever address they want in the RTAS call, and it's easy to construct a series of RTAS calls that can overwrite arbitrary bytes (even without /dev/mem access). Additionally, there are some RTAS calls that do potentially dangerous things and for which there are no legitimate userspace use cases. In the past, this would not have been a particularly big deal as it was assumed that root could modify all system state freely, but with Secure Boot and lockdown we need to care about this. We can't fundamentally change the ABI at this point, however we can address this by implementing a filter that checks RTAS calls against a list of permitted calls and forces the caller to use addresses within the RMO buffer. The list is based off the list of calls that are used by the librtas userspace library, and has been tested with a number of existing userspace RTAS utilities. For compatibility with any applications we are not aware of that require other calls, the filter can be turned off at build time. Cc: stable@vger.kernel.org Reported-by: Daniel Axtens Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820044512.7543-1-ajd@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 13 ++++ arch/powerpc/kernel/rtas.c | 153 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 27ef333e96f6..cb285e474c88 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1024,6 +1024,19 @@ config FSL_RIO Include support for RapidIO controller on Freescale embedded processors (MPC8548, MPC8641, etc). +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config NONSTATIC_KERNEL diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 01210593d60c..c62ff66d44ad 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -940,6 +940,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -977,6 +1118,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1038,6 +1182,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1077,6 +1224,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, -- cgit v1.2.3 From 3fa03b7f21a3edcee18190c2a4ddccc0b4128dc5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 2 Sep 2020 09:30:11 +0930 Subject: powerpc: Warn about use of smt_snooze_delay commit a02f6d42357acf6e5de6ffc728e6e77faf3ad217 upstream. It's not done anything for a long time. Save the percpu variable, and emit a warning to remind users to not expect it to do anything. This uses pr_warn_once instead of pr_warn_ratelimit as testing 'ppc64_cpu --smt=off' on a 24 core / 4 SMT system showed the warning to be noisy, as the online/offline loop is slow. Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.") Cc: stable@vger.kernel.org # v3.14 Signed-off-by: Joel Stanley Acked-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902000012.3440389-1-joel@jms.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/sysfs.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 80a676da11cb..f08ca604a394 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -31,29 +31,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -61,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -71,16 +69,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); -- cgit v1.2.3 From 7850dd0851a3b36118a81302cf1bf6a207786811 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:34 +0530 Subject: powerpc/memhotplug: Make lmb size 64bit commit 301d2ea6572386245c5d2d2dc85c3b5a737b85ac upstream. Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. This was found by code audit. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-3-aneesh.kumar@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++++-------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 66b32f46702d..f364909d0c08 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -279,7 +279,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb) return dlpar_change_lmb_state(lmb, false); } -static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) { unsigned long block_sz, start_pfn; int sections_per_block; @@ -310,10 +310,11 @@ out: static int pseries_remove_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually removing memory @@ -324,12 +325,19 @@ static int pseries_remove_mem_node(struct device_node *np) /* * Find the base address and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); pseries_remove_memblock(base, lmb_size); return 0; @@ -620,7 +628,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) #else static inline int pseries_remove_memblock(unsigned long base, - unsigned int memblock_size) + unsigned long memblock_size) { return -EOPNOTSUPP; } @@ -953,10 +961,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) static int pseries_add_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually adding memory @@ -967,12 +976,18 @@ static int pseries_add_mem_node(struct device_node *np) /* * Find the base and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); /* * Update memory region to represent the memory add -- cgit v1.2.3 From 61ed8c1b940d7b1c833cceabb0888d1771754f87 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 6 Oct 2020 13:02:18 +0530 Subject: powerpc/powernv/elog: Fix race while processing OPAL error log event. commit aea948bb80b478ddc2448f7359d574387521a52d upstream. Every error log reported by OPAL is exported to userspace through a sysfs interface and notified using kobject_uevent(). The userspace daemon (opal_errd) then reads the error log and acknowledges the error log is saved safely to disk. Once acknowledged the kernel removes the respective sysfs file entry causing respective resources to be released including kobject. However it's possible the userspace daemon may already be scanning elog entries when a new sysfs elog entry is created by the kernel. User daemon may read this new entry and ack it even before kernel can notify userspace about it through kobject_uevent() call. If that happens then we have a potential race between elog_ack_store->kobject_put() and kobject_uevent which can lead to use-after-free of a kernfs object resulting in a kernel crash. eg: BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6bfb Faulting instruction address: 0xc0000000008ff2a0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV CPU: 27 PID: 805 Comm: irq/29-opal-elo Not tainted 5.9.0-rc2-gcc-8.2.0-00214-g6f56a67bcbb5-dirty #363 ... NIP kobject_uevent_env+0xa0/0x910 LR elog_event+0x1f4/0x2d0 Call Trace: 0x5deadbeef0000122 (unreliable) elog_event+0x1f4/0x2d0 irq_thread_fn+0x4c/0xc0 irq_thread+0x1c0/0x2b0 kthread+0x1c4/0x1d0 ret_from_kernel_thread+0x5c/0x6c This patch fixes this race by protecting the sysfs file creation/notification by holding a reference count on kobject until we safely send kobject_uevent(). The function create_elog_obj() returns the elog object which if used by caller function will end up in use-after-free problem again. However, the return value of create_elog_obj() function isn't being used today and there is no need as well. Hence change it to return void to make this fix complete. Fixes: 774fea1a38c6 ("powerpc/powernv: Read OPAL error log and export it through sysfs") Cc: stable@vger.kernel.org # v3.15+ Reported-by: Oliver O'Halloran Signed-off-by: Mahesh Salgaonkar Signed-off-by: Aneesh Kumar K.V Reviewed-by: Oliver O'Halloran Reviewed-by: Vasant Hegde [mpe: Rework the logic to use a single return, reword comments, add oops] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201006122051.190176-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-elog.c | 33 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 62ef7ad995da..5e33b1fc67c2 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) -- cgit v1.2.3 From 94e27f13694c04334b8c55e60cfcf1c13e5a5f5d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Sep 2020 10:29:15 +0000 Subject: powerpc/powermac: Fix low_sleep_handler with KUAP and KUEP commit 2c637d2df4ee4830e9d3eb2bd5412250522ce96e upstream. low_sleep_handler() has an hardcoded restore of segment registers that doesn't take KUAP and KUEP into account. Use head_32's load_segment_registers() routine instead. Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection") Fixes: 31ed2b13c48d ("powerpc/32s: Implement Kernel Userspace Execution Prevention.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21b05f7298c1b18f73e6e5b4cd5005aafa24b6da.1599820109.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_32.S | 2 +- arch/powerpc/platforms/powermac/sleep.S | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 4a24f8f026c7..5e2f2fd78b94 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -843,7 +843,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -load_segment_registers: +_GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7..935dcac4c02d 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -293,14 +293,7 @@ grackle_wake_up: * we do any r1 memory access as we are not sure they * are in a sane state above the first 256Mb region */ - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b + bl load_segment_registers sync isync -- cgit v1.2.3 From 415043c3ec0dea6bf3a347f2ce22b0461b9e161a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 13 Oct 2020 15:37:40 +1100 Subject: powerpc: Fix undetected data corruption with P9N DD2.1 VSX CI load emulation commit 1da4a0272c5469169f78cd76cf175ff984f52f06 upstream. __get_user_atomic_128_aligned() stores to kaddr using stvx which is a VMX store instruction, hence kaddr must be 16 byte aligned otherwise the store won't occur as expected. Unfortunately when we call __get_user_atomic_128_aligned() in p9_hmi_special_emu(), the buffer we pass as kaddr (ie. vbuf) isn't guaranteed to be 16B aligned. This means that the write to vbuf in __get_user_atomic_128_aligned() has the bottom bits of the address truncated. This results in other local variables being overwritten. Also vbuf will not contain the correct data which results in the userspace emulation being wrong and hence undetected user data corruption. In the past we've been mostly lucky as vbuf has ended up aligned but this is fragile and isn't always true. CONFIG_STACKPROTECTOR in particular can change the stack arrangement enough that our luck runs out. This issue only occurs on POWER9 Nimbus <= DD2.1 bare metal. The fix is to align vbuf to a 16 byte boundary. Fixes: 5080332c2c89 ("powerpc/64s: Add workaround for P9 vector CI load issue") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201013043741.743413-1-mikey@neuling.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9432fc6af28a..206032c9b545 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -877,7 +877,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; -- cgit v1.2.3 From c342001cab7f058d6fbf50f05ca4ea499c985ad9 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 25 Sep 2020 15:48:39 -0400 Subject: NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE commit b4868b44c5628995fdd8ef2e24dda73cef963a75 upstream. Since commit 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") the following livelock may occur if a CLOSE races with the update of the nfs_state: Process 1 Process 2 Server ========= ========= ======== OPEN file OPEN file Reply OPEN (1) Reply OPEN (2) Update state (1) CLOSE file (1) Reply OLD_STATEID (1) CLOSE file (2) Reply CLOSE (-1) Update state (2) wait for state change OPEN file wake CLOSE file OPEN file wake CLOSE file ... ... We can avoid this situation by not issuing an immediate retry with a bumped seqid when CLOSE/OPEN_DOWNGRADE receives NFS4ERR_OLD_STATEID. Instead, take the same approach used by OPEN and wait at least 5 seconds for outstanding stateid updates to complete if we can detect that we're out of sequence. Note that after this change it is still possible (though unlikely) that CLOSE waits a full 5 seconds, bumps the seqid, and retries -- and that attempt races with another OPEN at the same time. In order to avoid this race (which would result in the livelock), update nfs_need_update_open_stateid() to handle the case where: - the state is NFS_OPEN_STATE, and - the stateid doesn't match the current open stateid Finally, nfs_need_update_open_stateid() is modified to be idempotent and renamed to better suit the purpose of signaling that the stateid passed is the next stateid in sequence. Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4_fs.h | 8 ++++++ fs/nfs/nfs4proc.c | 81 +++++++++++++++++++++++++++++++----------------------- fs/nfs/nfs4trace.h | 1 + 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index bb322d9de313..c4a98cbda6dd 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -570,6 +570,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; } +static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2) +{ + u32 seq1 = be32_to_cpu(s1->seqid); + u32 seq2 = be32_to_cpu(s2->seqid); + + return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU); +} + static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1) { u32 seqid = be32_to_cpu(s1->seqid); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 00435556db0c..f77b7041eaef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1515,19 +1515,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state) wake_up_all(&state->waitq); } -static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid) -{ - u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); - u32 stateid_seqid = be32_to_cpu(stateid->seqid); - - if (stateid_seqid == state_seqid + 1U || - (stateid_seqid == 1U && state_seqid == 0xffffffffU)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); -} - static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1553,21 +1540,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) * i.e. The stateid seqids have to be initialised to 1, and * are then incremented on every state transition. */ -static bool nfs_need_update_open_stateid(struct nfs4_state *state, +static bool nfs_stateid_is_sequential(struct nfs4_state *state, const nfs4_stateid *stateid) { - if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || - !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + nfs4_stateid_is_next(&state->open_stateid, stateid)) { + return true; + } + } else { + /* This is the first OPEN in this generation */ if (stateid->seqid == cpu_to_be32(1)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); - return true; - } - - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { - nfs_state_log_out_of_order_open_stateid(state, stateid); - return true; + return true; } return false; } @@ -1641,16 +1626,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, int status = 0; for (;;) { - if (!nfs_need_update_open_stateid(state, stateid)) - return; - if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + if (nfs_stateid_is_sequential(state, stateid)) break; + if (status) break; /* Rely on seqids for serialisation with NFSv4.0 */ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) break; + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); /* * Ensure we process the state changes in the same order @@ -1661,6 +1646,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, spin_unlock(&state->owner->so_lock); rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -3397,7 +3383,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, __be32 seqid_open; u32 dst_seqid; bool ret; - int seq; + int seq, status = -EAGAIN; + DEFINE_WAIT(wait); for (;;) { ret = false; @@ -3409,15 +3396,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, continue; break; } + + write_seqlock(&state->seqlock); seqid_open = state->open_stateid.seqid; - if (read_seqretry(&state->seqlock, seq)) - continue; dst_seqid = be32_to_cpu(dst->seqid); - if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0) - dst->seqid = cpu_to_be32(dst_seqid + 1); - else + + /* Did another OPEN bump the state's seqid? try again: */ + if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) { dst->seqid = seqid_open; + write_sequnlock(&state->seqlock); + ret = true; + break; + } + + /* server says we're behind but we haven't seen the update yet */ + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + write_sequnlock(&state->seqlock); + trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); + + if (signal_pending(current)) + status = -EINTR; + else + if (schedule_timeout(5*HZ) != 0) + status = 0; + + finish_wait(&state->waitq, &wait); + + if (!status) + continue; + if (status == -EINTR) + break; + + /* we slept the whole 5 seconds, we must have lost a seqid */ + dst->seqid = cpu_to_be32(dst_seqid + 1); ret = true; break; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9398c0b6e0a3..2295a934a154 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1291,6 +1291,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( -- cgit v1.2.3 From da86bb4c214fd70f9c6f17a130878c423ef156d2 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 16 Oct 2020 09:25:45 -0400 Subject: NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag commit 8c39076c276be0b31982e44654e2c2357473258a upstream. RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS. Client needs to update its bitmask to allow for this flag value. v2: changed minor version argument to unsigned int Signed-off-by: Olga Kornievskaia CC: Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 9 ++++++--- include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f77b7041eaef..ddc900df461c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7859,9 +7859,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -8274,7 +8276,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..54a78529c8b3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -136,6 +136,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -143,6 +145,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 -- cgit v1.2.3 From c1ea3c4a4302ee33da9f2684e049c19806074c3e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Oct 2020 18:58:56 -0400 Subject: NFSD: Add missing NFSv2 .pc_func methods commit 6b3dccd48de8a4c650b01499a0b09d1e2279649e upstream. There's no protection in nfsd_dispatch() against a NULL .pc_func helpers. A malicious NFS client can trigger a crash by invoking the unused/unsupported NFSv2 ROOT or WRITECACHE procedures. The current NFSD dispatcher does not support returning a void reply to a non-NULL procedure, so the reply to both of these is wrong, for the moment. Cc: Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c83ddac22f38..754c763374dd 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -118,6 +118,13 @@ done: return nfsd_return_attrs(nfserr, resp); } +/* Obsolete, replaced by MNTPROC_MNT. */ +static __be32 +nfsd_proc_root(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Look up a path name component * Note: the dentry in the resp->fh may be negative if the file @@ -203,6 +210,13 @@ nfsd_proc_read(struct svc_rqst *rqstp) return fh_getattr(&resp->fh, &resp->stat); } +/* Reserved */ +static __be32 +nfsd_proc_writecache(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Write data to a file * N.B. After this call resp->fh needs an fh_put @@ -617,6 +631,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT, }, [NFSPROC_ROOT] = { + .pc_func = nfsd_proc_root, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), @@ -654,6 +669,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, }, [NFSPROC_WRITECACHE] = { + .pc_func = nfsd_proc_writecache, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), -- cgit v1.2.3 From 213c836b239658f808c63d058b442ad81a399eb3 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 1 Jun 2020 17:10:37 +0800 Subject: ubifs: dent: Fix some potential memory leaks while iterating entries commit 58f6e78a65f1fcbf732f60a7478ccc99873ff3ba upstream. Fix some potential memory leaks in error handling branches while iterating dent entries. For example, function dbg_check_dir() forgets to free pdent if it exists. Signed-off-by: Zhihao Cheng Cc: Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index e4b52783819d..992b74f9c941 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1123,6 +1123,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) err = PTR_ERR(dent); if (err == -ENOENT) break; + kfree(pdent); return err; } -- cgit v1.2.3 From a779274697600a8269722f80d2599204bba0fe58 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 1 Jun 2020 17:10:36 +0800 Subject: ubifs: xattr: Fix some potential memory leaks while iterating entries commit f2aae745b82c842221f4f233051f9ac641790959 upstream. Fix some potential memory leaks in error handling branches while iterating xattr entries. For example, function ubifs_tnc_remove_ino() forgets to free pxent if it exists. Similar problems also exist in ubifs_purge_xattrs(), ubifs_add_orphan() and ubifs_jnl_write_inode(). Signed-off-by: Zhihao Cheng Cc: Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 2 ++ fs/ubifs/orphan.c | 2 ++ fs/ubifs/tnc.c | 3 +++ fs/ubifs/xattr.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 5f2ac5ef0891..037b7a7549f5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -894,6 +894,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) if (err == -ENOENT) break; + kfree(pxent); goto out_release; } @@ -906,6 +907,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) ubifs_err(c, "dead directory entry '%s', error %d", xent->name, err); ubifs_ro_mode(c, err); + kfree(pxent); kfree(xent); goto out_release; } diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 283f9eb48410..b0117878b3a0 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -173,6 +173,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) err = PTR_ERR(xent); if (err == -ENOENT) break; + kfree(pxent); return err; } @@ -182,6 +183,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) xattr_orphan = orphan_add(c, xattr_inum, orphan); if (IS_ERR(xattr_orphan)) { + kfree(pxent); kfree(xent); return PTR_ERR(xattr_orphan); } diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e8e7b0e9532e..33742ee3945b 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2885,6 +2885,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) err = PTR_ERR(xent); if (err == -ENOENT) break; + kfree(pxent); return err; } @@ -2898,6 +2899,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) fname_len(&nm) = le16_to_cpu(xent->nlen); err = ubifs_tnc_remove_nm(c, &key1, &nm); if (err) { + kfree(pxent); kfree(xent); return err; } @@ -2906,6 +2908,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) highest_ino_key(c, &key2, xattr_inum); err = ubifs_tnc_remove_range(c, &key1, &key2); if (err) { + kfree(pxent); kfree(xent); return err; } diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 9aefbb60074f..a0b9b349efe6 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -522,6 +522,7 @@ int ubifs_purge_xattrs(struct inode *host) xent->name, err); ubifs_ro_mode(c, err); kfree(pxent); + kfree(xent); return err; } @@ -531,6 +532,7 @@ int ubifs_purge_xattrs(struct inode *host) err = remove_xattr(c, host, xino, &nm); if (err) { kfree(pxent); + kfree(xent); iput(xino); ubifs_err(c, "cannot remove xattr, error %d", err); return err; -- cgit v1.2.3 From bc202c839b5d38096210a7b466313af0c8fe2202 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 28 Sep 2020 20:58:59 +0200 Subject: ubifs: journal: Make sure to not dirty twice for auth nodes commit 78c7d49f55d8631b67c09f9bfbe8155211a9ea06 upstream. When removing the last reference of an inode the size of an auth node is already part of write_len. So we must not call ubifs_add_auth_dirt(). Call it only when needed. Cc: Cc: Sascha Hauer Cc: Kristof Havasi Fixes: 6a98bc4614de ("ubifs: Add authentication nodes to journal") Reported-and-tested-by: Kristof Havasi Reviewed-by: Sascha Hauer Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 037b7a7549f5..f78c3e3ef931 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -938,8 +938,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) inode->i_ino); release_head(c, BASEHD); - ubifs_add_auth_dirt(c, lnum); - if (last_reference) { err = ubifs_tnc_remove_ino(c, inode->i_ino); if (err) @@ -949,6 +947,8 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) } else { union ubifs_key key; + ubifs_add_auth_dirt(c, lnum); + ino_key_init(c, &key, inode->i_ino); err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash); } -- cgit v1.2.3 From 748057df47b90b338ec60ec784ae7f3c9161094a Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 29 Sep 2020 20:45:29 +0800 Subject: ubifs: Fix a memleak after dumping authentication mount options commit 47f6d9ce45b03a40c34b668a9884754c58122b39 upstream. Fix a memory leak after dumping authentication mount options in error handling branch. Signed-off-by: Zhihao Cheng Cc: # 4.20+ Fixes: d8a22773a12c6d7 ("ubifs: Enable authentication support") Reviewed-by: Sascha Hauer Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7fc2f3f07c16..05ce40ee7d64 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1123,6 +1123,18 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, return 0; } +/* + * ubifs_release_options - release mount parameters which have been dumped. + * @c: UBIFS file-system description object + */ +static void ubifs_release_options(struct ubifs_info *c) +{ + kfree(c->auth_key_name); + c->auth_key_name = NULL; + kfree(c->auth_hash_name); + c->auth_hash_name = NULL; +} + /** * destroy_journal - destroy journal data structures. * @c: UBIFS file-system description object @@ -1632,8 +1644,7 @@ static void ubifs_umount(struct ubifs_info *c) ubifs_lpt_free(c, 0); ubifs_exit_authentication(c); - kfree(c->auth_key_name); - kfree(c->auth_hash_name); + ubifs_release_options(c); kfree(c->cbuf); kfree(c->rcvrd_mst_node); kfree(c->mst_node); @@ -2201,6 +2212,7 @@ out_umount: out_unlock: mutex_unlock(&c->umount_mutex); out_close: + ubifs_release_options(c); ubi_close_volume(c->ubi); out: return err; -- cgit v1.2.3 From 9ba6324ca9c4a1f4fc24c3620ae41fc10239a4cf Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 29 Sep 2020 20:45:30 +0800 Subject: ubifs: Don't parse authentication mount options in remount process commit bb674a4d4de1032837fcbf860a63939e66f0b7ad upstream. There is no need to dump authentication options while remounting, because authentication initialization can only be doing once in the first mount process. Dumping authentication mount options in remount process may cause memory leak if UBIFS has already been mounted with old authentication mount options. Signed-off-by: Zhihao Cheng Cc: # 4.20+ Fixes: d8a22773a12c6d7 ("ubifs: Enable authentication support") Reviewed-by: Sascha Hauer Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 05ce40ee7d64..94ddb0ff1ad7 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1092,14 +1092,20 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, break; } case Opt_auth_key: - c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL); - if (!c->auth_key_name) - return -ENOMEM; + if (!is_remount) { + c->auth_key_name = kstrdup(args[0].from, + GFP_KERNEL); + if (!c->auth_key_name) + return -ENOMEM; + } break; case Opt_auth_hash_name: - c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL); - if (!c->auth_hash_name) - return -ENOMEM; + if (!is_remount) { + c->auth_hash_name = kstrdup(args[0].from, + GFP_KERNEL); + if (!c->auth_hash_name) + return -ENOMEM; + } break; case Opt_ignore: break; -- cgit v1.2.3 From a99cbd20a5c511ad7b1b96730bc87fcba80e21d1 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 29 Sep 2020 20:45:31 +0800 Subject: ubifs: mount_ubifs: Release authentication resource in error handling path commit e2a05cc7f8229e150243cdae40f2af9021d67a4a upstream. Release the authentication related resource in some error handling branches in mount_ubifs(). Signed-off-by: Zhihao Cheng Cc: # 4.20+ Fixes: d8a22773a12c6d7 ("ubifs: Enable authentication support") Reviewed-by: Sascha Hauer Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 94ddb0ff1ad7..e49bd69dfc1c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c) err = ubifs_read_superblock(c); if (err) - goto out_free; + goto out_auth; c->probing = 0; @@ -1325,18 +1325,18 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_err(c, "'compressor \"%s\" is not compiled in", ubifs_compr_name(c, c->default_compr)); err = -ENOTSUPP; - goto out_free; + goto out_auth; } err = init_constants_sb(c); if (err) - goto out_free; + goto out_auth; sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2; c->cbuf = kmalloc(sz, GFP_NOFS); if (!c->cbuf) { err = -ENOMEM; - goto out_free; + goto out_auth; } err = alloc_wbufs(c); @@ -1611,6 +1611,8 @@ out_wbufs: free_wbufs(c); out_cbuf: kfree(c->cbuf); +out_auth: + ubifs_exit_authentication(c); out_free: kfree(c->write_reserve_buf); kfree(c->bu.buf); -- cgit v1.2.3 From 46881159583318cf84e9210d794ae820f7196dca Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Sep 2020 22:11:35 +0200 Subject: perf python scripting: Fix printable strings in python3 scripts commit 6fcd5ddc3b1467b3586972ef785d0d926ae4cdf4 upstream. Hagen reported broken strings in python3 tracepoint scripts: make PYTHON=python3 perf record -e sched:sched_switch -a -- sleep 5 perf script --gen-script py perf script -s ./perf-script.py [..] sched__sched_switch 7 563231.759525792 0 swapper prev_comm=bytearray(b'swapper/7\x00\x00\x00\x00\x00\x00\x00'), prev_pid=0, prev_prio=120, prev_state=, next_comm=bytearray(b'mutex-thread-co\x00'), The problem is in the is_printable_array function that does not take the zero byte into account and claim such string as not printable, so the code will create byte array instead of string. Committer testing: After this fix: sched__sched_switch 3 484522.497072626 1158680 kworker/3:0-eve prev_comm=kworker/3:0, prev_pid=1158680, prev_prio=120, prev_state=I, next_comm=swapper/3, next_pid=0, next_prio=120 Sample: {addr=0, cpu=3, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1158680, tid=1158680, time=484522497072626, transaction=0, values=[(0, 0)], weight=0} sched__sched_switch 4 484522.497085610 1225814 perf prev_comm=perf, prev_pid=1225814, prev_prio=120, prev_state=, next_comm=migration/4, next_pid=30, next_prio=0 Sample: {addr=0, cpu=4, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1225814, tid=1225814, time=484522497085610, transaction=0, values=[(0, 0)], weight=0} Fixes: 249de6e07458 ("perf script python: Fix string vs byte array resolving") Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Hagen Paul Pfeifer Cc: Alexander Shishkin Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200928201135.3633850-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/print_binary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 599a1543871d..13fdc51c61d9 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len) len--; - for (i = 0; i < len; i++) { + for (i = 0; i < len && p[i]; i++) { if (!isprint(p[i]) && !isspace(p[i])) return 0; } -- cgit v1.2.3 From 6d0beeebd15d86482b5accf2d1e94c3b71571193 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 22 Oct 2020 03:16:22 -0700 Subject: ARC: perf: redo the pct irq missing in device-tree handling commit 8c42a5c02bec6c7eccf08957be3c6c8fccf9790b upstream. commit feb92d7d3813456c11dce21 "(ARC: perf: don't bail setup if pct irq missing in device-tree)" introduced a silly brown-paper bag bug: The assignment and comparison in an if statement were not bracketed correctly leaving the order of evaluation undefined. | | if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { | ^^^ ^^^^ And given such a chance, the compiler will bite you hard, fully entitled to generating this piece of beauty: | | # if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { | | bl.d @platform_get_irq <-- irq returned in r0 | | setge r2, r0, 0 <-- r2 is bool 1 or 0 if irq >= 0 true/false | brlt.d r0, 0, @.L114 | | st_s r2,[sp] <-- irq saved is bool 1 or 0, not actual return val | st 1,[r3,160] # arc_pmu.18_29->irq <-- drops bool and assumes 1 | | # return __request_percpu_irq(irq, handler, 0, | | bl.d @__request_percpu_irq; | mov_s r0,1 <-- drops even bool and assumes 1 which fails With the snafu fixed, everything is as expected. | bl.d @platform_get_irq <-- returns irq in r0 | | mov_s r2,r0 | brlt.d r2, 0, @.L112 | | st_s r0,[sp] <-- irq isaved is actual return value above | st r0,[r13,160] #arc_pmu.18_27->irq | | bl.d @__request_percpu_irq <-- r0 unchanged so actual irq returned | add r4,r4,r12 #, tmp363, __ptr Cc: Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/perf_event.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 79849f37e782..145722f80c9b 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, has_interrupts, irq; + int i, has_interrupts, irq = -1; int counter_size; /* in bits */ union cc_name { @@ -637,19 +637,28 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .attr_groups = arc_pmu->attr_groups, }; - if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { + if (has_interrupts) { + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + int ret; - arc_pmu->irq = irq; + arc_pmu->irq = irq; - /* intc map function ensures irq_set_percpu_devid() called */ - request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", - this_cpu_ptr(&arc_pmu_cpu)); + /* intc map function ensures irq_set_percpu_devid() called */ + ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", + this_cpu_ptr(&arc_pmu_cpu)); + + if (!ret) + on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); + else + irq = -1; + } - on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); - } else { - arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; } + if (irq == -1) + arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + /* * perf parser doesn't really like '-' symbol in events name, so let's * use '_' in arc pct name as it goes to kernel PMU event prefix. -- cgit v1.2.3 From da3bb6fa23f1a4a2f0efa625621106d1d3e5892c Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 1 Jun 2020 17:12:31 +0800 Subject: ubi: check kthread_should_stop() after the setting of task state commit d005f8c6588efcfbe88099b6edafc6f58c84a9c1 upstream. A detach hung is possible when a race occurs between the detach process and the ubi background thread. The following sequences outline the race: ubi thread: if (list_empty(&ubi->works)... ubi detach: set_bit(KTHREAD_SHOULD_STOP, &kthread->flags) => by kthread_stop() wake_up_process() => ubi thread is still running, so 0 is returned ubi thread: set_current_state(TASK_INTERRUPTIBLE) schedule() => ubi thread will never be scheduled again ubi detach: wait_for_completion() => hung task! To fix that, we need to check kthread_should_stop() after we set the task state, so the ubi thread will either see the stop bit and exit or the task state is reset to runnable such that it isn't scheduled out indefinitely. Signed-off-by: Zhihao Cheng Cc: Fixes: 801c135ce73d5df1ca ("UBI: Unsorted Block Images") Reported-by: syzbot+853639d0cb16c31c7a14@syzkaller.appspotmail.com Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 5d77a38dba54..7def041bbe48 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1629,6 +1629,19 @@ int ubi_thread(void *u) !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); + + /* + * Check kthread_should_stop() after we set the task + * state to guarantee that we either see the stop bit + * and exit or the task state is reset to runnable such + * that it's not scheduled out indefinitely and detects + * the stop bit at kthread_should_stop(). + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + break; + } + schedule(); continue; } -- cgit v1.2.3 From e17afa6d1de3d7ce7b2cc49fa2eb8c0e26962746 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 17 Oct 2020 16:13:37 -0700 Subject: ia64: fix build error with !COREDUMP commit 7404840d87557c4092bf0272bce5e0354c774bf9 upstream. Fix linkage error when CONFIG_BINFMT_ELF is selected but CONFIG_COREDUMP is not: ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_phdrs': elfcore.c:(.text+0x172): undefined reference to `dump_emit' ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_data': elfcore.c:(.text+0x2b2): undefined reference to `dump_emit' Fixes: 1fcccbac89f5 ("elf coredump: replace ELF_CORE_EXTRA_* macros by functions") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Cc: Tony Luck Cc: Fenghua Yu Cc: Link: https://lkml.kernel.org/r/20200819064146.12529-1-krzk@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/ia64/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 1a8df6669eee..18d6008b151f 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -41,7 +41,7 @@ obj-y += esi_stub.o # must be in kernel proper endif obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 -- cgit v1.2.3 From da3ccf5b20458d1ceafc399130cb47aaac56428b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:48 +0200 Subject: rtc: rx8010: don't modify the global rtc ops commit d3b14296da69adb7825022f3224ac6137eb30abf upstream. The way the driver is implemented is buggy for the (admittedly unlikely) use case where there are two RTCs with one having an interrupt configured and the second not. This is caused by the fact that we use a global rtc_class_ops struct which we modify depending on whether the irq number is present or not. Fix it by using two const ops structs with and without alarm operations. While at it: not being able to request a configured interrupt is an error so don't ignore it and bail out of probe(). Fixes: ed13d89b08e3 ("rtc: Add Epson RX8010SJ RTC driver") Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200914154601.32245-2-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-rx8010.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 8102469e27c0..0c436aea0da4 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -424,16 +424,26 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) } } -static struct rtc_class_ops rx8010_rtc_ops = { +static const struct rtc_class_ops rx8010_rtc_ops_default = { .read_time = rx8010_get_time, .set_time = rx8010_set_time, .ioctl = rx8010_ioctl, }; +static const struct rtc_class_ops rx8010_rtc_ops_alarm = { + .read_time = rx8010_get_time, + .set_time = rx8010_set_time, + .ioctl = rx8010_ioctl, + .read_alarm = rx8010_read_alarm, + .set_alarm = rx8010_set_alarm, + .alarm_irq_enable = rx8010_alarm_irq_enable, +}; + static int rx8010_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = client->adapter; + const struct rtc_class_ops *rtc_ops; struct rx8010_data *rx8010; int err = 0; @@ -464,16 +474,16 @@ static int rx8010_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "unable to request IRQ\n"); - client->irq = 0; - } else { - rx8010_rtc_ops.read_alarm = rx8010_read_alarm; - rx8010_rtc_ops.set_alarm = rx8010_set_alarm; - rx8010_rtc_ops.alarm_irq_enable = rx8010_alarm_irq_enable; + return err; } + + rtc_ops = &rx8010_rtc_ops_alarm; + } else { + rtc_ops = &rx8010_rtc_ops_default; } rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8010_rtc_ops, THIS_MODULE); + rtc_ops, THIS_MODULE); if (IS_ERR(rx8010->rtc)) { dev_err(&client->dev, "unable to register the class device\n"); -- cgit v1.2.3 From 2c58d5e0c754c0cd11c226a5061beea26414c067 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 20 Sep 2020 23:12:38 +0200 Subject: i2c: imx: Fix external abort on interrupt in exit paths commit e50e4f0b85be308a01b830c5fbdffc657e1a6dd0 upstream. If interrupt comes late, during probe error path or device remove (could be triggered with CONFIG_DEBUG_SHIRQ), the interrupt handler i2c_imx_isr() will access registers with the clock being disabled. This leads to external abort on non-linefetch on Toradex Colibri VF50 module (with Vybrid VF5xx): Unhandled fault: external abort on non-linefetch (0x1008) at 0x8882d003 Internal error: : 1008 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.7.0 #607 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (i2c_imx_isr) from [<8017009c>] (free_irq+0x25c/0x3b0) (free_irq) from [<805844ec>] (release_nodes+0x178/0x284) (release_nodes) from [<80580030>] (really_probe+0x10c/0x348) (really_probe) from [<80580380>] (driver_probe_device+0x60/0x170) (driver_probe_device) from [<80580630>] (device_driver_attach+0x58/0x60) (device_driver_attach) from [<805806bc>] (__driver_attach+0x84/0xc0) (__driver_attach) from [<8057e228>] (bus_for_each_dev+0x68/0xb4) (bus_for_each_dev) from [<8057f3ec>] (bus_add_driver+0x144/0x1ec) (bus_add_driver) from [<80581320>] (driver_register+0x78/0x110) (driver_register) from [<8010213c>] (do_one_initcall+0xa8/0x2f4) (do_one_initcall) from [<80c0100c>] (kernel_init_freeable+0x178/0x1dc) (kernel_init_freeable) from [<80807048>] (kernel_init+0x8/0x110) (kernel_init) from [<80100114>] (ret_from_fork+0x14/0x20) Additionally, the i2c_imx_isr() could wake up the wait queue (imx_i2c_struct->queue) before its initialization happens. The resource-managed framework should not be used for interrupt handling, because the resource will be released too late - after disabling clocks. The interrupt handler is not prepared for such case. Fixes: 1c4b6c3bcf30 ("i2c: imx: implement bus recovery") Cc: Signed-off-by: Krzysztof Kozlowski Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index a3b61336fe55..9543c9816eed 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1112,14 +1112,6 @@ static int i2c_imx_probe(struct platform_device *pdev) return ret; } - /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, - pdev->name, i2c_imx); - if (ret) { - dev_err(&pdev->dev, "can't claim irq %d\n", irq); - goto clk_disable; - } - /* Init queue */ init_waitqueue_head(&i2c_imx->queue); @@ -1138,6 +1130,14 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto rpm_disable; + /* Request IRQ */ + ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED, + pdev->name, i2c_imx); + if (ret) { + dev_err(&pdev->dev, "can't claim irq %d\n", irq); + goto rpm_disable; + } + /* Set up clock divider */ i2c_imx->bitrate = IMX_I2C_BIT_RATE; ret = of_property_read_u32(pdev->dev.of_node, @@ -1180,13 +1180,12 @@ static int i2c_imx_probe(struct platform_device *pdev) clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + free_irq(irq, i2c_imx); rpm_disable: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - -clk_disable: clk_disable_unprepare(i2c_imx->clk); return ret; } @@ -1194,7 +1193,7 @@ clk_disable: static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); - int ret; + int irq, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -1214,6 +1213,9 @@ static int i2c_imx_remove(struct platform_device *pdev) imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) + free_irq(irq, i2c_imx); clk_disable_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); -- cgit v1.2.3 From 987d3814c92c1288a29cf482ab4b0bc2e1db6857 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Fri, 16 Oct 2020 18:03:07 +0530 Subject: drm/amdgpu: don't map BO in reserved region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c4aa8dff6091cc9536aeb255e544b0b4ba29faf4 upstream. 2MB area is reserved at top inside VM. Suggested-by: Christian König Signed-off-by: Madhav Chauhan Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5fa5158d18ee..9d61d1b7a569 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -561,6 +561,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx ticket; struct list_head list, duplicates; uint64_t va_flags; + uint64_t vm_size; int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { @@ -581,6 +582,15 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_GMC_HOLE_MASK; + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + vm_size -= AMDGPU_VA_RESERVED_SIZE; + if (args->va_address + args->map_size > vm_size) { + dev_dbg(&dev->pdev->dev, + "va_address 0x%llx is in top reserved area 0x%llx\n", + args->va_address + args->map_size, vm_size); + return -EINVAL; + } + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", args->flags); -- cgit v1.2.3 From 9887a48d49f0fa67f123fa601745929c2749c9e7 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Wed, 9 Sep 2020 17:41:53 -0400 Subject: drm/amd/display: Increase timeout for DP Disable commit 37b7cb10f07c1174522faafc1d51c6591b1501d4 upstream. [WHY] When disabling DP video, the current REG_WAIT timeout of 50ms is too low for certain cases with very high VSYNC intervals. [HOW] Increase the timeout to 102ms, so that refresh rates as low as 10Hz can be handled properly. Signed-off-by: Wesley Chalmers Reviewed-by: Aric Cyr Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index ddf66046616d..6718777c826d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -898,10 +898,10 @@ void enc1_stream_encoder_dp_blank( */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); /* Larger delay to wait until VBLANK - use max retry of - * 10us*5000=50ms. This covers 41.7ms of minimum 24 Hz mode + + * 10us*10200=102ms. This covers 100.0ms of minimum 10 Hz mode + * a little more because we may not trust delay accuracy. */ - max_retries = DP_BLANK_MAX_RETRY * 250; + max_retries = DP_BLANK_MAX_RETRY * 501; /* disable DP stream */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); -- cgit v1.2.3 From d417026c408187cf676d3d2d5920fcdc5eafd079 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 15 Oct 2020 14:57:46 +0800 Subject: drm/amdgpu: correct the gpu reset handling for job != NULL case commit 207ac684792560acdb9e06f9d707ebf63c84b0e0 upstream. Current code wrongly treat all cases as job == NULL. Signed-off-by: Evan Quan Reviewed-and-tested-by: Jane Jian Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4105fbf57167..29141bff4b57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3890,7 +3890,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_device_lock_adev(tmp_adev, false); r = amdgpu_device_pre_asic_reset(tmp_adev, - NULL, + (tmp_adev == adev) ? job : NULL, &need_full_reset); /*TODO Should we stop ?*/ if (r) { -- cgit v1.2.3 From 1e460aa7353da2e4044f1ec0693ce2080865623a Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Sat, 17 Oct 2020 08:38:43 -0500 Subject: drm/amdkfd: Use same SQ prefetch setting as amdgpu commit d56b1980d7efe9ef08469e856fc0703d0cef65e4 upstream. 0 causes instruction fetch stall at cache line boundary under some conditions on Navi10. A non-zero prefetch is the preferred default in any case. Fixes soft hang in Luxmark. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c index 72e4d61ac752..ad0593342333 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c @@ -58,8 +58,9 @@ static int update_qpd_v10(struct device_queue_manager *dqm, /* check if sh_mem_config register already configured */ if (qpd->sh_mem_config == 0) { qpd->sh_mem_config = - SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); #if 0 /* TODO: * This shouldn't be an issue with Navi10. Verify. -- cgit v1.2.3 From adff3a805c9748116597605c596cd16de080a286 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 14 Oct 2020 13:12:30 -0400 Subject: drm/amd/display: Avoid MST manager resource leak. commit 5dff80bdce9e385af5716ed083f9e33e814484ab upstream. On connector destruction call drm_dp_mst_topology_mgr_destroy to release resources allocated in drm_dp_mst_topology_mgr_init. Do it only if MST manager was initilized before otherwsie a crash is seen on driver unload/device unplug. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Andrey Grodzovsky Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7c5808503173..d2dd387c95d8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3956,6 +3956,13 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) struct amdgpu_device *adev = connector->dev->dev_private; struct amdgpu_display_manager *dm = &adev->dm; + /* + * Call only if mst_mgr was iniitalized before since it's not done + * for all connector types. + */ + if (aconnector->mst_mgr.dev) + drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr); + #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) -- cgit v1.2.3 From d1628cdacfb0b8bd390e8bce81ea481dc9986f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 12 Oct 2020 13:09:36 +0200 Subject: drm/amdgpu: increase the reserved VM size to 2MB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 55bb919be4e4973cd037a04f527ecc6686800437 upstream. Ideally this should be a multiple of the VM block size. 2MB should at least fit for Vega/Navi. Signed-off-by: Christian König Reviewed-by: Madhav Chauhan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2eda3a8c330d..4a64825b53cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -105,8 +105,8 @@ struct amdgpu_bo_list_entry; #define AMDGPU_MMHUB_0 1 #define AMDGPU_MMHUB_1 2 -/* hardcode that limit for now */ -#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) +/* Reserve 2MB at top/bottom of address space for kernel use */ +#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 -- cgit v1.2.3 From d7e22dbc662db61309ff6d8dc345b161b0962a30 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Oct 2020 09:46:55 +0200 Subject: drm/amd/display: Don't invoke kgdb_breakpoint() unconditionally commit 8b7dc1fe1a5c1093551f6cd7dfbb941bd9081c2e upstream. ASSERT_CRITICAL() invokes kgdb_breakpoint() whenever either CONFIG_KGDB or CONFIG_HAVE_KGDB is set. This, however, may lead to a kernel panic when no kdb stuff is attached, since the kgdb_breakpoint() call issues INT3. It's nothing but a surprise for normal end-users. For avoiding the pitfall, make the kgdb_breakpoint() call only when CONFIG_DEBUG_KERNEL_DC is set. https://bugzilla.opensuse.org/show_bug.cgi?id=1177973 Cc: Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/os_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 30ec80ac6fc8..8da322582b68 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -57,7 +57,7 @@ * general debug capabilities * */ -#if defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB) +#if defined(CONFIG_DEBUG_KERNEL_DC) && (defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB)) #define ASSERT_CRITICAL(expr) do { \ if (WARN_ON(!(expr))) { \ kgdb_breakpoint(); \ -- cgit v1.2.3 From 9cdccb4761e54d12e9cb5f19864c2b747115f82b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Oct 2020 09:46:54 +0200 Subject: drm/amd/display: Fix kernel panic by dal_gpio_open() error commit 920bb38c518408fa2600eaefa0af9e82cf48f166 upstream. Currently both error code paths handled in dal_gpio_open_ex() issues ASSERT_CRITICAL(), and this leads to a kernel panic unnecessarily if CONFIG_KGDB is enabled. Since basically both are non-critical errors and can be recovered, drop those assert calls and use a safer one, BREAK_TO_DEBUGGER(), for allowing the debugging, instead. BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1177973 Cc: Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c index f8f85490e77e..8a6fb58d0199 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c @@ -63,13 +63,13 @@ enum gpio_result dal_gpio_open_ex( enum gpio_mode mode) { if (gpio->pin) { - ASSERT_CRITICAL(false); + BREAK_TO_DEBUGGER(); return GPIO_RESULT_ALREADY_OPENED; } // No action if allocation failed during gpio construct if (!gpio->hw_container.ddc) { - ASSERT_CRITICAL(false); + BREAK_TO_DEBUGGER(); return GPIO_RESULT_NON_SPECIFIC_ERROR; } gpio->mode = mode; -- cgit v1.2.3 From d4fdbedef767143cfce7218086c6b3b39bc42373 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:24 +0100 Subject: ceph: promote to unsigned long long before shifting commit c403c3a2fbe24d4ed33e10cabad048583ebd4edf upstream. On 32-bit systems, this shift will overflow for files larger than 4GB. Cc: stable@vger.kernel.org Fixes: 61f68816211e ("ceph: check caps in filemap_fault and page_mkwrite") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7ab616601141..a02e845eb0fb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1427,7 +1427,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; - loff_t off = vmf->pgoff << PAGE_SHIFT; + loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; -- cgit v1.2.3 From 51135ffbb54d01d77038bd01edbc54b875a0be03 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 7 Oct 2020 20:06:48 +0200 Subject: libceph: clear con->out_msg on Policy::stateful_server faults commit 28e1581c3b4ea5f98530064a103c6217bedeea73 upstream. con->out_msg must be cleared on Policy::stateful_server (!CEPH_MSG_CONNECT_LOSSY) faults. Not doing so botches the reconnection attempt, because after writing the banner the messenger moves on to writing the data section of that message (either from where it got interrupted by the connection reset or from the beginning) instead of writing struct ceph_msg_connect. This results in a bizarre error message because the server sends CEPH_MSGR_TAG_BADPROTOVER but we think we wrote struct ceph_msg_connect: libceph: mds0 (1)172.21.15.45:6828 socket error on write ceph: mds0 reconnect start libceph: mds0 (1)172.21.15.45:6829 socket closed (con state OPEN) libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch, my 32 != server's 32 libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch AFAICT this bug goes back to the dawn of the kernel client. The reason it survived for so long is that only MDS sessions are stateful and only two MDS messages have a data section: CEPH_MSG_CLIENT_RECONNECT (always, but reconnecting is rare) and CEPH_MSG_CLIENT_REQUEST (only when xattrs are involved). The connection has to get reset precisely when such message is being sent -- in this case it was the former. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/47723 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3d2e9f944e0f..49726c378aab 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3007,6 +3007,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); -- cgit v1.2.3 From 0db6e7161e3333a2a119e592cc215909d53d8ecc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:22 +0100 Subject: 9P: Cast to loff_t before multiplying commit f5f7ab168b9a60e12a4b8f2bb6fcc91321dc23c1 upstream. On 32-bit systems, this multiplication will overflow for files larger than 4GB. Link: http://lkml.kernel.org/r/20201004180428.14494-2-willy@infradead.org Cc: stable@vger.kernel.org Fixes: fb89b45cdfdc ("9P: introduction of a new cache=mmap model.") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fe7f0bd2048e..ee9cabac1204 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -609,9 +609,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, - .range_start = vma->vm_pgoff * PAGE_SIZE, + .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE, /* absolute end, byte at end included */ - .range_end = vma->vm_pgoff * PAGE_SIZE + + .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start - 1), }; -- cgit v1.2.3 From 3cfbc13ab3f00df35612929143bad651a2eafcc8 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Mon, 19 Oct 2020 22:22:42 +0800 Subject: ring-buffer: Return 0 on success from ring_buffer_resize() commit 0a1754b2a97efa644aa6e84d1db5b17c42251483 upstream. We don't need to check the new buffer size, and the return value had confused resize_buffer_duplicate_size(). ... ret = ring_buffer_resize(trace_buf->buffer, per_cpu_ptr(size_buf->data,cpu_id)->entries, cpu_id); if (ret == 0) per_cpu_ptr(trace_buf->data, cpu_id)->entries = per_cpu_ptr(size_buf->data, cpu_id)->entries; ... Link: https://lkml.kernel.org/r/20201019142242.11560-1-hqjagain@gmail.com Cc: stable@vger.kernel.org Fixes: d60da506cbeb3 ("tracing: Add a resize function to make one buffer equivalent to another buffer") Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9a2581fe7ed5..67cdb401c6ce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1717,18 +1717,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; - int cpu, err = 0; + int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) - return size; + return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) - return size; + return 0; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); @@ -1868,7 +1868,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } mutex_unlock(&buffer->mutex); - return size; + return 0; out_err: for_each_buffer_cpu(buffer, cpu) { -- cgit v1.2.3 From b11e9dd66e3a1d306a2d5697299a99537250b7ec Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 8 Oct 2020 22:42:56 +0200 Subject: vringh: fix __vringh_iov() when riov and wiov are different commit 5745bcfbbf89b158416075374254d3c013488f21 upstream. If riov and wiov are both defined and they point to different objects, only riov is initialized. If the wiov is not initialized by the caller, the function fails returning -EINVAL and printing "Readable desc 0x... after writable" error message. This issue happens when descriptors have both readable and writable buffers (eg. virtio-blk devices has virtio_blk_outhdr in the readable buffer and status as last byte of writable buffer) and we call __vringh_iov() to get both type of buffers in two different iovecs. Let's replace the 'else if' clause with 'if' to initialize both riov and wiov if they are not NULL. As checkpatch pointed out, we also avoid crashing the kernel when riov and wiov are both NULL, replacing BUG() with WARN_ON() and returning -EINVAL. Fixes: f87d0fbb5798 ("vringh: host-side implementation of virtio rings.") Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201008204256.162292-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vringh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index a0a2d74967ef..026a37ee4177 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -274,13 +274,14 @@ __vringh_iov(struct vringh *vrh, u16 i, desc_max = vrh->vring.num; up_next = -1; + /* You must want something! */ + if (WARN_ON(!riov && !wiov)) + return -EINVAL; + if (riov) riov->i = riov->used = 0; - else if (wiov) + if (wiov) wiov->i = wiov->used = 0; - else - /* You must want something! */ - BUG(); for (;;) { void *addr; -- cgit v1.2.3 From c0de3cf2f28642a181707418b5ed55c115d8d0b1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Sep 2020 09:24:56 -0700 Subject: ext4: fix leaking sysfs kobject after failed mount commit cb8d53d2c97369029cc638c9274ac7be0a316c75 upstream. ext4_unregister_sysfs() only deletes the kobject. The reference to it needs to be put separately, like ext4_put_super() does. This addresses the syzbot report "memory leak in kobject_set_name_vargs (3)" (https://syzkaller.appspot.com/bug?extid=9f864abad79fae7c17e1). Reported-by: syzbot+9f864abad79fae7c17e1@syzkaller.appspotmail.com Fixes: 72ba74508b28 ("ext4: release sysfs kobject when failing to enable quotas on mount") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200922162456.93657-1-ebiggers@kernel.org Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2603537b1f66..6a260cc8bce6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4684,6 +4684,7 @@ cantfind_ext4: failed_mount8: ext4_unregister_sysfs(sb); + kobject_put(&sbi->s_kobj); failed_mount7: ext4_unregister_li_request(sb); failed_mount6: -- cgit v1.2.3 From ae05fdc6d60a48d44f1ed9329218c9c6407ff818 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 29 Aug 2020 10:54:02 +0800 Subject: ext4: fix error handling code in add_new_gdb commit c9e87161cc621cbdcfc472fa0b2d81c63780c8f5 upstream. When ext4_journal_get_write_access() fails, we should terminate the execution flow and release n_group_desc, iloc.bh, dind and gdb_bh. Cc: stable@kernel.org Signed-off-by: Dinghao Liu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20200829025403.3139-1-dinghao.liu@zju.edu.cn Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 080e25f6ef56..ad1d4c8faf44 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -861,8 +861,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); - if (unlikely(err)) + if (unlikely(err)) { ext4_std_error(sb, err); + goto errout; + } /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); -- cgit v1.2.3 From b60edf37d5d34f5375de90b7ba68bc59cced5c68 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 20 Oct 2020 09:36:31 +0800 Subject: ext4: fix invalid inode checksum commit 1322181170bb01bce3c228b82ae3d5c6b793164f upstream. During the stability test, there are some errors: ext4_lookup:1590: inode #6967: comm fsstress: iget: checksum invalid. If the inode->i_iblocks too big and doesn't set huge file flag, checksum will not be recalculated when update the inode information to it's buffer. If other inode marks the buffer dirty, then the inconsistent inode will be flushed to disk. Fix this problem by checking i_blocks in advance. Cc: stable@kernel.org Signed-off-by: Luo Meng Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20201020013631.3796673-1-luomeng12@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 95a8a04c77dd..cbd028a31daf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5271,6 +5271,12 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + err = ext4_inode_blocks_set(handle, raw_inode, ei); + if (err) { + spin_unlock(&ei->i_raw_lock); + goto out_brelse; + } + raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); i_gid = i_gid_read(inode); @@ -5304,11 +5310,6 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - err = ext4_inode_blocks_set(handle, raw_inode, ei); - if (err) { - spin_unlock(&ei->i_raw_lock); - goto out_brelse; - } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) -- cgit v1.2.3 From b7e1a637eae9c6fbae8e8aa98c4b6f9952d27887 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 20 Oct 2020 08:22:53 +1000 Subject: drm/ttm: fix eviction valuable range check. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fea456d82c19d201c21313864105876deabe148b upstream. This was adding size to start, but pfn and start are in pages, so it should be using num_pages. Not sure this fixes anything in the real world, just noticed it during refactoring. Signed-off-by: Dave Airlie Reviewed-by: Christian König Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20201019222257.1684769-2-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3ce8ad7603c7..3ffcbaa13878 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -761,7 +761,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, /* Don't evict this BO if it's outside of the * requested placement range */ - if (place->fpfn >= (bo->mem.start + bo->mem.size) || + if (place->fpfn >= (bo->mem.start + bo->mem.num_pages) || (place->lpfn && place->lpfn <= bo->mem.start)) return false; -- cgit v1.2.3 From fb4e2a67e19369da0f2fd8903bc59cce6c5481c8 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 23 Oct 2020 00:23:37 +0200 Subject: mmc: sdhci-of-esdhc: set timeout to max before tuning commit 0add6e9b88d0632a25323aaf4987dbacb0e4ae64 upstream. On rare occations there is the following error: mmc0: Tuning timeout, falling back to fixed sampling clock There are SD cards which takes a significant longer time to reply to the first CMD19 command. The eSDHC takes the data timeout value into account during the tuning period. The SDHCI core doesn't explicitly set this timeout for the tuning procedure. Thus on the slow cards, there might be a spurious "Buffer Read Ready" interrupt, which in turn triggers a wrong sequence of events. In the end this will lead to an unsuccessful tuning procedure and to the above error. To workaround this, set the timeout to the maximum value (which is the best we can do) and the SDHCI core will take care of the proper timeout handling. Fixes: ba49cbd0936e ("mmc: sdhci-of-esdhc: add tuning support") Signed-off-by: Michael Walle Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201022222337.19857-1-michael@walle.cc Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 66ad46d0ba88..64196c1b1c8f 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -1004,6 +1004,17 @@ static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode) esdhc_tuning_block_enable(host, true); + /* + * The eSDHC controller takes the data timeout value into account + * during tuning. If the SD card is too slow sending the response, the + * timer will expire and a "Buffer Read Ready" interrupt without data + * is triggered. This leads to tuning errors. + * + * Just set the timeout to the maximum value because the core will + * already take care of it in sdhci_send_tuning(). + */ + sdhci_writeb(host, 0xe, SDHCI_TIMEOUT_CONTROL); + hs400_tuning = host->flags & SDHCI_HS400_TUNING; do { -- cgit v1.2.3 From 071b3300c9516ada95b82c2b65d9e2d9b81dbd23 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Oct 2020 17:41:15 +0800 Subject: mmc: sdhci: Use Auto CMD Auto Select only when v4_mode is true commit b3e1ea16fb39fb6e1a1cf1dbdd6738531de3dc7d upstream. sdhci-of-dwcmshc meets an eMMC read performance regression with below command after commit 427b6514d095 ("mmc: sdhci: Add Auto CMD Auto Select support"): dd if=/dev/mmcblk0 of=/dev/null bs=8192 count=100000 Before the commit, the above command gives 120MB/s After the commit, the above command gives 51.3 MB/s So it looks like sdhci-of-dwcmshc expects Version 4 Mode for Auto CMD Auto Select. Fix the performance degradation by ensuring v4_mode is true to use Auto CMD Auto Select. Fixes: 427b6514d095 ("mmc: sdhci: Add Auto CMD Auto Select support") Signed-off-by: Jisheng Zhang Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201015174115.4cf2c19a@xhacker.debian Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 136f9737713d..a1aeb2e10564 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1162,9 +1162,11 @@ static inline void sdhci_auto_cmd_select(struct sdhci_host *host, /* * In case of Version 4.10 or later, use of 'Auto CMD Auto * Select' is recommended rather than use of 'Auto CMD12 - * Enable' or 'Auto CMD23 Enable'. + * Enable' or 'Auto CMD23 Enable'. We require Version 4 Mode + * here because some controllers (e.g sdhci-of-dwmshc) expect it. */ - if (host->version >= SDHCI_SPEC_410 && (use_cmd12 || use_cmd23)) { + if (host->version >= SDHCI_SPEC_410 && host->v4_mode && + (use_cmd12 || use_cmd23)) { *mode |= SDHCI_TRNS_AUTO_SEL; ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); -- cgit v1.2.3 From beb5d0dfc154298e3e59a0abfb7ac3723278cc6f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 2 Sep 2020 16:10:10 +0800 Subject: drm/amd/pm: increase mclk switch threshold to 200 us commit 83da6eea3af669ee0b1f1bc05ffd6150af984994 upstream. To avoid underflow seen on Polaris10 with some 3440x1440 144Hz displays. As the threshold of 190 us cuts too close to minVBlankTime of 192 us. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 1b55f037ba4a..35e6cbe805eb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2864,7 +2864,7 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, if (hwmgr->is_kicker) switch_limit_us = data->is_memory_gddr5 ? 450 : 150; else - switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + switch_limit_us = data->is_memory_gddr5 ? 200 : 150; break; case CHIP_VEGAM: switch_limit_us = 30; -- cgit v1.2.3 From c2313d7818b979f8b3751f052a8db34a7ed26780 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Oct 2020 13:15:23 -0700 Subject: tty: make FONTX ioctl use the tty pointer they were actually passed commit 90bfdeef83f1d6c696039b6a917190dcbbad3220 upstream. Some of the font tty ioctl's always used the current foreground VC for their operations. Don't do that then. This fixes a data race on fg_console. Side note: both Michael Ellerman and Jiri Slaby point out that all these ioctls are deprecated, and should probably have been removed long ago, and everything seems to be using the KDFONTOP ioctl instead. In fact, Michael points out that it looks like busybox's loadfont program seems to have switched over to using KDFONTOP exactly _because_ of this bug (ahem.. 12 years ago ;-). Reported-by: Minh Yuan Acked-by: Michael Ellerman Acked-by: Jiri Slaby Cc: Greg KH Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index cbc85c995d92..4f51be17427c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -244,7 +244,7 @@ int vt_waitactive(int n) static inline int -do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) +do_fontx_ioctl(struct vc_data *vc, int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) { struct consolefontdesc cfdarg; int i; @@ -262,15 +262,16 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - return con_font_op(vc_cons[fg_console].d, op); - case GIO_FONTX: { + return con_font_op(vc, op); + + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; op->width = 8; op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -278,7 +279,6 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) return -EFAULT; return 0; - } } return -EINVAL; } @@ -924,7 +924,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 0; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -935,7 +935,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 32; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -952,7 +952,7 @@ int vt_ioctl(struct tty_struct *tty, case PIO_FONTX: case GIO_FONTX: - ret = do_fontx_ioctl(cmd, up, perm, &op); + ret = do_fontx_ioctl(vc, cmd, up, perm, &op); break; case PIO_FONTRESET: @@ -969,11 +969,11 @@ int vt_ioctl(struct tty_struct *tty, { op.op = KD_FONT_OP_SET_DEFAULT; op.data = NULL; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); if (ret) break; console_lock(); - con_set_default_unimap(vc_cons[fg_console].d); + con_set_default_unimap(vc); console_unlock(); break; } @@ -1100,8 +1100,9 @@ struct compat_consolefontdesc { }; static inline int -compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) +compat_fontx_ioctl(struct vc_data *vc, int cmd, + struct compat_consolefontdesc __user *user_cfd, + int perm, struct console_font_op *op) { struct compat_consolefontdesc cfdarg; int i; @@ -1119,7 +1120,8 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc_cons[fg_console].d, op); + return con_font_op(vc, op); + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; @@ -1127,7 +1129,7 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -1217,7 +1219,7 @@ long vt_compat_ioctl(struct tty_struct *tty, */ case PIO_FONTX: case GIO_FONTX: - return compat_fontx_ioctl(cmd, up, perm, &op); + return compat_fontx_ioctl(vc, cmd, up, perm, &op); case KDFONTOP: return compat_kdfontop_ioctl(up, perm, &op, vc); -- cgit v1.2.3 From 4bf2a744a4e77d340700c83b40b7f859deac7d03 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 9 Oct 2020 15:08:31 +0800 Subject: arm64: berlin: Select DW_APB_TIMER_OF commit b0fc70ce1f028e14a37c186d9f7a55e51439b83a upstream. Berlin SoCs always contain some DW APB timers which can be used as an always-on broadcast timer. Link: https://lore.kernel.org/r/20201009150536.214181fb@xhacker.debian Cc: # v3.14+ Signed-off-by: Jisheng Zhang Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 16d761475a86..9dccf4db319b 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -54,6 +54,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help -- cgit v1.2.3 From 81190a9efde0159ba255d9d629e50b4dda51d043 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 26 Oct 2020 09:12:10 +0000 Subject: cachefiles: Handle readpage error correctly commit 9480b4e75b7108ee68ecf5bc6b4bd68e8031c521 upstream. If ->readpage returns an error, it has already unlocked the page. Fixes: 5e929b33c393 ("CacheFiles: Handle truncate unlocking the page we're reading") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/rdwr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index ad057ed2b30b..bd5fe8d00d00 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -121,7 +121,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, _debug("reissue read"); ret = bmapping->a_ops->readpage(NULL, backpage); if (ret < 0) - goto unlock_discard; + goto discard; } /* but the page may have been read before the monitor was installed, so @@ -138,6 +138,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, unlock_discard: unlock_page(backpage); +discard: spin_lock_irq(&object->work_lock); list_del(&monitor->op_link); spin_unlock_irq(&object->work_lock); -- cgit v1.2.3 From f3d8023e064764a99aafbdfcf92632df23a1164e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 19 Oct 2020 16:57:50 +0200 Subject: hil/parisc: Disable HIL driver when it gets stuck commit 879bc2d27904354b98ca295b6168718e045c4aa2 upstream. When starting a HP machine with HIL driver but without an HIL keyboard or HIL mouse attached, it may happen that data written to the HIL loop gets stuck (e.g. because the transaction queue is full). Usually one will then have to reboot the machine because all you see is and endless output of: Transaction add failed: transaction already queued? In the higher layers hp_sdc_enqueue_transaction() is called to queued up a HIL packet. This function returns an error code, and this patch adds the necessary checks for this return code and disables the HIL driver if further packets can't be sent. Tested on a HP 730 and a HP 715/64 machine. Signed-off-by: Helge Deller Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/hil_mlc.c | 21 ++++++++++++++++++--- drivers/input/serio/hp_sdc_mlc.c | 8 ++++---- include/linux/hil_mlc.h | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index e1423f7648d6..4c039e4125d9 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(hil_mlc_unregister); static LIST_HEAD(hil_mlcs); static DEFINE_RWLOCK(hil_mlcs_lock); static struct timer_list hil_mlcs_kicker; -static int hil_mlcs_probe; +static int hil_mlcs_probe, hil_mlc_stop; static void hil_mlcs_process(unsigned long unused); static DECLARE_TASKLET_DISABLED(hil_mlcs_tasklet, hil_mlcs_process, 0); @@ -702,9 +702,13 @@ static int hilse_donode(hil_mlc *mlc) if (!mlc->ostarted) { mlc->ostarted = 1; mlc->opacket = pack; - mlc->out(mlc); + rc = mlc->out(mlc); nextidx = HILSEN_DOZE; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; } mlc->ostarted = 0; @@ -715,8 +719,13 @@ static int hilse_donode(hil_mlc *mlc) case HILSE_CTS: write_lock_irqsave(&mlc->lock, flags); - nextidx = mlc->cts(mlc) ? node->bad : node->good; + rc = mlc->cts(mlc); + nextidx = rc ? node->bad : node->good; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; default: @@ -780,6 +789,12 @@ static void hil_mlcs_process(unsigned long unused) static void hil_mlcs_timer(struct timer_list *unused) { + if (hil_mlc_stop) { + /* could not send packet - stop immediately. */ + pr_warn(PREFIX "HIL seems stuck - Disabling HIL MLC.\n"); + return; + } + hil_mlcs_probe = 1; tasklet_schedule(&hil_mlcs_tasklet); /* Re-insert the periodic task. */ diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 232d30c825bd..3e85e9039374 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -210,7 +210,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) priv->tseq[2] = 1; priv->tseq[3] = 0; priv->tseq[4] = 0; - __hp_sdc_enqueue_transaction(&priv->trans); + return __hp_sdc_enqueue_transaction(&priv->trans); busy: return 1; done: @@ -219,7 +219,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) return 0; } -static void hp_sdc_mlc_out(hil_mlc *mlc) +static int hp_sdc_mlc_out(hil_mlc *mlc) { struct hp_sdc_mlc_priv_s *priv; @@ -234,7 +234,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) do_data: if (priv->emtestmode) { up(&mlc->osem); - return; + return 0; } /* Shouldn't be sending commands when loop may be busy */ BUG_ON(down_trylock(&mlc->csem)); @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) BUG_ON(down_trylock(&mlc->csem)); } enqueue: - hp_sdc_enqueue_transaction(&priv->trans); + return hp_sdc_enqueue_transaction(&priv->trans); } static int __init hp_sdc_mlc_init(void) diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 774f7d3b8f6a..369221fd5518 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { -- cgit v1.2.3 From 0808ca98e67e914ac48cc8f11910509d3b04379c Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 7 Sep 2020 09:05:17 +0200 Subject: arm: dts: mt7623: add missing pause for switchport commit 36f0a5fc5284838c544218666c63ee8cfa46a9c3 upstream. port6 of mt7530 switch (= cpu port 0) on bananapi-r2 misses pause option which causes rx drops on running iperf. Fixes: f4ff257cd160 ("arm: dts: mt7623: add support for Bananapi R2 (BPI-R2) board") Signed-off-by: Frank Wunderlich Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200907070517.51715-1-linux@fw-web.de Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts index 2b760f90f38c..5375c6699843 100644 --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts @@ -192,6 +192,7 @@ fixed-link { speed = <1000>; full-duplex; + pause; }; }; }; -- cgit v1.2.3 From 2937774ef43aee8675c1219a37240e7a2b2c523f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 10 Sep 2020 17:41:49 +0200 Subject: ARM: samsung: fix PM debug build with DEBUG_LL but !MMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7be0d19c751b02db778ca95e3274d5ea7f31891c upstream. Selecting CONFIG_SAMSUNG_PM_DEBUG (depending on CONFIG_DEBUG_LL) but without CONFIG_MMU leads to build errors: arch/arm/plat-samsung/pm-debug.c: In function ‘s3c_pm_uart_base’: arch/arm/plat-samsung/pm-debug.c:57:2: error: implicit declaration of function ‘debug_ll_addr’ [-Werror=implicit-function-declaration] Fixes: 99b2fc2b8b40 ("ARM: SAMSUNG: Use debug_ll_addr() to get UART base address") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Cc: Link: https://lore.kernel.org/r/20200910154150.3318-1-krzk@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/plat-samsung/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 301e572651c0..790c87ee7271 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -241,6 +241,7 @@ config SAMSUNG_PM_DEBUG depends on PM && DEBUG_KERNEL depends on PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 depends on DEBUG_EXYNOS_UART || DEBUG_S3C24XX_UART || DEBUG_S3C2410_UART + depends on DEBUG_LL && MMU help Say Y here if you want verbose debugging from the PM Suspend and Resume code. See -- cgit v1.2.3 From e793fc391351b1231cc615a995bb7692b741bf90 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Aug 2020 21:26:49 +0200 Subject: ARM: s3c24xx: fix missing system reset commit f6d7cde84f6c5551586c8b9b68d70f8e6dc9a000 upstream. Commit f6361c6b3880 ("ARM: S3C24XX: remove separate restart code") removed usage of the watchdog reset platform code in favor of the Samsung SoC watchdog driver. However the latter was not selected thus S3C24xx platforms lost reset abilities. Cc: Fixes: f6361c6b3880 ("ARM: S3C24XX: remove separate restart code") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 05c9bbfe444d..9aa88715f196 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -507,8 +507,10 @@ config ARCH_S3C24XX select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_IO_H + select S3C2410_WATCHDOG select SAMSUNG_ATAGS select USE_OF + select WATCHDOG help Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 SoCs based systems, such as the Simtec Electronics BAST -- cgit v1.2.3 From 26086875476fe188846f482109a5bc931756fe44 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Oct 2020 21:40:59 +0300 Subject: device property: Keep secondary firmware node secondary by type commit d5dcce0c414fcbfe4c2037b66ac69ea5f9b3f75c upstream. Behind primary and secondary we understand the type of the nodes which might define their ordering. However, if primary node gone, we can't maintain the ordering by definition of the linked list. Thus, by ordering secondary node becomes first in the list. But in this case the meaning of it is still secondary (or auxiliary). The type of the node is maintained by the secondary pointer in it: secondary pointer Meaning NULL or valid primary node ERR_PTR(-ENODEV) secondary node So, if by some reason we do the following sequence of calls set_primary_fwnode(dev, NULL); set_primary_fwnode(dev, primary); we should preserve secondary node. This concept is supported by the description of set_primary_fwnode() along with implementation of set_secondary_fwnode(). Hence, fix the commit c15e1bdda436 to follow this as well. Fixes: c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") Cc: Ferry Toth Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Tested-by: Ferry Toth Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 94df2ba1bbed..b6a8545aaee2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3414,7 +3414,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; - fn->secondary = NULL; + fn->secondary = ERR_PTR(-ENODEV); } else { dev->fwnode = NULL; } -- cgit v1.2.3 From 4cb29cdd50439e89e349119c936ebb9000d180bf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Oct 2020 21:41:00 +0300 Subject: device property: Don't clear secondary pointer for shared primary firmware node commit 99aed9227073fb34ce2880cbc7063e04185a65e1 upstream. It appears that firmware nodes can be shared between devices. In such case when a (child) device is about to be deleted, its firmware node may be shared and ACPI_COMPANION_SET(..., NULL) call for it breaks the secondary link of the shared primary firmware node. In order to prevent that, check, if the device has a parent and parent's firmware node is shared with its child, and avoid crashing the link. Fixes: c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") Reported-by: Ferry Toth Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Tested-by: Ferry Toth Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b6a8545aaee2..0fde3e9e63ee 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3400,6 +3400,7 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { + struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { @@ -3414,7 +3415,8 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; - fn->secondary = ERR_PTR(-ENODEV); + if (!(parent && fn == parent->fwnode)) + fn->secondary = ERR_PTR(-ENODEV); } else { dev->fwnode = NULL; } -- cgit v1.2.3 From 8fd792948e76d473f30a39d2d74d539abeda6153 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:24:09 +0000 Subject: KVM: arm64: Fix AArch32 handling of DBGD{CCINT,SCRext} and DBGVCR commit 4a1c2c7f63c52ccb11770b5ae25920a6b79d3548 upstream. The DBGD{CCINT,SCRext} and DBGVCR register entries in the cp14 array are missing their target register, resulting in all accesses being targetted at the guard sysreg (indexed by __INVALID_SYSREG__). Point the emulation code at the actual register entries. Fixes: bdfb4b389c8d ("arm64: KVM: add trap handlers for AArch32 debug registers") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201029172409.2768336-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d719c6b4dd81..7140701f65f9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -209,6 +209,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d43f44b3377e..0ed7598dfa6a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1746,9 +1746,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1763,7 +1763,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), -- cgit v1.2.3 From 4d934fe936fd6c52417d408529b5d45714bea247 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Mon, 12 Oct 2020 21:24:04 +0800 Subject: staging: fieldbus: anybuss: jump to correct label in an error path commit 7e97e4cbf30026b49b0145c3bfe06087958382c5 upstream. In current code, controller_probe() misses to call ida_simple_remove() in an error path. Jump to correct label to fix it. Fixes: 17614978ed34 ("staging: fieldbus: anybus-s: support the Arcx anybus controller") Reviewed-by: Sven Van Asbroeck Signed-off-by: Jing Xiangfeng Cc: stable Link: https://lore.kernel.org/r/20201012132404.113031-1-jingxiangfeng@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fieldbus/anybuss/arcx-anybus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/fieldbus/anybuss/arcx-anybus.c b/drivers/staging/fieldbus/anybuss/arcx-anybus.c index 2ecffa42e561..fbe693816f07 100644 --- a/drivers/staging/fieldbus/anybuss/arcx-anybus.c +++ b/drivers/staging/fieldbus/anybuss/arcx-anybus.c @@ -297,7 +297,7 @@ static int controller_probe(struct platform_device *pdev) regulator = devm_regulator_register(dev, &can_power_desc, &config); if (IS_ERR(regulator)) { err = PTR_ERR(regulator); - goto out_reset; + goto out_ida; } /* make controller info visible to userspace */ cd->class_dev = kzalloc(sizeof(*cd->class_dev), GFP_KERNEL); -- cgit v1.2.3 From 15506ee68893711b5fb20c76d433fb49928574ea Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 21 Oct 2020 13:21:42 +0100 Subject: staging: comedi: cb_pcidas: Allow 2-channel commands for AO subdevice commit 647a6002cb41d358d9ac5de101a8a6dc74748a59 upstream. The "cb_pcidas" driver supports asynchronous commands on the analog output (AO) subdevice for those boards that have an AO FIFO. The code (in `cb_pcidas_ao_check_chanlist()` and `cb_pcidas_ao_cmd()`) to validate and set up the command supports output to a single channel or to two channels simultaneously (the boards have two AO channels). However, the code in `cb_pcidas_auto_attach()` that initializes the subdevices neglects to initialize the AO subdevice's `len_chanlist` member, leaving it set to 0, but the Comedi core will "correct" it to 1 if the driver neglected to set it. This limits commands to use a single channel (either channel 0 or 1), but the limit should be two channels. Set the AO subdevice's `len_chanlist` member to be the same value as the `n_chan` member, which will be 2. Cc: Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20201021122142.81628-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c index 02ae00c95313..1893c70de0b9 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas.c +++ b/drivers/staging/comedi/drivers/cb_pcidas.c @@ -1342,6 +1342,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, if (dev->irq && board->has_ao_fifo) { dev->write_subdev = s; s->subdev_flags |= SDF_CMD_WRITE; + s->len_chanlist = s->n_chan; s->do_cmdtest = cb_pcidas_ao_cmdtest; s->do_cmd = cb_pcidas_ao_cmd; s->cancel = cb_pcidas_ao_cancel; -- cgit v1.2.3 From b869f6b67274926600824e8702106d5cb62734fd Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 16 Oct 2020 12:18:57 +0200 Subject: staging: octeon: repair "fixed-link" support commit 179f5dc36b0a1aa31538d7d8823deb65c39847b3 upstream. The PHYs must be registered once in device probe function, not in device open callback because it's only possible to register them once. Fixes: a25e278020bf ("staging: octeon: support fixed-link phys") Signed-off-by: Alexander Sverdlin Cc: stable Link: https://lore.kernel.org/r/20201016101858.11374-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-mdio.c | 6 ------ drivers/staging/octeon/ethernet.c | 9 +++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index ffac0c4b3f5c..022c71e69a23 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -147,12 +147,6 @@ int cvm_oct_phy_setup_device(struct net_device *dev) phy_node = of_parse_phandle(priv->of_node, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(priv->of_node)) { - int rc; - - rc = of_phy_register_fixed_link(priv->of_node); - if (rc) - return rc; - phy_node = of_node_get(priv->of_node); } if (!phy_node) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index cf8e9a23ebf9..77b7c1fd9d78 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -894,6 +895,14 @@ static int cvm_oct_probe(struct platform_device *pdev) break; } + if (priv->of_node && of_phy_is_fixed_link(priv->of_node)) { + if (of_phy_register_fixed_link(priv->of_node)) { + netdev_err(dev, "Failed to register fixed link for interface %d, port %d\n", + interface, priv->port); + dev->netdev_ops = NULL; + } + } + if (!dev->netdev_ops) { free_netdev(dev); } else if (register_netdev(dev) < 0) { -- cgit v1.2.3 From 6ce4da84e5f479b8a2e402589c0713cd9fcefff9 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 16 Oct 2020 16:56:30 +0200 Subject: staging: octeon: Drop on uncorrectable alignment or FCS error commit 49d28ebdf1e30d806410eefc7de0a7a1ca5d747c upstream. Currently in case of alignment or FCS error if the packet cannot be corrected it's still not dropped. Report the error properly and drop the packet while making the code around a little bit more readable. Fixes: 80ff0fd3ab64 ("Staging: Add octeon-ethernet driver files.") Signed-off-by: Alexander Sverdlin Cc: stable Link: https://lore.kernel.org/r/20201016145630.41852-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-rx.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 0e65955c746b..9a242d68a26b 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -69,15 +69,17 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) else port = work->word1.cn38xx.ipprt; - if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) { + if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) /* * Ignore length errors on min size packets. Some * equipment incorrectly pads packets to 64+4FCS * instead of 60+4FCS. Note these packets still get * counted as frame errors. */ - } else if (work->word2.snoip.err_code == 5 || - work->word2.snoip.err_code == 7) { + return 0; + + if (work->word2.snoip.err_code == 5 || + work->word2.snoip.err_code == 7) { /* * We received a packet with either an alignment error * or a FCS error. This may be signalling that we are @@ -108,7 +110,10 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) /* Port received 0xd5 preamble */ work->packet_ptr.s.addr += i + 1; work->word1.len -= i + 5; - } else if ((*ptr & 0xf) == 0xd) { + return 0; + } + + if ((*ptr & 0xf) == 0xd) { /* Port received 0xd preamble */ work->packet_ptr.s.addr += i; work->word1.len -= i + 4; @@ -118,21 +123,20 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) ((*(ptr + 1) & 0xf) << 4); ptr++; } - } else { - printk_ratelimited("Port %d unknown preamble, packet dropped\n", - port); - cvm_oct_free_work(work); - return 1; + return 0; } + + printk_ratelimited("Port %d unknown preamble, packet dropped\n", + port); + cvm_oct_free_work(work); + return 1; } - } else { - printk_ratelimited("Port %d receive error code %d, packet dropped\n", - port, work->word2.snoip.err_code); - cvm_oct_free_work(work); - return 1; } - return 0; + printk_ratelimited("Port %d receive error code %d, packet dropped\n", + port, work->word2.snoip.err_code); + cvm_oct_free_work(work); + return 1; } static void copy_segments_to_skb(cvmx_wqe_t *work, struct sk_buff *skb) -- cgit v1.2.3 From 6e97ed6efa701db070da0054b055c085895aba86 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Nov 2020 11:43:38 +0100 Subject: Linux 5.4.75 Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201103203249.448706377@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3be5a9c352b9..d38d0cab8e9a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 74 +SUBLEVEL = 75 EXTRAVERSION = NAME = Kleptomaniac Octopus -- cgit v1.2.3 From 391a7c6dfd13dbece8211b269d74c37875da0f67 Mon Sep 17 00:00:00 2001 From: Michael Doswald Date: Thu, 5 Nov 2020 14:31:10 +0100 Subject: irq-imx-irqsteer: fix compile error if CONFIG_PM_SLEEP is not set - Fixed a compile error in imx_irqsteer_chans_enable where the type name was used instead of the variable name - Removed CONFIG_PM_SLEEP guards and marked runtime suspend api with __maybe_unused Fixes: 1b98a5088798 ("MLK-17290-01 irqchip: imx-irqsteer: add runtime pm support") Signed-off-by: Michael Doswald --- drivers/irqchip/irq-imx-irqsteer.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c index f766265abf16..5aa67ebd291a 100644 --- a/drivers/irqchip/irq-imx-irqsteer.c +++ b/drivers/irqchip/irq-imx-irqsteer.c @@ -199,7 +199,7 @@ static int imx_irqsteer_chans_enable(struct irqsteer_data *data) { int ret; - ret = clk_prepare_enable(irqsteer_data->ipg_clk); + ret = clk_prepare_enable(data->ipg_clk); if (ret) { dev_err(data->dev, "failed to enable ipg clk: %d\n", ret); return ret; @@ -329,8 +329,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev) return pm_runtime_force_suspend(&pdev->dev); } -#ifdef CONFIG_PM_SLEEP -static void imx_irqsteer_init(struct irqsteer_data *data) +static void __maybe_unused imx_irqsteer_init(struct irqsteer_data *data) { /* steer all IRQs into configured channel */ writel_relaxed(BIT(data->channel), data->regs + CHANCTRL); @@ -341,7 +340,7 @@ static void imx_irqsteer_init(struct irqsteer_data *data) data->inited = true; } -static void imx_irqsteer_save_regs(struct irqsteer_data *data) +static void __maybe_unused imx_irqsteer_save_regs(struct irqsteer_data *data) { int i; @@ -350,7 +349,7 @@ static void imx_irqsteer_save_regs(struct irqsteer_data *data) CHANMASK(i, data->reg_num)); } -static void imx_irqsteer_restore_regs(struct irqsteer_data *data) +static void __maybe_unused imx_irqsteer_restore_regs(struct irqsteer_data *data) { int i; @@ -360,7 +359,7 @@ static void imx_irqsteer_restore_regs(struct irqsteer_data *data) data->regs + CHANMASK(i, data->reg_num)); } -static int imx_irqsteer_runtime_suspend(struct device *dev) +static int __maybe_unused imx_irqsteer_runtime_suspend(struct device *dev) { struct irqsteer_data *irqsteer_data = dev_get_drvdata(dev); @@ -370,7 +369,7 @@ static int imx_irqsteer_runtime_suspend(struct device *dev) return 0; } -static int imx_irqsteer_runtime_resume(struct device *dev) +static int __maybe_unused imx_irqsteer_runtime_resume(struct device *dev) { struct irqsteer_data *irqsteer_data = dev_get_drvdata(dev); int ret; @@ -389,7 +388,6 @@ static int imx_irqsteer_runtime_resume(struct device *dev) return 0; } -#endif static const struct dev_pm_ops imx_irqsteer_pm_ops = { SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, -- cgit v1.2.3 From 5bcd18bf80827702243d868314068be7a50128ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Sep 2020 10:00:58 +0100 Subject: drm/i915: Break up error capture compression loops with cond_resched() commit 7d5553147613b50149238ac1385c60e5c7cacb34 upstream. As the error capture will compress user buffers as directed to by the user, it can take an arbitrary amount of time and space. Break up the compression loops with a call to cond_resched(), that will allow other processes to schedule (avoiding the soft lockups) and also serve as a warning should we try to make this loop atomic in the future. Testcase: igt/gem_exec_capture/many-* Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200916090059.3189-2-chris@chris-wilson.co.uk (cherry picked from commit 293f43c80c0027ff9299036c24218ac705ce584e) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fe9edbba997c..b9af329eb3a3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -307,6 +307,8 @@ static int compress_page(struct compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + cond_resched(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -392,6 +394,7 @@ static int compress_page(struct compress *c, if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; + cond_resched(); return 0; } -- cgit v1.2.3 From d321f127eb51ac533b473e56a21b6c65fc9a974a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Oct 2020 20:50:23 +0100 Subject: drm/i915/gt: Delay execlist processing for tgl commit 9b99e5ba3e5d68039bd6b657e4bbe520a3521f4c upstream. When running gem_exec_nop, it floods the system with many requests (with the goal of userspace submitting faster than the HW can process a single empty batch). This causes the driver to continually resubmit new requests onto the end of an active context, a flood of lite-restore preemptions. If we time this just right, Tigerlake hangs. Inserting a small delay between the processing of CS events and submitting the next context, prevents the hang. Naturally it does not occur with debugging enabled. The suspicion then is that this is related to the issues with the CS event buffer, and inserting an mmio read of the CS pointer status appears to be very successful in preventing the hang. Other registers, or uncached reads, or plain mb, do not prevent the hang, suggesting that register is key -- but that the hang can be prevented by a simple udelay, suggests it is just a timing issue like that encountered by commit 233c1ae3c83f ("drm/i915/gt: Wait for CSB entries on Tigerlake"). Also note that the hang is not prevented by applying CTX_DESC_FORCE_RESTORE, or by inserting a delay on the GPU between requests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Bruce Chang Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201015195023.32346-1-chris@chris-wilson.co.uk (cherry picked from commit 6ca7217dffaf1abba91558e67a2efb655ac91405) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c169f0f70f3a..2fa491f82682 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1574,6 +1574,9 @@ static void process_csb(struct intel_engine_cs *engine) if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); + /* XXX Magic delay for tgl */ + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + WRITE_ONCE(execlists->pending[0], NULL); break; -- cgit v1.2.3 From e05dfcff26e94ded9049b3130860445a257ee095 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Aug 2020 10:25:32 +0100 Subject: drm/i915: Drop runtime-pm assert from vgpu io accessors commit 5c6c13cd1102caf92d006a3cf4591c0229019daf upstream. The "mmio" writes into vgpu registers are simple memory traps from the guest into the host. We do not need to assert in the guest that the device is awake for the io as we do not write to the device itself. However, over time we have refactored all the mmio accessors with the result that the vgpu reuses the gen2 accessors and so inherits the assert for runtime-pm of the native device. The assert though has actually been there since commit 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps"). References: 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps") Signed-off-by: Chris Wilson Cc: Yan Zhao Cc: Zhenyu Wang Reviewed-by: Zhenyu Wang Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200811092532.13753-1-chris@chris-wilson.co.uk (cherry picked from commit 0e65ce24a33c1d37da4bf43c34e080334ec6cb60) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_uncore.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9e583f13a9e4..62f8a47fda45 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1124,6 +1124,18 @@ unclaimed_reg_debug(struct intel_uncore *uncore, spin_unlock(&uncore->debug->lock); } +#define __vgpu_read(x) \ +static u##x \ +vgpu_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \ + u##x val = __raw_uncore_read##x(uncore, reg); \ + trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ + return val; \ +} +__vgpu_read(8) +__vgpu_read(16) +__vgpu_read(32) +__vgpu_read(64) + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(uncore->rpm); @@ -1327,6 +1339,16 @@ __gen_reg_write_funcs(gen8); #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER +#define __vgpu_write(x) \ +static void \ +vgpu_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \ + trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ + __raw_uncore_write##x(uncore, reg, val); \ +} +__vgpu_write(8) +__vgpu_write(16) +__vgpu_write(32) + #define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \ do { \ (uncore)->funcs.mmio_writeb = x##_write8; \ @@ -1647,7 +1669,10 @@ static void uncore_raw_init(struct intel_uncore *uncore) { GEM_BUG_ON(intel_uncore_has_forcewake(uncore)); - if (IS_GEN(uncore->i915, 5)) { + if (intel_vgpu_active(uncore->i915)) { + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, vgpu); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, vgpu); + } else if (IS_GEN(uncore->i915, 5)) { ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5); } else { -- cgit v1.2.3 From 1ca84322ab5b963b9381a1048e3b3e950b9b5398 Mon Sep 17 00:00:00 2001 From: Mateusz Gorski Date: Mon, 27 Apr 2020 15:27:25 +0200 Subject: ASoC: Intel: Skylake: Add alternative topology binary name commit 1b290ef023b3eeb4f4688b582fecb773915ef937 upstream. Add alternative topology binary file name based on used machine driver and fallback to use this name after failed attempt to load topology file with name based on NHLT. This change addresses multiple issues with current mechanism, for example - there are devices without NHLT table, and that currently results in tplg_name being empty. Signed-off-by: Mateusz Gorski Reviewed-by: Cezary Rojewski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200427132727.24942-2-mateusz.gorski@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-topology.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 69cd7a81bf2a..4b114ece58c6 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -3565,8 +3566,20 @@ int skl_tplg_init(struct snd_soc_component *component, struct hdac_bus *bus) ret = request_firmware(&fw, skl->tplg_name, bus->dev); if (ret < 0) { - dev_info(bus->dev, "tplg fw %s load failed with %d, falling back to dfw_sst.bin", - skl->tplg_name, ret); + char alt_tplg_name[64]; + + snprintf(alt_tplg_name, sizeof(alt_tplg_name), "%s-tplg.bin", + skl->mach->drv_name); + dev_info(bus->dev, "tplg fw %s load failed with %d, trying alternative tplg name %s", + skl->tplg_name, ret, alt_tplg_name); + + ret = request_firmware(&fw, alt_tplg_name, bus->dev); + if (!ret) + goto component_load; + + dev_info(bus->dev, "tplg %s failed with %d, falling back to dfw_sst.bin", + alt_tplg_name, ret); + ret = request_firmware(&fw, "dfw_sst.bin", bus->dev); if (ret < 0) { dev_err(bus->dev, "Fallback tplg fw %s load failed with %d\n", @@ -3575,6 +3588,8 @@ int skl_tplg_init(struct snd_soc_component *component, struct hdac_bus *bus) } } +component_load: + /* * The complete tplg for SKL is loaded as index 0, we don't use * any other index -- cgit v1.2.3 From 840d8c9b3e5f51d1005256e6c63eab4f81cbebfb Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:50:41 +0200 Subject: linkage: Introduce new macros for assembler symbols commit ffedeeb780dc554eff3d3b16e6a462a26a41d7ec upstream. Introduce new C macros for annotations of functions and data in assembly. There is a long-standing mess in macros like ENTRY, END, ENDPROC and similar. They are used in different manners and sometimes incorrectly. So introduce macros with clear use to annotate assembly as follows: a) Support macros for the ones below SYM_T_FUNC -- type used by assembler to mark functions SYM_T_OBJECT -- type used by assembler to mark data SYM_T_NONE -- type used by assembler to mark entries of unknown type They are defined as STT_FUNC, STT_OBJECT, and STT_NOTYPE respectively. According to the gas manual, this is the most portable way. I am not sure about other assemblers, so this can be switched back to %function and %object if this turns into a problem. Architectures can also override them by something like ", @function" if they need. SYM_A_ALIGN, SYM_A_NONE -- align the symbol? SYM_L_GLOBAL, SYM_L_WEAK, SYM_L_LOCAL -- linkage of symbols b) Mostly internal annotations, used by the ones below SYM_ENTRY -- use only if you have to (for non-paired symbols) SYM_START -- use only if you have to (for paired symbols) SYM_END -- use only if you have to (for paired symbols) c) Annotations for code SYM_INNER_LABEL_ALIGN -- only for labels in the middle of code SYM_INNER_LABEL -- only for labels in the middle of code SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one function SYM_FUNC_START_ALIAS -- use where there are two global names for one function SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function SYM_FUNC_START -- use for global functions SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment SYM_FUNC_START_LOCAL -- use for local functions SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment SYM_FUNC_START_WEAK -- use for weak functions SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, SYM_FUNC_START_WEAK, ... For functions with special (non-C) calling conventions: SYM_CODE_START -- use for non-C (special) functions SYM_CODE_START_NOALIGN -- use for non-C (special) functions, w/o alignment SYM_CODE_START_LOCAL -- use for local non-C (special) functions SYM_CODE_START_LOCAL_NOALIGN -- use for local non-C (special) functions, w/o alignment SYM_CODE_END -- the end of SYM_CODE_START_LOCAL or SYM_CODE_START d) For data SYM_DATA_START -- global data symbol SYM_DATA_START_LOCAL -- local data symbol SYM_DATA_END -- the end of the SYM_DATA_START symbol SYM_DATA_END_LABEL -- the labeled end of SYM_DATA_START symbol SYM_DATA -- start+end wrapper around simple global data SYM_DATA_LOCAL -- start+end wrapper around simple local data ========== The macros allow to pair starts and ends of functions and mark functions correctly in the output ELF objects. All users of the old macros in x86 are converted to use these in further patches. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Boris Ostrovsky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Len Brown Cc: Linus Torvalds Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Mark Rutland Cc: Pavel Machek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: x86-ml Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20191011115108.12392-2-jslaby@suse.cz Cc: Jian Cai Signed-off-by: Greg Kroah-Hartman --- Documentation/asm-annotations.rst | 216 +++++++++++++++++++++++++++++++++ Documentation/index.rst | 8 ++ arch/x86/include/asm/linkage.h | 10 +- include/linux/linkage.h | 245 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 468 insertions(+), 11 deletions(-) create mode 100644 Documentation/asm-annotations.rst diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst new file mode 100644 index 000000000000..29ccd6e61fe5 --- /dev/null +++ b/Documentation/asm-annotations.rst @@ -0,0 +1,216 @@ +Assembler Annotations +===================== + +Copyright (c) 2017-2019 Jiri Slaby + +This document describes the new macros for annotation of data and code in +assembly. In particular, it contains information about ``SYM_FUNC_START``, +``SYM_FUNC_END``, ``SYM_CODE_START``, and similar. + +Rationale +--------- +Some code like entries, trampolines, or boot code needs to be written in +assembly. The same as in C, such code is grouped into functions and +accompanied with data. Standard assemblers do not force users into precisely +marking these pieces as code, data, or even specifying their length. +Nevertheless, assemblers provide developers with such annotations to aid +debuggers throughout assembly. On top of that, developers also want to mark +some functions as *global* in order to be visible outside of their translation +units. + +Over time, the Linux kernel has adopted macros from various projects (like +``binutils``) to facilitate such annotations. So for historic reasons, +developers have been using ``ENTRY``, ``END``, ``ENDPROC``, and other +annotations in assembly. Due to the lack of their documentation, the macros +are used in rather wrong contexts at some locations. Clearly, ``ENTRY`` was +intended to denote the beginning of global symbols (be it data or code). +``END`` used to mark the end of data or end of special functions with +*non-standard* calling convention. In contrast, ``ENDPROC`` should annotate +only ends of *standard* functions. + +When these macros are used correctly, they help assemblers generate a nice +object with both sizes and types set correctly. For example, the result of +``arch/x86/lib/putuser.S``:: + + Num: Value Size Type Bind Vis Ndx Name + 25: 0000000000000000 33 FUNC GLOBAL DEFAULT 1 __put_user_1 + 29: 0000000000000030 37 FUNC GLOBAL DEFAULT 1 __put_user_2 + 32: 0000000000000060 36 FUNC GLOBAL DEFAULT 1 __put_user_4 + 35: 0000000000000090 37 FUNC GLOBAL DEFAULT 1 __put_user_8 + +This is not only important for debugging purposes. When there are properly +annotated objects like this, tools can be run on them to generate more useful +information. In particular, on properly annotated objects, ``objtool`` can be +run to check and fix the object if needed. Currently, ``objtool`` can report +missing frame pointer setup/destruction in functions. It can also +automatically generate annotations for :doc:`ORC unwinder ` +for most code. Both of these are especially important to support reliable +stack traces which are in turn necessary for :doc:`Kernel live patching +`. + +Caveat and Discussion +--------------------- +As one might realize, there were only three macros previously. That is indeed +insufficient to cover all the combinations of cases: + +* standard/non-standard function +* code/data +* global/local symbol + +There was a discussion_ and instead of extending the current ``ENTRY/END*`` +macros, it was decided that brand new macros should be introduced instead:: + + So how about using macro names that actually show the purpose, instead + of importing all the crappy, historic, essentially randomly chosen + debug symbol macro names from the binutils and older kernels? + +.. _discussion: https://lkml.kernel.org/r/20170217104757.28588-1-jslaby@suse.cz + +Macros Description +------------------ + +The new macros are prefixed with the ``SYM_`` prefix and can be divided into +three main groups: + +1. ``SYM_FUNC_*`` -- to annotate C-like functions. This means functions with + standard C calling conventions, i.e. the stack contains a return address at + the predefined place and a return from the function can happen in a + standard way. When frame pointers are enabled, save/restore of frame + pointer shall happen at the start/end of a function, respectively, too. + + Checking tools like ``objtool`` should ensure such marked functions conform + to these rules. The tools can also easily annotate these functions with + debugging information (like *ORC data*) automatically. + +2. ``SYM_CODE_*`` -- special functions called with special stack. Be it + interrupt handlers with special stack content, trampolines, or startup + functions. + + Checking tools mostly ignore checking of these functions. But some debug + information still can be generated automatically. For correct debug data, + this code needs hints like ``UNWIND_HINT_REGS`` provided by developers. + +3. ``SYM_DATA*`` -- obviously data belonging to ``.data`` sections and not to + ``.text``. Data do not contain instructions, so they have to be treated + specially by the tools: they should not treat the bytes as instructions, + nor assign any debug information to them. + +Instruction Macros +~~~~~~~~~~~~~~~~~~ +This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above. + +* ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the + most frequent markings**. They are used for functions with standard calling + conventions -- global and local. Like in C, they both align the functions to + architecture specific ``__ALIGN`` bytes. There are also ``_NOALIGN`` variants + for special cases where developers do not want this implicit alignment. + + ``SYM_FUNC_START_WEAK`` and ``SYM_FUNC_START_WEAK_NOALIGN`` markings are + also offered as an assembler counterpart to the *weak* attribute known from + C. + + All of these **shall** be coupled with ``SYM_FUNC_END``. First, it marks + the sequence of instructions as a function and computes its size to the + generated object file. Second, it also eases checking and processing such + object files as the tools can trivially find exact function boundaries. + + So in most cases, developers should write something like in the following + example, having some asm instructions in between the macros, of course:: + + SYM_FUNC_START(function_hook) + ... asm insns ... + SYM_FUNC_END(function_hook) + + In fact, this kind of annotation corresponds to the now deprecated ``ENTRY`` + and ``ENDPROC`` macros. + +* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those + who decided to have two or more names for one function. The typical use is:: + + SYM_FUNC_START_ALIAS(__memset) + SYM_FUNC_START(memset) + ... asm insns ... + SYM_FUNC_END(memset) + SYM_FUNC_END_ALIAS(__memset) + + In this example, one can call ``__memset`` or ``memset`` with the same + result, except the debug information for the instructions is generated to + the object file only once -- for the non-``ALIAS`` case. + +* ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in + special cases -- if you know what you are doing. This is used exclusively + for interrupt handlers and similar where the calling convention is not the C + one. ``_NOALIGN`` variants exist too. The use is the same as for the ``FUNC`` + category above:: + + SYM_CODE_START_LOCAL(bad_put_user) + ... asm insns ... + SYM_CODE_END(bad_put_user) + + Again, every ``SYM_CODE_START*`` **shall** be coupled by ``SYM_CODE_END``. + + To some extent, this category corresponds to deprecated ``ENTRY`` and + ``END``. Except ``END`` had several other meanings too. + +* ``SYM_INNER_LABEL*`` is used to denote a label inside some + ``SYM_{CODE,FUNC}_START`` and ``SYM_{CODE,FUNC}_END``. They are very similar + to C labels, except they can be made global. An example of use:: + + SYM_CODE_START(ftrace_caller) + /* save_mcount_regs fills in first two parameters */ + ... + + SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + /* Load the ftrace_ops into the 3rd parameter */ + ... + + SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + call ftrace_stub + ... + retq + SYM_CODE_END(ftrace_caller) + +Data Macros +~~~~~~~~~~~ +Similar to instructions, there is a couple of macros to describe data in the +assembly. + +* ``SYM_DATA_START`` and ``SYM_DATA_START_LOCAL`` mark the start of some data + and shall be used in conjunction with either ``SYM_DATA_END``, or + ``SYM_DATA_END_LABEL``. The latter adds also a label to the end, so that + people can use ``lstack`` and (local) ``lstack_end`` in the following + example:: + + SYM_DATA_START_LOCAL(lstack) + .skip 4096 + SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end) + +* ``SYM_DATA`` and ``SYM_DATA_LOCAL`` are variants for simple, mostly one-line + data:: + + SYM_DATA(HEAP, .long rm_heap) + SYM_DATA(heap_end, .long rm_stack) + + In the end, they expand to ``SYM_DATA_START`` with ``SYM_DATA_END`` + internally. + +Support Macros +~~~~~~~~~~~~~~ +All the above reduce themselves to some invocation of ``SYM_START``, +``SYM_END``, or ``SYM_ENTRY`` at last. Normally, developers should avoid using +these. + +Further, in the above examples, one could see ``SYM_L_LOCAL``. There are also +``SYM_L_GLOBAL`` and ``SYM_L_WEAK``. All are intended to denote linkage of a +symbol marked by them. They are used either in ``_LABEL`` variants of the +earlier macros, or in ``SYM_START``. + + +Overriding Macros +~~~~~~~~~~~~~~~~~ +Architecture can also override any of the macros in their own +``asm/linkage.h``, including macros specifying the type of a symbol +(``SYM_T_FUNC``, ``SYM_T_OBJECT``, and ``SYM_T_NONE``). As every macro +described in this file is surrounded by ``#ifdef`` + ``#endif``, it is enough +to define the macros differently in the aforementioned architecture-dependent +header. diff --git a/Documentation/index.rst b/Documentation/index.rst index b843e313d2f2..2ceab197246f 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -135,6 +135,14 @@ needed). mic/index scheduler/index +Architecture-agnostic documentation +----------------------------------- + +.. toctree:: + :maxdepth: 2 + + asm-annotations + Architecture-specific documentation ----------------------------------- diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7f..e07188e8d763 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,9 +13,13 @@ #ifdef __ASSEMBLY__ -#define GLOBAL(name) \ - .globl name; \ - name: +/* + * GLOBAL is DEPRECATED + * + * use SYM_DATA_START, SYM_FUNC_START, SYM_INNER_LABEL, SYM_CODE_START, or + * similar + */ +#define GLOBAL(name) SYM_ENTRY(name, SYM_L_GLOBAL, SYM_A_NONE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) #define __ALIGN .p2align 4, 0x90 diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 7e020782ade2..f3ae8f3dea2c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -75,32 +75,58 @@ #ifdef __ASSEMBLY__ +/* SYM_T_FUNC -- type used by assembler to mark functions */ +#ifndef SYM_T_FUNC +#define SYM_T_FUNC STT_FUNC +#endif + +/* SYM_T_OBJECT -- type used by assembler to mark data */ +#ifndef SYM_T_OBJECT +#define SYM_T_OBJECT STT_OBJECT +#endif + +/* SYM_T_NONE -- type used by assembler to mark entries of unknown type */ +#ifndef SYM_T_NONE +#define SYM_T_NONE STT_NOTYPE +#endif + +/* SYM_A_* -- align the symbol? */ +#define SYM_A_ALIGN ALIGN +#define SYM_A_NONE /* nothing */ + +/* SYM_L_* -- linkage of symbols */ +#define SYM_L_GLOBAL(name) .globl name +#define SYM_L_WEAK(name) .weak name +#define SYM_L_LOCAL(name) /* nothing */ + #ifndef LINKER_SCRIPT #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR +/* === DEPRECATED annotations === */ + #ifndef GLOBAL +/* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */ #define GLOBAL(name) \ .globl name ASM_NL \ name: #endif #ifndef ENTRY +/* deprecated, use SYM_FUNC_START */ #define ENTRY(name) \ - .globl name ASM_NL \ - ALIGN ASM_NL \ - name: + SYM_FUNC_START(name) #endif #endif /* LINKER_SCRIPT */ #ifndef WEAK +/* deprecated, use SYM_FUNC_START_WEAK* */ #define WEAK(name) \ - .weak name ASM_NL \ - ALIGN ASM_NL \ - name: + SYM_FUNC_START_WEAK(name) #endif #ifndef END +/* deprecated, use SYM_FUNC_END, SYM_DATA_END, or SYM_END */ #define END(name) \ .size name, .-name #endif @@ -110,11 +136,214 @@ * static analysis tools such as stack depth analyzer. */ #ifndef ENDPROC +/* deprecated, use SYM_FUNC_END */ #define ENDPROC(name) \ - .type name, @function ASM_NL \ - END(name) + SYM_FUNC_END(name) +#endif + +/* === generic annotations === */ + +/* SYM_ENTRY -- use only if you have to for non-paired symbols */ +#ifndef SYM_ENTRY +#define SYM_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + name: +#endif + +/* SYM_START -- use only if you have to */ +#ifndef SYM_START +#define SYM_START(name, linkage, align...) \ + SYM_ENTRY(name, linkage, align) +#endif + +/* SYM_END -- use only if you have to */ +#ifndef SYM_END +#define SYM_END(name, sym_type) \ + .type name sym_type ASM_NL \ + .size name, .-name +#endif + +/* === code annotations === */ + +/* + * FUNC -- C-like functions (proper stack frame etc.) + * CODE -- non-C code (e.g. irq handlers with different, special stack etc.) + * + * Objtool validates stack for FUNC, but not for CODE. + * Objtool generates debug info for both FUNC & CODE, but needs special + * annotations for each CODE's start (to describe the actual stack frame). + * + * ALIAS -- does not generate debug info -- the aliased function will + */ + +/* SYM_INNER_LABEL_ALIGN -- only for labels in the middle of code */ +#ifndef SYM_INNER_LABEL_ALIGN +#define SYM_INNER_LABEL_ALIGN(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_ALIGN) +#endif + +/* SYM_INNER_LABEL -- only for labels in the middle of code */ +#ifndef SYM_INNER_LABEL +#define SYM_INNER_LABEL(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_NONE) +#endif + +/* + * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one + * function + */ +#ifndef SYM_FUNC_START_LOCAL_ALIAS +#define SYM_FUNC_START_LOCAL_ALIAS(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) +#endif + +/* + * SYM_FUNC_START_ALIAS -- use where there are two global names for one + * function + */ +#ifndef SYM_FUNC_START_ALIAS +#define SYM_FUNC_START_ALIAS(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_FUNC_START -- use for global functions */ +#ifndef SYM_FUNC_START +/* + * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two + * later. + */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#ifndef SYM_FUNC_START_NOALIGN +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#ifndef SYM_FUNC_START_LOCAL +/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#ifndef SYM_FUNC_START_LOCAL_NOALIGN +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) #endif +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#ifndef SYM_FUNC_START_WEAK +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) #endif + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#ifndef SYM_FUNC_START_WEAK_NOALIGN +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) +#endif + +/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ +#ifndef SYM_FUNC_END_ALIAS +#define SYM_FUNC_END_ALIAS(name) \ + SYM_END(name, SYM_T_FUNC) +#endif + +/* + * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, + * SYM_FUNC_START_WEAK, ... + */ +#ifndef SYM_FUNC_END +/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ +#define SYM_FUNC_END(name) \ + SYM_END(name, SYM_T_FUNC) +#endif + +/* SYM_CODE_START -- use for non-C (special) functions */ +#ifndef SYM_CODE_START +#define SYM_CODE_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_CODE_START_NOALIGN -- use for non-C (special) functions, w/o alignment */ +#ifndef SYM_CODE_START_NOALIGN +#define SYM_CODE_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_CODE_START_LOCAL -- use for local non-C (special) functions */ +#ifndef SYM_CODE_START_LOCAL +#define SYM_CODE_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) +#endif + +/* + * SYM_CODE_START_LOCAL_NOALIGN -- use for local non-C (special) functions, + * w/o alignment + */ +#ifndef SYM_CODE_START_LOCAL_NOALIGN +#define SYM_CODE_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* SYM_CODE_END -- the end of SYM_CODE_START_LOCAL, SYM_CODE_START, ... */ +#ifndef SYM_CODE_END +#define SYM_CODE_END(name) \ + SYM_END(name, SYM_T_NONE) +#endif + +/* === data annotations === */ + +/* SYM_DATA_START -- global data symbol */ +#ifndef SYM_DATA_START +#define SYM_DATA_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_DATA_START -- local data symbol */ +#ifndef SYM_DATA_START_LOCAL +#define SYM_DATA_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* SYM_DATA_END -- the end of SYM_DATA_START symbol */ +#ifndef SYM_DATA_END +#define SYM_DATA_END(name) \ + SYM_END(name, SYM_T_OBJECT) +#endif + +/* SYM_DATA_END_LABEL -- the labeled end of SYM_DATA_START symbol */ +#ifndef SYM_DATA_END_LABEL +#define SYM_DATA_END_LABEL(name, linkage, label) \ + linkage(label) ASM_NL \ + .type label SYM_T_OBJECT ASM_NL \ + label: \ + SYM_END(name, SYM_T_OBJECT) +#endif + +/* SYM_DATA -- start+end wrapper around simple global data */ +#ifndef SYM_DATA +#define SYM_DATA(name, data...) \ + SYM_DATA_START(name) ASM_NL \ + data ASM_NL \ + SYM_DATA_END(name) +#endif + +/* SYM_DATA_LOCAL -- start+end wrapper around simple local data */ +#ifndef SYM_DATA_LOCAL +#define SYM_DATA_LOCAL(name, data...) \ + SYM_DATA_START_LOCAL(name) ASM_NL \ + data ASM_NL \ + SYM_DATA_END(name) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_LINKAGE_H */ -- cgit v1.2.3 From 3e7050661d954c470e1896bd394f4b5ecf2c7681 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Jan 2020 19:58:16 +0000 Subject: arm64: asm: Add new-style position independent function annotations commit 35e61c77ef386555f3df1bc2057098c6997ca10b upstream. As part of an effort to make the annotations in assembly code clearer and more consistent new macros have been introduced, including replacements for ENTRY() and ENDPROC(). On arm64 we have ENDPIPROC(), a custom version of ENDPROC() which is used for code that will need to run in position independent environments like EFI, it creates an alias for the function with the prefix __pi_ and then emits the standard ENDPROC. Add new-style macros to replace this which expand to the standard SYM_FUNC_*() and SYM_FUNC_ALIAS_*(), resulting in the same object code. These are added in linkage.h for consistency with where the generic assembler code has its macros. Signed-off-by: Mark Brown [will: Rename 'WEAK' macro, use ';' instead of ASM_NL, deprecate ENDPIPROC] Signed-off-by: Will Deacon Cc: Jian Cai Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 1 + arch/arm64/include/asm/linkage.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b8cf7c85ffa2..4a4258f17c86 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -462,6 +462,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* + * Deprecated! Use SYM_FUNC_{START,START_WEAK,END}_PI instead. * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 1b266292f0be..ebee3113a62f 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,4 +4,20 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define SYM_FUNC_START_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START(x) + +#define SYM_FUNC_START_WEAK_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START_WEAK(x) + +#define SYM_FUNC_END_PI(x) \ + SYM_FUNC_END(x); \ + SYM_FUNC_END_ALIAS(__pi_##x) + #endif -- cgit v1.2.3 From d94589900d98a20ea28324a7ae4568edb5db77fd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Jan 2020 19:58:17 +0000 Subject: arm64: lib: Use modern annotations for assembly functions commit 3ac0f4526dfb80625f5c2365bccd85be68db93ef upstream. In an effort to clarify and simplify the annotation of assembly functions in the kernel new macros have been introduced. These replace ENTRY and ENDPROC and also add a new annotation for static functions which previously had no ENTRY equivalent. Update the annotations in the library code to the new macros. Signed-off-by: Mark Brown [will: Use SYM_FUNC_START_WEAK_PI] Signed-off-by: Will Deacon Cc: Jian Cai Signed-off-by: Greg Kroah-Hartman --- arch/arm64/lib/clear_page.S | 4 ++-- arch/arm64/lib/clear_user.S | 4 ++-- arch/arm64/lib/copy_from_user.S | 4 ++-- arch/arm64/lib/copy_in_user.S | 4 ++-- arch/arm64/lib/copy_page.S | 4 ++-- arch/arm64/lib/copy_to_user.S | 4 ++-- arch/arm64/lib/crc32.S | 8 ++++---- arch/arm64/lib/memchr.S | 4 ++-- arch/arm64/lib/memcmp.S | 4 ++-- arch/arm64/lib/memcpy.S | 8 ++++---- arch/arm64/lib/memmove.S | 8 ++++---- arch/arm64/lib/memset.S | 8 ++++---- arch/arm64/lib/strchr.S | 4 ++-- arch/arm64/lib/strcmp.S | 4 ++-- arch/arm64/lib/strlen.S | 4 ++-- arch/arm64/lib/strncmp.S | 4 ++-- arch/arm64/lib/strnlen.S | 4 ++-- arch/arm64/lib/strrchr.S | 4 ++-- arch/arm64/lib/tishift.S | 12 ++++++------ 19 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 78a9ef66288a..073acbf02a7c 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -ENTRY(clear_page) +SYM_FUNC_START(clear_page) mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 @@ -25,5 +25,5 @@ ENTRY(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret -ENDPROC(clear_page) +SYM_FUNC_END(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index aeafc03e961a..48a3a26eff66 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -19,7 +19,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__arch_clear_user) +SYM_FUNC_START(__arch_clear_user) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -40,7 +40,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 ret -ENDPROC(__arch_clear_user) +SYM_FUNC_END(__arch_clear_user) EXPORT_SYMBOL(__arch_clear_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index ebb3c06cbb5d..8e25e89ad01f 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -53,12 +53,12 @@ .endm end .req x5 -ENTRY(__arch_copy_from_user) +SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 // Nothing to copy ret -ENDPROC(__arch_copy_from_user) +SYM_FUNC_END(__arch_copy_from_user) EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 3d8153a1ebce..667139013ed1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -55,12 +55,12 @@ end .req x5 -ENTRY(__arch_copy_in_user) +SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_in_user) +SYM_FUNC_END(__arch_copy_in_user) EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index bbb8562396af..e125a84eb400 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -ENTRY(copy_page) +SYM_FUNC_START(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,5 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112] ret -ENDPROC(copy_page) +SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 357eae2c18eb..1a104d0089f3 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -52,12 +52,12 @@ .endm end .req x5 -ENTRY(__arch_copy_to_user) +SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_to_user) +SYM_FUNC_END(__arch_copy_to_user) EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index e6135f16649b..243e107e9896 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -85,17 +85,17 @@ CPU_BE( rev16 w3, w3 ) .endm .align 5 -ENTRY(crc32_le) +SYM_FUNC_START(crc32_le) alternative_if_not ARM64_HAS_CRC32 b crc32_le_base alternative_else_nop_endif __crc32 -ENDPROC(crc32_le) +SYM_FUNC_END(crc32_le) .align 5 -ENTRY(__crc32c_le) +SYM_FUNC_START(__crc32c_le) alternative_if_not ARM64_HAS_CRC32 b __crc32c_le_base alternative_else_nop_endif __crc32 c -ENDPROC(__crc32c_le) +SYM_FUNC_END(__crc32c_le) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 48a3ab636e4f..edf6b970a277 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -19,7 +19,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(memchr) +SYM_FUNC_START_WEAK_PI(memchr) and w1, w1, #0xff 1: subs x2, x2, #1 b.mi 2f @@ -30,5 +30,5 @@ WEAK(memchr) ret 2: mov x0, #0 ret -ENDPIPROC(memchr) +SYM_FUNC_END_PI(memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index b297bdaaf549..c0671e793ea9 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -46,7 +46,7 @@ pos .req x11 limit_wd .req x12 mask .req x13 -WEAK(memcmp) +SYM_FUNC_START_WEAK_PI(memcmp) cbz limit, .Lret0 eor tmp1, src1, src2 tst tmp1, #7 @@ -243,5 +243,5 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPIPROC(memcmp) +SYM_FUNC_END_PI(memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index d79f48994dbb..9f382adfa88a 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,11 +57,11 @@ .endm .weak memcpy -ENTRY(__memcpy) -ENTRY(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) +SYM_FUNC_START_PI(memcpy) #include "copy_template.S" ret -ENDPIPROC(memcpy) +SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) -ENDPROC(__memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 784775136480..02cda2e33bde 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -46,8 +46,8 @@ D_l .req x13 D_h .req x14 .weak memmove -ENTRY(__memmove) -ENTRY(memmove) +SYM_FUNC_START_ALIAS(__memmove) +SYM_FUNC_START_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count @@ -184,7 +184,7 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPIPROC(memmove) +SYM_FUNC_END_PI(memmove) EXPORT_SYMBOL(memmove) -ENDPROC(__memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 9fb97e6bc560..77c3c7ba0084 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -43,8 +43,8 @@ tmp3w .req w9 tmp3 .req x9 .weak memset -ENTRY(__memset) -ENTRY(memset) +SYM_FUNC_START_ALIAS(__memset) +SYM_FUNC_START_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -203,7 +203,7 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPIPROC(memset) +SYM_FUNC_END_PI(memset) EXPORT_SYMBOL(memset) -ENDPROC(__memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index ca3ec18171a4..1f47eae3b0d6 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(strchr) +SYM_FUNC_START_WEAK(strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,5 @@ WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -ENDPROC(strchr) +SYM_FUNC_END(strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index e9aefbe0b740..4767540d1b94 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -48,7 +48,7 @@ tmp3 .req x9 zeroones .req x10 pos .req x11 -WEAK(strcmp) +SYM_FUNC_START_WEAK_PI(strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -219,5 +219,5 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPIPROC(strcmp) +SYM_FUNC_END_PI(strcmp) EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 87b0cb066915..ee3ed882dd79 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -44,7 +44,7 @@ pos .req x12 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strlen) +SYM_FUNC_START_WEAK_PI(strlen) mov zeroones, #REP8_01 bic src, srcin, #15 ands tmp1, srcin, #15 @@ -111,5 +111,5 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPIPROC(strlen) +SYM_FUNC_END_PI(strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index f571581888fa..2a7ee949ed47 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -52,7 +52,7 @@ limit_wd .req x13 mask .req x14 endloop .req x15 -WEAK(strncmp) +SYM_FUNC_START_WEAK_PI(strncmp) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -295,5 +295,5 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPIPROC(strncmp) +SYM_FUNC_END_PI(strncmp) EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index c0bac9493c68..b72913a99038 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strnlen) +SYM_FUNC_START_WEAK_PI(strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,5 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -ENDPIPROC(strnlen) +SYM_FUNC_END_PI(strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 794ac49ea433..13132d1ed6d1 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -WEAK(strrchr) +SYM_FUNC_START_WEAK_PI(strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,5 @@ WEAK(strrchr) b 1b 2: mov x0, x3 ret -ENDPIPROC(strrchr) +SYM_FUNC_END_PI(strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index 047622536535..a88613834fb0 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -7,7 +7,7 @@ #include -ENTRY(__ashlti3) +SYM_FUNC_START(__ashlti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -26,10 +26,10 @@ ENTRY(__ashlti3) lsl x1, x0, x1 mov x0, x2 ret -ENDPROC(__ashlti3) +SYM_FUNC_END(__ashlti3) EXPORT_SYMBOL(__ashlti3) -ENTRY(__ashrti3) +SYM_FUNC_START(__ashrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -48,10 +48,10 @@ ENTRY(__ashrti3) asr x0, x1, x0 mov x1, x2 ret -ENDPROC(__ashrti3) +SYM_FUNC_END(__ashrti3) EXPORT_SYMBOL(__ashrti3) -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -70,5 +70,5 @@ ENTRY(__lshrti3) lsr x0, x1, x0 mov x1, x2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__lshrti3) EXPORT_SYMBOL(__lshrti3) -- cgit v1.2.3 From ca16a42f5f0da332a1756e7b48b228b6d3bcf24b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 29 Oct 2020 11:19:51 -0700 Subject: arm64: Change .weak to SYM_FUNC_START_WEAK_PI for arch/arm64/lib/mem*.S commit ec9d78070de986ecf581ea204fd322af4d2477ec upstream. Commit 39d114ddc682 ("arm64: add KASAN support") added .weak directives to arch/arm64/lib/mem*.S instead of changing the existing SYM_FUNC_START_PI macros. This can lead to the assembly snippet `.weak memcpy ... .globl memcpy` which will produce a STB_WEAK memcpy with GNU as but STB_GLOBAL memcpy with LLVM's integrated assembler before LLVM 12. LLVM 12 (since https://reviews.llvm.org/D90108) will error on such an overridden symbol binding. Use the appropriate SYM_FUNC_START_WEAK_PI instead. Fixes: 39d114ddc682 ("arm64: add KASAN support") Reported-by: Sami Tolvanen Signed-off-by: Fangrui Song Tested-by: Sami Tolvanen Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Cc: Link: https://lore.kernel.org/r/20201029181951.1866093-1-maskray@google.com Signed-off-by: Will Deacon Cc: Jian Cai Signed-off-by: Greg Kroah-Hartman --- arch/arm64/lib/memcpy.S | 3 +-- arch/arm64/lib/memmove.S | 3 +-- arch/arm64/lib/memset.S | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 9f382adfa88a..b03cbb3455d4 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -56,9 +56,8 @@ stp \ptr, \regB, [\regC], \val .endm - .weak memcpy SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_PI(memcpy) +SYM_FUNC_START_WEAK_PI(memcpy) #include "copy_template.S" ret SYM_FUNC_END_PI(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 02cda2e33bde..1035dce4bdaf 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -45,9 +45,8 @@ C_h .req x12 D_l .req x13 D_h .req x14 - .weak memmove SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_PI(memmove) +SYM_FUNC_START_WEAK_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 77c3c7ba0084..a9c1c9a01ea9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,9 +42,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 - .weak memset SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_PI(memset) +SYM_FUNC_START_WEAK_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 -- cgit v1.2.3 From 76e5bba75a631602d10399eba4b38a791bc8a1ec Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Thu, 27 Aug 2020 09:56:51 +0700 Subject: tipc: fix use-after-free in tipc_bcast_get_mode commit fdeba99b1e58ecd18c2940c453e19e4ef20ff591 upstream. Syzbot has reported those issues as: ================================================================== BUG: KASAN: use-after-free in tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 Read of size 1 at addr ffff88805e6b3571 by task kworker/0:6/3850 CPU: 0 PID: 3850 Comm: kworker/0:6 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Thread 1's call trace: [...] kfree+0x103/0x2c0 mm/slab.c:3757 <- bcbase releasing tipc_bcast_stop+0x1b0/0x2f0 net/tipc/bcast.c:721 tipc_exit_net+0x24/0x270 net/tipc/core.c:112 [...] Thread 2's call trace: [...] tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 <- bcbase has already been freed by Thread 1 tipc_node_broadcast+0x9e/0xcc0 net/tipc/node.c:1744 tipc_nametbl_publish+0x60b/0x970 net/tipc/name_table.c:752 tipc_net_finalize net/tipc/net.c:141 [inline] tipc_net_finalize+0x1fa/0x310 net/tipc/net.c:131 tipc_net_finalize_work+0x55/0x80 net/tipc/net.c:150 [...] ================================================================== BUG: KASAN: use-after-free in tipc_named_reinit+0xef/0x290 net/tipc/name_distr.c:344 Read of size 8 at addr ffff888052ab2000 by task kworker/0:13/30628 CPU: 0 PID: 30628 Comm: kworker/0:13 Not tainted 5.8.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1f0/0x31e lib/dump_stack.c:118 print_address_description+0x66/0x5a0 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 tipc_named_reinit+0xef/0x290 net/tipc/name_distr.c:344 tipc_net_finalize+0x85/0xe0 net/tipc/net.c:138 tipc_net_finalize_work+0x50/0x70 net/tipc/net.c:150 process_one_work+0x789/0xfc0 kernel/workqueue.c:2269 worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415 kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 [...] Freed by task 14058: save_stack mm/kasan/common.c:48 [inline] set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x114/0x170 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x10a/0x220 mm/slab.c:3757 tipc_exit_net+0x29/0x50 net/tipc/core.c:113 ops_exit_list net/core/net_namespace.c:186 [inline] cleanup_net+0x708/0xba0 net/core/net_namespace.c:603 process_one_work+0x789/0xfc0 kernel/workqueue.c:2269 worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415 kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Fix it by calling flush_scheduled_work() to make sure the tipc_net_finalize_work() stopped before releasing bcbase object. Reported-by: syzbot+6ea1f7a8df64596ef4d7@syzkaller.appspotmail.com Reported-by: syzbot+e9cc557752ab126c1b99@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Hoang Huu Le Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/core.c b/net/tipc/core.c index 12192e7f4050..2374adb50558 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -98,6 +98,11 @@ static void __net_exit tipc_exit_net(struct net *net) { tipc_detach_loopback(net); tipc_net_stop(net); + + /* Make sure the tipc_net_finalize_work stopped + * before releasing the resources. + */ + flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); -- cgit v1.2.3 From 1695fca8a923df8ec971bada7e3dce5ff74099c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 1 Nov 2020 17:07:44 -0800 Subject: ptrace: fix task_join_group_stop() for the case when current is traced commit 7b3c36fc4c231ca532120bbc0df67a12f09c1d96 upstream. This testcase #include #include #include #include #include #include #include void *tf(void *arg) { return NULL; } int main(void) { int pid = fork(); if (!pid) { kill(getpid(), SIGSTOP); pthread_t th; pthread_create(&th, NULL, tf, NULL); return 0; } waitpid(pid, NULL, WSTOPPED); ptrace(PTRACE_SEIZE, pid, 0, PTRACE_O_TRACECLONE); waitpid(pid, NULL, 0); ptrace(PTRACE_CONT, pid, 0,0); waitpid(pid, NULL, 0); int status; int thread = waitpid(-1, &status, 0); assert(thread > 0 && thread != pid); assert(status == 0x80137f); return 0; } fails and triggers WARN_ON_ONCE(!signr) in do_jobctl_trap(). This is because task_join_group_stop() has 2 problems when current is traced: 1. We can't rely on the "JOBCTL_STOP_PENDING" check, a stopped tracee can be woken up by debugger and it can clone another thread which should join the group-stop. We need to check group_stop_count || SIGNAL_STOP_STOPPED. 2. If SIGNAL_STOP_STOPPED is already set, we should not increment sig->group_stop_count and add JOBCTL_STOP_CONSUME. The new thread should stop without another do_notify_parent_cldstop() report. To clarify, the problem is very old and we should blame ptrace_init_task(). But now that we have task_join_group_stop() it makes more sense to fix this helper to avoid the code duplication. Reported-by: syzbot+3485e3773f7da290eecc@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Cc: Jens Axboe Cc: Christian Brauner Cc: "Eric W . Biederman" Cc: Zhiqiang Liu Cc: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201019134237.GA18810@redhat.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 595a36ab87d0..8c97fc72d78b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task) { + unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; + struct signal_struct *sig = current->signal; + + if (sig->group_stop_count) { + sig->group_stop_count++; + mask |= JOBCTL_STOP_CONSUME; + } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) + return; + /* Have the new thread join an on-going signal group stop */ - unsigned long jobctl = current->jobctl; - if (jobctl & JOBCTL_STOP_PENDING) { - struct signal_struct *sig = current->signal; - unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; - unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; - if (task_set_jobctl_pending(task, signr | gstop)) { - sig->group_stop_count++; - } - } + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } /* -- cgit v1.2.3 From 57bb59f9d8fb6d32a5f22aa0708a1419d311e25c Mon Sep 17 00:00:00 2001 From: Mark Deneen Date: Fri, 30 Oct 2020 15:58:14 +0000 Subject: cadence: force nonlinear buffers to be cloned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 403dc16796f5516acf23d94a1cd9eba564d03210 ] In my test setup, I had a SAMA5D27 device configured with ip forwarding, and second device with usb ethernet (r8152) sending ICMP packets.  If the packet was larger than about 220 bytes, the SAMA5 device would "oops" with the following trace: kernel BUG at net/core/skbuff.c:1863! Internal error: Oops - BUG: 0 [#1] ARM Modules linked in: xt_MASQUERADE ppp_async ppp_generic slhc iptable_nat xt_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 can_raw can bridge stp llc ipt_REJECT nf_reject_ipv4 sd_mod cdc_ether usbnet usb_storage r8152 scsi_mod mii o ption usb_wwan usbserial micrel macb at91_sama5d2_adc phylink gpio_sama5d2_piobu m_can_platform m_can industrialio_triggered_buffer kfifo_buf of_mdio can_dev fixed_phy sdhci_of_at91 sdhci_pltfm libphy sdhci mmc_core ohci_at91 ehci_atmel o hci_hcd iio_rescale industrialio sch_fq_codel spidev prox2_hal(O) CPU: 0 PID: 0 Comm: swapper Tainted: G           O      5.9.1-prox2+ #1 Hardware name: Atmel SAMA5 PC is at skb_put+0x3c/0x50 LR is at macb_start_xmit+0x134/0xad0 [macb] pc : []    lr : []    psr: 20070113 sp : c0d01a60  ip : c07232c0  fp : c4250000 r10: c0d03cc8  r9 : 00000000  r8 : c0d038c0 r7 : 00000000  r6 : 00000008  r5 : c59b66c0  r4 : 0000002a r3 : 8f659eff  r2 : c59e9eea  r1 : 00000001  r0 : c59b66c0 Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none Control: 10c53c7d  Table: 2640c059  DAC: 00000051 Process swapper (pid: 0, stack limit = 0x75002d81) [] (skb_put) from [] (macb_start_xmit+0x134/0xad0 [macb]) [] (macb_start_xmit [macb]) from [] (dev_hard_start_xmit+0x90/0x11c) [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0x124/0x260) [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x4b0/0x6d0) [] (__dev_queue_xmit) from [] (ip_finish_output2+0x350/0x580) [] (ip_finish_output2) from [] (ip_output+0xb4/0x13c) [] (ip_output) from [] (ip_forward+0x474/0x500) [] (ip_forward) from [] (ip_sublist_rcv_finish+0x3c/0x50) [] (ip_sublist_rcv_finish) from [] (ip_sublist_rcv+0x11c/0x188) [] (ip_sublist_rcv) from [] (ip_list_rcv+0xf8/0x124) [] (ip_list_rcv) from [] (__netif_receive_skb_list_core+0x1a0/0x20c) [] (__netif_receive_skb_list_core) from [] (netif_receive_skb_list_internal+0x194/0x230) [] (netif_receive_skb_list_internal) from [] (gro_normal_list.part.0+0x14/0x28) [] (gro_normal_list.part.0) from [] (napi_complete_done+0x16c/0x210) [] (napi_complete_done) from [] (r8152_poll+0x684/0x708 [r8152]) [] (r8152_poll [r8152]) from [] (net_rx_action+0x100/0x328) [] (net_rx_action) from [] (__do_softirq+0xec/0x274) [] (__do_softirq) from [] (irq_exit+0xcc/0xd0) [] (irq_exit) from [] (__handle_domain_irq+0x58/0xa4) [] (__handle_domain_irq) from [] (__irq_svc+0x6c/0x90) Exception stack(0xc0d01ef0 to 0xc0d01f38) 1ee0:                                     00000000 0000003d 0c31f383 c0d0fa00 1f00: c0d2eb80 00000000 c0d2e630 4dad8c49 4da967b0 0000003d 0000003d 00000000 1f20: fffffff5 c0d01f40 c04e0f88 c04e0f8c 30070013 ffffffff [] (__irq_svc) from [] (cpuidle_enter_state+0x7c/0x378) [] (cpuidle_enter_state) from [] (cpuidle_enter+0x28/0x38) [] (cpuidle_enter) from [] (do_idle+0x194/0x214) [] (do_idle) from [] (cpu_startup_entry+0xc/0x14) [] (cpu_startup_entry) from [] (start_kernel+0x46c/0x4a0) Code: e580c054 8a000002 e1a00002 e8bd8070 (e7f001f2) ---[ end trace 146c8a334115490c ]--- The solution was to force nonlinear buffers to be cloned.  This was previously reported by Klaus Doth (https://www.spinics.net/lists/netdev/msg556937.html) but never formally submitted as a patch. This is the third revision, hopefully the formatting is correct this time! Suggested-by: Klaus Doth Fixes: 653e92a9175e ("net: macb: add support for padding and fcs computation") Signed-off-by: Mark Deneen Link: https://lore.kernel.org/r/20201030155814.622831-1-mdeneen@saucontech.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a5c4d4d66df3..3f74416bb874 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1718,7 +1718,8 @@ static inline int macb_clear_csum(struct sk_buff *skb) static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) { - bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb); + bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || + skb_is_nonlinear(*skb); int padlen = ETH_ZLEN - (*skb)->len; int headroom = skb_headroom(*skb); int tailroom = skb_tailroom(*skb); -- cgit v1.2.3 From 14d755a4815ee7c168cf6b839d4b312f166a2e37 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 2 Nov 2020 23:06:51 +0530 Subject: chelsio/chtls: fix memory leaks caused by a race [ Upstream commit 8080b462b6aa856ae05ea010441a702599e579f2 ] race between user context and softirq causing memleak, consider the call sequence scenario chtls_setkey() //user context chtls_peer_close() chtls_abort_req_rss() chtls_setkey() //user context work request skb queued in chtls_setkey() won't be freed because resources are already cleaned for this connection, fix it by not queuing work request while socket is closing. v1->v2: - fix W=1 warning. v2->v3: - separate it out from another memleak fix. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201102173650.24754-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_hw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c index a217fe72602d..3ef723e08953 100644 --- a/drivers/crypto/chelsio/chtls/chtls_hw.c +++ b/drivers/crypto/chelsio/chtls/chtls_hw.c @@ -357,6 +357,9 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) if (ret) goto out_notcb; + if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) + goto out_notcb; + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid); csk->wr_credits -= DIV_ROUND_UP(len, 16); csk->wr_unacked += DIV_ROUND_UP(len, 16); -- cgit v1.2.3 From 7bf7b7c385a145221a37c81d485baa37fea901a0 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 2 Nov 2020 23:09:10 +0530 Subject: chelsio/chtls: fix always leaking ctrl_skb [ Upstream commit dbfe394dad33f99cf8458be50483ec40a5d29c34 ] Correct skb refcount in alloc_ctrl_skb(), causing skb memleak when chtls_send_abort() called with NULL skb. it was always leaking the skb, correct it by incrementing skb refs by one. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201102173909.24826-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index e2c64c943cfc..385bd4dc6686 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -174,7 +174,7 @@ static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len) { if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) { __skb_trim(skb, 0); - refcount_add(2, &skb->users); + refcount_inc(&skb->users); } else { skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); } -- cgit v1.2.3 From 5b66a5b6a9e239d1d1f84b74539f745d72d00f16 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 29 Oct 2020 10:10:56 +0200 Subject: gianfar: Replace skb_realloc_headroom with skb_cow_head for PTP [ Upstream commit d145c9031325fed963a887851d9fa42516efd52b ] When PTP timestamping is enabled on Tx, the controller inserts the Tx timestamp at the beginning of the frame buffer, between SFD and the L2 frame header. This means that the skb provided by the stack is required to have enough headroom otherwise a new skb needs to be created by the driver to accommodate the timestamp inserted by h/w. Up until now the driver was relying on skb_realloc_headroom() to create new skbs to accommodate PTP frames. Turns out that this method is not reliable in this context at least, as skb_realloc_headroom() for PTP frames can cause random crashes, mostly in subsequent skb_*() calls, when multiple concurrent TCP streams are run at the same time with the PTP flow on the same device (as seen in James' report). I also noticed that when the system is loaded by sending multiple TCP streams, the driver receives cloned skbs in large numbers. skb_cow_head() instead proves to be stable in this scenario, and not only handles cloned skbs too but it's also more efficient and widely used in other drivers. The commit introducing skb_realloc_headroom in the driver goes back to 2009, commit 93c1285c5d92 ("gianfar: reallocate skb when headroom is not enough for fcb"). For practical purposes I'm referencing a newer commit (from 2012) that brings the code to its current structure (and fixes the PTP case). Fixes: 9c4886e5e63b ("gianfar: Fix invalid TX frames returned on error queue when time stamping") Reported-by: James Jurack Suggested-by: Jakub Kicinski Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201029081057.8506-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3978d82c9598..d1bcabeda049 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1826,20 +1826,12 @@ static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* make space for additional header when fcb is needed */ - if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) { - struct sk_buff *skb_new; - - skb_new = skb_realloc_headroom(skb, fcb_len); - if (!skb_new) { + if (fcb_len) { + if (unlikely(skb_cow_head(skb, fcb_len))) { dev->stats.tx_errors++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } - - if (skb->sk) - skb_set_owner_w(skb_new, skb->sk); - dev_consume_skb_any(skb); - skb = skb_new; } /* total number of fragments in the SKB */ -- cgit v1.2.3 From 6ef3bcc25a3e85c1325398c3f605123715814b98 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 20 Oct 2020 20:36:05 +0300 Subject: gianfar: Account for Tx PTP timestamp in the skb headroom [ Upstream commit d6a076d68c6b5d6a5800f3990a513facb7016dea ] When PTP timestamping is enabled on Tx, the controller inserts the Tx timestamp at the beginning of the frame buffer, between SFD and the L2 frame header. This means that the skb provided by the stack is required to have enough headroom otherwise a new skb needs to be created by the driver to accommodate the timestamp inserted by h/w. Up until now the driver was relying on the second option, using skb_realloc_headroom() to create a new skb to accommodate PTP frames. Turns out that this method is not reliable, as reallocation of skbs for PTP frames along with the required overhead (skb_set_owner_w, consume_skb) is causing random crashes in subsequent skb_*() calls, when multiple concurrent TCP streams are run at the same time on the same device (as seen in James' report). Note that these crashes don't occur with a single TCP stream, nor with multiple concurrent UDP streams, but only when multiple TCP streams are run concurrently with the PTP packet flow (doing skb reallocation). This patch enforces the first method, by requesting enough headroom from the stack to accommodate PTP frames, and so avoiding skb_realloc_headroom() & co, and the crashes no longer occur. There's no reason not to set needed_headroom to a large enough value to accommodate PTP frames, so in this regard this patch is a fix. Reported-by: James Jurack Fixes: bee9e58c9e98 ("gianfar:don't add FCB length to hard_header_len") Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201020173605.1173-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d1bcabeda049..5cb58ab1eec9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3369,7 +3369,7 @@ static int gfar_probe(struct platform_device *ofdev) if (dev->features & NETIF_F_IP_CSUM || priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - dev->needed_headroom = GMAC_FCB_LEN; + dev->needed_headroom = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* Initializing some of the rx/tx queue level parameters */ for (i = 0; i < priv->num_tx_queues; i++) { -- cgit v1.2.3 From ac343efb572c55f6a237a5fd37f44efd58fe7194 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 4 Nov 2020 11:56:06 -0800 Subject: ionic: check port ptr before use [ Upstream commit 2bcbf42add911ef63a6d90e92001dc2bcb053e68 ] Check for corner case of port_init failure before using the port_info pointer. Fixes: 4d03e00a2140 ("ionic: Add initial ethtool support") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20201104195606.61184-1-snelson@pensando.io Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 5aacc00962df..b8de3784d976 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -125,6 +125,11 @@ static int ionic_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_zero_link_mode(ks, supported); + if (!idev->port_info) { + netdev_err(netdev, "port_info not initialized\n"); + return -EOPNOTSUPP; + } + /* The port_info data is found in a DMA space that the NIC keeps * up-to-date, so there's no need to request the data from the * NIC, we already have it in our memory space. -- cgit v1.2.3 From 8e3c047f814bf1f28449893613b7f5ae5f7e9674 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 30 Oct 2020 11:32:08 +0800 Subject: ip_tunnel: fix over-mtu packet send fail without TUNNEL_DONT_FRAGMENT flags [ Upstream commit 20149e9eb68c003eaa09e7c9a49023df40779552 ] The tunnel device such as vxlan, bareudp and geneve in the lwt mode set the outer df only based TUNNEL_DONT_FRAGMENT. And this was also the behavior for gre device before switching to use ip_md_tunnel_xmit in commit 962924fa2b7a ("ip_gre: Refactor collect metatdata mode tunnel xmit to ip_md_tunnel_xmit") When the ip_gre in lwt mode xmit with ip_md_tunnel_xmi changed the rule and make the discrepancy between handling of DF by different tunnels. So in the ip_md_tunnel_xmit should follow the same rule like other tunnels. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Signed-off-by: wenxu Link: https://lore.kernel.org/r/1604028728-31100-1-git-send-email-wenxu@ucloud.cn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a0b4dc54f8a6..f61c5a0b502a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -614,9 +614,6 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - if (!df && skb->protocol == htons(ETH_P_IP)) - df = inner_iph->frag_off & htons(IP_DF); - headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; if (headroom > dev->needed_headroom) dev->needed_headroom = headroom; -- cgit v1.2.3 From 92e65059bedadea4aa4c5e7078f17011c2523d13 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 2 Nov 2020 12:01:08 +0100 Subject: net: usb: qmi_wwan: add Telit LE910Cx 0x1230 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5fd8477ed8ca77e64b93d44a6dae4aa70c191396 ] Add support for Telit LE910Cx 0x1230 composition: 0x1230: tty, adb, rmnet, audio, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20201102110108.17244-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 21d905d90650..868acb410141 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1331,6 +1331,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ -- cgit v1.2.3 From 26ffb891605967304c6f5cc28b88c0c319b738fc Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 30 Oct 2020 10:07:11 -0700 Subject: powerpc/vnic: Extend "failover pending" window [ Upstream commit 1d8504937478fdc2f3ef2174a816fd3302eca882 ] Commit 5a18e1e0c193b introduced the 'failover_pending' state to track the "failover pending window" - where we wait for the partner to become ready (after a transport event) before actually attempting to failover. i.e window is between following two events: a. we get a transport event due to a FAILOVER b. later, we get CRQ_INITIALIZED indicating the partner is ready at which point we schedule a FAILOVER reset. and ->failover_pending is true during this window. If during this window, we attempt to open (or close) a device, we pretend that the operation succeded and let the FAILOVER reset path complete the operation. This is fine, except if the transport event ("a" above) occurs during the open and after open has already checked whether a failover is pending. If that happens, we fail the open, which can cause the boot scripts to leave the interface down requiring administrator to manually bring up the device. This fix "extends" the failover pending window till we are _actually_ ready to perform the failover reset (i.e until after we get the RTNL lock). Since open() holds the RTNL lock, we can be sure that we either finish the open or if the open() fails due to the failover pending window, we can again pretend that open is done and let the failover complete it. We could try and block the open until failover is completed but a) that could still timeout the application and b) Existing code "pretends" that failover occurred "just after" open succeeded, so marks the open successful and lets the failover complete the open. So, mark the open successful even if the transport event occurs before we actually start the open. Fixes: 5a18e1e0c193 ("ibmvnic: Fix failover case for non-redundant configuration") Signed-off-by: Sukadev Bhattiprolu Acked-by: Dany Madden Link: https://lore.kernel.org/r/20201030170711.1562994-1-sukadev@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 91559a52c7ad..f357b9cbfee7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1109,18 +1109,27 @@ static int ibmvnic_open(struct net_device *netdev) if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) - return rc; + goto out; rc = init_resources(adapter); if (rc) { netdev_err(netdev, "failed to initialize resources\n"); release_resources(adapter); - return rc; + goto out; } } rc = __ibmvnic_open(netdev); +out: + /* + * If open fails due to a pending failover, set device state and + * return. Device operation will be handled by reset routine. + */ + if (rc && adapter->failover_pending) { + adapter->state = VNIC_OPEN; + rc = 0; + } return rc; } @@ -1842,6 +1851,13 @@ static int do_reset(struct ibmvnic_adapter *adapter, rwi->reset_reason); rtnl_lock(); + /* + * Now that we have the rtnl lock, clear any pending failover. + * This will ensure ibmvnic_open() has either completed or will + * block until failover is complete. + */ + if (rwi->reset_reason == VNIC_RESET_FAILOVER) + adapter->failover_pending = false; netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; @@ -2112,6 +2128,13 @@ static void __ibmvnic_reset(struct work_struct *work) /* CHANGE_PARAM requestor holds rtnl_lock */ rc = do_change_param_reset(adapter, rwi, reset_state); } else if (adapter->force_reset_recovery) { + /* + * Since we are doing a hard reset now, clear the + * failover_pending flag so we don't ignore any + * future MOBILITY or other resets. + */ + adapter->failover_pending = false; + /* Transport event occurred during previous reset */ if (adapter->wait_for_reset) { /* Previous was CHANGE_PARAM; caller locked */ @@ -2176,9 +2199,15 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, unsigned long flags; int ret; + /* + * If failover is pending don't schedule any other reset. + * Instead let the failover complete. If there is already a + * a failover reset scheduled, we will detect and drop the + * duplicate reset when walking the ->rwi_list below. + */ if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED || - adapter->failover_pending) { + (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { ret = EBUSY; netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); goto err; @@ -4532,7 +4561,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, case IBMVNIC_CRQ_INIT: dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; - adapter->failover_pending = false; if (!completion_done(&adapter->init_done)) { complete(&adapter->init_done); adapter->init_done_rc = -EIO; -- cgit v1.2.3 From 9b5458effeeefa503dab92fc397bb9d0230c8db1 Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Fri, 30 Oct 2020 14:26:33 +0100 Subject: sctp: Fix COMM_LOST/CANT_STR_ASSOC err reporting on big-endian platforms [ Upstream commit b6df8c81412190fbd5eaa3cec7f642142d9c16cd ] Commit 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning")' broke err reading from sctp_arg, because it reads the value as 32-bit integer, although the value is stored as 16-bit integer. Later this value is passed to the userspace in 16-bit variable, thus the user always gets 0 on big-endian platforms. Fix it by reading the __u16 field of sctp_arg union, as reading err field would produce a sparse warning. Fixes: 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning") Signed-off-by: Petr Malat Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20201030132633.7045-1-oss@malat.biz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6927b658dad3..4b7edb3645c3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1600,12 +1600,12 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u16); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.u32); + subtype, chunk, cmd->obj.u16); break; case SCTP_CMD_INIT_COUNTER_INC: -- cgit v1.2.3 From e5ea79bb19f8f8ff57abb14ce20bfad7d1531935 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 31 Oct 2020 11:10:53 +0800 Subject: sfp: Fix error handing in sfp_probe() [ Upstream commit 9621618130bf7e83635367c13b9a6ee53935bb37 ] gpiod_to_irq() never return 0, but returns negative in case of error, check it and set gpio_irq to 0. Fixes: 73970055450e ("sfp: add SFP module support") Signed-off-by: YueHaibing Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201031031053.25264-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/sfp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 272d5773573e..27b67f12ec45 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1970,7 +1970,8 @@ static int sfp_probe(struct platform_device *pdev) continue; sfp->gpio_irq[i] = gpiod_to_irq(sfp->gpio[i]); - if (!sfp->gpio_irq[i]) { + if (sfp->gpio_irq[i] < 0) { + sfp->gpio_irq[i] = 0; poll = true; continue; } -- cgit v1.2.3 From 61402d61a2afd49b3be2e447bd8f15340b43f0bb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 2 Nov 2020 13:32:42 -0500 Subject: Fonts: Replace discarded const qualifier commit 9522750c66c689b739e151fcdf895420dc81efc0 upstream. Commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") introduced the following error when building rpc_defconfig (only this build appears to be affected): `acorndata_8x8' referenced in section `.text' of arch/arm/boot/compressed/ll_char_wr.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o `acorndata_8x8' referenced in section `.data.rel.ro' of arch/arm/boot/compressed/font.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o make[3]: *** [/scratch/linux/arch/arm/boot/compressed/Makefile:191: arch/arm/boot/compressed/vmlinux] Error 1 make[2]: *** [/scratch/linux/arch/arm/boot/Makefile:61: arch/arm/boot/compressed/vmlinux] Error 2 make[1]: *** [/scratch/linux/arch/arm/Makefile:317: zImage] Error 2 The .data section is discarded at link time. Reinstating acorndata_8x8 as const ensures it is still available after linking. Do the same for the other 12 built-in fonts as well, for consistency purposes. Cc: Cc: Russell King Reviewed-by: Greg Kroah-Hartman Fixes: 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") Signed-off-by: Lee Jones Co-developed-by: Peilin Ye Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201102183242.2031659-1-yepeilin.cs@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/fonts/font_10x18.c | 2 +- lib/fonts/font_6x10.c | 2 +- lib/fonts/font_6x11.c | 2 +- lib/fonts/font_7x14.c | 2 +- lib/fonts/font_8x16.c | 2 +- lib/fonts/font_8x8.c | 2 +- lib/fonts/font_acorn_8x8.c | 2 +- lib/fonts/font_mini_4x6.c | 2 +- lib/fonts/font_pearl_8x8.c | 2 +- lib/fonts/font_sun12x22.c | 2 +- lib/fonts/font_sun8x16.c | 2 +- lib/fonts/font_ter16x32.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c index 0e2deac97da0..e02f9df24d1e 100644 --- a/lib/fonts/font_10x18.c +++ b/lib/fonts/font_10x18.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 9216 -static struct font_data fontdata_10x18 = { +static const struct font_data fontdata_10x18 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 0000000000 */ diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c index 87da8acd07db..6e3c4b7691c8 100644 --- a/lib/fonts/font_6x10.c +++ b/lib/fonts/font_6x10.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 2560 -static struct font_data fontdata_6x10 = { +static const struct font_data fontdata_6x10 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c index 5e975dfa10a5..2d22a24e816f 100644 --- a/lib/fonts/font_6x11.c +++ b/lib/fonts/font_6x11.c @@ -9,7 +9,7 @@ #define FONTDATAMAX (11*256) -static struct font_data fontdata_6x11 = { +static const struct font_data fontdata_6x11 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c index 86d298f38505..9cc7ae2e03f7 100644 --- a/lib/fonts/font_7x14.c +++ b/lib/fonts/font_7x14.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 3584 -static struct font_data fontdata_7x14 = { +static const struct font_data fontdata_7x14 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 0000000 */ diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c index 37cedd36ca5e..bab25dc59e8d 100644 --- a/lib/fonts/font_8x16.c +++ b/lib/fonts/font_8x16.c @@ -10,7 +10,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_8x16 = { +static const struct font_data fontdata_8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c index 8ab695538395..109d0572368f 100644 --- a/lib/fonts/font_8x8.c +++ b/lib/fonts/font_8x8.c @@ -9,7 +9,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_8x8 = { +static const struct font_data fontdata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c index 069b3e80c434..fb395f0d4031 100644 --- a/lib/fonts/font_acorn_8x8.c +++ b/lib/fonts/font_acorn_8x8.c @@ -5,7 +5,7 @@ #define FONTDATAMAX 2048 -static struct font_data acorndata_8x8 = { +static const struct font_data acorndata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ^@ */ /* 01 */ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e, /* ^A */ diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c index 1449876c6a27..592774a90917 100644 --- a/lib/fonts/font_mini_4x6.c +++ b/lib/fonts/font_mini_4x6.c @@ -43,7 +43,7 @@ __END__; #define FONTDATAMAX 1536 -static struct font_data fontdata_mini_4x6 = { +static const struct font_data fontdata_mini_4x6 = { { 0, 0, FONTDATAMAX, 0 }, { /*{*/ /* Char 0: ' ' */ diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c index 32d65551e7ed..a6f95ebce950 100644 --- a/lib/fonts/font_pearl_8x8.c +++ b/lib/fonts/font_pearl_8x8.c @@ -14,7 +14,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_pearl8x8 = { +static const struct font_data fontdata_pearl8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c index 641a6b4dca42..a5b65bd49604 100644 --- a/lib/fonts/font_sun12x22.c +++ b/lib/fonts/font_sun12x22.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 11264 -static struct font_data fontdata_sun12x22 = { +static const struct font_data fontdata_sun12x22 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 000000000000 */ diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c index 193fe6d988e0..e577e76a6a7c 100644 --- a/lib/fonts/font_sun8x16.c +++ b/lib/fonts/font_sun8x16.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_sun8x16 = { +static const struct font_data fontdata_sun8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* */ 0x00,0x00,0x7e,0x81,0xa5,0x81,0x81,0xbd,0x99,0x81,0x81,0x7e,0x00,0x00,0x00,0x00, diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c index 91b9c283bd9c..f7c3abb6b99e 100644 --- a/lib/fonts/font_ter16x32.c +++ b/lib/fonts/font_ter16x32.c @@ -4,7 +4,7 @@ #define FONTDATAMAX 16384 -static struct font_data fontdata_ter16x32 = { +static const struct font_data fontdata_ter16x32 = { { 0, 0, FONTDATAMAX, 0 }, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc, -- cgit v1.2.3 From f7d0f72424058ee224ba66e10a5bd84b84ce7607 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 Oct 2020 16:46:38 +0800 Subject: ALSA: hda/realtek - Fixed HP headset Mic can't be detected commit 8a8de09cb2adc119104f35044d1a840dd47aa9d8 upstream. System boot with plugged headset. It will not detect headset Mic. It will happen on cold boot restart resume state. Quirk by SSID change to quirk by pin verb. Fixes: 13468bfa8c58 ("ALSA: hda/realtek - set mic to auto detect on a HP AIO machine") Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/f42ae1ede1cf47029ae2bef1a42caf03@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 54 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7a24e9f0d2fe..9bbb299d9733 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5990,6 +5990,27 @@ static void alc285_fixup_invalidate_dacs(struct hda_codec *codec, snd_hda_override_wcaps(codec, 0x03, 0); } +static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) +{ + switch (codec->core.vendor_id) { + case 0x10ec0274: + case 0x10ec0294: + case 0x10ec0225: + case 0x10ec0295: + case 0x10ec0299: + alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); + break; + case 0x10ec0235: + case 0x10ec0236: + case 0x10ec0255: + case 0x10ec0256: + alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); + break; + } +} + static void alc295_fixup_chromebook(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6000,16 +6021,7 @@ static void alc295_fixup_chromebook(struct hda_codec *codec, spec->ultra_low_power = true; break; case HDA_FIXUP_ACT_INIT: - switch (codec->core.vendor_id) { - case 0x10ec0295: - alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); - break; - case 0x10ec0236: - alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); - break; - } + alc_combo_jack_hp_jd_restart(codec); break; } } @@ -6065,6 +6077,16 @@ static void alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec, alc_write_coef_idx(codec, 0x65, 0x0); } +static void alc274_fixup_hp_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + switch (action) { + case HDA_FIXUP_ACT_INIT: + alc_combo_jack_hp_jd_restart(codec); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -6259,6 +6281,7 @@ enum { ALC256_FIXUP_INTEL_NUC8_RUGGED, ALC255_FIXUP_XIAOMI_HEADSET_MIC, ALC274_FIXUP_HP_MIC, + ALC274_FIXUP_HP_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7646,6 +7669,12 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC274_FIXUP_HP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_headset_mic, + .chained = true, + .chain_id = ALC274_FIXUP_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7797,7 +7826,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), - SND_PCI_QUIRK(0x103c, 0x874e, "HP", ALC274_FIXUP_HP_MIC), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), @@ -8353,6 +8381,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x90a70130}, {0x1b, 0x90170110}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0274, 0x103c, "HP", ALC274_FIXUP_HP_HEADSET_MIC, + {0x17, 0x90170110}, + {0x19, 0x03a11030}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x14, 0x90170110}, -- cgit v1.2.3 From 72ce616ed55adbae313e4711efc048ab532b8213 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 3 Nov 2020 15:40:35 +0800 Subject: ALSA: hda/realtek - Enable headphone for ASUS TM420 commit ef9ce66fab959c66d270bbee7ca79b92ee957893 upstream. ASUS TM420 had depop circuit for headphone. It need to turn on by COEF bit. [ fixed the missing enum definition by tiwai ] Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/3d6177d7023b4783bf2793861c577ada@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9bbb299d9733..d25c3bee56f8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6282,6 +6282,7 @@ enum { ALC255_FIXUP_XIAOMI_HEADSET_MIC, ALC274_FIXUP_HP_MIC, ALC274_FIXUP_HP_HEADSET_MIC, + ALC256_FIXUP_ASUS_HPE, }; static const struct hda_fixup alc269_fixups[] = { @@ -7675,6 +7676,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC274_FIXUP_HP_MIC }, + [ALC256_FIXUP_ASUS_HPE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Set EAPD high */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x0f }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x7778 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7858,6 +7870,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), -- cgit v1.2.3 From 65457e345f3c658e9a6d0c3f2e446d61eb377718 Mon Sep 17 00:00:00 2001 From: Keith Winstein Date: Sun, 25 Oct 2020 22:05:47 -0700 Subject: ALSA: usb-audio: Add implicit feedback quirk for Zoom UAC-2 commit f15cfca818d756dd1c9492530091dfd583359db3 upstream. The Zoom UAC-2 USB audio interface provides an async playback endpoint ("1 OUT (ASYNC)") and capture endpoint ("2 IN (ASYNC)"), both with 2-channel S32_LE in 44.1, 48, 88.2, 96, 176.4, or 192 kilosamples/s. The device provides explicit feedback to adjust the host's playback rate, but the feedback appears unstable and biased relative to the device's capture rate. "alsaloop -t 1000" experiences playback underruns and tries to resample the captured audio to match the varying playback rate. Forcing the kernel to use implicit feedback appears to produce more stable results. This causes the host to transmit one playback sample for each capture sample received. (Zoom North America has been notified of this change.) Signed-off-by: Keith Winstein Tested-by: Keith Winstein Cc: BugLink: https://lore.kernel.org/r/20201027071841.GA164525@trolley.csail.mit.edu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 878f1201aad6..1c2032ab1751 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -339,6 +339,10 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; + case USB_ID(0x1686, 0xf029): /* Zoom UAC-2 */ + ep = 0x82; + ifnum = 2; + goto add_sync_ep_from_ifnum; case USB_ID(0x1397, 0x0001): /* Behringer UFX1604 */ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */ ep = 0x81; -- cgit v1.2.3 From a46e830d017ee1b85d2a215fdd4202bf38582989 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Tue, 3 Nov 2020 18:08:09 +0800 Subject: ALSA: usb-audio: add usb vendor id as DSD-capable for Khadas devices commit 07815a2b3501adeaae6384a25b9c4a9c81dae59f upstream. Khadas audio devices ( USB_ID_VENDOR 0x3353 ) have DSD-capable implementations from XMOS need add new usb vendor id for recognition Signed-off-by: Artem Lapkin Cc: Link: https://lore.kernel.org/r/20201103103311.5435-1-art@khadas.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index cc75d9749e9f..825b6f2efada 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1732,6 +1732,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ case 0x2ab6: /* T+A devices */ + case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ case 0xc502: /* HiBy devices */ if (fp->dsd_raw) -- cgit v1.2.3 From 26a871cf86cb277fdd815f1df1e437f23d7eb144 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 4 Nov 2020 22:27:17 +1030 Subject: ALSA: usb-audio: Add implicit feedback quirk for Qu-16 commit 0938ecae432e7ac8b01080c35dd81d50a1e43033 upstream. This patch fixes audio distortion on playback for the Allen&Heath Qu-16. Signed-off-by: Geoffrey D. Bennett Cc: Link: https://lore.kernel.org/r/20201104115717.GA19046@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 1c2032ab1751..60f3df76f01b 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -323,6 +323,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, switch (subs->stream->chip->usb_id) { case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */ case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */ + case USB_ID(0x22f0, 0x0006): /* Allen&Heath Qu-16 */ ep = 0x81; ifnum = 3; goto add_sync_ep_from_ifnum; -- cgit v1.2.3 From f7c2913d606b37f1dd8b76647cb11172fcfac988 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 4 Nov 2020 22:37:05 +1030 Subject: ALSA: usb-audio: Add implicit feedback quirk for MODX commit 26201ddc1373c99b2a67c5774da2f0eecd749b93 upstream. This patch fixes audio distortion on playback for the Yamaha MODX. Signed-off-by: Geoffrey D. Bennett Tested-by: Frank Slotta Cc: Link: https://lore.kernel.org/r/20201104120705.GA19126@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 60f3df76f01b..1a5e555002b2 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -333,6 +333,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ifnum = 2; goto add_sync_ep_from_ifnum; case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */ + case USB_ID(0x0499, 0x172a): /* Yamaha MODX */ ep = 0x86; ifnum = 2; goto add_sync_ep_from_ifnum; -- cgit v1.2.3 From c1f729c7dec0df04d62550d981af849f970a660d Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sun, 1 Nov 2020 17:07:40 -0800 Subject: mm: mempolicy: fix potential pte_unmap_unlock pte error commit 3f08842098e842c51e3b97d0dcdebf810b32558e upstream. When flags in queue_pages_pte_range don't have MPOL_MF_MOVE or MPOL_MF_MOVE_ALL bits, code breaks and passing origin pte - 1 to pte_unmap_unlock seems like not a good idea. queue_pages_pte_range can run in MPOL_MF_MOVE_ALL mode which doesn't migrate misplaced pages but returns with EIO when encountering such a page. Since commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") and early break on the first pte in the range results in pte_unmap_unlock on an underflow pte. This can lead to lockups later on when somebody tries to lock the pte resp. page_table_lock again.. Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Miaohe Lin Cc: Feilong Lin Cc: Shijie Luo Cc: Link: https://lkml.kernel.org/r/20201019074853.50856-1-luoshijie1@huawei.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 787c5fc91b21..87d165923fee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long flags = qp->flags; int ret; bool has_unmovable = false; - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); @@ -510,7 +510,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; @@ -542,7 +542,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, } else break; } - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); cond_resched(); if (has_unmovable) -- cgit v1.2.3 From b1d16be4f2f4a6cbdc5ff1dddc8917fad11d3623 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Nov 2020 17:07:47 -0800 Subject: lib/crc32test: remove extra local_irq_disable/enable commit aa4e460f0976351fddd2f5ac6e08b74320c277a1 upstream. Commit 4d004099a668 ("lockdep: Fix lockdep recursion") uncovered the following issue in lib/crc32test reported on s390: BUG: using __this_cpu_read() in preemptible [00000000] code: swapper/0/1 caller is lockdep_hardirqs_on_prepare+0x48/0x270 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.9.0-next-20201015-15164-g03d992bd2de6 #19 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: lockdep_hardirqs_on_prepare+0x48/0x270 trace_hardirqs_on+0x9c/0x1b8 crc32_test.isra.0+0x170/0x1c0 crc32test_init+0x1c/0x40 do_one_initcall+0x40/0x130 do_initcalls+0x126/0x150 kernel_init_freeable+0x1f6/0x230 kernel_init+0x22/0x150 ret_from_fork+0x24/0x2c no locks held by swapper/0/1. Remove extra local_irq_disable/local_irq_enable helpers calls. Fixes: 5fb7f87408f1 ("lib: add module support to crc32 tests") Signed-off-by: Vasily Gorbik Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/patch.git-4369da00c06e.your-ad-here.call-01602859837-ext-1679@work.hours Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/crc32test.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/crc32test.c b/lib/crc32test.c index 97d6a57cefcc..61ddce2cff77 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -683,7 +683,6 @@ static int __init crc32c_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -694,7 +693,6 @@ static int __init crc32c_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); @@ -768,7 +766,6 @@ static int __init crc32_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -783,7 +780,6 @@ static int __init crc32_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", CRC_LE_BITS, CRC_BE_BITS); -- cgit v1.2.3 From 1b8490d6b809c56a95287c3ec3e2bb6a358763e7 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sun, 1 Nov 2020 17:07:53 -0800 Subject: kthread_worker: prevent queuing delayed work from timer_fn when it is being canceled commit 6993d0fdbee0eb38bfac350aa016f65ad11ed3b1 upstream. There is a small race window when a delayed work is being canceled and the work still might be queued from the timer_fn: CPU0 CPU1 kthread_cancel_delayed_work_sync() __kthread_cancel_work_sync() __kthread_cancel_work() work->canceling++; kthread_delayed_work_timer_fn() kthread_insert_work(); BUG: kthread_insert_work() should not get called when work->canceling is set. Signed-off-by: Zqiang Signed-off-by: Andrew Morton Reviewed-by: Petr Mladek Acked-by: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201014083030.16895-1-qiang.zhang@windriver.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/kthread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..e51f0006057d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -873,7 +873,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) /* Move the work from worker->delayed_work_list. */ WARN_ON_ONCE(list_empty(&work->node)); list_del_init(&work->node); - kthread_insert_work(worker, work, &worker->work_list); + if (!work->canceling) + kthread_insert_work(worker, work, &worker->work_list); raw_spin_unlock_irqrestore(&worker->lock, flags); } -- cgit v1.2.3 From d2286457bd838e78f6e12a6f4a0d99aa64dc2cc0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: mm: always have io_remap_pfn_range() set pgprot_decrypted() commit f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 upstream. The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/pgtable.h | 4 ---- include/linux/mm.h | 9 +++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 6fd08cf04add..ea39a0b54c63 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1159,10 +1159,6 @@ static inline bool arch_has_pfn_modify_check(void) #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 diff --git a/include/linux/mm.h b/include/linux/mm.h index 34119f393a80..7249cf58f78d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2572,6 +2572,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) -- cgit v1.2.3 From 2cd71743e7fff055bc9382b50f924e289b1dc740 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 26 Oct 2020 10:52:29 -0400 Subject: gfs2: Wake up when sd_glock_disposal becomes zero commit da7d554f7c62d0c17c1ac3cc2586473c2d99f0bd upstream. Commit fc0e38dae645 ("GFS2: Fix glock deallocation race") fixed a sd_glock_disposal accounting bug by adding a missing atomic_dec statement, but it failed to wake up sd_glock_wait when that decrement causes sd_glock_disposal to reach zero. As a consequence, gfs2_gl_hash_clear can now run into a 10-minute timeout instead of being woken up. Add the missing wakeup. Fixes: fc0e38dae645 ("GFS2: Fix glock deallocation race") Cc: stable@vger.kernel.org # v2.6.39+ Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0290a22ebccf..9e1685a30bf8 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -873,7 +873,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); out: return ret; -- cgit v1.2.3 From aef59b5e5bdfc800f0e7aec7e74a33e98bf142c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 2 Nov 2020 15:31:27 -0500 Subject: ring-buffer: Fix recursion protection transitions between interrupt context commit b02414c8f045ab3b9afc816c3735bc98c5c3d262 upstream. The recursion protection of the ring buffer depends on preempt_count() to be correct. But it is possible that the ring buffer gets called after an interrupt comes in but before it updates the preempt_count(). This will trigger a false positive in the recursion code. Use the same trick from the ftrace function callback recursion code which uses a "transition" bit that gets set, to allow for a single recursion for to handle transitions between contexts. Cc: stable@vger.kernel.org Fixes: 567cd4da54ff4 ("ring-buffer: User context bit recursion checking") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 58 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 67cdb401c6ce..6e5c6b023dc3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -422,14 +422,16 @@ struct rb_event_info { /* * Used for which event context the event is in. - * NMI = 0 - * IRQ = 1 - * SOFTIRQ = 2 - * NORMAL = 3 + * TRANSITION = 0 + * NMI = 1 + * IRQ = 2 + * SOFTIRQ = 3 + * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { + RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, @@ -2660,10 +2662,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * bit 1 = NMI context + * bit 2 = IRQ context + * bit 3 = SoftIRQ context + * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ @@ -2686,6 +2688,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. + * + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit + * is set when a recursion is detected at the current context, and if + * the TRANSITION bit is already set, it will fail the recursion. + * This is needed because there's a lag between the changing of + * interrupt context and updating the preempt count. In this case, + * a false positive will be found. To handle this, one extra recursion + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION + * bit is already set, then it is considered a recursion and the function + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. + * + * On the trace_recursive_unlock(), the TRANSITION bit will be the first + * to be cleared. Even if it wasn't the context that set it. That is, + * if an interrupt comes in while NORMAL bit is set and the ring buffer + * is called before preempt_count() is updated, since the check will + * be on the NORMAL bit, the TRANSITION bit will then be set. If an + * NMI then comes in, it will set the NMI bit, but when the NMI code + * does the trace_recursive_unlock() it will clear the TRANSTION bit + * and leave the NMI bit set. But this is fine, because the interrupt + * code that set the TRANSITION bit will then clear the NMI bit when it + * calls trace_recursive_unlock(). If another NMI comes in, it will + * set the TRANSITION bit and continue. + * + * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline int @@ -2701,8 +2727,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = pc & NMI_MASK ? RB_CTX_NMI : pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; - if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) - return 1; + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { + /* + * It is possible that this was called by transitioning + * between interrupt context, and preempt_count() has not + * been updated yet. In this case, use the TRANSITION bit. + */ + bit = RB_CTX_TRANSITION; + if (val & (1 << (bit + cpu_buffer->nest))) + return 1; + } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; @@ -2717,8 +2751,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->current_context - (1 << cpu_buffer->nest); } -/* The recursive locking above uses 4 bits */ -#define NESTED_BITS 4 +/* The recursive locking above uses 5 bits */ +#define NESTED_BITS 5 /** * ring_buffer_nest_start - Allow to trace while nested -- cgit v1.2.3 From cfaf010cf34527d683e1f82fedb42438a84e329c Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 5 Oct 2020 10:48:03 +0200 Subject: mtd: spi-nor: Don't copy self-pointing struct around commit 69a8eed58cc09aea3b01a64997031dd5d3c02c07 upstream. spi_nor_parse_sfdp() modifies the passed structure so that it points to itself (params.erase_map.regions to params.erase_map.uniform_region). This makes it impossible to copy the local struct anywhere else. Therefore only use memcpy() in backup-restore scenario. The bug may show up like below: BUG: unable to handle page fault for address: ffffc90000b377f8 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 3500 Comm: flashcp Tainted: G O 5.4.53-... #1 ... RIP: 0010:spi_nor_erase+0x8e/0x5c0 Code: 64 24 18 89 db 4d 8b b5 d0 04 00 00 4c 89 64 24 18 4c 89 64 24 20 eb 12 a8 10 0f 85 59 02 00 00 49 83 c6 10 0f 84 4f 02 00 00 <49> 8b 06 48 89 c2 48 83 e2 c0 48 89 d1 49 03 4e 08 48 39 cb 73 d8 RSP: 0018:ffffc9000217fc48 EFLAGS: 00010206 RAX: 0000000000740000 RBX: 0000000000000000 RCX: 0000000000740000 RDX: ffff8884550c9980 RSI: ffff88844f9c0bc0 RDI: ffff88844ede7bb8 RBP: 0000000000740000 R08: ffffffff815bfbe0 R09: ffff88844f9c0bc0 R10: 0000000000000000 R11: 0000000000000000 R12: ffffc9000217fc60 R13: ffff88844ede7818 R14: ffffc90000b377f8 R15: 0000000000000000 FS: 00007f4699780500(0000) GS:ffff88846ff00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc90000b377f8 CR3: 00000004538ee000 CR4: 0000000000340fe0 Call Trace: part_erase+0x27/0x50 mtdchar_ioctl+0x831/0xba0 ? filemap_map_pages+0x186/0x3d0 ? do_filp_open+0xad/0x110 ? _copy_to_user+0x22/0x30 ? cp_new_stat+0x150/0x180 mtdchar_unlocked_ioctl+0x2a/0x40 do_vfs_ioctl+0xa0/0x630 ? __do_sys_newfstat+0x3c/0x60 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x6a/0x200 ? prepare_exit_to_usermode+0x50/0xd0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f46996b6817 Cc: stable@vger.kernel.org Fixes: c46872170a54 ("mtd: spi-nor: Move erase_map to 'struct spi_nor_flash_parameter'") Co-developed-by: Matija Glavinic Pecotic Signed-off-by: Matija Glavinic Pecotic Signed-off-by: Alexander Sverdlin Signed-off-by: Vignesh Raghavendra Tested-by: Baurzhan Ismagulov Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201005084803.23460-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index f417fb680cd8..40586ad17f52 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -4444,11 +4444,10 @@ static void spi_nor_sfdp_init_params(struct spi_nor *nor) memcpy(&sfdp_params, &nor->params, sizeof(sfdp_params)); - if (spi_nor_parse_sfdp(nor, &sfdp_params)) { + if (spi_nor_parse_sfdp(nor, &nor->params)) { + memcpy(&nor->params, &sfdp_params, sizeof(nor->params)); nor->addr_width = 0; nor->flags &= ~SNOR_F_4B_OPCODES; - } else { - memcpy(&nor->params, &sfdp_params, sizeof(nor->params)); } } -- cgit v1.2.3 From 3058420f40fbae3707d4e82748f487b23e219238 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 29 Oct 2020 17:31:45 -0400 Subject: ftrace: Fix recursion check for NMI test commit ee11b93f95eabdf8198edd4668bf9102e7248270 upstream. The code that checks recursion will work to only do the recursion check once if there's nested checks. The top one will do the check, the other nested checks will see recursion was already checked and return zero for its "bit". On the return side, nothing will be done if the "bit" is zero. The problem is that zero is returned for the "good" bit when in NMI context. This will set the bit for NMIs making it look like *all* NMI tracing is recursing, and prevent tracing of anything in NMI context! The simple fix is to return "bit + 1" and subtract that bit on the end to get the real bit. Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4055158c1dd2..b84fa0d2d6ea 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -653,7 +653,7 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) current->trace_recursion = val; barrier(); - return bit; + return bit + 1; } static __always_inline void trace_clear_recursion(int bit) @@ -663,6 +663,7 @@ static __always_inline void trace_clear_recursion(int bit) if (!bit) return; + bit--; bit = 1 << bit; val &= ~bit; -- cgit v1.2.3 From a69af5baed80d918bcc10f7b0a86069558ae642e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 29 Oct 2020 19:35:08 -0400 Subject: ftrace: Handle tracing when switching between context commit 726b3d3f141fba6f841d715fc4d8a4a84f02c02a upstream. When an interrupt or NMI comes in and switches the context, there's a delay from when the preempt_count() shows the update. As the preempt_count() is used to detect recursion having each context have its own bit get set when tracing starts, and if that bit is already set, it is considered a recursion and the function exits. But if this happens in that section where context has changed but preempt_count() has not been updated, this will be incorrectly flagged as a recursion. To handle this case, create another bit call TRANSITION and test it if the current context bit is already set. Flag the call as a recursion if the TRANSITION bit is already set, and if not, set it and continue. The TRANSITION bit will be cleared normally on the return of the function that set it, or if the current context bit is clear, set it and clear the TRANSITION bit to allow for another transition between the current context and an even higher one. Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 23 +++++++++++++++++++++-- kernel/trace/trace_selftest.c | 9 +++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b84fa0d2d6ea..fc3aa81a43e3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -592,6 +592,12 @@ enum { * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -646,8 +652,21 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) return 0; bit = trace_get_context_bit() + start; - if (unlikely(val & (1 << bit))) - return -1; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); val |= 1 << bit; current->trace_recursion = val; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..0838c290ac7f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -492,8 +492,13 @@ trace_selftest_function_recursion(void) unregister_ftrace_function(&test_rec_probe); ret = -1; - if (trace_selftest_recursion_cnt != 1) { - pr_cont("*callback not called once (%d)* ", + /* + * Recursion allows for transitions between context, + * and may call the callback twice. + */ + if (trace_selftest_recursion_cnt != 1 && + trace_selftest_recursion_cnt != 2) { + pr_cont("*callback not called once (or twice) (%d)* ", trace_selftest_recursion_cnt); goto out; } -- cgit v1.2.3 From f352cca84625b49c170dba841b60791dee737357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 2 Nov 2020 22:27:27 +0100 Subject: regulator: defer probe when trying to get voltage from unresolved supply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cf1ad559a20d1930aa7b47a52f54e1f8718de301 upstream. regulator_get_voltage_rdev() is called in regulator probe() when applying machine constraints. The "fixed" commit exposed the problem that non-bypassed regulators can forward the request to its parent (like bypassed ones) supply. Return -EPROBE_DEFER when the supply is expected but not resolved yet. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław Reported-by: Ondřej Jirman Reported-by: Corentin Labbe Tested-by: Ondřej Jirman Link: https://lore.kernel.org/r/a9041d68b4d35e4a2dd71629c8a6422662acb5ee.1604351936.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ee850cffe154..4da2d6ad22b0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4045,6 +4045,8 @@ int regulator_get_voltage_rdev(struct regulator_dev *rdev) ret = rdev->desc->fixed_uV; } else if (rdev->supply) { ret = regulator_get_voltage_rdev(rdev->supply->rdev); + } else if (rdev->supply_name) { + return -EPROBE_DEFER; } else { return -EINVAL; } -- cgit v1.2.3 From 9f6883fce69454809f6105041be02d2e71dc358a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Wed, 14 Oct 2020 11:02:30 +0200 Subject: spi: bcm2835: fix gpio cs level inversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e31ba0c0543a04483b53151eb5b7413efece94c upstream. The work on improving gpio chip-select in spi core, and the following fixes, has caused the bcm2835 spi driver to use wrong levels. Fix this by simply removing level handling in the bcm2835 driver, and let the core do its work. Fixes: 3e5ec1db8bfe ("spi: Fix SPI_CS_HIGH setting when using native and GPIO CS") Cc: Signed-off-by: Martin Hundebøll Link: https://lore.kernel.org/r/20201014090230.2706810-1-martin@geanix.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index c88f5d99c906..cfd9176e6413 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1245,18 +1245,6 @@ static int bcm2835_spi_setup(struct spi_device *spi) if (!chip) return 0; - /* - * Retrieve the corresponding GPIO line used for CS. - * The inversion semantics will be handled by the GPIO core - * code, so we pass GPIOS_OUT_LOW for "unasserted" and - * the correct flag for inversion semantics. The SPI_CS_HIGH - * on spi->mode cannot be checked for polarity in this case - * as the flag use_gpio_descriptors enforces SPI_CS_HIGH. - */ - if (of_property_read_bool(spi->dev.of_node, "spi-cs-high")) - lflags = GPIO_ACTIVE_HIGH; - else - lflags = GPIO_ACTIVE_LOW; spi->cs_gpiod = gpiochip_request_own_desc(chip, 8 - spi->chip_select, DRV_NAME, lflags, -- cgit v1.2.3 From ec5f524e0293e19d2bf89219d7d589e1fb654e87 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 30 Oct 2020 00:19:05 +0800 Subject: tracing: Fix out of bounds write in get_trace_buf commit c1acb4ac1a892cf08d27efcb964ad281728b0545 upstream. The nesting count of trace_printk allows for 4 levels of nesting. The nesting counter starts at zero and is incremented before being used to retrieve the current context's buffer. But the index to the buffer uses the nesting counter after it was incremented, and not its original number, which in needs to do. Link: https://lkml.kernel.org/r/20201029161905.4269-1-hqjagain@gmail.com Cc: stable@vger.kernel.org Fixes: 3d9622c12c887 ("tracing: Add barrier to trace_printk() buffer nesting modification") Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b2a664812b1..2a357bda45cf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3012,7 +3012,7 @@ static char *get_trace_buf(void) /* Interrupts must see nesting incremented before we use the buffer */ barrier(); - return &buffer->buffer[buffer->nesting][0]; + return &buffer->buffer[buffer->nesting - 1][0]; } static void put_trace_buf(void) -- cgit v1.2.3 From 2716e78a6486814537df95a82efec4e9e4e081d9 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 4 Nov 2020 16:12:44 +0100 Subject: futex: Handle transient "ownerless" rtmutex state correctly commit 9f5d1c336a10c0d24e83e40b4c1b9539f7dba627 upstream. Gratian managed to trigger the BUG_ON(!newowner) in fixup_pi_state_owner(). This is one possible chain of events leading to this: Task Prio Operation T1 120 lock(F) T2 120 lock(F) -> blocks (top waiter) T3 50 (RT) lock(F) -> boosts T1 and blocks (new top waiter) XX timeout/ -> wakes T2 signal T1 50 unlock(F) -> wakes T3 (rtmutex->owner == NULL, waiter bit is set) T2 120 cleanup -> try_to_take_mutex() fails because T3 is the top waiter and the lower priority T2 cannot steal the lock. -> fixup_pi_state_owner() sees newowner == NULL -> BUG_ON() The comment states that this is invalid and rt_mutex_real_owner() must return a non NULL owner when the trylock failed, but in case of a queued and woken up waiter rt_mutex_real_owner() == NULL is a valid transient state. The higher priority waiter has simply not yet managed to take over the rtmutex. The BUG_ON() is therefore wrong and this is just another retry condition in fixup_pi_state_owner(). Drop the locks, so that T3 can make progress, and then try the fixup again. Gratian provided a great analysis, traces and a reproducer. The analysis is to the point, but it confused the hell out of that tglx dude who had to page in all the futex horrors again. Condensed version is above. [ tglx: Wrote comment and changelog ] Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") Reported-by: Gratian Crisan Signed-off-by: Mike Galbraith Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87a6w6x7bb.fsf@ni.com Link: https://lore.kernel.org/r/87sg9pkvf7.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 17fba7a986e0..9c4f9b868a49 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2511,10 +2511,22 @@ retry: } /* - * Since we just failed the trylock; there must be an owner. + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); - BUG_ON(!newowner); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { -- cgit v1.2.3 From 69e0e917c7c85e88cd63954a9b6366e7c157c727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= Date: Sat, 3 Oct 2020 12:03:32 +0200 Subject: ARM: dts: sun4i-a10: fix cpu_alert temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dea252fa41cd8ce332d148444e4799235a8a03ec ] When running dtbs_check thermal_zone warn about the temperature declared. thermal-zones: cpu-thermal:trips:cpu-alert0:temperature:0:0: 850000 is greater than the maximum of 200000 It's indeed wrong the real value is 85°C and not 850°C. Signed-off-by: Clément Péron Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201003100332.431178-1-peron.clem@gmail.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun4i-a10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index e0a9b371c248..2265ca24c0c7 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; -- cgit v1.2.3 From 3283d4d78412e9097c6fa3cfad34876bc26f52b2 Mon Sep 17 00:00:00 2001 From: Scott K Logan Date: Fri, 25 Sep 2020 01:43:53 -0700 Subject: arm64: dts: meson: add missing g12 rng clock [ Upstream commit a1afbbb0285797e01313779c71287d936d069245 ] This adds the missing perpheral clock for the RNG for Amlogic G12. As stated in amlogic,meson-rng.yaml, this isn't always necessary for the RNG to function, but is better to have in case the clock is disabled for some reason prior to loading. Signed-off-by: Scott K Logan Suggested-by: Neil Armstrong Reviewed-by: Neil Armstrong Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/520a1a8ec7a958b3d918d89563ec7e93a4100a45.camel@cottsay.net Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 1234bc797429..354ef2f3eac6 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -167,6 +167,8 @@ hwrng: rng@218 { compatible = "amlogic,meson-rng"; reg = <0x0 0x218 0x0 0x4>; + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; }; }; -- cgit v1.2.3 From 6e02c29e4ac45b7b18854d988915a129a3014371 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 14 Oct 2020 17:24:28 +0800 Subject: x86/kexec: Use up-to-dated screen_info copy to fill boot params [ Upstream commit afc18069a2cb7ead5f86623a5f3d4ad6e21f940d ] kexec_file_load() currently reuses the old boot_params.screen_info, but if drivers have change the hardware state, boot_param.screen_info could contain invalid info. For example, the video type might be no longer VGA, or the frame buffer address might be changed. If the kexec kernel keeps using the old screen_info, kexec'ed kernel may attempt to write to an invalid framebuffer memory region. There are two screen_info instances globally available, boot_params.screen_info and screen_info. Later one is a copy, and is updated by drivers. So let kexec_file_load use the updated copy. [ mingo: Tidied up the changelog. ] Signed-off-by: Kairui Song Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20201014092429.1415040-2-kasong@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/kexec-bzimage64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index d2f4e706a428..b8b3b84308ed 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -210,8 +210,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; -- cgit v1.2.3 From 147e3743cf7a97d1ccbb941e39b14649f916086c Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 21 Oct 2020 11:53:59 +0200 Subject: of: Fix reserved-memory overlap detection [ Upstream commit ca05f33316559a04867295dd49f85aeedbfd6bfd ] The reserved-memory overlap detection code fails to detect overlaps if either of the regions starts at address 0x0. The code explicitly checks for and ignores such regions, apparently in order to ignore dynamically allocated regions which have an address of 0x0 at this point. These dynamically allocated regions also have a size of 0x0 at this point, so fix this by removing the check and sorting the dynamically allocated regions ahead of any static regions at address 0x0. For example, there are two overlaps in this case but they are not currently reported: foo@0 { reg = <0x0 0x2000>; }; bar@0 { reg = <0x0 0x1000>; }; baz@1000 { reg = <0x1000 0x1000>; }; quux { size = <0x1000>; }; but they are after this patch: OF: reserved mem: OVERLAP DETECTED! bar@0 (0x00000000--0x00001000) overlaps with foo@0 (0x00000000--0x00002000) OF: reserved mem: OVERLAP DETECTED! foo@0 (0x00000000--0x00002000) overlaps with baz@1000 (0x00001000--0x00002000) Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/ded6fd6b47b58741aabdcc6967f73eca6a3f311e.1603273666.git-series.vincent.whitchurch@axis.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/of_reserved_mem.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 6bd610ee2cd7..3fb5d8caffd5 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -200,6 +200,16 @@ static int __init __rmem_cmp(const void *a, const void *b) if (ra->base > rb->base) return 1; + /* + * Put the dynamic allocations (address == 0, size == 0) before static + * allocations at address 0x0 so that overlap detection works + * correctly. + */ + if (ra->size < rb->size) + return -1; + if (ra->size > rb->size) + return 1; + return 0; } @@ -217,8 +227,7 @@ static void __init __rmem_check_for_overlap(void) this = &reserved_mem[i]; next = &reserved_mem[i + 1]; - if (!(this->base && next->base)) - continue; + if (this->base + this->size > next->base) { phys_addr_t this_end, next_end; -- cgit v1.2.3 From 6d7b41a67687ba3691c7f5057f254743258f4205 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 15 Oct 2020 11:36:40 +0200 Subject: drm/sun4i: frontend: Rework a bit the phase data [ Upstream commit 84c971b356379c621df595bd00c3114579dfa59f ] The scaler filter phase setup in the allwinner kernel has two different cases for setting up the scaler filter, the first one using different phase parameters for the two channels, and the second one reusing the first channel parameters on the second channel. The allwinner kernel has a third option where the horizontal phase of the second channel will be set to a different value than the vertical one (and seems like it's the same value than one used on the first channel). However, that code path seems to never be taken, so we can ignore it for now, and it's essentially what we're doing so far as well. Since we will have always the same values across each components of the filter setup for a given channel, we can simplify a bit our frontend structure by only storing the phase value we want to apply to a given channel. Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20201015093642.261440-1-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_frontend.c | 34 ++++++++-------------------------- drivers/gpu/drm/sun4i/sun4i_frontend.h | 6 +----- 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c index ec2a032e07b9..7462801b1fa8 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.c +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c @@ -443,17 +443,17 @@ int sun4i_frontend_update_formats(struct sun4i_frontend *frontend, * related to the scaler FIR filter phase parameters. */ regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_HORZPHASE_REG, - frontend->data->ch_phase[0].horzphase); + frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_HORZPHASE_REG, - frontend->data->ch_phase[1].horzphase); + frontend->data->ch_phase[1]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_VERTPHASE0_REG, - frontend->data->ch_phase[0].vertphase[0]); + frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_VERTPHASE0_REG, - frontend->data->ch_phase[1].vertphase[0]); + frontend->data->ch_phase[1]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_VERTPHASE1_REG, - frontend->data->ch_phase[0].vertphase[1]); + frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_VERTPHASE1_REG, - frontend->data->ch_phase[1].vertphase[1]); + frontend->data->ch_phase[1]); /* * Checking the input format is sufficient since we currently only @@ -687,30 +687,12 @@ static const struct dev_pm_ops sun4i_frontend_pm_ops = { }; static const struct sun4i_frontend_data sun4i_a10_frontend = { - .ch_phase = { - { - .horzphase = 0, - .vertphase = { 0, 0 }, - }, - { - .horzphase = 0xfc000, - .vertphase = { 0xfc000, 0xfc000 }, - }, - }, + .ch_phase = { 0x000, 0xfc000 }, .has_coef_rdy = true, }; static const struct sun4i_frontend_data sun8i_a33_frontend = { - .ch_phase = { - { - .horzphase = 0x400, - .vertphase = { 0x400, 0x400 }, - }, - { - .horzphase = 0x400, - .vertphase = { 0x400, 0x400 }, - }, - }, + .ch_phase = { 0x400, 0x400 }, .has_coef_access_ctrl = true, }; diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.h b/drivers/gpu/drm/sun4i/sun4i_frontend.h index 0c382c1ddb0f..2e7b76e50c2b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.h +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.h @@ -115,11 +115,7 @@ struct reset_control; struct sun4i_frontend_data { bool has_coef_access_ctrl; bool has_coef_rdy; - - struct { - u32 horzphase; - u32 vertphase[2]; - } ch_phase[2]; + u32 ch_phase[2]; }; struct sun4i_frontend { -- cgit v1.2.3 From f743f73f42a77deb4832ec76a2403452429067f2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 15 Oct 2020 11:36:41 +0200 Subject: drm/sun4i: frontend: Reuse the ch0 phase for RGB formats [ Upstream commit 2db9ef9d9e6ea89a9feb5338f58d1f8f83875577 ] When using the scaler on the A10-like frontend with single-planar formats, the current code will setup the channel 0 filter (used for the R or Y component) with a different phase parameter than the channel 1 filter (used for the G/B or U/V components). This creates a bleed out that keeps repeating on of the last line of the RGB plane across the rest of the display. The Allwinner BSP either applies the same phase parameter over both channels or use a separate one, the condition being whether the input format is YUV420 or not. Since YUV420 is both subsampled and multi-planar, and since YUYV is subsampled but single-planar, we can rule out the subsampling and assume that the condition is actually whether the format is single or multi-planar. And it looks like applying the same phase parameter over both channels for single-planar formats fixes our issue, while we keep the multi-planar formats working properly. Reported-by: Taras Galchenko Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20201015093642.261440-2-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_frontend.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c index 7462801b1fa8..c4959d9e1639 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.c +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c @@ -407,6 +407,7 @@ int sun4i_frontend_update_formats(struct sun4i_frontend *frontend, struct drm_framebuffer *fb = state->fb; const struct drm_format_info *format = fb->format; uint64_t modifier = fb->modifier; + unsigned int ch1_phase_idx; u32 out_fmt_val; u32 in_fmt_val, in_mod_val, in_ps_val; unsigned int i; @@ -442,18 +443,19 @@ int sun4i_frontend_update_formats(struct sun4i_frontend *frontend, * I have no idea what this does exactly, but it seems to be * related to the scaler FIR filter phase parameters. */ + ch1_phase_idx = (format->num_planes > 1) ? 1 : 0; regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_HORZPHASE_REG, frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_HORZPHASE_REG, - frontend->data->ch_phase[1]); + frontend->data->ch_phase[ch1_phase_idx]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_VERTPHASE0_REG, frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_VERTPHASE0_REG, - frontend->data->ch_phase[1]); + frontend->data->ch_phase[ch1_phase_idx]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH0_VERTPHASE1_REG, frontend->data->ch_phase[0]); regmap_write(frontend->regs, SUN4I_FRONTEND_CH1_VERTPHASE1_REG, - frontend->data->ch_phase[1]); + frontend->data->ch_phase[ch1_phase_idx]); /* * Checking the input format is sufficient since we currently only -- cgit v1.2.3 From 914fc55242614caf63427fdfae21368ea8cfca32 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 15 Oct 2020 11:36:42 +0200 Subject: drm/sun4i: frontend: Fix the scaler phase on A33 [ Upstream commit e3190b5e9462067714d267c40d8c8c1d0463dda3 ] The A33 has a different phase parameter in the Allwinner BSP on the channel1 than the one currently applied. Fix this. Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20201015093642.261440-3-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c index c4959d9e1639..7186ba73d8e1 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.c +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c @@ -694,7 +694,7 @@ static const struct sun4i_frontend_data sun4i_a10_frontend = { }; static const struct sun4i_frontend_data sun8i_a33_frontend = { - .ch_phase = { 0x400, 0x400 }, + .ch_phase = { 0x400, 0xfc400 }, .has_coef_access_ctrl = true, }; -- cgit v1.2.3 From f77756ea6641fdb84301c75bcc7434715028628c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 22 Oct 2020 16:58:41 -0400 Subject: blk-cgroup: Fix memleak on error path [ Upstream commit 52abfcbd57eefdd54737fc8c2dc79d8f46d4a3e5 ] If new_blkg allocation raced with blk_policy change and blkg_lookup_check fails, new_blkg is leaked. Acked-by: Tejun Heo Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0c7addcd1985..a4793cfb68f2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -861,6 +861,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_lookup_check(pos, pol, q); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); + blkg_free(new_blkg); goto fail_unlock; } -- cgit v1.2.3 From 3c52715ceaaecca7ad4ae0647dc789126890f5cd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 22 Oct 2020 16:58:42 -0400 Subject: blk-cgroup: Pre-allocate tree node on blkg_conf_prep [ Upstream commit f255c19b3ab46d3cad3b1b2e1036f4c926cb1d0c ] Similarly to commit 457e490f2b741 ("blkcg: allocate struct blkcg_gq outside request queue spinlock"), blkg_create can also trigger occasional -ENOMEM failures at the radix insertion because any allocation inside blkg_create has to be non-blocking, making it more likely to fail. This causes trouble for userspace tools trying to configure io weights who need to deal with this condition. This patch reduces the occurrence of -ENOMEMs on this path by preloading the radix tree element on a GFP_KERNEL context, such that we guarantee the later non-blocking insertion won't fail. A similar solution exists in blkcg_init_queue for the same situation. Acked-by: Tejun Heo Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a4793cfb68f2..3d34ac02d76e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -855,6 +855,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto fail; } + if (radix_tree_preload(GFP_KERNEL)) { + blkg_free(new_blkg); + ret = -ENOMEM; + goto fail; + } + rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -862,7 +868,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); blkg_free(new_blkg); - goto fail_unlock; + goto fail_preloaded; } if (blkg) { @@ -871,10 +877,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_create(pos, q, new_blkg); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); - goto fail_unlock; + goto fail_preloaded; } } + radix_tree_preload_end(); + if (pos == blkcg) goto success; } @@ -884,6 +892,8 @@ success: ctx->body = input; return 0; +fail_preloaded: + radix_tree_preload_end(); fail_unlock: spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); -- cgit v1.2.3 From fd4fb5080725142dea7b9f88fffec162eae9a5f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 Oct 2020 11:25:39 +0800 Subject: scsi: core: Don't start concurrent async scan on same host [ Upstream commit 831e3405c2a344018a18fcc2665acc5a38c3a707 ] The current scanning mechanism is supposed to fall back to a synchronous host scan if an asynchronous scan is in progress. However, this rule isn't strictly respected, scsi_prep_async_scan() doesn't hold scan_mutex when checking shost->async_scan. When scsi_scan_host() is called concurrently, two async scans on same host can be started and a hang in do_scan_async() is observed. Fixes this issue by checking & setting shost->async_scan atomically with shost->scan_mutex. Link: https://lore.kernel.org/r/20201010032539.426615-1-ming.lei@redhat.com Cc: Christoph Hellwig Cc: Ewan D. Milne Cc: Hannes Reinecke Cc: Bart Van Assche Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 058079f915f1..79232cef1af1 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1715,15 +1715,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { - struct async_scan_data *data; + struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; + mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); - return NULL; + goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); @@ -1734,7 +1735,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) goto err; init_completion(&data->prev_finished); - mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); @@ -1749,6 +1749,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) return data; err: + mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } -- cgit v1.2.3 From 1247f4e291888c18f4cd65047c888287206fdc0a Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Wed, 14 Oct 2020 17:05:50 +0800 Subject: drm/amdgpu: add DID for navi10 blockchain SKU [ Upstream commit 8942881144a7365143f196f5eafed24783a424a3 ] Reviewed-by: Alex Deucher Reviewed-by: Guchun Chen Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index fa2c0f29ad4d..e8e172010416 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1011,6 +1011,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, -- cgit v1.2.3 From 2149aa583068d7bc6d9dc05132a8062dc0cfd209 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sat, 24 Oct 2020 19:13:55 -0500 Subject: scsi: ibmvscsi: Fix potential race after loss of transport [ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ] After a loss of transport due to an adapter migration or crash/disconnect from the host partner there is a tiny window where we can race adjusting the request_limit of the adapter. The request limit is atomically increased/decreased to track the number of inflight requests against the allowed limit of our VIOS partner. After a transport loss we set the request_limit to zero to reflect this state. However, there is a window where the adapter may attempt to queue a command because the transport loss event hasn't been fully processed yet and request_limit is still greater than zero. The hypercall to send the event will fail and the error path will increment the request_limit as a result. If the adapter processes the transport event prior to this increment the request_limit becomes out of sync with the adapter state and can result in SCSI commands being submitted on the now reset connection prior to an SRP Login resulting in a protocol violation. Fix this race by protecting request_limit with the host lock when changing the value via atomic_set() to indicate no transport. Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvscsi.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index c5711c659b51..1ab0a61e3fb5 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) spin_unlock_irqrestore(hostdata->host->host_lock, flags); } +/** + * ibmvscsi_set_request_limit - Set the adapter request_limit in response to + * an adapter failure, reset, or SRP Login. Done under host lock to prevent + * race with SCSI command submission. + * @hostdata: adapter to adjust + * @limit: new request limit + */ +static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit) +{ + unsigned long flags; + + spin_lock_irqsave(hostdata->host->host_lock, flags); + atomic_set(&hostdata->request_limit, limit); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +} + /** * ibmvscsi_reset_host - Reset the connection to the server * @hostdata: struct ibmvscsi_host_data to reset @@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata) { scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); purge_requests(hostdata, DID_ERROR); hostdata->action = IBMVSCSI_HOST_ACTION_RESET; @@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct) dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n", evt_struct->xfer_iu->srp.login_rej.reason); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; default: dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n", evt_struct->xfer_iu->srp.login_rsp.opcode); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; } @@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct) * This value is set rather than added to request_limit because * request_limit could have been set to -1 by this client. */ - atomic_set(&hostdata->request_limit, + ibmvscsi_set_request_limit(hostdata, be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta)); /* If we had any pending I/Os, kick them */ @@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata) login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); - spin_lock_irqsave(hostdata->host->host_lock, flags); /* Start out with a request limit of 0, since this is negotiated in * the login request we are just sending and login requests always * get sent by the driver regardless of request_limit. */ - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); + spin_lock_irqsave(hostdata->host->host_lock, flags); rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); dev_info(hostdata->dev, "sent SRP login\n"); @@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, return; case VIOSRP_CRQ_XPORT_EVENT: /* Hypervisor telling us the connection is closed */ scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); if (crq->format == 0x06) { /* We need to re-setup the interpartition connection */ dev_info(hostdata->dev, "Re-enabling adapter!\n"); @@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata) } hostdata->action = IBMVSCSI_HOST_ACTION_NONE; + spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rc) { - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); dev_err(hostdata->dev, "error after %s\n", action); } - spin_unlock_irqrestore(hostdata->host->host_lock, flags); scsi_unblock_requests(hostdata->host); } @@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) init_waitqueue_head(&hostdata->work_wait_q); hostdata->host = host; hostdata->dev = dev; - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT; if (map_persist_bufs(hostdata)) { -- cgit v1.2.3 From 2fd9e60760ef30243c883c2556f86bc7e04d18b9 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Fri, 23 Oct 2020 16:37:57 +0200 Subject: vsock: use ns_capable_noaudit() on socket create [ Upstream commit af545bb5ee53f5261db631db2ac4cde54038bdaf ] During __vsock_create() CAP_NET_ADMIN is used to determine if the vsock_sock->trusted should be set to true. This value is used later for determing if a remote connection should be allowed to connect to a restricted VM. Unfortunately, if the caller doesn't have CAP_NET_ADMIN, an audit message such as an selinux denial is generated even if the caller does not want a trusted socket. Logging errors on success is confusing. To avoid this, switch the capable(CAP_NET_ADMIN) check to the noaudit version. Reported-by: Roman Kiryanov https://android-review.googlesource.com/c/device/generic/goldfish/+/1468545/ Signed-off-by: Jeff Vander Stoep Reviewed-by: James Morris Link: https://lore.kernel.org/r/20201023143757.377574-1-jeffv@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7bd6c8199ca6..3a074a03d382 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -621,7 +621,7 @@ struct sock *__vsock_create(struct net *net, vsk->owner = get_cred(psk->owner); vsk->connect_timeout = psk->connect_timeout; } else { - vsk->trusted = capable(CAP_NET_ADMIN); + vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; } -- cgit v1.2.3 From 8c9c03432500dbeac952dcf806f1f7caa348363b Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Sun, 25 Oct 2020 19:51:24 +0800 Subject: nvme-rdma: handle unexpected nvme completion data length [ Upstream commit 25c1ca6ecaba3b751d3f7ff92d5cddff3b05f8d0 ] Receiving a zero length message leads to the following warnings because the CQE is processed twice: refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 0 at lib/refcount.c:28 RIP: 0010:refcount_warn_saturate+0xd9/0xe0 Call Trace: nvme_rdma_recv_done+0xf3/0x280 [nvme_rdma] __ib_process_cq+0x76/0x150 [ib_core] ... Sanity check the received data length, to avoids this. Thanks to Chao Leng & Sagi for suggestions. Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/rdma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a41ee9feab8e..e957ad0a07f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1520,6 +1520,14 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + /* sanity checking for received data length */ + if (unlikely(wc->byte_len < len)) { + dev_err(queue->ctrl->ctrl.device, + "Unexpected nvme completion length(%d)\n", wc->byte_len); + nvme_rdma_error_recovery(queue->ctrl); + return; + } + ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); /* * AEN requests are special as they don't time out and can -- cgit v1.2.3 From a04cec1dd2937880895364a0ccade0e5e4d51683 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 22 Oct 2020 16:58:21 -0700 Subject: nvmet: fix a NULL pointer dereference when tracing the flush command [ Upstream commit 3c3751f2daf6675f6b5bee83b792354c272f5bd2 ] When target side trace in turned on and flush command is issued from the host it results in the following Oops. [ 856.789724] BUG: kernel NULL pointer dereference, address: 0000000000000068 [ 856.790686] #PF: supervisor read access in kernel mode [ 856.791262] #PF: error_code(0x0000) - not-present page [ 856.791863] PGD 6d7110067 P4D 6d7110067 PUD 66f0ad067 PMD 0 [ 856.792527] Oops: 0000 [#1] SMP NOPTI [ 856.792950] CPU: 15 PID: 7034 Comm: nvme Tainted: G OE 5.9.0nvme-5.9+ #71 [ 856.793790] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e3214 [ 856.794956] RIP: 0010:trace_event_raw_event_nvmet_req_init+0x13e/0x170 [nvmet] [ 856.795734] Code: 41 5c 41 5d c3 31 d2 31 f6 e8 4e 9b b8 e0 e9 0e ff ff ff 49 8b 55 00 48 8b 38 8b 0 [ 856.797740] RSP: 0018:ffffc90001be3a60 EFLAGS: 00010246 [ 856.798375] RAX: 0000000000000000 RBX: ffff8887e7d2c01c RCX: 0000000000000000 [ 856.799234] RDX: 0000000000000020 RSI: 0000000057e70ea2 RDI: ffff8887e7d2c034 [ 856.800088] RBP: ffff88869f710578 R08: ffff888807500d40 R09: 00000000fffffffe [ 856.800951] R10: 0000000064c66670 R11: 00000000ef955201 R12: ffff8887e7d2c034 [ 856.801807] R13: ffff88869f7105c8 R14: 0000000000000040 R15: ffff88869f710440 [ 856.802667] FS: 00007f6a22bd8780(0000) GS:ffff888813a00000(0000) knlGS:0000000000000000 [ 856.803635] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 856.804367] CR2: 0000000000000068 CR3: 00000006d73e0000 CR4: 00000000003506e0 [ 856.805283] Call Trace: [ 856.805613] nvmet_req_init+0x27c/0x480 [nvmet] [ 856.806200] nvme_loop_queue_rq+0xcb/0x1d0 [nvme_loop] [ 856.806862] blk_mq_dispatch_rq_list+0x123/0x7b0 [ 856.807459] ? kvm_sched_clock_read+0x14/0x30 [ 856.808025] __blk_mq_sched_dispatch_requests+0xc7/0x170 [ 856.808708] blk_mq_sched_dispatch_requests+0x30/0x60 [ 856.809372] __blk_mq_run_hw_queue+0x70/0x100 [ 856.809935] __blk_mq_delay_run_hw_queue+0x156/0x170 [ 856.810574] blk_mq_run_hw_queue+0x86/0xe0 [ 856.811104] blk_mq_sched_insert_request+0xef/0x160 [ 856.811733] blk_execute_rq+0x69/0xc0 [ 856.812212] ? blk_mq_rq_ctx_init+0xd0/0x230 [ 856.812784] nvme_execute_passthru_rq+0x57/0x130 [nvme_core] [ 856.813461] nvme_submit_user_cmd+0xeb/0x300 [nvme_core] [ 856.814099] nvme_user_cmd.isra.82+0x11e/0x1a0 [nvme_core] [ 856.814752] blkdev_ioctl+0x1dc/0x2c0 [ 856.815197] block_ioctl+0x3f/0x50 [ 856.815606] __x64_sys_ioctl+0x84/0xc0 [ 856.816074] do_syscall_64+0x33/0x40 [ 856.816533] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 856.817168] RIP: 0033:0x7f6a222ed107 [ 856.817617] Code: 44 00 00 48 8b 05 81 cd 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 8 [ 856.819901] RSP: 002b:00007ffca848f058 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 856.820846] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f6a222ed107 [ 856.821726] RDX: 00007ffca848f060 RSI: 00000000c0484e43 RDI: 0000000000000003 [ 856.822603] RBP: 0000000000000003 R08: 000000000000003f R09: 0000000000000005 [ 856.823478] R10: 00007ffca848ece0 R11: 0000000000000202 R12: 00007ffca84912d3 [ 856.824359] R13: 00007ffca848f4d0 R14: 0000000000000002 R15: 000000000067e900 [ 856.825236] Modules linked in: nvme_loop(OE) nvmet(OE) nvme_fabrics(OE) null_blk nvme(OE) nvme_corel Move the nvmet_req_init() tracepoint after we parse the command in nvmet_req_init() so that we can get rid of the duplicate nvmet_find_namespace() call. Rename __assign_disk_name() -> __assign_req_name(). Now that we call tracepoint after parsing the command simplify the newly added __assign_req_name() which fixes this bug. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/trace.h | 21 +++++++-------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6b2f1e290fa7..cca5a00c098a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -878,8 +878,6 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; - trace_nvmet_req_init(req, req->cmd); - /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { req->error_loc = offsetof(struct nvme_common_command, flags); @@ -913,6 +911,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, if (status) goto fail; + trace_nvmet_req_init(req, req->cmd); + if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index e645caa882dd..3f61b6657175 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -46,19 +46,12 @@ static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) return req->sq->ctrl; } -static inline void __assign_disk_name(char *name, struct nvmet_req *req, - bool init) +static inline void __assign_req_name(char *name, struct nvmet_req *req) { - struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req); - struct nvmet_ns *ns; - - if ((init && req->sq->qid) || (!init && req->cq->qid)) { - ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid); - strncpy(name, ns->device_path, DISK_NAME_LEN); - return; - } - - memset(name, 0, DISK_NAME_LEN); + if (req->ns) + strncpy(name, req->ns->device_path, DISK_NAME_LEN); + else + memset(name, 0, DISK_NAME_LEN); } #endif @@ -81,7 +74,7 @@ TRACE_EVENT(nvmet_req_init, TP_fast_assign( __entry->cmd = cmd; __entry->ctrl = nvmet_req_to_ctrl(req); - __assign_disk_name(__entry->disk, req, true); + __assign_req_name(__entry->disk, req); __entry->qid = req->sq->qid; __entry->cid = cmd->common.command_id; __entry->opcode = cmd->common.opcode; @@ -121,7 +114,7 @@ TRACE_EVENT(nvmet_req_complete, __entry->cid = req->cqe->command_id; __entry->result = le64_to_cpu(req->cqe->result.u64); __entry->status = le16_to_cpu(req->cqe->status) >> 1; - __assign_disk_name(__entry->disk, req, false); + __assign_req_name(__entry->disk, req); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", __print_ctrl_name(__entry->ctrl), -- cgit v1.2.3 From 16476c2b26caf75bf64d558e4f511fef0c67b8e9 Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Tue, 27 Oct 2020 13:14:42 +0900 Subject: drm/vc4: drv: Add error handding for bind [ Upstream commit 9ce0af3e9573fb84c4c807183d13ea2a68271e4b ] There is a problem that if vc4_drm bind fails, a memory leak occurs on the drm_property_create side. Add error handding for drm_mode_config. Signed-off-by: Hoegeun Kwon Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20201027041442.30352-2-hoegeun.kwon@samsung.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 5e6fb6c2307f..0d78ba017a29 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -309,6 +309,7 @@ unbind_all: component_unbind_all(dev, drm); gem_destroy: vc4_gem_destroy(drm); + drm_mode_config_cleanup(drm); vc4_bo_cache_destroy(drm); dev_put: drm_dev_put(drm); -- cgit v1.2.3 From 937753df482c57b7e761411bcb257a5f0e410d16 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 27 Oct 2020 21:49:01 +0800 Subject: ACPI: NFIT: Fix comparison to '-ENXIO' [ Upstream commit 85f971b65a692b68181438e099b946cc06ed499b ] Initial value of rc is '-ENXIO', and we should use the initial value to check it. Signed-off-by: Zhang Qilong Reviewed-by: Pankaj Gupta Reviewed-by: Vishal Verma [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 12d980aafc5f..9d78f29cf996 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1553,7 +1553,7 @@ static ssize_t format1_show(struct device *dev, le16_to_cpu(nfit_dcr->dcr->code)); break; } - if (rc != ENXIO) + if (rc != -ENXIO) break; } mutex_unlock(&acpi_desc->init_mutex); -- cgit v1.2.3 From 4dab0fd40323f55c94c2377cf29aaac1b4408176 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 22 Oct 2020 08:55:03 +0800 Subject: usb: cdns3: gadget: suspicious implicit sign extension [ Upstream commit 5fca3f062879f8e5214c56f3e3e2be6727900f5d ] The code: trb->length = cpu_to_le32(TRB_BURST_LEN(priv_ep->trb_burst_size) | TRB_LEN(length)); TRB_BURST_LEN(priv_ep->trb_burst_size) may be overflow for int 32 if priv_ep->trb_burst_size is equal or larger than 0x80; Below is the Coverity warning: sign_extension: Suspicious implicit sign extension: priv_ep->trb_burst_size with type u8 (8 bits, unsigned) is promoted in priv_ep->trb_burst_size << 24 to type int (32 bits, signed), then sign-extended to type unsigned long (64 bits, unsigned). If priv_ep->trb_burst_size << 24 is greater than 0x7FFFFFFF, the upper bits of the result will all be 1. To fix it, it needs to add an explicit cast to unsigned int type for ((p) << 24). Reviewed-by: Jun Li Signed-off-by: Peter Chen Signed-off-by: Sasha Levin --- drivers/usb/cdns3/gadget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/gadget.h b/drivers/usb/cdns3/gadget.h index bc4024041ef2..ec5c05454531 100644 --- a/drivers/usb/cdns3/gadget.h +++ b/drivers/usb/cdns3/gadget.h @@ -1057,7 +1057,7 @@ struct cdns3_trb { #define TRB_TDL_SS_SIZE_GET(p) (((p) & GENMASK(23, 17)) >> 17) /* transfer_len bitmasks - bits 31:24 */ -#define TRB_BURST_LEN(p) (((p) << 24) & GENMASK(31, 24)) +#define TRB_BURST_LEN(p) ((unsigned int)((p) << 24) & GENMASK(31, 24)) #define TRB_BURST_LEN_GET(p) (((p) & GENMASK(31, 24)) >> 24) /* Data buffer pointer bitmasks*/ -- cgit v1.2.3 From 7d0de6f87257187495b17d31591959262ebe6b7e Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 31 Aug 2020 13:31:11 -0700 Subject: drm/nouveau/nouveau: fix the start/end range for migration [ Upstream commit cfa736f5a6f31ca8a05459b5720aac030247ad1b ] The user level OpenCL code shouldn't have to align start and end addresses to a page boundary. That is better handled in the nouveau driver. The npages field is also redundant since it can be computed from the start and end addresses. Signed-off-by: Ralph Campbell Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_svm.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 824654742a60..0be4668c780b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -112,11 +112,11 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, struct nouveau_cli *cli = nouveau_cli(file_priv); struct drm_nouveau_svm_bind *args = data; unsigned target, cmd, priority; - unsigned long addr, end, size; + unsigned long addr, end; struct mm_struct *mm; args->va_start &= PAGE_MASK; - args->va_end &= PAGE_MASK; + args->va_end = ALIGN(args->va_end, PAGE_SIZE); /* Sanity check arguments */ if (args->reserved0 || args->reserved1) @@ -125,8 +125,6 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, return -EINVAL; if (args->va_start >= args->va_end) return -EINVAL; - if (!args->npages) - return -EINVAL; cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT; cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK; @@ -158,12 +156,6 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, if (args->stride) return -EINVAL; - size = ((unsigned long)args->npages) << PAGE_SHIFT; - if ((args->va_start + size) <= args->va_start) - return -EINVAL; - if ((args->va_start + size) > args->va_end) - return -EINVAL; - /* * Ok we are ask to do something sane, for now we only support migrate * commands but we will add things like memory policy (what to do on @@ -178,7 +170,7 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, return -EINVAL; } - for (addr = args->va_start, end = args->va_start + size; addr < end;) { + for (addr = args->va_start, end = args->va_end; addr < end;) { struct vm_area_struct *vma; unsigned long next; -- cgit v1.2.3 From eceb94287dbf239848f18875429eaa0a076ff146 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 7 Oct 2020 00:08:09 +0200 Subject: drm/nouveau/gem: fix "refcount_t: underflow; use-after-free" [ Upstream commit 925681454d7b557d404b5d28ef4469fac1b2e105 ] we can't use nouveau_bo_ref here as no ttm object was allocated and nouveau_bo_ref mainly deals with that. Simply deallocate the object. Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 7d39d4949ee7..2dd9fcab464b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -197,7 +197,8 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(drm->dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); return ret; } -- cgit v1.2.3 From 8ee6a0f25457605e4f2c72dddebd1f73f7140fab Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 28 Oct 2020 14:26:14 -0400 Subject: arm64/smp: Move rcu_cpu_starting() earlier [ Upstream commit ce3d31ad3cac765484463b4f5a0b6b1f8f1a963e ] The call to rcu_cpu_starting() in secondary_start_kernel() is not early enough in the CPU-hotplug onlining process, which results in lockdep splats as follows: WARNING: suspicious RCU usage ----------------------------- kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. Call trace: dump_backtrace+0x0/0x3c8 show_stack+0x14/0x60 dump_stack+0x14c/0x1c4 lockdep_rcu_suspicious+0x134/0x14c __lock_acquire+0x1c30/0x2600 lock_acquire+0x274/0xc48 _raw_spin_lock+0xc8/0x140 vprintk_emit+0x90/0x3d0 vprintk_default+0x34/0x40 vprintk_func+0x378/0x590 printk+0xa8/0xd4 __cpuinfo_store_cpu+0x71c/0x868 cpuinfo_store_cpu+0x2c/0xc8 secondary_start_kernel+0x244/0x318 This is avoided by moving the call to rcu_cpu_starting up near the beginning of the secondary_start_kernel() function. Signed-off-by: Qian Cai Acked-by: Paul E. McKenney Link: https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@tip-bot2/ Link: https://lore.kernel.org/r/20201028182614.13655-1-cai@redhat.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 102dc3e7f2e1..426409e0d071 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -215,6 +215,7 @@ asmlinkage notrace void secondary_start_kernel(void) if (system_uses_irq_prio_masking()) init_gic_priority_masking(); + rcu_cpu_starting(cpu); preempt_disable(); trace_hardirqs_off(); -- cgit v1.2.3 From cfd9d7137759127d67adf5b1f9a7b171a1bac52f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 8 Nov 2020 08:28:09 -0500 Subject: Revert "coresight: Make sysfs functional on topologies with per core sink" This reverts commit 8fd52a21ab570e80f84f39e12affce42a5300e91. Guenter Roeck writes: I get the following build warning in v5.4.75. drivers/hwtracing/coresight/coresight-etm-perf.c: In function 'etm_setup_aux': drivers/hwtracing/coresight/coresight-etm-perf.c:226:37: warning: passing argument 1 of 'coresight_get_enabled_sink' makes pointer from integer without a cast Actually, the warning is fatal, since the call is sink = coresight_get_enabled_sink(true); However, the argument to coresight_get_enabled_sink() is now a pointer. The parameter change was introduced with commit 8fd52a21ab57 ("coresight: Make sysfs functional on topologies with per core sink"). In the upstream kernel, the call is removed with commit bb1860efc817 ("coresight: etm: perf: Sink selection using sysfs is deprecated"). That commit alone would, however, likely not solve the problem. It looks like at least two more commits would be needed. 716f5652a131 coresight: etm: perf: Fix warning caused by etm_setup_aux failure 8e264c52e1da coresight: core: Allow the coresight core driver to be built as a module 39a7661dcf65 coresight: Fix uninitialised pointer bug in etm_setup_aux() Looking into the coresight code, I see several additional commits affecting the sysfs interface since v5.4. I have no idea what would actually be needed for stable code in v5.4.y, short of applying them all. With all this in mind, I would suggest to revert commit 8fd52a21ab57 ("coresight: Make sysfs functional on topologies with per core sink") from v5.4.y, especially since it is not marked as bug fix or for stable. Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-priv.h | 3 +- drivers/hwtracing/coresight/coresight.c | 62 ++++++++++++++++------------ 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index dfd24b85a577..82e563cdc879 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -147,8 +147,7 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val, void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); -struct coresight_device * -coresight_get_enabled_sink(struct coresight_device *source); +struct coresight_device *coresight_get_enabled_sink(bool reset); struct coresight_device *coresight_get_sink_by_id(u32 id); struct list_head *coresight_build_path(struct coresight_device *csdev, struct coresight_device *sink); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 90ecd04a2f20..0bbce0d29158 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -481,46 +481,50 @@ struct coresight_device *coresight_get_sink(struct list_head *path) return csdev; } -static struct coresight_device * -coresight_find_enabled_sink(struct coresight_device *csdev) +static int coresight_enabled_sink(struct device *dev, const void *data) { - int i; - struct coresight_device *sink; + const bool *reset = data; + struct coresight_device *csdev = to_coresight_device(dev); if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && - csdev->activated) - return csdev; - - /* - * Recursively explore each port found on this element. - */ - for (i = 0; i < csdev->pdata->nr_outport; i++) { - struct coresight_device *child_dev; + csdev->activated) { + /* + * Now that we have a handle on the sink for this session, + * disable the sysFS "enable_sink" flag so that possible + * concurrent perf session that wish to use another sink don't + * trip on it. Doing so has no ramification for the current + * session. + */ + if (*reset) + csdev->activated = false; - child_dev = csdev->pdata->conns[i].child_dev; - if (child_dev) - sink = coresight_find_enabled_sink(child_dev); - if (sink) - return sink; + return 1; } - return NULL; + return 0; } /** - * coresight_get_enabled_sink - returns the first enabled sink using - * connection based search starting from the source reference + * coresight_get_enabled_sink - returns the first enabled sink found on the bus + * @deactivate: Whether the 'enable_sink' flag should be reset * - * @source: Coresight source device reference + * When operated from perf the deactivate parameter should be set to 'true'. + * That way the "enabled_sink" flag of the sink that was selected can be reset, + * allowing for other concurrent perf sessions to choose a different sink. + * + * When operated from sysFS users have full control and as such the deactivate + * parameter should be set to 'false', hence mandating users to explicitly + * clear the flag. */ -struct coresight_device * -coresight_get_enabled_sink(struct coresight_device *source) +struct coresight_device *coresight_get_enabled_sink(bool deactivate) { - if (!source) - return NULL; + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &deactivate, + coresight_enabled_sink); - return coresight_find_enabled_sink(source); + return dev ? to_coresight_device(dev) : NULL; } static int coresight_sink_by_id(struct device *dev, const void *data) @@ -760,7 +764,11 @@ int coresight_enable(struct coresight_device *csdev) goto out; } - sink = coresight_get_enabled_sink(csdev); + /* + * Search for a valid sink for this session but don't reset the + * "enable_sink" flag in sysFS. Users get to do that explicitly. + */ + sink = coresight_get_enabled_sink(false); if (!sink) { ret = -EINVAL; goto out; -- cgit v1.2.3 From 642181fe3567419d84d2457b58f262c37467f525 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 8 Nov 2020 16:38:06 +0100 Subject: vt: Disable KD_FONT_OP_COPY commit 3c4e0dff2095c579b142d5a0693257f1c58b4804 upstream. It's buggy: On Fri, Nov 06, 2020 at 10:30:08PM +0800, Minh Yuan wrote: > We recently discovered a slab-out-of-bounds read in fbcon in the latest > kernel ( v5.10-rc2 for now ). The root cause of this vulnerability is that > "fbcon_do_set_font" did not handle "vc->vc_font.data" and > "vc->vc_font.height" correctly, and the patch > for VT_RESIZEX can't handle this > issue. > > Specifically, we use KD_FONT_OP_SET to set a small font.data for tty6, and > use KD_FONT_OP_SET again to set a large font.height for tty1. After that, > we use KD_FONT_OP_COPY to assign tty6's vc_font.data to tty1's vc_font.data > in "fbcon_do_set_font", while tty1 retains the original larger > height. Obviously, this will cause an out-of-bounds read, because we can > access a smaller vc_font.data with a larger vc_font.height. Further there was only one user ever. - Android's loadfont, busybox and console-tools only ever use OP_GET and OP_SET - fbset documentation only mentions the kernel cmdline font: option, not anything else. - systemd used OP_COPY before release 232 published in Nov 2016 Now unfortunately the crucial report seems to have gone down with gmane, and the commit message doesn't say much. But the pull request hints at OP_COPY being broken https://github.com/systemd/systemd/pull/3651 So in other words, this never worked, and the only project which foolishly every tried to use it, realized that rather quickly too. Instead of trying to fix security issues here on dead code by adding missing checks, fix the entire thing by removing the functionality. Note that systemd code using the OP_COPY function ignored the return value, so it doesn't matter what we're doing here really - just in case a lone server somewhere happens to be extremely unlucky and running an affected old version of systemd. The relevant code from font_copy_to_all_vcs() in systemd was: /* copy font from active VT, where the font was uploaded to */ cfo.op = KD_FONT_OP_COPY; cfo.height = vcs.v_active-1; /* tty1 == index 0 */ (void) ioctl(vcfd, KDFONTOP, &cfo); Note this just disables the ioctl, garbage collecting the now unused callbacks is left for -next. v2: Tetsuo found the old mail, which allowed me to find it on another archive. Add the link too. Acked-by: Peilin Ye Reported-by: Minh Yuan References: https://lists.freedesktop.org/archives/systemd-devel/2016-June/036935.html References: https://github.com/systemd/systemd/pull/3651 Cc: Greg KH Cc: Peilin Ye Cc: Tetsuo Handa Signed-off-by: Daniel Vetter Link: https://lore.kernel.org/r/20201108153806.3140315-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index d07a9c9c7608..c55b6d7ccaf7 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4620,27 +4620,6 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) return rc; } -static int con_font_copy(struct vc_data *vc, struct console_font_op *op) -{ - int con = op->height; - int rc; - - - console_lock(); - if (vc->vc_mode != KD_TEXT) - rc = -EINVAL; - else if (!vc->vc_sw->con_font_copy) - rc = -ENOSYS; - else if (con < 0 || !vc_cons_allocated(con)) - rc = -ENOTTY; - else if (con == vc->vc_num) /* nothing to do */ - rc = 0; - else - rc = vc->vc_sw->con_font_copy(vc, con); - console_unlock(); - return rc; -} - int con_font_op(struct vc_data *vc, struct console_font_op *op) { switch (op->op) { @@ -4651,7 +4630,8 @@ int con_font_op(struct vc_data *vc, struct console_font_op *op) case KD_FONT_OP_SET_DEFAULT: return con_font_default(vc, op); case KD_FONT_OP_COPY: - return con_font_copy(vc, op); + /* was buggy and never really used */ + return -EINVAL; } return -ENOSYS; } -- cgit v1.2.3 From beeb658cfd3544ceca894375c36b6572e4ae7a5f Mon Sep 17 00:00:00 2001 From: Eddy Wu Date: Sat, 7 Nov 2020 14:47:22 +0800 Subject: fork: fix copy_process(CLONE_PARENT) race with the exiting ->real_parent commit b4e00444cab4c3f3fec876dc0cccc8cbb0d1a948 upstream. current->group_leader->exit_signal may change during copy_process() if current->real_parent exits. Move the assignment inside tasklist_lock to avoid the race. Signed-off-by: Eddy Wu Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index e3d5963d8c6f..419fff8eb9e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2100,14 +2100,9 @@ static __latent_entropy struct task_struct *copy_process( /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { - p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { - if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = args->exit_signal; p->group_leader = p; p->tgid = p->pid; } @@ -2152,9 +2147,14 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) + p->exit_signal = -1; + else + p->exit_signal = current->group_leader->exit_signal; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; + p->exit_signal = args->exit_signal; } klp_copy_process(p); -- cgit v1.2.3 From b33a1039564c1052032e6ab27d35af13dc4ec7e0 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 15 Sep 2020 11:00:17 +0200 Subject: s390/pkey: fix paes selftest failure with paes and pkey static build commit 5b35047eb467c8cdd38a31beb9ac109221777843 upstream. When both the paes and the pkey kernel module are statically build into the kernel, the paes cipher selftests run before the pkey kernel module is initialized. So a static variable set in the pkey init function and used in the pkey_clr2protkey function is not initialized when the paes cipher's selftests request to call pckmo for transforming a clear key value into a protected key. This patch moves the initial setup of the static variable into the function pck_clr2protkey. So it's possible, to use the function for transforming a clear to a protected key even before the pkey init function has been called and the paes selftests may run successful. Reported-by: Alexander Egorenkov Cc: # 4.20 Fixes: f822ad2c2c03 ("s390/pkey: move pckmo subfunction available checks away from module init") Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/pkey_api.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 5c9898e934d9..0658aa5030c6 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -33,9 +33,6 @@ MODULE_DESCRIPTION("s390 protected key interface"); #define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */ #define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */ -/* mask of available pckmo subfunctions, fetched once at module init */ -static cpacf_mask_t pckmo_functions; - /* * debug feature data and functions */ @@ -78,6 +75,9 @@ static int pkey_clr2protkey(u32 keytype, const struct pkey_clrkey *clrkey, struct pkey_protkey *protkey) { + /* mask of available pckmo subfunctions */ + static cpacf_mask_t pckmo_functions; + long fc; int keysize; u8 paramblock[64]; @@ -101,11 +101,13 @@ static int pkey_clr2protkey(u32 keytype, return -EINVAL; } - /* - * Check if the needed pckmo subfunction is available. - * These subfunctions can be enabled/disabled by customers - * in the LPAR profile or may even change on the fly. - */ + /* Did we already check for PCKMO ? */ + if (!pckmo_functions.bytes[0]) { + /* no, so check now */ + if (!cpacf_query(CPACF_PCKMO, &pckmo_functions)) + return -ENODEV; + } + /* check for the pckmo subfunction we need now */ if (!cpacf_test_func(&pckmo_functions, fc)) { DEBUG_ERR("%s pckmo functions not available\n", __func__); return -ENODEV; @@ -1504,7 +1506,7 @@ static struct miscdevice pkey_dev = { */ static int __init pkey_init(void) { - cpacf_mask_t kmc_functions; + cpacf_mask_t func_mask; /* * The pckmo instruction should be available - even if we don't @@ -1512,15 +1514,15 @@ static int __init pkey_init(void) * is also the minimum level for the kmc instructions which * are able to work with protected keys. */ - if (!cpacf_query(CPACF_PCKMO, &pckmo_functions)) + if (!cpacf_query(CPACF_PCKMO, &func_mask)) return -ENODEV; /* check for kmc instructions available */ - if (!cpacf_query(CPACF_KMC, &kmc_functions)) + if (!cpacf_query(CPACF_KMC, &func_mask)) return -ENODEV; - if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || - !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || - !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) + if (!cpacf_test_func(&func_mask, CPACF_KMC_PAES_128) || + !cpacf_test_func(&func_mask, CPACF_KMC_PAES_192) || + !cpacf_test_func(&func_mask, CPACF_KMC_PAES_256)) return -ENODEV; pkey_debug_init(); -- cgit v1.2.3 From 085fc4784e4bf7799eb23a6ed202d14fb5b75029 Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Mon, 2 Nov 2020 20:07:49 +0800 Subject: serial: 8250_mtk: Fix uart_get_baud_rate warning commit 912ab37c798770f21b182d656937072b58553378 upstream. Mediatek 8250 port supports speed higher than uartclk / 16. If the baud rates in both the new and the old termios setting are higher than uartclk / 16, the WARN_ON in uart_get_baud_rate() will be triggered. Passing NULL as the old termios so uart_get_baud_rate() will use uartclk / 16 - 1 as the new baud rate which will be replaced by the original baud rate later by tty_termios_encode_baud_rate() in mtk8250_set_termios(). Fixes: 551e553f0d4a ("serial: 8250_mtk: Fix high-speed baud rates clamping") Signed-off-by: Claire Chang Link: https://lore.kernel.org/r/20201102120749.374458-1-tientzu@chromium.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 2b59a4305077..e9eb454c2472 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -316,7 +316,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, */ baud = tty_termios_baud_rate(termios); - serial8250_do_set_termios(port, termios, old); + serial8250_do_set_termios(port, termios, NULL); tty_termios_encode_baud_rate(termios, baud, baud); -- cgit v1.2.3 From 62c4b2b21e3b6dcfc3659c8c22027a8888dd0814 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Tue, 3 Nov 2020 16:49:42 +0800 Subject: serial: txx9: add missing platform_driver_unregister() on error in serial_txx9_init commit 0c5fc92622ed5531ff324b20f014e9e3092f0187 upstream. Add the missing platform_driver_unregister() before return from serial_txx9_init in the error handling case when failed to register serial_txx9_pci_driver with macro ENABLE_SERIAL_TXX9_PCI defined. Fixes: ab4382d27412 ("tty: move drivers/serial/ to drivers/tty/serial/") Signed-off-by: Qinglang Miao Link: https://lore.kernel.org/r/20201103084942.109076-1-miaoqinglang@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_txx9.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index d22ccb32aa9b..8507f18900d0 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -1283,6 +1283,9 @@ static int __init serial_txx9_init(void) #ifdef ENABLE_SERIAL_TXX9_PCI ret = pci_register_driver(&serial_txx9_pci_driver); + if (ret) { + platform_driver_unregister(&serial_txx9_plat_driver); + } #endif if (ret == 0) goto out; -- cgit v1.2.3 From 9d34dbab6ef4cd3494b8127d8883100f67e0e4a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Oct 2020 09:25:48 +0100 Subject: USB: serial: cyberjack: fix write-URB completion race commit 985616f0457d9f555fff417d0da56174f70cc14f upstream. The write-URB busy flag was being cleared before the completion handler was done with the URB, something which could lead to corrupt transfers due to a racing write request if the URB is resubmitted. Fixes: 507ca9bc0476 ("[PATCH] USB: add ability for usb-serial drivers to determine if their write urb is currently being used.") Cc: stable # 2.6.13 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cyberjack.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index ebd76ab07b72..36dd688b5795 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -357,11 +357,12 @@ static void cyberjack_write_bulk_callback(struct urb *urb) struct device *dev = &port->dev; int status = urb->status; unsigned long flags; + bool resubmitted = false; - set_bit(0, &port->write_urbs_free); if (status) { dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); + set_bit(0, &port->write_urbs_free); return; } @@ -394,6 +395,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) goto exit; } + resubmitted = true; + dev_dbg(dev, "%s - priv->wrsent=%d\n", __func__, priv->wrsent); dev_dbg(dev, "%s - priv->wrfilled=%d\n", __func__, priv->wrfilled); @@ -410,6 +413,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) exit: spin_unlock_irqrestore(&priv->lock, flags); + if (!resubmitted) + set_bit(0, &port->write_urbs_free); usb_serial_port_softint(port); } -- cgit v1.2.3 From b7f74775c2bb7037b811cf531235acf1ecacd4fb Mon Sep 17 00:00:00 2001 From: Ziyi Cao Date: Tue, 20 Oct 2020 00:08:06 +0800 Subject: USB: serial: option: add Quectel EC200T module support commit a46b973bced1ba57420752bf38426acd9f6cbfa6 upstream. Add usb product id of the Quectel EC200T module. Signed-off-by: Ziyi Cao Link: https://lore.kernel.org/r/17f8a2a3-ce0f-4be7-8544-8fdf286907d0@www.fastmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index eb5538a44ee9..dd81fff168ed 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -250,6 +250,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_EC200T 0x6026 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1117,6 +1118,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, -- cgit v1.2.3 From 7f7be9341b860608d20acaa4e1402e316b000e91 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Sat, 31 Oct 2020 23:54:58 +0100 Subject: USB: serial: option: add LE910Cx compositions 0x1203, 0x1230, 0x1231 commit 489979b4aab490b6b917c11dc02d81b4b742784a upstream. Add following Telit LE910Cx compositions: 0x1203: rndis, tty, adb, tty, tty, tty, tty 0x1230: tty, adb, rmnet, audio, tty, tty, tty, tty 0x1231: rndis, tty, adb, audio, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20201031225458.10512-1-dnlplm@gmail.com [ johan: add comments after entries ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index dd81fff168ed..0dc256a26131 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1217,6 +1219,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1230, 0xff), /* Telit LE910Cx (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), -- cgit v1.2.3 From d5d3cca9d61f518e3c031fb7908291e76d7e4a1f Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 3 Nov 2020 13:44:25 +0100 Subject: USB: serial: option: add Telit FN980 composition 0x1055 commit db0362eeb22992502764e825c79b922d7467e0eb upstream. Add the following Telit FN980 composition: 0x1055: tty, adb, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20201103124425.12940-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0dc256a26131..741c72bd499a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1191,6 +1191,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1054, 0xff), /* Telit FT980-KS */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), -- cgit v1.2.3 From 8febdfb5973db600c998b5e65d7cdf3cd250de39 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 6 Mar 2020 22:44:32 +0100 Subject: tty: serial: fsl_lpuart: add LS1028A support commit c2f448cff22a7ed09281f02bde084b0ce3bc61ed upstream. The LS1028A uses little endian register access and has a different FIFO size encoding. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20200306214433.23215-4-michael@walle.cc Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index ec8e608b46ba..378e2d4b196c 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -238,6 +238,7 @@ static DEFINE_IDA(fsl_lpuart_ida); enum lpuart_type { VF610_LPUART, LS1021A_LPUART, + LS1028A_LPUART, IMX7ULP_LPUART, IMX8QXP_LPUART, }; @@ -282,11 +283,16 @@ static const struct lpuart_soc_data vf_data = { .iotype = UPIO_MEM, }; -static const struct lpuart_soc_data ls_data = { +static const struct lpuart_soc_data ls1021a_data = { .devtype = LS1021A_LPUART, .iotype = UPIO_MEM32BE, }; +static const struct lpuart_soc_data ls1028a_data = { + .devtype = LS1028A_LPUART, + .iotype = UPIO_MEM32, +}; + static struct lpuart_soc_data imx7ulp_data = { .devtype = IMX7ULP_LPUART, .iotype = UPIO_MEM32, @@ -301,7 +307,8 @@ static struct lpuart_soc_data imx8qxp_data = { static const struct of_device_id lpuart_dt_ids[] = { { .compatible = "fsl,vf610-lpuart", .data = &vf_data, }, - { .compatible = "fsl,ls1021a-lpuart", .data = &ls_data, }, + { .compatible = "fsl,ls1021a-lpuart", .data = &ls1021a_data, }, + { .compatible = "fsl,ls1028a-lpuart", .data = &ls1028a_data, }, { .compatible = "fsl,imx7ulp-lpuart", .data = &imx7ulp_data, }, { .compatible = "fsl,imx8qxp-lpuart", .data = &imx8qxp_data, }, { /* sentinel */ } @@ -311,6 +318,11 @@ MODULE_DEVICE_TABLE(of, lpuart_dt_ids); /* Forward declare this for the dma callbacks*/ static void lpuart_dma_tx_complete(void *arg); +static inline bool is_ls1028a_lpuart(struct lpuart_port *sport) +{ + return sport->devtype == LS1028A_LPUART; +} + static inline bool is_imx8qxp_lpuart(struct lpuart_port *sport) { return sport->devtype == IMX8QXP_LPUART; @@ -1553,6 +1565,17 @@ static int lpuart32_startup(struct uart_port *port) sport->rxfifo_size = UARTFIFO_DEPTH((temp >> UARTFIFO_RXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK); + /* + * The LS1028A has a fixed length of 16 words. Although it supports the + * RX/TXSIZE fields their encoding is different. Eg the reference manual + * states 0b101 is 16 words. + */ + if (is_ls1028a_lpuart(sport)) { + sport->rxfifo_size = 16; + sport->txfifo_size = 16; + sport->port.fifosize = sport->txfifo_size; + } + spin_lock_irqsave(&sport->port.lock, flags); lpuart32_setup_watermark_enable(sport); -- cgit v1.2.3 From 86875e1d6426ac1308c969abd1eb0eaf24ab40b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 23 Oct 2020 04:34:29 +0300 Subject: tty: serial: fsl_lpuart: LS1021A has a FIFO size of 16 words, like LS1028A MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c97f2a6fb3dfbfbbc88edc8ea62ef2b944e18849 upstream. Prior to the commit that this one fixes, the FIFO size was derived from the read-only register LPUARTx_FIFO[TXFIFOSIZE] using the following formula: TX FIFO size = 2 ^ (LPUARTx_FIFO[TXFIFOSIZE] - 1) The documentation for LS1021A is a mess. Under chapter 26.1.3 LS1021A LPUART module special consideration, it mentions TXFIFO_SZ and RXFIFO_SZ being equal to 4, and in the register description for LPUARTx_FIFO, it shows the out-of-reset value of TXFIFOSIZE and RXFIFOSIZE fields as "011", even though these registers read as "101" in reality. And when LPUART on LS1021A was working, the "101" value did correspond to "16 datawords", by applying the formula above, even though the documentation is wrong again (!!!!) and says that "101" means 64 datawords (hint: it doesn't). So the "new" formula created by commit f77ebb241ce0 has all the premises of being wrong for LS1021A, because it relied only on false data and no actual experimentation. Interestingly, in commit c2f448cff22a ("tty: serial: fsl_lpuart: add LS1028A support"), Michael Walle applied a workaround to this by manually setting the FIFO widths for LS1028A. It looks like the same values are used by LS1021A as well, in fact. When the driver thinks that it has a deeper FIFO than it really has, getty (user space) output gets truncated. Many thanks to Michael for pointing out where to look. Fixes: f77ebb241ce0 ("tty: serial: fsl_lpuart: correct the FIFO depth size") Suggested-by: Michael Walle Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201023013429.3551026-1-vladimir.oltean@nxp.com Reviewed-by:Fugang Duan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 378e2d4b196c..6ad985e8dc65 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -318,9 +318,10 @@ MODULE_DEVICE_TABLE(of, lpuart_dt_ids); /* Forward declare this for the dma callbacks*/ static void lpuart_dma_tx_complete(void *arg); -static inline bool is_ls1028a_lpuart(struct lpuart_port *sport) +static inline bool is_layerscape_lpuart(struct lpuart_port *sport) { - return sport->devtype == LS1028A_LPUART; + return (sport->devtype == LS1021A_LPUART || + sport->devtype == LS1028A_LPUART); } static inline bool is_imx8qxp_lpuart(struct lpuart_port *sport) @@ -1566,11 +1567,11 @@ static int lpuart32_startup(struct uart_port *port) UARTFIFO_FIFOSIZE_MASK); /* - * The LS1028A has a fixed length of 16 words. Although it supports the - * RX/TXSIZE fields their encoding is different. Eg the reference manual - * states 0b101 is 16 words. + * The LS1021A and LS1028A have a fixed FIFO depth of 16 words. + * Although they support the RX/TXSIZE fields, their encoding is + * different. Eg the reference manual states 0b101 is 16 words. */ - if (is_ls1028a_lpuart(sport)) { + if (is_layerscape_lpuart(sport)) { sport->rxfifo_size = 16; sport->txfifo_size = 16; sport->port.fifosize = sport->txfifo_size; -- cgit v1.2.3 From 290fcf3e0c0c825cf89101f4b2c0c2d9717f0ca8 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 22 Oct 2020 15:44:59 -0700 Subject: usb: dwc3: ep0: Fix delay status handling commit fa27e2f6c5e674f3f1225f9ca7a7821faaf393bb upstream. If we want to send a control status on our own time (through delayed_status), make sure to handle a case where we may queue the delayed status before the host requesting for it (when XferNotReady is generated). Otherwise, the driver won't send anything because it's not EP0_STATUS_PHASE yet. To resolve this, regardless whether dwc->ep0state is EP0_STATUS_PHASE, make sure to clear the dwc->delayed_status flag if dwc3_ep0_send_delayed_status() is called. The control status can be sent when the host requests it later. Cc: Fixes: d97c78a1908e ("usb: dwc3: gadget: END_TRANSFER before CLEAR_STALL command") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 991cab9a7491..4de8e5f091cb 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -1058,10 +1058,11 @@ void dwc3_ep0_send_delayed_status(struct dwc3 *dwc) { unsigned int direction = !dwc->ep0_expect_in; + dwc->delayed_status = false; + if (dwc->ep0state != EP0_STATUS_PHASE) return; - dwc->delayed_status = false; __dwc3_ep0_do_control_status(dwc, dwc->eps[direction]); } -- cgit v1.2.3 From b0d03a1bdb3cd35d483eea25f860b688f906ffb1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 2 Nov 2020 09:58:21 -0500 Subject: USB: Add NO_LPM quirk for Kingston flash drive commit afaa2e745a246c5ab95103a65b1ed00101e1bc63 upstream. In Bugzilla #208257, Julien Humbert reports that a 32-GB Kingston flash drive spontaneously disconnects and reconnects, over and over. Testing revealed that disabling Link Power Management for the drive fixed the problem. This patch adds a quirk entry for that drive to turn off LPM permanently. CC: Hans de Goede CC: Reported-and-tested-by: Julien Humbert Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201102145821.GA1478741@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 4ee810531098..5ad14cdd9762 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -378,6 +378,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Kingston DataTraveler 3.0 */ + { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM }, + /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, -- cgit v1.2.3 From b9d91fa921642581ef749f4f6ee2b90f0dba9710 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Fri, 6 Nov 2020 13:54:29 +0800 Subject: usb: mtu3: fix panic in mtu3_gadget_stop() commit 20914919ad31849ee2b9cfe0428f4a20335c9e2a upstream. This patch fixes a possible issue when mtu3_gadget_stop() already assigned NULL to mtu->gadget_driver during mtu_gadget_disconnect(). [] notifier_call_chain+0xa4/0x128 [] __atomic_notifier_call_chain+0x84/0x138 [] notify_die+0xb0/0x120 [] die+0x1f8/0x5d0 [] __do_kernel_fault+0x19c/0x280 [] do_bad_area+0x44/0x140 [] do_translation_fault+0x4c/0x90 [] do_mem_abort+0xb8/0x258 [] el1_da+0x24/0x3c [] mtu3_gadget_disconnect+0xac/0x128 [] mtu3_irq+0x34c/0xc18 [] __handle_irq_event_percpu+0x2ac/0xcd0 [] handle_irq_event_percpu+0x80/0x138 [] handle_irq_event+0xac/0x148 [] handle_fasteoi_irq+0x234/0x568 [] generic_handle_irq+0x48/0x68 [] __handle_domain_irq+0x264/0x1740 [] gic_handle_irq+0x14c/0x250 [] el1_irq+0xec/0x194 [] dma_pool_alloc+0x6e4/0xae0 [] cmdq_mbox_pool_alloc_impl+0xb0/0x238 [] cmdq_pkt_alloc_buf+0x2dc/0x7c0 [] cmdq_pkt_add_cmd_buffer+0x178/0x270 [] cmdq_pkt_perf_begin+0x108/0x148 [] cmdq_pkt_create+0x178/0x1f0 [] mtk_crtc_config_default_path+0x328/0x7a0 [] mtk_drm_idlemgr_kick+0xa6c/0x1460 [] mtk_drm_crtc_atomic_begin+0x1a4/0x1a68 [] drm_atomic_helper_commit_planes+0x154/0x878 [] mtk_atomic_complete.isra.16+0xe80/0x19c8 [] mtk_atomic_commit+0x258/0x898 [] drm_atomic_commit+0xcc/0x108 [] drm_mode_atomic_ioctl+0x1c20/0x2580 [] drm_ioctl_kernel+0x118/0x1b0 [] drm_ioctl+0x5c0/0x920 [] do_vfs_ioctl+0x188/0x1820 [] SyS_ioctl+0x8c/0xa0 Fixes: df2069acb005 ("usb: Add MediaTek USB3 DRD driver") Signed-off-by: Macpaul Lin Acked-by: Chunfeng Yun Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1604642069-20961-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index f93732e53fd8..08db02f3172d 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -587,6 +587,7 @@ static int mtu3_gadget_stop(struct usb_gadget *g) spin_unlock_irqrestore(&mtu->lock, flags); + synchronize_irq(mtu->irq); return 0; } -- cgit v1.2.3 From d61edc06002f6448b863bfa8d164c5fbc64de509 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 1 Nov 2020 18:40:16 +0100 Subject: drm/panfrost: Fix a deadlock between the shrinker and madvise path commit 7d2d6d01293e6d9b42a6cb410be4158571f7fe9d upstream. panfrost_ioctl_madvise() and panfrost_gem_purge() acquire the mappings and shmem locks in different orders, thus leading to a potential the mappings lock first. Fixes: bdefca2d8dc0 ("drm/panfrost: Add the panfrost_gem_mapping concept") Cc: Cc: Christian Hewitt Reported-by: Christian Hewitt Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20201101174016.839110-1-boris.brezillon@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panfrost/panfrost_gem.c | 4 +--- drivers/gpu/drm/panfrost/panfrost_gem.h | 2 +- drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c | 14 +++++++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index c05e013bb8e3..ac9de37a8280 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -105,14 +105,12 @@ void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping) kref_put(&mapping->refcount, panfrost_gem_mapping_release); } -void panfrost_gem_teardown_mappings(struct panfrost_gem_object *bo) +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo) { struct panfrost_gem_mapping *mapping; - mutex_lock(&bo->mappings.lock); list_for_each_entry(mapping, &bo->mappings.list, node) panfrost_gem_teardown_mapping(mapping); - mutex_unlock(&bo->mappings.lock); } int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index b3517ff9630c..8088d5fd8480 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -82,7 +82,7 @@ struct panfrost_gem_mapping * panfrost_gem_mapping_get(struct panfrost_gem_object *bo, struct panfrost_file_priv *priv); void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping); -void panfrost_gem_teardown_mappings(struct panfrost_gem_object *bo); +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo); void panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index 288e46c40673..1b9f68d8e9aa 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -40,18 +40,26 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); struct panfrost_gem_object *bo = to_panfrost_bo(obj); + bool ret = false; if (atomic_read(&bo->gpu_usecount)) return false; - if (!mutex_trylock(&shmem->pages_lock)) + if (!mutex_trylock(&bo->mappings.lock)) return false; - panfrost_gem_teardown_mappings(bo); + if (!mutex_trylock(&shmem->pages_lock)) + goto unlock_mappings; + + panfrost_gem_teardown_mappings_locked(bo); drm_gem_shmem_purge_locked(obj); + ret = true; mutex_unlock(&shmem->pages_lock); - return true; + +unlock_mappings: + mutex_unlock(&bo->mappings.lock); + return ret; } static unsigned long -- cgit v1.2.3 From fbfca92c7840db5f4a980b111789ab0a7af745ef Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 27 Oct 2020 15:01:17 -0700 Subject: ARC: stack unwinding: avoid indefinite looping commit 328d2168ca524d501fc4b133d6be076142bd305c upstream. Currently stack unwinder is a while(1) loop which relies on the dwarf unwinder to signal termination, which in turn relies on dwarf info to do so. This in theory could cause an infinite loop if the dwarf info was somehow messed up or the register contents were etc. This fix thus detects the excessive looping and breaks the loop. | Mem: 26184K used, 1009136K free, 0K shrd, 0K buff, 14416K cached | CPU: 0.0% usr 72.8% sys 0.0% nic 27.1% idle 0.0% io 0.0% irq 0.0% sirq | Load average: 4.33 2.60 1.11 2/74 139 | PID PPID USER STAT VSZ %VSZ CPU %CPU COMMAND | 133 2 root SWN 0 0.0 3 22.9 [rcu_torture_rea] | 132 2 root SWN 0 0.0 0 22.0 [rcu_torture_rea] | 131 2 root SWN 0 0.0 3 21.5 [rcu_torture_rea] | 126 2 root RW 0 0.0 2 5.4 [rcu_torture_wri] | 129 2 root SWN 0 0.0 0 0.2 [rcu_torture_fak] | 137 2 root SW 0 0.0 0 0.2 [rcu_torture_cbf] | 127 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 138 115 root R 1464 0.1 2 0.1 top | 130 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 128 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 115 1 root S 1472 0.1 1 0.0 -/bin/sh | 104 1 root S 1464 0.1 0 0.0 inetd | 1 0 root S 1456 0.1 2 0.0 init | 78 1 root S 1456 0.1 0 0.0 syslogd -O /var/log/messages | 134 2 root SW 0 0.0 2 0.0 [rcu_torture_sta] | 10 2 root IW 0 0.0 1 0.0 [rcu_preempt] | 88 2 root IW 0 0.0 1 0.0 [kworker/1:1-eve] | 66 2 root IW 0 0.0 2 0.0 [kworker/2:2-eve] | 39 2 root IW 0 0.0 2 0.0 [kworker/2:1-eve] | unwinder looping too long, aborting ! Cc: Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/stacktrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 1e440bbfa876..fc65d2921e3b 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -112,7 +112,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; @@ -132,6 +132,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ -- cgit v1.2.3 From 874dfb5c6aa3001b8528eb5b0ac57cdabc6a41e8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:12:15 +0200 Subject: PM: runtime: Drop runtime PM references to supplier on link removal commit e0e398e204634db8fb71bd89cf2f6e3e5bd09b51 upstream. While removing a device link, drop the supplier device's runtime PM usage counter as many times as needed to drop all of the runtime PM references to it from the consumer in addition to dropping the consumer's link count. Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++---- drivers/base/power/runtime.c | 21 ++++++++++++++++++++- include/linux/pm_runtime.h | 4 ++-- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 0fde3e9e63ee..ddfbd62d8bfc 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -454,8 +454,7 @@ static void __device_link_del(struct kref *kref) dev_dbg(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); - if (link->flags & DL_FLAG_PM_RUNTIME) - pm_runtime_drop_link(link->consumer); + pm_runtime_drop_link(link); list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); @@ -469,8 +468,7 @@ static void __device_link_del(struct kref *kref) dev_info(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); - if (link->flags & DL_FLAG_PM_RUNTIME) - pm_runtime_drop_link(link->consumer); + pm_runtime_drop_link(link); list_del(&link->s_node); list_del(&link->c_node); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 4244e22e4b40..159e9c6e16c2 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1702,7 +1702,7 @@ void pm_runtime_new_link(struct device *dev) spin_unlock_irq(&dev->power.lock); } -void pm_runtime_drop_link(struct device *dev) +static void pm_runtime_drop_link_count(struct device *dev) { spin_lock_irq(&dev->power.lock); WARN_ON(dev->power.links_count == 0); @@ -1710,6 +1710,25 @@ void pm_runtime_drop_link(struct device *dev) spin_unlock_irq(&dev->power.lock); } +/** + * pm_runtime_drop_link - Prepare for device link removal. + * @link: Device link going away. + * + * Drop the link count of the consumer end of @link and decrement the supplier + * device's runtime PM usage counter as many times as needed to drop all of the + * PM runtime reference to it from the consumer. + */ +void pm_runtime_drop_link(struct device_link *link) +{ + if (!(link->flags & DL_FLAG_PM_RUNTIME)) + return; + + pm_runtime_drop_link_count(link->consumer); + + while (refcount_dec_not_one(&link->rpm_active)) + pm_runtime_put(link->supplier); +} + static bool pm_runtime_need_not_resume(struct device *dev) { return atomic_read(&dev->power.usage_count) <= 1 && diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 22af69d237a6..d805fb1ae880 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); -extern void pm_runtime_drop_link(struct device *dev); +extern void pm_runtime_drop_link(struct device_link *link); static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { @@ -177,7 +177,7 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -static inline void pm_runtime_drop_link(struct device *dev) {} +static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From 37f75c6aa8ddfea0bb9d6823c24b0da398a649d0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:13:10 +0200 Subject: PM: runtime: Drop pm_runtime_clean_up_links() commit d6e36668598154820177bfd78c1621d8e6c580a2 upstream. After commit d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") nothing prevents the consumer device's runtime PM from acquiring additional references to the supplier device after pm_runtime_clean_up_links() has run (or even while it is running), so calling this function from __device_release_driver() may be pointless (or even harmful). Moreover, it ignores stateless device links, so the runtime PM handling of managed and stateless device links is inconsistent because of it, so better get rid of it entirely. Fixes: d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 1 - drivers/base/power/runtime.c | 36 ------------------------------------ include/linux/pm_runtime.h | 2 -- 3 files changed, 39 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 84e757860ebb..85463c4f7653 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1121,7 +1121,6 @@ static void __device_release_driver(struct device *dev, struct device *parent) } pm_runtime_get_sync(dev); - pm_runtime_clean_up_links(dev); driver_sysfs_remove(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 159e9c6e16c2..137a7ba053d7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1615,42 +1615,6 @@ void pm_runtime_remove(struct device *dev) pm_runtime_reinit(dev); } -/** - * pm_runtime_clean_up_links - Prepare links to consumers for driver removal. - * @dev: Device whose driver is going to be removed. - * - * Check links from this device to any consumers and if any of them have active - * runtime PM references to the device, drop the usage counter of the device - * (as many times as needed). - * - * Links with the DL_FLAG_MANAGED flag unset are ignored. - * - * Since the device is guaranteed to be runtime-active at the point this is - * called, nothing else needs to be done here. - * - * Moreover, this is called after device_links_busy() has returned 'false', so - * the status of each link is guaranteed to be DL_STATE_SUPPLIER_UNBIND and - * therefore rpm_active can't be manipulated concurrently. - */ -void pm_runtime_clean_up_links(struct device *dev) -{ - struct device_link *link; - int idx; - - idx = device_links_read_lock(); - - list_for_each_entry_rcu(link, &dev->links.consumers, s_node, - device_links_read_lock_held()) { - if (!(link->flags & DL_FLAG_MANAGED)) - continue; - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put_noidle(dev); - } - - device_links_read_unlock(idx); -} - /** * pm_runtime_get_suppliers - Resume and reference-count supplier devices. * @dev: Consumer device. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d805fb1ae880..fe61e3b9a9ca 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -54,7 +54,6 @@ extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); -extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); @@ -173,7 +172,6 @@ static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} -static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -- cgit v1.2.3 From 258d01b1577e98feda9cb3d4f141aa318e59b715 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Oct 2020 17:38:22 +0200 Subject: PM: runtime: Resume the device earlier in __device_release_driver() commit 9226c504e364158a17a68ff1fe9d67d266922f50 upstream. Since the device is resumed from runtime-suspend in __device_release_driver() anyway, it is better to do that before looking for busy managed device links from it to consumers, because if there are any, device_links_unbind_consumers() will be called and it will cause the consumer devices' drivers to unbind, so the consumer devices will be runtime-resumed. In turn, resuming each consumer device will cause the supplier to be resumed and when the runtime PM references from the given consumer to it are dropped, it may be suspended. Then, the runtime-resume of the next consumer will cause the supplier to resume again and so on. Update the code accordingly. Signed-off-by: Rafael J. Wysocki Fixes: 9ed9895370ae ("driver core: Functional dependencies tracking support") Cc: All applicable # All applicable Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 85463c4f7653..32823f36cffd 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1105,6 +1105,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv = dev->driver; if (drv) { + pm_runtime_get_sync(dev); + while (device_links_busy(dev)) { __device_driver_unlock(dev, parent); @@ -1116,12 +1118,12 @@ static void __device_release_driver(struct device *dev, struct device *parent) * have released the driver successfully while this one * was waiting, so check for that. */ - if (dev->driver != drv) + if (dev->driver != drv) { + pm_runtime_put(dev); return; + } } - pm_runtime_get_sync(dev); - driver_sysfs_remove(dev); if (dev->bus) -- cgit v1.2.3 From 21ab13af8c507b36ccea18d8d1f1c731ee623a9a Mon Sep 17 00:00:00 2001 From: Andy Strohman Date: Thu, 5 Nov 2020 20:28:50 +0000 Subject: xfs: flush for older, xfs specific ioctls 837a6e7f5cdb ("fs: add generic UNRESVSP and ZERO_RANGE ioctl handlers") changed ioctls XFS_IOC_UNRESVSP XFS_IOC_UNRESVSP64 and XFS_IOC_ZERO_RANGE to be generic instead of xfs specific. Because of this change, 36f11775da75 ("xfs: properly serialise fallocate against AIO+DIO") needed adaptation, as 5.4 still uses the xfs specific ioctls. Without this, xfstests xfs/242 and xfs/290 fail. Both of these tests test XFS_IOC_ZERO_RANGE. Fixes: 36f11775da75 ("xfs: properly serialise fallocate against AIO+DIO") Tested-by: Andy Strohman Signed-off-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bf0435dbec43..b3021d9b34a5 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -622,7 +622,6 @@ xfs_ioc_space( error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); if (error) goto out_unlock; - inode_dio_wait(inode); switch (bf->l_whence) { case 0: /*SEEK_SET*/ @@ -668,6 +667,31 @@ xfs_ioc_space( goto out_unlock; } + /* + * Must wait for all AIO to complete before we continue as AIO can + * change the file size on completion without holding any locks we + * currently hold. We must do this first because AIO can update both + * the on disk and in memory inode sizes, and the operations that follow + * require the in-memory size to be fully up-to-date. + */ + inode_dio_wait(inode); + + /* + * Now that AIO and DIO has drained we can flush and (if necessary) + * invalidate the cached range over the first operation we are about to + * run. We include zero range here because it starts with a hole punch + * over the target range. + */ + switch (cmd) { + case XFS_IOC_ZERO_RANGE: + case XFS_IOC_UNRESVSP: + case XFS_IOC_UNRESVSP64: + error = xfs_flush_unmap_range(ip, bf->l_start, bf->l_len); + if (error) + goto out_unlock; + break; + } + switch (cmd) { case XFS_IOC_ZERO_RANGE: flags |= XFS_PREALLOC_SET; -- cgit v1.2.3 From b7f7474b392194530d1ec07203c8668e81b7fdb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kiyin=28=E5=B0=B9=E4=BA=AE=29?= Date: Wed, 4 Nov 2020 08:23:22 +0300 Subject: perf/core: Fix a memory leak in perf_event_parse_addr_filter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7bdb157cdebbf95a1cd94ed2e01b338714075d00 upstream. As shown through runtime testing, the "filename" allocation is not always freed in perf_event_parse_addr_filter(). There are three possible ways that this could happen: - It could be allocated twice on subsequent iterations through the loop, - or leaked on the success path, - or on the failure path. Clean up the code flow to make it obvious that 'filename' is always freed in the reallocation path and in the two return paths as well. We rely on the fact that kfree(NULL) is NOP and filename is initialized with NULL. This fixes the leak. No other side effects expected. [ Dan Carpenter: cleaned up the code flow & added a changelog. ] [ Ingo Molnar: updated the changelog some more. ] Fixes: 375637bc5249 ("perf/core: Introduce address range filtering") Signed-off-by: "kiyin(尹亮)" Signed-off-by: Dan Carpenter Signed-off-by: Ingo Molnar Cc: "Srivatsa S. Bhat" Cc: Anthony Liguori -- kernel/events/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 09e1cc22221f..1b60f9c508c9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9415,6 +9415,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { int fpos = token == IF_SRC_FILE ? 2 : 1; + kfree(filename); filename = match_strdup(&args[fpos]); if (!filename) { ret = -ENOMEM; @@ -9461,16 +9462,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, */ ret = -EOPNOTSUPP; if (!event->ctx->task) - goto fail_free_name; + goto fail; /* look up the path and grab its inode */ ret = kern_path(filename, LOOKUP_FOLLOW, &filter->path); if (ret) - goto fail_free_name; - - kfree(filename); - filename = NULL; + goto fail; ret = -EINVAL; if (!filter->path.dentry || @@ -9490,13 +9488,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (state != IF_STATE_ACTION) goto fail; + kfree(filename); kfree(orig); return 0; -fail_free_name: - kfree(filename); fail: + kfree(filename); free_filters_list(filters); kfree(orig); -- cgit v1.2.3 From c3d60c695712781268addd7c6bbdb953744c98a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 7 Sep 2020 13:27:17 +0200 Subject: arm64: dts: marvell: espressobin: Add ethernet switch aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b64d814257b027e29a474bcd660f6372490138c7 upstream. Espressobin boards have 3 ethernet ports and some of them got assigned more then one MAC address. MAC addresses are stored in U-Boot environment. Since commit a2c7023f7075c ("net: dsa: read mac address from DT for slave device") kernel can use MAC addresses from DT for particular DSA port. Currently Espressobin DTS file contains alias just for ethernet0. This patch defines additional ethernet aliases in Espressobin DTS files, so bootloader can fill correct MAC address for DSA switch ports if more MAC addresses were specified. DT alias ethernet1 is used for wan port, DT aliases ethernet2 and ethernet3 are used for lan ports for both Espressobin revisions (V5 and V7). Fixes: 5253cb8c00a6f ("arm64: dts: marvell: espressobin: add ethernet alias") Cc: # a2c7023f7075c: dsa: read mac address Signed-off-by: Pali Rohár Reviewed-by: Andrew Lunn Reviewed-by: Andre Heider Signed-off-by: Gregory CLEMENT [pali: Backported Espressobin rev V5 changes to 5.4 and 4.19 versions] Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index 05dc58c13fa4..6226e7e80980 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -21,6 +21,10 @@ aliases { ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; serial0 = &uart0; serial1 = &uart1; }; @@ -147,7 +151,7 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; @@ -158,19 +162,19 @@ }; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; -- cgit v1.2.3 From ec9c6b417e271ee76d1430d2b197794858238d3b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Nov 2020 12:37:34 +0100 Subject: Linux 5.4.76 Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20201109125022.614792961@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d38d0cab8e9a..842ed8411810 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = Kleptomaniac Octopus -- cgit v1.2.3 From 19f6d91bdad42200aac557a683c17b1f65ee6c94 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 10 Nov 2020 13:00:00 -0800 Subject: powercap: restrict energy meter to root access commit 949dd0104c496fa7c14991a23c03c62e44637e71 upstream. Remove non-privileged user access to power data contained in /sys/class/powercap/intel-rapl*/*/energy_uj Non-privileged users currently have read access to power data and can use this data to form a security attack. Some privileged drivers/applications need read access to this data, but don't expose it to non-privileged users. For example, thermald uses this data to ensure that power management works correctly. Thus removing non-privileged access is preferred over completely disabling this power reporting capability with CONFIG_INTEL_RAPL=n. Fixes: 95677a9a3847 ("PowerCap: Fix mode for energy counter") Signed-off-by: Len Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/powercap_sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index f808c5fa9838..3f0b8e2ef3d4 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -367,9 +367,9 @@ static void create_power_zone_common_attributes( &dev_attr_max_energy_range_uj.attr; if (power_zone->ops->get_energy_uj) { if (power_zone->ops->reset_energy_uj) - dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR; else - dev_attr_energy_uj.attr.mode = S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IRUSR; power_zone->zone_dev_attrs[count++] = &dev_attr_energy_uj.attr; } -- cgit v1.2.3 From 2544d06afd8d060f35b159809274e4b7477e63e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Nov 2020 21:13:20 +0100 Subject: Linux 5.4.77 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 842ed8411810..2e24b568b93f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = Kleptomaniac Octopus -- cgit v1.2.3