diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 3 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
-rw-r--r-- | kernel/bpf/percpu_freelist.c | 41 | ||||
-rw-r--r-- | kernel/bpf/percpu_freelist.h | 4 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 12 | ||||
-rw-r--r-- | kernel/futex.c | 32 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 37 | ||||
-rw-r--r-- | kernel/relay.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 63 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 9 |
12 files changed, 172 insertions, 53 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index befe570be5ba..c57bd10340ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, /* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && - !btf_type_is_fwd(next_type)) { + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..f9274114c88d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); - __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); } void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ again: local_irq_restore(flags); } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu; - local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; }; +/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..8577bb7f8be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr) } rcu_read_unlock(); } + this_cpu_dec(bpf_prog_active); + preempt_enable(); +done: if (err) goto free_value; diff --git a/kernel/futex.c b/kernel/futex.c index fdd312da0992..a0514e01c3eb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) * decrement the counter at queue_unlock() when some error has * occurred and we don't end up adding the task to the list. */ - hb_waiters_inc(hb); + hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + spin_lock(&hb->lock); return hb; } @@ -2861,35 +2861,39 @@ retry_private: * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the - * latter before calling rt_mutex_start_proxy_lock(). This still fully - * serializes against futex_unlock_pi() as that does the exact same - * lock handoff sequence. + * latter before calling __rt_mutex_start_proxy_lock(). This + * interleaves with futex_unlock_pi() -- which does a similar lock + * handoff -- such that the latter can observe the futex_q::pi_state + * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); + /* + * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; - - spin_lock(q.lock_ptr); - goto no_block; + goto cleanup; } - if (unlikely(to)) hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); +cleanup: spin_lock(q.lock_ptr); /* - * If we failed to acquire the lock (signal/timeout), we must + * If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the - * rt_mutex waitqueue, such that we can keep the hb and rt_mutex - * wait lists consistent. + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait + * lists consistent. * * In particular; it is important that futex_unlock_pi() can not * observe this inconsistency. @@ -3013,6 +3017,10 @@ retry: * there is no point where we hold neither; and therefore * wake_futex_pi() must observe a state consistent with what we * observed. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the + * rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 581edcc63c26..978d63a8261c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, rt_mutex_set_owner(lock, NULL); } +/** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: does _NOT_ remove the @waiter on failure; must either call + * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for PI-futex support. + */ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task) { int ret; + lockdep_assert_held(&lock->wait_lock); + if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ret = 0; } - if (unlikely(ret)) - remove_waiter(lock, waiter); - debug_rt_mutex_print_deadlock(waiter); return ret; @@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter + * on failure. + * * Returns: * 0 - task blocked on lock * 1 - acquired the lock for task, caller should wake it up * <0 - error * - * Special API call for FUTEX_REQUEUE_PI support. + * Special API call for PI-futex support. */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, @@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + if (unlikely(ret)) + remove_waiter(lock, waiter); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, * @lock: the rt_mutex we were woken on * @waiter: the pre-initialized rt_mutex_waiter * - * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or + * rt_mutex_wait_proxy_lock(). * * Unless we acquired the lock; we're still enqueued on the wait-list and can * in fact still be granted ownership until we're removed. Therefore we can diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..9e0f52375487 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL; kfree(tmpname); @@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; } diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..99fa8ff06fd9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in } EXPORT_SYMBOL_GPL(dequeue_signal); +static int dequeue_synchronous_signal(kernel_siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * Tell a process that it has a new active signal.. * @@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc result = TRACE_SIGNAL_DELIVERED; /* - * Skip useless siginfo allocation for SIGKILL SIGSTOP, - * and kernel threads. + * Skip useless siginfo allocation for SIGKILL and kernel threads. */ - if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) + if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) goto out_set; /* @@ -2394,6 +2435,11 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + ksig->info.si_signo = signr = SIGKILL; + if (signal_group_exit(signal)) + goto fatal; + for (;;) { struct k_sigaction *ka; @@ -2407,7 +2453,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ @@ -2489,6 +2543,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..f1a86a0d881d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = __bpf_probe_register(btp, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return __bpf_probe_register(btp, prog); } int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e335576b9411..9bde07c06362 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -5,7 +5,7 @@ * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> */ -#define pr_fmt(fmt) "trace_kprobe: " fmt +#define pr_fmt(fmt) "trace_uprobe: " fmt #include <linux/ctype.h> #include <linux/module.h> @@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; + else + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; *(u32 *)dest = make_data_loc(ret, (void *)dst - base); } |