diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/verifier.c | 98 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 3 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 4 | ||||
-rw-r--r-- | kernel/exit.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 13 | ||||
-rw-r--r-- | kernel/futex.c | 7 | ||||
-rw-r--r-- | kernel/hung_task.c | 8 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 52 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 8 | ||||
-rw-r--r-- | kernel/memremap.c | 11 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 22 | ||||
-rw-r--r-- | kernel/signal.c | 61 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 2 | ||||
-rw-r--r-- | kernel/time/posix-timers.c | 29 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 2 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 8 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 9 |
22 files changed, 293 insertions, 79 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 35dfa9e9d69e..c43ca9857479 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -191,6 +191,7 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; @@ -569,10 +570,11 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct verifier_env *env, + struct verifier_state *state, int off, + int size, int value_regno, int insn_idx) { - int i; + int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ @@ -587,15 +589,37 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } /* save register state */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - state->regs[value_regno]; - - for (i = 0; i < BPF_REG_SIZE; i++) + state->spilled_regs[spi] = state->regs[value_regno]; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose("insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } } else { /* regular write of data into stack */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + state->spilled_regs[spi] = (struct reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -696,7 +720,7 @@ static bool is_ctx_reg(struct verifier_env *env, int regno) * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { @@ -748,7 +772,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, verbose("attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); } else { err = check_stack_read(state, off, size, value_regno); } @@ -760,7 +785,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct verifier_env *env, int insn_idx, struct bpf_insn *insn) { struct reg_state *regs = env->cur_state.regs; int err; @@ -793,13 +818,13 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) } /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); if (err) return err; /* check whether atomic_add can write into the same memory */ - return check_mem_access(env, insn->dst_reg, insn->off, + return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); } @@ -1838,13 +1863,14 @@ static int do_check(struct verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn->src_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg); if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W) { + if (BPF_SIZE(insn->code) != BPF_W && + BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; continue; } @@ -1876,7 +1902,7 @@ static int do_check(struct verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn); + err = check_xadd(env, insn_idx, insn); if (err) return err; insn_idx++; @@ -1895,7 +1921,7 @@ static int do_check(struct verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg); if (err) @@ -1930,7 +1956,7 @@ static int do_check(struct verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); if (err) @@ -2220,13 +2246,43 @@ static int convert_ctx_accesses(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { u32 cnt; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 58013ef228a1..358bb53c1e74 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -637,6 +637,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); + if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) + goto fail; + rb = kzalloc(size, GFP_KERNEL); if (!rb) goto fail; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7108097fa2f2..aad43c88a668 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); + ret = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, &vma); if (ret <= 0) return ret; @@ -1700,7 +1700,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) if (likely(result == 0)) goto out; - result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + result = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &page, NULL); if (result < 0) return result; diff --git a/kernel/exit.c b/kernel/exit.c index f20e6339761b..03f6722302b5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -450,12 +450,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) return NULL; } -static struct task_struct *find_child_reaper(struct task_struct *father) +static struct task_struct *find_child_reaper(struct task_struct *father, + struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *p, *n; if (likely(reaper != father)) return reaper; @@ -471,6 +473,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) panic("Attempted to kill init! exitcode=0x%08x\n", father->signal->group_exit_code ?: father->exit_code); } + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); @@ -557,7 +565,7 @@ static void forget_original_parent(struct task_struct *father, exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ - reaper = find_child_reaper(father); + reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; diff --git a/kernel/fork.c b/kernel/fork.c index dd2f79ac0771..e4b81913a998 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1411,8 +1411,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, posix_cpu_timers_init(p); - p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1573,6 +1571,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free_pid; /* + * From this point on we must avoid any synchronous user-space + * communication until we take the tasklist-lock. In particular, we do + * not want user-space to be able to predict the process start-time by + * stalling fork(2) after we recorded the start_time but before it is + * visible to the system. + */ + + p->start_time = ktime_get_ns(); + p->real_start_time = ktime_get_boot_ns(); + + /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ diff --git a/kernel/futex.c b/kernel/futex.c index a26d217c99fe..0c92c8d34ffa 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2923,10 +2923,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); - debug_rt_mutex_free_waiter(&rt_waiter); + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); spin_lock(q.lock_ptr); + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; + + debug_rt_mutex_free_waiter(&rt_waiter); /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. diff --git a/kernel/hung_task.c b/kernel/hung_task.c index e0f90c2b57aa..cc05b97ba569 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -30,7 +30,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * is disabled during the critical section. It also controls the size of * the RCU grace period. So it needs to be upper-bound. */ -#define HUNG_TASK_BATCHING 1024 +#define HUNG_TASK_LOCK_BREAK (HZ / 10) /* * Zero means infinite timeout - no checking done: @@ -158,7 +158,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) static void check_hung_uninterruptible_tasks(unsigned long timeout) { int max_count = sysctl_hung_task_check_count; - int batch_count = HUNG_TASK_BATCHING; + unsigned long last_break = jiffies; struct task_struct *g, *t; /* @@ -172,10 +172,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) for_each_process_thread(g, t) { if (!max_count--) goto unlock; - if (!--batch_count) { - batch_count = HUNG_TASK_BATCHING; + if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { if (!rcu_lock_break(g, t)) goto unlock; + last_break = jiffies; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b066724d7a5b..dd173df9ee5e 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1712,21 +1712,23 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) } /** - * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * rt_mutex_wait_proxy_lock() - Wait for lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have * been started. * @waiter: the pre-initialized rt_mutex_waiter * - * Complete the lock acquisition started our behalf by another thread. + * Wait for the the lock acquisition started on our behalf by + * rt_mutex_start_proxy_lock(). Upon failure, the caller must call + * rt_mutex_cleanup_proxy_lock(). * * Returns: * 0 - success * <0 - error, one of -EINTR, -ETIMEDOUT * - * Special API call for PI-futex requeue support + * Special API call for PI-futex support */ -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { @@ -1739,9 +1741,6 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - if (unlikely(ret)) - remove_waiter(lock, waiter); - /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. @@ -1752,3 +1751,42 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, return ret; } + +/** + * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition + * @lock: the rt_mutex we were woken on + * @waiter: the pre-initialized rt_mutex_waiter + * + * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * + * Unless we acquired the lock; we're still enqueued on the wait-list and can + * in fact still be granted ownership until we're removed. Therefore we can + * find we are in fact the owner and must disregard the + * rt_mutex_wait_proxy_lock() failure. + * + * Returns: + * true - did the cleanup, we done. + * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, + * caller should disregards its return value. + * + * Special API call for PI-futex support + */ +bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter) +{ + bool cleanup = false; + + raw_spin_lock_irq(&lock->wait_lock); + /* + * Unless we're the owner; we're still enqueued on the wait_list. + * So check if we became owner, if not, take us off the wait_list. + */ + if (rt_mutex_owner(lock) != current) { + remove_waiter(lock, waiter); + fixup_rt_mutex_waiters(lock); + cleanup = true; + } + raw_spin_unlock_irq(&lock->wait_lock); + + return cleanup; +} diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index e317e1cbb3eb..6f8f68edb700 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -106,9 +106,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter); +extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter); +extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, struct wake_q_head *wqh); diff --git a/kernel/memremap.c b/kernel/memremap.c index f719c925cb54..1be42f9b3e00 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -171,15 +171,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) struct page_map *page_map; int error, nid; - if (is_ram == REGION_MIXED) { - WARN_ONCE(1, "%s attempted on mixed region %pr\n", - __func__, res); + if (is_ram != REGION_DISJOINT) { + WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__, + is_ram == REGION_MIXED ? "mixed" : "ram", res); return ERR_PTR(-ENXIO); } - if (is_ram == REGION_INTERSECTS) - return __va(res->start); - page_map = devres_alloc_node(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) @@ -202,5 +199,5 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) devres_add(dev, page_map); return __va(res->start); } -EXPORT_SYMBOL(devm_memremap_pages); +EXPORT_SYMBOL_GPL(devm_memremap_pages); #endif /* CONFIG_ZONE_DEVICE */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8a62cbfe1f2f..082aedefe29c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1611,15 +1611,23 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* - * Awaken the grace-period kthread for the specified flavor of RCU. - * Don't do a self-awaken, and don't bother awakening when there is - * nothing for the grace-period kthread to do (as in several CPUs - * raced to awaken, and we lost), and finally don't try to awaken - * a kthread that has not yet been created. + * Awaken the grace-period kthread. Don't do a self-awaken (unless in + * an interrupt or softirq handler), and don't bother awakening when there + * is nothing for the grace-period kthread to do (as in several CPUs raced + * to awaken, and we lost), and finally don't try to awaken a kthread that + * has not yet been created. If all those checks are passed, track some + * debug information and awaken. + * + * So why do the self-wakeup when in an interrupt or softirq handler + * in the grace-period kthread's context? Because the kthread might have + * been interrupted just as it was going to sleep, and just after the final + * pre-sleep check of the awaken condition. In this case, a wakeup really + * is required, and is therefore supplied. */ static void rcu_gp_kthread_wake(struct rcu_state *rsp) { - if (current == rsp->gp_kthread || + if ((current == rsp->gp_kthread && + !in_interrupt() && !in_serving_softirq()) || !READ_ONCE(rsp->gp_flags) || !rsp->gp_kthread) return; @@ -3817,7 +3825,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) continue; rdp = per_cpu_ptr(rsp->rda, cpu); pr_cont(" %d-%c%c%c", cpu, - "O."[cpu_online(cpu)], + "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rnp->expmaskinit)], "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); } diff --git a/kernel/signal.c b/kernel/signal.c index 5b1313309356..96e8c3cbfa38 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -696,6 +696,48 @@ static inline bool si_fromuser(const struct siginfo *info) (!is_si_special(info) && SI_FROMUSER(info)); } +static int dequeue_synchronous_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * called with RCU read lock from check_kill_permission() */ @@ -2198,6 +2240,14 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + if (signal_group_exit(signal)) { + ksig->info.si_signo = signr = SIGKILL; + sigdelset(¤t->pending.signal, SIGKILL); + recalc_sigpending(); + goto fatal; + } + for (;;) { struct k_sigaction *ka; @@ -2211,7 +2261,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ @@ -2293,6 +2351,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7e832f9a8f42..beadcf83ceba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2306,7 +2306,16 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, { struct do_proc_dointvec_minmax_conv_param *param = data; if (write) { - int val = *negp ? -*lvalp : *lvalp; + int val; + if (*negp) { + if (*lvalp > (unsigned long) INT_MAX + 1) + return -EINVAL; + val = -*lvalp; + } else { + if (*lvalp > (unsigned long) INT_MAX) + return -EINVAL; + val = *lvalp; + } if ((param->min && *param->min > val) || (param->max && *param->max < val)) return -EINVAL; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80016b329d94..8fc68e60c795 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -103,7 +103,7 @@ static void bump_cpu_timer(struct k_itimer *timer, continue; timer->it.cpu.expires += incr; - timer->it_overrun += 1 << i; + timer->it_overrun += 1LL << i; delta -= incr; } } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fc7c37ad90a0..0e6ed2e7d066 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -355,6 +355,17 @@ static __init int init_posix_timers(void) __initcall(init_posix_timers); +/* + * The siginfo si_overrun field and the return value of timer_getoverrun(2) + * are of type int. Clamp the overrun value to INT_MAX + */ +static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) +{ + s64 sum = timr->it_overrun_last + (s64)baseval; + + return sum > (s64)INT_MAX ? INT_MAX : (int)sum; +} + static void schedule_next_timer(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; @@ -362,12 +373,11 @@ static void schedule_next_timer(struct k_itimer *timr) if (timr->it.real.interval.tv64 == 0) return; - timr->it_overrun += (unsigned int) hrtimer_forward(timer, - timer->base->get_time(), - timr->it.real.interval); + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; - timr->it_overrun = -1; + timr->it_overrun = -1LL; ++timr->it_requeue_pending; hrtimer_restart(timer); } @@ -396,7 +406,7 @@ void do_schedule_next_timer(struct siginfo *info) else schedule_next_timer(timr); - info->si_overrun += timr->it_overrun_last; + info->si_overrun = timer_overrun_to_int(timr, info->si_overrun); } if (timr) @@ -491,8 +501,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) now = ktime_add(now, kj); } #endif - timr->it_overrun += (unsigned int) - hrtimer_forward(timer, now, + timr->it_overrun += hrtimer_forward(timer, now, timr->it.real.interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; @@ -633,7 +642,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; - new_timer->it_overrun = -1; + new_timer->it_overrun = -1LL; if (timer_event_spec) { if (copy_from_user(&event, timer_event_spec, sizeof (event))) { @@ -762,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || timr->it_sigev_notify == SIGEV_NONE)) - timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); + timr->it_overrun += hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); /* Return 0 only, when the timer is expired and not pending */ @@ -824,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) if (!timr) return -EINVAL; - overrun = timr->it_overrun_last; + overrun = timer_overrun_to_int(timr, 0); unlock_timer(timr, flags); return overrun; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fed86b2dfc89..d9837d25dfe0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -39,7 +39,9 @@ static struct { seqcount_t seq; struct timekeeper timekeeper; -} tk_core ____cacheline_aligned; +} tk_core ____cacheline_aligned = { + .seq = SEQCNT_ZERO(tk_core.seq), +}; static DEFINE_RAW_SPINLOCK(timekeeper_lock); static struct timekeeper shadow_timekeeper; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ef4f16e81283..1407ed20ea93 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -399,7 +399,7 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0444, NULL, &timer_list_fops); + pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); if (!pe) return -ENOMEM; return 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd3682c3..3dd40c736067 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -119,11 +119,13 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) i++; } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; - i++; - if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) return -EINVAL; fmt_cnt++; - if (fmt[i - 1] == 's') { + if (fmt[i] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ac758a53fcea..d90b42b39908 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4767,6 +4767,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_ENABLED) ftrace_shutdown(ops, 0); ops->flags |= FTRACE_OPS_FL_DELETED; + ftrace_free_filter(ops); mutex_unlock(&ftrace_lock); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a47a64d623f..8c097de8a596 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4646,7 +4646,6 @@ out: return ret; fail: - kfree(iter->trace); kfree(iter); __trace_array_put(tr); mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index b8a894adab2c..8be66a2b0cac 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -727,8 +727,10 @@ static int set_trigger_filter(char *filter_str, /* The filter is for the 'trigger' event, not the triggered event */ ret = create_event_filter(file->event_call, filter_str, false, &filter); - if (ret) - goto out; + /* + * If create_event_filter() fails, filter still needs to be freed. + * Which the calling code will do with data->filter. + */ assign: tmp = rcu_access_pointer(data->filter); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1dc887bab085..518e62a398d2 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -150,7 +150,14 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, ret = strncpy_from_user(dst, src, maxlen); if (ret == maxlen) - dst[--ret] = '\0'; + dst[ret - 1] = '\0'; + else if (ret >= 0) + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; if (ret < 0) { /* Failed to fetch string */ ((u8 *)get_rloc_data(dest))[0] = '\0'; |