diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/helpers.c | 11 | ||||
| -rw-r--r-- | kernel/cgroup/cpuset.c | 23 | ||||
| -rw-r--r-- | kernel/dma/pool.c | 4 | ||||
| -rw-r--r-- | kernel/events/core.c | 20 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/power/console.c | 7 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 3 | ||||
| -rw-r--r-- | kernel/sched/core.c | 19 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 4 | ||||
| -rw-r--r-- | kernel/sched/loadavg.c | 2 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 6 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 2 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 19 | ||||
| -rw-r--r-- | kernel/trace/preemptirq_delay_test.c | 13 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 37 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 5 | ||||
| -rw-r--r-- | kernel/ucount.c | 2 |
18 files changed, 139 insertions, 48 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d3feaf6d68eb..b40ca025d9a4 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -832,6 +832,13 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, if (fmt[i] == 'p') { sizeof_cur_arg = sizeof(long); + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || + ispunct(fmt[i + 1])) { + if (tmp_buf) + cur_arg = raw_args[num_spec]; + goto nocopy_fmt; + } + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && fmt[i + 2] == 's') { fmt_ptype = fmt[i + 1]; @@ -839,11 +846,9 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, goto fmt_str; } - if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || - ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || + if (fmt[i + 1] == 'K' || fmt[i + 1] == 'x' || fmt[i + 1] == 's' || fmt[i + 1] == 'S') { - /* just kernel pointers */ if (tmp_buf) cur_arg = raw_args[num_spec]; i++; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d6e70aa7e151..78c499021768 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -71,6 +71,13 @@ DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); +/* + * There could be abnormal cpuset configurations for cpu or memory + * node binding, add this key to provide a quick low-cost judgement + * of the situation. + */ +DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key); + /* See "Frequency meter" comments, below. */ struct fmeter { @@ -397,6 +404,17 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); +static inline void check_insane_mems_config(nodemask_t *nodes) +{ + if (!cpusets_insane_config() && + movable_only_nodes(nodes)) { + static_branch_enable_cpuslocked(&cpusets_insane_config_key); + pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n" + "Cpuset allocations might fail even with a lot of memory available.\n", + nodemask_pr_args(nodes)); + } +} + /* * Cgroup v2 behavior is used on the "cpus" and "mems" control files when * on default hierarchy or when the cpuset_v2_mode flag is set by mounting @@ -1915,6 +1933,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; + check_insane_mems_config(&trialcs->mems_allowed); + spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; spin_unlock_irq(&callback_lock); @@ -3262,6 +3282,9 @@ update_tasks: cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); mems_updated = !nodes_equal(new_mems, cs->effective_mems); + if (mems_updated) + check_insane_mems_config(&new_mems); + if (is_in_v2_mode()) hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 4d40dcce7604..37d3ddd36ae5 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -102,8 +102,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #ifdef CONFIG_DMA_DIRECT_REMAP addr = dma_common_contiguous_remap(page, pool_size, - pgprot_dmacoherent(PAGE_KERNEL), - __builtin_return_address(0)); + pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)), + __builtin_return_address(0)); if (!addr) goto free_page; #else diff --git a/kernel/events/core.c b/kernel/events/core.c index 3e98d1029314..156221bd5661 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6381,11 +6381,21 @@ out_put: ring_buffer_put(rb); /* could be last */ } +static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + /* + * Forbid splitting perf mappings to prevent refcount leaks due to + * the resulting non-matching offsets and sizes. See open()/close(). + */ + return -EINVAL; +} + static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, .close = perf_mmap_close, /* non mergeable */ .fault = perf_mmap_fault, .page_mkwrite = perf_mmap_fault, + .may_split = perf_mmap_may_split, }; static int perf_mmap(struct file *file, struct vm_area_struct *vma) @@ -6477,9 +6487,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; } - atomic_set(&rb->aux_mmap_count, 1); user_extra = nr_pages; - goto accounting; } @@ -6581,8 +6589,10 @@ accounting: } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); - if (!ret) + if (!ret) { + atomic_set(&rb->aux_mmap_count, 1); rb->aux_mmap_locked = extra; + } } unlock: @@ -6592,6 +6602,7 @@ unlock: atomic_inc(&event->mmap_count); } else if (rb) { + /* AUX allocation failed */ atomic_dec(&rb->mmap_count); } aux_unlock: @@ -6599,6 +6610,9 @@ aux_unlock: mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); + if (ret) + return ret; + /* * Since pinned accounting is per vm we cannot allow fork() to copy our * vma. diff --git a/kernel/fork.c b/kernel/fork.c index 22313f31dd8c..2fd9c431bf45 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -580,7 +580,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, get_file(file); i_mmap_lock_write(mapping); - if (tmp->vm_flags & VM_SHARED) + if (vma_is_shared_maywrite(tmp)) mapping_allow_writable(mapping); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ diff --git a/kernel/power/console.c b/kernel/power/console.c index fcdf0e14a47d..19c48aa5355d 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -16,6 +16,7 @@ #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) static int orig_fgconsole, orig_kmsg; +static bool vt_switch_done; static DEFINE_MUTEX(vt_switch_mutex); @@ -136,17 +137,21 @@ void pm_prepare_console(void) if (orig_fgconsole < 0) return; + vt_switch_done = true; + orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); return; } void pm_restore_console(void) { - if (!pm_vt_switch()) + if (!pm_vt_switch() && !vt_switch_done) return; if (orig_fgconsole >= 0) { vt_move_to_console(orig_fgconsole, 0); vt_kmsg_redirect(orig_kmsg); } + + vt_switch_done = false; } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 9e84d603e882..73483f0fcf6b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -607,10 +607,13 @@ static notrace void rcu_preempt_deferred_qs(struct task_struct *t) */ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp) { + unsigned long flags; struct rcu_data *rdp; rdp = container_of(iwp, struct rcu_data, defer_qs_iw); + local_irq_save(flags); rdp->defer_qs_iw_pending = false; + local_irq_restore(flags); } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 380938831b13..c1d219289872 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1963,6 +1963,25 @@ bool sched_task_on_rq(struct task_struct *p) return task_on_rq_queued(p); } +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip = 0; + unsigned int state; + + if (!p || p == current) + return 0; + + /* Only get wchan if task is blocked and we can keep it that way. */ + raw_spin_lock_irq(&p->pi_lock); + state = READ_ONCE(p->__state); + smp_rmb(); /* see try_to_wake_up() */ + if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) + ip = __get_wchan(p); + raw_spin_unlock_irq(&p->pi_lock); + + return ip; +} + static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) { if (!(flags & ENQUEUE_NOCLOCK)) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 66eb68c59f0b..708c3960bd06 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2661,6 +2661,9 @@ void sched_dl_do_global(void) if (global_rt_runtime() != RUNTIME_INF) new_bw = to_ratio(global_rt_period(), global_rt_runtime()); + for_each_possible_cpu(cpu) + init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl); + for_each_possible_cpu(cpu) { rcu_read_lock_sched(); @@ -2676,7 +2679,6 @@ void sched_dl_do_global(void) raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); - init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl); } } diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 954b229868d9..1c79896f1bc0 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -81,7 +81,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust) long nr_active, delta = 0; nr_active = this_rq->nr_running - adjust; - nr_active += (int)this_rq->nr_uninterruptible; + nr_active += (long)this_rq->nr_uninterruptible; if (nr_active != this_rq->calc_load_active) { delta = nr_active - this_rq->calc_load_active; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5fc99dce5145..9720b3c19ab9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2861,6 +2861,12 @@ undo: } mutex_unlock(&mutex); + /* + * After changing maximum available bandwidth for DEADLINE, we need to + * recompute per root domain and per cpus variables accordingly. + */ + rebuild_sched_domains(); + return ret; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6fc16bc13abf..8eba31190d30 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -971,7 +971,7 @@ struct rq { * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ - unsigned int nr_uninterruptible; + unsigned long nr_uninterruptible; struct task_struct __rcu *curr; struct task_struct *idle; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 645edd69b150..e86064b14259 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3825,13 +3825,17 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } else { iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); } + } else { + if (hash) + iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash); + else + iter->hash = EMPTY_HASH; + } - if (!iter->hash) { - trace_parser_put(&iter->parser); - goto out_unlock; - } - } else - iter->hash = hash; + if (!iter->hash) { + trace_parser_put(&iter->parser); + goto out_unlock; + } ret = 0; @@ -5700,9 +5704,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); - } else { - /* For read only, the hash is the ops hash */ - iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index cb0871fbdb07..8af92dbe98f0 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -119,12 +119,15 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); - struct cpumask cpu_mask; + cpumask_var_t cpu_mask; + + if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; if (cpu_affinity > -1) { - cpumask_clear(&cpu_mask); - cpumask_set_cpu(cpu_affinity, &cpu_mask); - if (set_cpus_allowed_ptr(current, &cpu_mask)) + cpumask_clear(cpu_mask); + cpumask_set_cpu(cpu_affinity, cpu_mask); + if (set_cpus_allowed_ptr(current, cpu_mask)) pr_err("cpu_affinity:%d, failed\n", cpu_affinity); } @@ -141,6 +144,8 @@ static int preemptirq_delay_run(void *data) __set_current_state(TASK_RUNNING); + free_cpumask_var(cpu_mask); + return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 35b9f08a9a37..7af8bbc57531 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1605,7 +1605,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, ret = get_user(ch, ubuf++); if (ret) - goto out; + goto fail; read++; cnt--; @@ -1619,7 +1619,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) - goto out; + goto fail; read++; cnt--; } @@ -1629,8 +1629,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* only spaces were written */ if (isspace(ch) || !ch) { *ppos += read; - ret = read; - goto out; + return read; } } @@ -1640,11 +1639,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, parser->buffer[parser->idx++] = ch; else { ret = -EINVAL; - goto out; + goto fail; } + ret = get_user(ch, ubuf++); if (ret) - goto out; + goto fail; read++; cnt--; } @@ -1660,13 +1660,13 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, parser->buffer[parser->idx] = 0; } else { ret = -EINVAL; - goto out; + goto fail; } *ppos += read; - ret = read; - -out: + return read; +fail: + trace_parser_fail(parser); return ret; } @@ -2136,10 +2136,10 @@ int __init register_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) - goto out_unlock; + return ret; if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) - goto out_unlock; + return 0; printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ @@ -2151,8 +2151,7 @@ int __init register_tracer(struct tracer *type) /* disable other selftests, since this will break it. */ disable_tracing_selftest("running a tracer"); - out_unlock: - return ret; + return 0; } static void tracing_reset_cpu(struct array_buffer *buf, int cpu) @@ -8711,11 +8710,10 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, out_reg: ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) - goto out; + return ret; ret = register_ftrace_function_probe(glob, tr, ops, count); - out: return ret < 0 ? ret : 0; } @@ -10124,10 +10122,10 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) ret = print_trace_line(&iter); if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); + + trace_printk_seq(&iter.seq); } touch_nmi_watchdog(); - - trace_printk_seq(&iter.seq); } if (!cnt) @@ -10231,7 +10229,7 @@ __init static int tracer_alloc_buffers(void) BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) - goto out; + return -ENOMEM; if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; @@ -10344,7 +10342,6 @@ out_free_cpumask: free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -out: return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 49b4353997fa..7464e0c9c4b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1132,6 +1132,7 @@ bool ftrace_event_is_function(struct trace_event_call *call); */ struct trace_parser { bool cont; + bool fail; char *buffer; unsigned idx; unsigned size; @@ -1139,7 +1140,7 @@ struct trace_parser { static inline bool trace_parser_loaded(struct trace_parser *parser) { - return (parser->idx != 0); + return !parser->fail && parser->idx != 0; } static inline bool trace_parser_cont(struct trace_parser *parser) @@ -1153,6 +1154,11 @@ static inline void trace_parser_clear(struct trace_parser *parser) parser->idx = 0; } +static inline void trace_parser_fail(struct trace_parser *parser) +{ + parser->fail = true; +} + extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e9d40f9fb09b..df8bb7fa823b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2685,7 +2685,10 @@ __register_event(struct trace_event_call *call, struct module *mod) if (ret < 0) return ret; + down_write(&trace_event_sem); list_add(&call->list, &ftrace_events); + up_write(&trace_event_sem); + if (call->flags & TRACE_EVENT_FL_DYNAMIC) atomic_set(&call->refcnt, 0); else @@ -3156,6 +3159,8 @@ __trace_add_event_dirs(struct trace_array *tr) struct trace_event_call *call; int ret; + lockdep_assert_held(&trace_event_sem); + list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) diff --git a/kernel/ucount.c b/kernel/ucount.c index 85d7c19b0b80..8c21398f7b4f 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -210,7 +210,7 @@ void put_ucounts(struct ucounts *ucounts) } } -static inline bool atomic_long_inc_below(atomic_long_t *v, int u) +static inline bool atomic_long_inc_below(atomic_long_t *v, long u) { long c, old; c = atomic_long_read(v); |
