diff options
| author | Vitor Soares <vitor.soares@toradex.com> | 2025-12-04 13:49:42 +0000 |
|---|---|---|
| committer | Vitor Soares <vitor.soares@toradex.com> | 2025-12-04 13:49:51 +0000 |
| commit | e0d2c0556a62886a8db485beef41d45be5b24a76 (patch) | |
| tree | fe285624899543b24fbe8dd15f55f8f42421f695 /kernel | |
| parent | 88e9f1ec02f87bb86c41152f0b18eec78d0fbfeb (diff) | |
| parent | 4ac2b983648e08576dc45697ba3680a07b0d2b29 (diff) | |
Merge commit '4ac2b983648e' of github.com/Freescale/linux-fslctoradex_5.15-2.2.x-imx
Sync with linux-fslc branch 5.15-2.2.x-imx up to commit 4ac2b983648e
which includes the v5.15.196 stable update.
Signed-off-by: Vitor Soares <vitor.soares@toradex.com>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/arraymap.c | 1 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 73 | ||||
| -rw-r--r-- | kernel/bpf/inode.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 22 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 4 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 43 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 8 | ||||
| -rw-r--r-- | kernel/padata.c | 6 | ||||
| -rw-r--r-- | kernel/pid.c | 2 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 28 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 38 | ||||
| -rw-r--r-- | kernel/smp.c | 11 | ||||
| -rw-r--r-- | kernel/time/hrtimer.c | 50 | ||||
| -rw-r--r-- | kernel/trace/preemptirq_delay_test.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_dynevent.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_events_synth.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 11 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 9 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 12 |
21 files changed, 212 insertions, 124 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2788da290c21..dc42970dda97 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1044,7 +1044,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); - spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1ded3eb492b8..73a1c66e5417 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1832,31 +1832,74 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, } #endif -bool bpf_prog_array_compatible(struct bpf_array *array, - const struct bpf_prog *fp) +static bool __bpf_prog_map_compatible(struct bpf_map *map, + const struct bpf_prog *fp) { - bool ret; + enum bpf_prog_type prog_type = fp->aux->dst_prog ? fp->aux->dst_prog->type : fp->type; + struct bpf_prog_aux *aux = fp->aux; + enum bpf_cgroup_storage_type i; + bool ret = false; + u64 cookie; if (fp->kprobe_override) - return false; - - spin_lock(&array->aux->owner.lock); + return ret; - if (!array->aux->owner.type) { - /* There's no owner yet where we could check for - * compatibility. - */ - array->aux->owner.type = fp->type; - array->aux->owner.jited = fp->jited; + spin_lock(&map->owner_lock); + /* There's no owner yet where we could check for compatibility. */ + if (!map->owner) { + map->owner = bpf_map_owner_alloc(map); + if (!map->owner) + goto err; + map->owner->type = prog_type; + map->owner->jited = fp->jited; + /* Note: xdp_has_frags doesn't exist in aux yet in our branch */ + /* map->owner->xdp_has_frags = aux->xdp_has_frags; */ + map->owner->attach_func_proto = aux->attach_func_proto; + for_each_cgroup_storage_type(i) { + map->owner->storage_cookie[i] = + aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + } ret = true; } else { - ret = array->aux->owner.type == fp->type && - array->aux->owner.jited == fp->jited; + ret = map->owner->type == prog_type && + map->owner->jited == fp->jited; + /* Note: xdp_has_frags check would go here when available */ + /* && map->owner->xdp_has_frags == aux->xdp_has_frags; */ + for_each_cgroup_storage_type(i) { + if (!ret) + break; + cookie = aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + ret = map->owner->storage_cookie[i] == cookie || + !cookie; + } + if (ret && + map->owner->attach_func_proto != aux->attach_func_proto) { + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_STRUCT_OPS: + ret = false; + break; + default: + break; + } + } } - spin_unlock(&array->aux->owner.lock); +err: + spin_unlock(&map->owner_lock); return ret; } +bool bpf_prog_array_compatible(struct bpf_array *array, + const struct bpf_prog *fp) +{ + struct bpf_map *map = &array->map; + return __bpf_prog_map_compatible(map, fp); +} + static int bpf_check_tail_call(const struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5a8d9f7467bf..849df8268af5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -610,7 +610,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void bpf_free_inode(struct inode *inode) +static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type; @@ -625,7 +625,7 @@ static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .free_inode = bpf_free_inode, + .destroy_inode = bpf_destroy_inode, }; enum { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6f309248f13f..b80d125dcea9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -31,6 +31,7 @@ #include <linux/bpf-netns.h> #include <linux/rcupdate_trace.h> #include <linux/memcontrol.h> +#include <linux/cookie.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -43,6 +44,7 @@ #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) DEFINE_PER_CPU(int, bpf_prog_active); +DEFINE_COOKIE(bpf_map_cookie); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); @@ -475,6 +477,7 @@ static void bpf_map_free_deferred(struct work_struct *work) security_bpf_map_free(map); bpf_map_release_memcg(map); + bpf_map_owner_free(map); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -574,17 +577,15 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { - const struct bpf_map *map = filp->private_data; - const struct bpf_array *array; + struct bpf_map *map = filp->private_data; u32 type = 0, jited = 0; - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { - array = container_of(map, struct bpf_array, map); - spin_lock(&array->aux->owner.lock); - type = array->aux->owner.type; - jited = array->aux->owner.jited; - spin_unlock(&array->aux->owner.lock); + spin_lock(&map->owner_lock); + if (map->owner) { + type = map->owner->type; + jited = map->owner->jited; } + spin_unlock(&map->owner_lock); seq_printf(m, "map_type:\t%u\n" @@ -886,9 +887,14 @@ static int map_create(union bpf_attr *attr) if (err < 0) goto free_map; + preempt_disable(); + map->cookie = gen_cookie_next(&bpf_map_cookie); + preempt_enable(); + atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); + spin_lock_init(&map->owner_lock); map->spin_lock_off = -EINVAL; map->timer_off = -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 89b4fa815a9b..4b7c9a60a735 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5071,6 +5071,10 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, verbose(env, "verifier bug. Two map pointers in a timer helper\n"); return -EFAULT; } + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); + return -EOPNOTSUPP; + } meta->map_uid = reg->map_uid; meta->map_ptr = map; return 0; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1a3b2e1436db..e5fe4ffff7cd 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -122,8 +122,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem); * of concurrent destructions. Use a separate workqueue so that cgroup * destruction work items don't end up filling up max_active of system_wq * which may lead to deadlock. + * + * A cgroup destruction should enqueue work sequentially to: + * cgroup_offline_wq: use for css offline work + * cgroup_release_wq: use for css release work + * cgroup_free_wq: use for free work + * + * Rationale for using separate workqueues: + * The cgroup root free work may depend on completion of other css offline + * operations. If all tasks were enqueued to a single workqueue, this could + * create a deadlock scenario where: + * - Free work waits for other css offline work to complete. + * - But other css offline work is queued after free work in the same queue. + * + * Example deadlock scenario with single workqueue (cgroup_destroy_wq): + * 1. umount net_prio + * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx) + * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx) + * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline. + * 5. net_prio root destruction blocks waiting for perf_event CSS A offline, + * which can never complete as it's behind in the same queue and + * workqueue's max_active is 1. */ -static struct workqueue_struct *cgroup_destroy_wq; +static struct workqueue_struct *cgroup_offline_wq; +static struct workqueue_struct *cgroup_release_wq; +static struct workqueue_struct *cgroup_free_wq; /* generate an array of cgroup subsystem pointers */ #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, @@ -5263,7 +5286,7 @@ static void css_release_work_fn(struct work_struct *work) mutex_unlock(&cgroup_mutex); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); } static void css_release(struct percpu_ref *ref) @@ -5272,7 +5295,7 @@ static void css_release(struct percpu_ref *ref) container_of(ref, struct cgroup_subsys_state, refcnt); INIT_WORK(&css->destroy_work, css_release_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + queue_work(cgroup_release_wq, &css->destroy_work); } static void init_and_link_css(struct cgroup_subsys_state *css, @@ -5394,7 +5417,7 @@ err_list_del: err_free_css: list_del_rcu(&css->rstat_css_node); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); return ERR_PTR(err); } @@ -5631,7 +5654,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref) if (atomic_dec_and_test(&css->online_cnt)) { INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + queue_work(cgroup_offline_wq, &css->destroy_work); } } @@ -6008,8 +6031,14 @@ static int __init cgroup_wq_init(void) * We would prefer to do this in cgroup_init() above, but that * is called before init_workqueues(): so leave this until after. */ - cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); - BUG_ON(!cgroup_destroy_wq); + cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1); + BUG_ON(!cgroup_offline_wq); + + cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1); + BUG_ON(!cgroup_release_wq); + + cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1); + BUG_ON(!cgroup_free_wq); return 0; } core_initcall(cgroup_wq_init); diff --git a/kernel/fork.c b/kernel/fork.c index 2fd9c431bf45..2c99d39e2bc0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1595,7 +1595,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) +static int copy_sighand(u64 clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b46fbfbb929f..ce0433446a8e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -501,7 +501,8 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) } EXPORT_SYMBOL_GPL(irq_force_affinity); -int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); @@ -510,12 +511,11 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) return -EINVAL; desc->affinity_hint = m; irq_put_desc_unlock(desc, flags); - /* set the initial affinity to prevent every interrupt being on CPU0 */ - if (m) + if (m && setaffinity) __irq_set_affinity(irq, m, false); return 0; } -EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { diff --git a/kernel/padata.c b/kernel/padata.c index b443e19e64cf..5453f5750906 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -282,7 +282,11 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd, if (remove_object) { list_del_init(&padata->list); ++pd->processed; - pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); + /* When sequence wraps around, reset to the first CPU. */ + if (unlikely(pd->processed == 0)) + pd->cpu = cpumask_first(pd->cpumask.pcpu); + else + pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); } spin_unlock(&reorder->lock); diff --git a/kernel/pid.c b/kernel/pid.c index efe87db44683..61f6649568b2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -474,7 +474,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) struct upid *upid; pid_t nr = 0; - if (pid && ns->level <= pid->level) { + if (pid && ns && ns->level <= pid->level) { upid = &pid->numbers[ns->level]; if (upid->ns == ns) nr = upid->nr; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 519f742d44f4..954a85b8c275 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -89,9 +89,20 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (!cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->limits_changed)) { - sg_policy->limits_changed = false; + if (unlikely(READ_ONCE(sg_policy->limits_changed))) { + WRITE_ONCE(sg_policy->limits_changed, false); sg_policy->need_freq_update = true; + + /* + * The above limits_changed update must occur before the reads + * of policy limits in cpufreq_driver_resolve_freq() or a policy + * limits update might be missed, so use a memory barrier to + * ensure it. + * + * This pairs with the write memory barrier in sugov_limits(). + */ + smp_mb(); + return true; } @@ -326,7 +337,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) - sg_cpu->sg_policy->limits_changed = true; + WRITE_ONCE(sg_cpu->sg_policy->limits_changed, true); } static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, @@ -826,7 +837,16 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->limits_changed = true; + /* + * The limits_changed update below must take place before the updates + * of policy limits in cpufreq_set_policy() or a policy limits update + * might be missed, so use a memory barrier to ensure it. + * + * This pairs with the memory barrier in sugov_should_update_freq(). + */ + smp_wmb(); + + WRITE_ONCE(sg_policy->limits_changed, true); } struct cpufreq_governor schedutil_gov = { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ea707ee9ddac..87f32cf8aa02 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3959,7 +3959,7 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) return cfs_rq->avg.load_avg; } -static int newidle_balance(struct rq *this_rq, struct rq_flags *rf); +static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf); static inline unsigned long task_util(struct task_struct *p) { @@ -4291,7 +4291,7 @@ attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} static inline void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} -static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) +static inline int sched_balance_newidle(struct rq *rq, struct rq_flags *rf) { return 0; } @@ -7280,7 +7280,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (rq->nr_running) return 1; - return newidle_balance(rq, rf) != 0; + return sched_balance_newidle(rq, rf) != 0; } #endif /* CONFIG_SMP */ @@ -7613,21 +7613,21 @@ done: __maybe_unused; return p; idle: - if (!rf) - return NULL; - - new_tasks = newidle_balance(rq, rf); + if (rf) { + new_tasks = sched_balance_newidle(rq, rf); - /* - * Because newidle_balance() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; + /* + * Because sched_balance_newidle() releases (and re-acquires) + * rq->lock, it is possible for any higher priority task to + * appear. In that case we must re-start the pick_next_entity() + * loop. + */ + if (new_tasks < 0) + return RETRY_TASK; - if (new_tasks > 0) - goto again; + if (new_tasks > 0) + goto again; + } /* * rq is about to be idle, check if we need to update the @@ -10427,7 +10427,7 @@ out_one_pinned: ld_moved = 0; /* - * newidle_balance() disregards balance intervals, so we could + * sched_balance_newidle() disregards balance intervals, so we could * repeatedly reach this code, which would lead to balance_interval * skyrocketing in a short amount of time. Skip the balance_interval * increase logic to avoid that. @@ -11155,7 +11155,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } #endif /* CONFIG_NO_HZ_COMMON */ /* - * newidle_balance is called by schedule() if this_cpu is about to become + * sched_balance_newidle is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. * * Returns: @@ -11163,7 +11163,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } * 0 - failed, no new tasks * > 0 - success, new (fair) tasks present */ -static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) +static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) { unsigned long next_balance = jiffies + HZ; int this_cpu = this_rq->cpu; diff --git a/kernel/smp.c b/kernel/smp.c index b60525b34ab0..387df30ca560 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -976,16 +976,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask, * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. - * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait - * (atomically) until function has completed on other CPUs. If - * %SCF_RUN_LOCAL is set, the function will also be run locally - * if the local CPU is set in the @cpumask. - * - * If @wait is true, then returns once @func has returned. + * @wait: If true, wait (atomically) until function has completed + * on other CPUs. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption * must be disabled when calling this function. + * + * @func is not called on the local CPU even if @mask contains it. Consider + * using on_each_cpu_cond_mask() instead if this is not desirable. */ void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 2e4b63f3c6dd..7e2ed34e9803 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -631,17 +631,12 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) /* * Is the high resolution mode active ? */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? cpu_base->hres_active : 0; } -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, ktime_t expires_next) @@ -665,7 +660,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, * set. So we'd effectively block all timers until the T2 event * fires. */ - if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) + if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(expires_next, 1); @@ -775,13 +770,13 @@ static void retrigger_next_event(void *arg) * of the next expiring timer is enough. The return from the SMP * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. + * + * In periodic low resolution mode, the next softirq expiration + * must also be updated. */ - if (!__hrtimer_hres_active(base) && !tick_nohz_active) - return; - raw_spin_lock(&base->lock); hrtimer_update_base(base); - if (__hrtimer_hres_active(base)) + if (hrtimer_hres_active(base)) hrtimer_force_reprogram(base, 0); else hrtimer_update_next_event(base); @@ -938,7 +933,7 @@ void clock_was_set(unsigned int bases) cpumask_var_t mask; int cpu; - if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active) + if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) goto out_timerfd; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -1489,7 +1484,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (!__hrtimer_hres_active(cpu_base)) + if (!hrtimer_hres_active(cpu_base)) expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1512,7 +1507,7 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (__hrtimer_hres_active(cpu_base)) { + if (hrtimer_hres_active(cpu_base)) { unsigned int active; if (!cpu_base->softirq_activated) { @@ -1873,25 +1868,7 @@ retry: tick_program_event(expires_next, 1); pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); } - -/* called with interrupts disabled */ -static inline void __hrtimer_peek_ahead_timers(void) -{ - struct tick_device *td; - - if (!hrtimer_hres_active()) - return; - - td = this_cpu_ptr(&tick_cpu_device); - if (td && td->evtdev) - hrtimer_interrupt(td->evtdev); -} - -#else /* CONFIG_HIGH_RES_TIMERS */ - -static inline void __hrtimer_peek_ahead_timers(void) { } - -#endif /* !CONFIG_HIGH_RES_TIMERS */ +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from run_local_timers in hardirq context every jiffy @@ -1902,7 +1879,7 @@ void hrtimer_run_queues(void) unsigned long flags; ktime_t now; - if (__hrtimer_hres_active(cpu_base)) + if (hrtimer_hres_active(cpu_base)) return; /* @@ -2252,11 +2229,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) &new_base->clock_base[i]); } - /* - * The migration might have changed the first expiring softirq - * timer on this CPU. Update it. - */ - __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); /* Tell the other CPU to retrigger the next event */ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 8af92dbe98f0..acb0c971a408 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - static void busy_wait(ulong time) { u64 start, end; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7af8bbc57531..a6040a707abb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7233,7 +7233,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, entry = ring_buffer_event_data(event); entry->ip = _THIS_IP_; - len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); + len = copy_from_user_nofault(&entry->buf, ubuf, cnt); if (len) { memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); cnt = FAULTED_SIZE; @@ -7308,7 +7308,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, entry = ring_buffer_event_data(event); - len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); + len = copy_from_user_nofault(&entry->id, ubuf, cnt); if (len) { entry->id = -1; memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 6d0e9f869ad6..3d8ffa81a1fa 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -239,6 +239,10 @@ static int dyn_event_open(struct inode *inode, struct file *file) { int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + ret = tracing_check_open_get_tr(NULL); if (ret) return ret; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index ab54810bd8d9..62d146254f47 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -364,13 +364,11 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, str_field = (char *)entry + data_offset; trace_seq_printf(s, print_fmt, se->fields[i]->name, - STR_VAR_LEN_MAX, str_field, i == se->n_fields - 1 ? "" : " "); n_u64++; } else { trace_seq_printf(s, print_fmt, se->fields[i]->name, - STR_VAR_LEN_MAX, (char *)&entry->fields[n_u64], i == se->n_fields - 1 ? "" : " "); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e062f4efec8d..03d4ac41d903 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1722,14 +1722,15 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + unsigned int flags = trace_probe_load_flag(&tk->tp); int ret = 0; raw_cpu_inc(*tk->nhit); - if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) + if (flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) ret = kprobe_perf_func(tk, regs); #endif return ret; @@ -1741,6 +1742,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); struct trace_kprobe *tk; + unsigned int flags; /* * There is a small chance that get_kretprobe(ri) returns NULL when @@ -1753,10 +1755,11 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); - if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tk->tp); + if (flags & TP_FLAG_TRACE) kretprobe_trace_func(tk, ri, regs); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) kretprobe_perf_func(tk, ri, regs); #endif return 0; /* We don't tweak kernel, so just return 0 */ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 82e1df8aefcb..b08aa3946868 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -258,16 +258,21 @@ struct event_file_link { struct list_head list; }; +static inline unsigned int trace_probe_load_flag(struct trace_probe *tp) +{ + return smp_load_acquire(&tp->event->flags); +} + static inline bool trace_probe_test_flag(struct trace_probe *tp, unsigned int flag) { - return !!(tp->event->flags & flag); + return !!(trace_probe_load_flag(tp) & flag); } static inline void trace_probe_set_flag(struct trace_probe *tp, unsigned int flag) { - tp->event->flags |= flag; + smp_store_release(&tp->event->flags, tp->event->flags | flag); } static inline void trace_probe_clear_flag(struct trace_probe *tp, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 322d56661d04..707c5373476a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1485,6 +1485,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; int dsize, esize; + unsigned int flags; int ret = 0; @@ -1505,11 +1506,12 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) ucb = uprobe_buffer_get(); store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tu->tp); + if (flags & TP_FLAG_TRACE) ret |= uprobe_trace_func(tu, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) ret |= uprobe_perf_func(tu, regs, ucb, dsize); #endif uprobe_buffer_put(ucb); @@ -1523,6 +1525,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; int dsize, esize; + unsigned int flags; tu = container_of(con, struct trace_uprobe, consumer); @@ -1540,11 +1543,12 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, ucb = uprobe_buffer_get(); store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tu->tp); + if (flags & TP_FLAG_TRACE) uretprobe_trace_func(tu, func, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) uretprobe_perf_func(tu, func, regs, ucb, dsize); #endif uprobe_buffer_put(ucb); |
