summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorVitor Soares <vitor.soares@toradex.com>2025-12-04 13:49:42 +0000
committerVitor Soares <vitor.soares@toradex.com>2025-12-04 13:49:51 +0000
commite0d2c0556a62886a8db485beef41d45be5b24a76 (patch)
treefe285624899543b24fbe8dd15f55f8f42421f695 /kernel
parent88e9f1ec02f87bb86c41152f0b18eec78d0fbfeb (diff)
parent4ac2b983648e08576dc45697ba3680a07b0d2b29 (diff)
Merge commit '4ac2b983648e' of github.com/Freescale/linux-fslctoradex_5.15-2.2.x-imx
Sync with linux-fslc branch 5.15-2.2.x-imx up to commit 4ac2b983648e which includes the v5.15.196 stable update. Signed-off-by: Vitor Soares <vitor.soares@toradex.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c1
-rw-r--r--kernel/bpf/core.c73
-rw-r--r--kernel/bpf/inode.c4
-rw-r--r--kernel/bpf/syscall.c22
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--kernel/cgroup/cgroup.c43
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/manage.c8
-rw-r--r--kernel/padata.c6
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c28
-rw-r--r--kernel/sched/fair.c38
-rw-r--r--kernel/smp.c11
-rw-r--r--kernel/time/hrtimer.c50
-rw-r--r--kernel/trace/preemptirq_delay_test.c2
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_dynevent.c4
-rw-r--r--kernel/trace/trace_events_synth.c2
-rw-r--r--kernel/trace/trace_kprobe.c11
-rw-r--r--kernel/trace/trace_probe.h9
-rw-r--r--kernel/trace/trace_uprobe.c12
21 files changed, 212 insertions, 124 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2788da290c21..dc42970dda97 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1044,7 +1044,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
INIT_LIST_HEAD(&aux->poke_progs);
mutex_init(&aux->poke_mutex);
- spin_lock_init(&aux->owner.lock);
map = array_map_alloc(attr);
if (IS_ERR(map)) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1ded3eb492b8..73a1c66e5417 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1832,31 +1832,74 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
}
#endif
-bool bpf_prog_array_compatible(struct bpf_array *array,
- const struct bpf_prog *fp)
+static bool __bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
- bool ret;
+ enum bpf_prog_type prog_type = fp->aux->dst_prog ? fp->aux->dst_prog->type : fp->type;
+ struct bpf_prog_aux *aux = fp->aux;
+ enum bpf_cgroup_storage_type i;
+ bool ret = false;
+ u64 cookie;
if (fp->kprobe_override)
- return false;
-
- spin_lock(&array->aux->owner.lock);
+ return ret;
- if (!array->aux->owner.type) {
- /* There's no owner yet where we could check for
- * compatibility.
- */
- array->aux->owner.type = fp->type;
- array->aux->owner.jited = fp->jited;
+ spin_lock(&map->owner_lock);
+ /* There's no owner yet where we could check for compatibility. */
+ if (!map->owner) {
+ map->owner = bpf_map_owner_alloc(map);
+ if (!map->owner)
+ goto err;
+ map->owner->type = prog_type;
+ map->owner->jited = fp->jited;
+ /* Note: xdp_has_frags doesn't exist in aux yet in our branch */
+ /* map->owner->xdp_has_frags = aux->xdp_has_frags; */
+ map->owner->attach_func_proto = aux->attach_func_proto;
+ for_each_cgroup_storage_type(i) {
+ map->owner->storage_cookie[i] =
+ aux->cgroup_storage[i] ?
+ aux->cgroup_storage[i]->cookie : 0;
+ }
ret = true;
} else {
- ret = array->aux->owner.type == fp->type &&
- array->aux->owner.jited == fp->jited;
+ ret = map->owner->type == prog_type &&
+ map->owner->jited == fp->jited;
+ /* Note: xdp_has_frags check would go here when available */
+ /* && map->owner->xdp_has_frags == aux->xdp_has_frags; */
+ for_each_cgroup_storage_type(i) {
+ if (!ret)
+ break;
+ cookie = aux->cgroup_storage[i] ?
+ aux->cgroup_storage[i]->cookie : 0;
+ ret = map->owner->storage_cookie[i] == cookie ||
+ !cookie;
+ }
+ if (ret &&
+ map->owner->attach_func_proto != aux->attach_func_proto) {
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ ret = false;
+ break;
+ default:
+ break;
+ }
+ }
}
- spin_unlock(&array->aux->owner.lock);
+err:
+ spin_unlock(&map->owner_lock);
return ret;
}
+bool bpf_prog_array_compatible(struct bpf_array *array,
+ const struct bpf_prog *fp)
+{
+ struct bpf_map *map = &array->map;
+ return __bpf_prog_map_compatible(map, fp);
+}
+
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5a8d9f7467bf..849df8268af5 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -610,7 +610,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static void bpf_free_inode(struct inode *inode)
+static void bpf_destroy_inode(struct inode *inode)
{
enum bpf_type type;
@@ -625,7 +625,7 @@ static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
- .free_inode = bpf_free_inode,
+ .destroy_inode = bpf_destroy_inode,
};
enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6f309248f13f..b80d125dcea9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -31,6 +31,7 @@
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
+#include <linux/cookie.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -43,6 +44,7 @@
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
DEFINE_PER_CPU(int, bpf_prog_active);
+DEFINE_COOKIE(bpf_map_cookie);
static DEFINE_IDR(prog_idr);
static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
@@ -475,6 +477,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
security_bpf_map_free(map);
bpf_map_release_memcg(map);
+ bpf_map_owner_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
}
@@ -574,17 +577,15 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
- const struct bpf_map *map = filp->private_data;
- const struct bpf_array *array;
+ struct bpf_map *map = filp->private_data;
u32 type = 0, jited = 0;
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
- array = container_of(map, struct bpf_array, map);
- spin_lock(&array->aux->owner.lock);
- type = array->aux->owner.type;
- jited = array->aux->owner.jited;
- spin_unlock(&array->aux->owner.lock);
+ spin_lock(&map->owner_lock);
+ if (map->owner) {
+ type = map->owner->type;
+ jited = map->owner->jited;
}
+ spin_unlock(&map->owner_lock);
seq_printf(m,
"map_type:\t%u\n"
@@ -886,9 +887,14 @@ static int map_create(union bpf_attr *attr)
if (err < 0)
goto free_map;
+ preempt_disable();
+ map->cookie = gen_cookie_next(&bpf_map_cookie);
+ preempt_enable();
+
atomic64_set(&map->refcnt, 1);
atomic64_set(&map->usercnt, 1);
mutex_init(&map->freeze_mutex);
+ spin_lock_init(&map->owner_lock);
map->spin_lock_off = -EINVAL;
map->timer_off = -EINVAL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 89b4fa815a9b..4b7c9a60a735 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5071,6 +5071,10 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
verbose(env, "verifier bug. Two map pointers in a timer helper\n");
return -EFAULT;
}
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
+ return -EOPNOTSUPP;
+ }
meta->map_uid = reg->map_uid;
meta->map_ptr = map;
return 0;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1a3b2e1436db..e5fe4ffff7cd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -122,8 +122,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
* of concurrent destructions. Use a separate workqueue so that cgroup
* destruction work items don't end up filling up max_active of system_wq
* which may lead to deadlock.
+ *
+ * A cgroup destruction should enqueue work sequentially to:
+ * cgroup_offline_wq: use for css offline work
+ * cgroup_release_wq: use for css release work
+ * cgroup_free_wq: use for free work
+ *
+ * Rationale for using separate workqueues:
+ * The cgroup root free work may depend on completion of other css offline
+ * operations. If all tasks were enqueued to a single workqueue, this could
+ * create a deadlock scenario where:
+ * - Free work waits for other css offline work to complete.
+ * - But other css offline work is queued after free work in the same queue.
+ *
+ * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
+ * 1. umount net_prio
+ * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
+ * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
+ * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
+ * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
+ * which can never complete as it's behind in the same queue and
+ * workqueue's max_active is 1.
*/
-static struct workqueue_struct *cgroup_destroy_wq;
+static struct workqueue_struct *cgroup_offline_wq;
+static struct workqueue_struct *cgroup_release_wq;
+static struct workqueue_struct *cgroup_free_wq;
/* generate an array of cgroup subsystem pointers */
#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@ -5263,7 +5286,7 @@ static void css_release_work_fn(struct work_struct *work)
mutex_unlock(&cgroup_mutex);
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
}
static void css_release(struct percpu_ref *ref)
@@ -5272,7 +5295,7 @@ static void css_release(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_release_wq, &css->destroy_work);
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5394,7 +5417,7 @@ err_list_del:
err_free_css:
list_del_rcu(&css->rstat_css_node);
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
return ERR_PTR(err);
}
@@ -5631,7 +5654,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
if (atomic_dec_and_test(&css->online_cnt)) {
INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_offline_wq, &css->destroy_work);
}
}
@@ -6008,8 +6031,14 @@ static int __init cgroup_wq_init(void)
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
- cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
- BUG_ON(!cgroup_destroy_wq);
+ cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
+ BUG_ON(!cgroup_offline_wq);
+
+ cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
+ BUG_ON(!cgroup_release_wq);
+
+ cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
+ BUG_ON(!cgroup_free_wq);
return 0;
}
core_initcall(cgroup_wq_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index 2fd9c431bf45..2c99d39e2bc0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1595,7 +1595,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
return 0;
}
-static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
+static int copy_sighand(u64 clone_flags, struct task_struct *tsk)
{
struct sighand_struct *sig;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b46fbfbb929f..ce0433446a8e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -501,7 +501,8 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
}
EXPORT_SYMBOL_GPL(irq_force_affinity);
-int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
+ bool setaffinity)
{
unsigned long flags;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
@@ -510,12 +511,11 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
return -EINVAL;
desc->affinity_hint = m;
irq_put_desc_unlock(desc, flags);
- /* set the initial affinity to prevent every interrupt being on CPU0 */
- if (m)
+ if (m && setaffinity)
__irq_set_affinity(irq, m, false);
return 0;
}
-EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint);
static void irq_affinity_notify(struct work_struct *work)
{
diff --git a/kernel/padata.c b/kernel/padata.c
index b443e19e64cf..5453f5750906 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -282,7 +282,11 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd,
if (remove_object) {
list_del_init(&padata->list);
++pd->processed;
- pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
+ /* When sequence wraps around, reset to the first CPU. */
+ if (unlikely(pd->processed == 0))
+ pd->cpu = cpumask_first(pd->cpumask.pcpu);
+ else
+ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
}
spin_unlock(&reorder->lock);
diff --git a/kernel/pid.c b/kernel/pid.c
index efe87db44683..61f6649568b2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -474,7 +474,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
struct upid *upid;
pid_t nr = 0;
- if (pid && ns->level <= pid->level) {
+ if (pid && ns && ns->level <= pid->level) {
upid = &pid->numbers[ns->level];
if (upid->ns == ns)
nr = upid->nr;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 519f742d44f4..954a85b8c275 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -89,9 +89,20 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
if (!cpufreq_this_cpu_can_update(sg_policy->policy))
return false;
- if (unlikely(sg_policy->limits_changed)) {
- sg_policy->limits_changed = false;
+ if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
+ WRITE_ONCE(sg_policy->limits_changed, false);
sg_policy->need_freq_update = true;
+
+ /*
+ * The above limits_changed update must occur before the reads
+ * of policy limits in cpufreq_driver_resolve_freq() or a policy
+ * limits update might be missed, so use a memory barrier to
+ * ensure it.
+ *
+ * This pairs with the write memory barrier in sugov_limits().
+ */
+ smp_mb();
+
return true;
}
@@ -326,7 +337,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
- sg_cpu->sg_policy->limits_changed = true;
+ WRITE_ONCE(sg_cpu->sg_policy->limits_changed, true);
}
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
@@ -826,7 +837,16 @@ static void sugov_limits(struct cpufreq_policy *policy)
mutex_unlock(&sg_policy->work_lock);
}
- sg_policy->limits_changed = true;
+ /*
+ * The limits_changed update below must take place before the updates
+ * of policy limits in cpufreq_set_policy() or a policy limits update
+ * might be missed, so use a memory barrier to ensure it.
+ *
+ * This pairs with the memory barrier in sugov_should_update_freq().
+ */
+ smp_wmb();
+
+ WRITE_ONCE(sg_policy->limits_changed, true);
}
struct cpufreq_governor schedutil_gov = {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea707ee9ddac..87f32cf8aa02 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3959,7 +3959,7 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
return cfs_rq->avg.load_avg;
}
-static int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf);
static inline unsigned long task_util(struct task_struct *p)
{
@@ -4291,7 +4291,7 @@ attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
static inline void
detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
-static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
+static inline int sched_balance_newidle(struct rq *rq, struct rq_flags *rf)
{
return 0;
}
@@ -7280,7 +7280,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (rq->nr_running)
return 1;
- return newidle_balance(rq, rf) != 0;
+ return sched_balance_newidle(rq, rf) != 0;
}
#endif /* CONFIG_SMP */
@@ -7613,21 +7613,21 @@ done: __maybe_unused;
return p;
idle:
- if (!rf)
- return NULL;
-
- new_tasks = newidle_balance(rq, rf);
+ if (rf) {
+ new_tasks = sched_balance_newidle(rq, rf);
- /*
- * Because newidle_balance() releases (and re-acquires) rq->lock, it is
- * possible for any higher priority task to appear. In that case we
- * must re-start the pick_next_entity() loop.
- */
- if (new_tasks < 0)
- return RETRY_TASK;
+ /*
+ * Because sched_balance_newidle() releases (and re-acquires)
+ * rq->lock, it is possible for any higher priority task to
+ * appear. In that case we must re-start the pick_next_entity()
+ * loop.
+ */
+ if (new_tasks < 0)
+ return RETRY_TASK;
- if (new_tasks > 0)
- goto again;
+ if (new_tasks > 0)
+ goto again;
+ }
/*
* rq is about to be idle, check if we need to update the
@@ -10427,7 +10427,7 @@ out_one_pinned:
ld_moved = 0;
/*
- * newidle_balance() disregards balance intervals, so we could
+ * sched_balance_newidle() disregards balance intervals, so we could
* repeatedly reach this code, which would lead to balance_interval
* skyrocketing in a short amount of time. Skip the balance_interval
* increase logic to avoid that.
@@ -11155,7 +11155,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
#endif /* CONFIG_NO_HZ_COMMON */
/*
- * newidle_balance is called by schedule() if this_cpu is about to become
+ * sched_balance_newidle is called by schedule() if this_cpu is about to become
* idle. Attempts to pull tasks from other CPUs.
*
* Returns:
@@ -11163,7 +11163,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
* 0 - failed, no new tasks
* > 0 - success, new (fair) tasks present
*/
-static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
{
unsigned long next_balance = jiffies + HZ;
int this_cpu = this_rq->cpu;
diff --git a/kernel/smp.c b/kernel/smp.c
index b60525b34ab0..387df30ca560 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -976,16 +976,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
- * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
- * (atomically) until function has completed on other CPUs. If
- * %SCF_RUN_LOCAL is set, the function will also be run locally
- * if the local CPU is set in the @cpumask.
- *
- * If @wait is true, then returns once @func has returned.
+ * @wait: If true, wait (atomically) until function has completed
+ * on other CPUs.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. Preemption
* must be disabled when calling this function.
+ *
+ * @func is not called on the local CPU even if @mask contains it. Consider
+ * using on_each_cpu_cond_mask() instead if this is not desirable.
*/
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2e4b63f3c6dd..7e2ed34e9803 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -631,17 +631,12 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
/*
* Is the high resolution mode active ?
*/
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
{
return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
cpu_base->hres_active : 0;
}
-static inline int hrtimer_hres_active(void)
-{
- return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
-}
-
static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
struct hrtimer *next_timer,
ktime_t expires_next)
@@ -665,7 +660,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
* set. So we'd effectively block all timers until the T2 event
* fires.
*/
- if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
+ if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return;
tick_program_event(expires_next, 1);
@@ -775,13 +770,13 @@ static void retrigger_next_event(void *arg)
* of the next expiring timer is enough. The return from the SMP
* function call will take care of the reprogramming in case the
* CPU was in a NOHZ idle sleep.
+ *
+ * In periodic low resolution mode, the next softirq expiration
+ * must also be updated.
*/
- if (!__hrtimer_hres_active(base) && !tick_nohz_active)
- return;
-
raw_spin_lock(&base->lock);
hrtimer_update_base(base);
- if (__hrtimer_hres_active(base))
+ if (hrtimer_hres_active(base))
hrtimer_force_reprogram(base, 0);
else
hrtimer_update_next_event(base);
@@ -938,7 +933,7 @@ void clock_was_set(unsigned int bases)
cpumask_var_t mask;
int cpu;
- if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
+ if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active)
goto out_timerfd;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -1489,7 +1484,7 @@ u64 hrtimer_get_next_event(void)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (!__hrtimer_hres_active(cpu_base))
+ if (!hrtimer_hres_active(cpu_base))
expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -1512,7 +1507,7 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (__hrtimer_hres_active(cpu_base)) {
+ if (hrtimer_hres_active(cpu_base)) {
unsigned int active;
if (!cpu_base->softirq_activated) {
@@ -1873,25 +1868,7 @@ retry:
tick_program_event(expires_next, 1);
pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
}
-
-/* called with interrupts disabled */
-static inline void __hrtimer_peek_ahead_timers(void)
-{
- struct tick_device *td;
-
- if (!hrtimer_hres_active())
- return;
-
- td = this_cpu_ptr(&tick_cpu_device);
- if (td && td->evtdev)
- hrtimer_interrupt(td->evtdev);
-}
-
-#else /* CONFIG_HIGH_RES_TIMERS */
-
-static inline void __hrtimer_peek_ahead_timers(void) { }
-
-#endif /* !CONFIG_HIGH_RES_TIMERS */
+#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
* Called from run_local_timers in hardirq context every jiffy
@@ -1902,7 +1879,7 @@ void hrtimer_run_queues(void)
unsigned long flags;
ktime_t now;
- if (__hrtimer_hres_active(cpu_base))
+ if (hrtimer_hres_active(cpu_base))
return;
/*
@@ -2252,11 +2229,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
&new_base->clock_base[i]);
}
- /*
- * The migration might have changed the first expiring softirq
- * timer on this CPU. Update it.
- */
- __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
/* Tell the other CPU to retrigger the next event */
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 8af92dbe98f0..acb0c971a408 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on");
static struct completion done;
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
static void busy_wait(ulong time)
{
u64 start, end;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7af8bbc57531..a6040a707abb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7233,7 +7233,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
entry = ring_buffer_event_data(event);
entry->ip = _THIS_IP_;
- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
+ len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
if (len) {
memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
cnt = FAULTED_SIZE;
@@ -7308,7 +7308,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
entry = ring_buffer_event_data(event);
- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
+ len = copy_from_user_nofault(&entry->id, ubuf, cnt);
if (len) {
entry->id = -1;
memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 6d0e9f869ad6..3d8ffa81a1fa 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -239,6 +239,10 @@ static int dyn_event_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = tracing_check_open_get_tr(NULL);
if (ret)
return ret;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index ab54810bd8d9..62d146254f47 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -364,13 +364,11 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
str_field = (char *)entry + data_offset;
trace_seq_printf(s, print_fmt, se->fields[i]->name,
- STR_VAR_LEN_MAX,
str_field,
i == se->n_fields - 1 ? "" : " ");
n_u64++;
} else {
trace_seq_printf(s, print_fmt, se->fields[i]->name,
- STR_VAR_LEN_MAX,
(char *)&entry->fields[n_u64],
i == se->n_fields - 1 ? "" : " ");
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e062f4efec8d..03d4ac41d903 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1722,14 +1722,15 @@ static int kprobe_register(struct trace_event_call *event,
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+ unsigned int flags = trace_probe_load_flag(&tk->tp);
int ret = 0;
raw_cpu_inc(*tk->nhit);
- if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
+ if (flags & TP_FLAG_TRACE)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
+ if (flags & TP_FLAG_PROFILE)
ret = kprobe_perf_func(tk, regs);
#endif
return ret;
@@ -1741,6 +1742,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
struct kretprobe *rp = get_kretprobe(ri);
struct trace_kprobe *tk;
+ unsigned int flags;
/*
* There is a small chance that get_kretprobe(ri) returns NULL when
@@ -1753,10 +1755,11 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
tk = container_of(rp, struct trace_kprobe, rp);
raw_cpu_inc(*tk->nhit);
- if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
+ flags = trace_probe_load_flag(&tk->tp);
+ if (flags & TP_FLAG_TRACE)
kretprobe_trace_func(tk, ri, regs);
#ifdef CONFIG_PERF_EVENTS
- if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
+ if (flags & TP_FLAG_PROFILE)
kretprobe_perf_func(tk, ri, regs);
#endif
return 0; /* We don't tweak kernel, so just return 0 */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 82e1df8aefcb..b08aa3946868 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -258,16 +258,21 @@ struct event_file_link {
struct list_head list;
};
+static inline unsigned int trace_probe_load_flag(struct trace_probe *tp)
+{
+ return smp_load_acquire(&tp->event->flags);
+}
+
static inline bool trace_probe_test_flag(struct trace_probe *tp,
unsigned int flag)
{
- return !!(tp->event->flags & flag);
+ return !!(trace_probe_load_flag(tp) & flag);
}
static inline void trace_probe_set_flag(struct trace_probe *tp,
unsigned int flag)
{
- tp->event->flags |= flag;
+ smp_store_release(&tp->event->flags, tp->event->flags | flag);
}
static inline void trace_probe_clear_flag(struct trace_probe *tp,
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 322d56661d04..707c5373476a 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1485,6 +1485,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
struct uprobe_dispatch_data udd;
struct uprobe_cpu_buffer *ucb;
int dsize, esize;
+ unsigned int flags;
int ret = 0;
@@ -1505,11 +1506,12 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
ucb = uprobe_buffer_get();
store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+ flags = trace_probe_load_flag(&tu->tp);
+ if (flags & TP_FLAG_TRACE)
ret |= uprobe_trace_func(tu, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+ if (flags & TP_FLAG_PROFILE)
ret |= uprobe_perf_func(tu, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);
@@ -1523,6 +1525,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
struct uprobe_dispatch_data udd;
struct uprobe_cpu_buffer *ucb;
int dsize, esize;
+ unsigned int flags;
tu = container_of(con, struct trace_uprobe, consumer);
@@ -1540,11 +1543,12 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
ucb = uprobe_buffer_get();
store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+ flags = trace_probe_load_flag(&tu->tp);
+ if (flags & TP_FLAG_TRACE)
uretprobe_trace_func(tu, func, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+ if (flags & TP_FLAG_PROFILE)
uretprobe_perf_func(tu, func, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);