summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c12
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/locking/osq_lock.c13
-rw-r--r--kernel/locking/rwsem-xadd.c27
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--kernel/time/timer.c29
-rw-r--r--kernel/trace/ring_buffer.c2
8 files changed, 87 insertions, 20 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 690e1e3c59f7..f036b6ada6ef 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -419,6 +419,13 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
struct path parent_path;
int h, ret = 0;
+ /*
+ * When we will be calling audit_add_to_parent, krule->watch might have
+ * been updated and watch might have been freed.
+ * So we need to keep a reference of watch.
+ */
+ audit_get_watch(watch);
+
mutex_unlock(&audit_filter_mutex);
/* Avoid calling path_lookup under audit_filter_mutex. */
@@ -427,8 +434,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- if (ret)
+ if (ret) {
+ audit_put_watch(watch);
return ret;
+ }
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));
@@ -446,6 +455,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
*list = &audit_inode_hash[h];
error:
path_put(&parent_path);
+ audit_put_watch(watch);
return ret;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e6ec229c780..95bd00d9f2c3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5563,6 +5563,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
unsigned long sp;
unsigned int rem;
u64 dyn_size;
+ mm_segment_t fs;
/*
* We dump:
@@ -5580,7 +5581,10 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
/* Data. */
sp = perf_user_stack_pointer(regs);
+ fs = get_fs();
+ set_fs(USER_DS);
rem = __output_copy_user(handle, (void *) sp, dump_size);
+ set_fs(fs);
dyn_size = dump_size - rem;
perf_output_skip(handle, rem);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5d0e2f366766..73beb8dfa9df 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1532,6 +1532,18 @@ static __latent_entropy struct task_struct *copy_process(
if (!p)
goto fork_out;
+ /*
+ * This _must_ happen before we call free_task(), i.e. before we jump
+ * to any of the bad_fork_* labels. This is to avoid freeing
+ * p->set_child_tid which is (ab)used as a kthread's data pointer for
+ * kernel threads (PF_KTHREAD).
+ */
+ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+ /*
+ * Clear TID on mm_release()?
+ */
+ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
@@ -1693,11 +1705,6 @@ static __latent_entropy struct task_struct *copy_process(
}
}
- p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
- /*
- * Clear TID on mm_release()?
- */
- p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a37857ab55..8d7047ecef4e 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -104,6 +104,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = decode_cpu(old);
node->prev = prev;
+
+ /*
+ * osq_lock() unqueue
+ *
+ * node->prev = prev osq_wait_next()
+ * WMB MB
+ * prev->next = node next->prev = prev // unqueue-C
+ *
+ * Here 'node->prev' and 'next->prev' are the same variable and we need
+ * to ensure these stores happen in-order to avoid corrupting the list.
+ */
+ smp_wmb();
+
WRITE_ONCE(prev->next, node);
/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2337b4bb2366..a4112dfcd0fb 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -574,6 +574,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
WAKE_Q(wake_q);
/*
+ * __rwsem_down_write_failed_common(sem)
+ * rwsem_optimistic_spin(sem)
+ * osq_unlock(sem->osq)
+ * ...
+ * atomic_long_add_return(&sem->count)
+ *
+ * - VS -
+ *
+ * __up_write()
+ * if (atomic_long_sub_return_release(&sem->count) < 0)
+ * rwsem_wake(sem)
+ * osq_is_locked(&sem->osq)
+ *
+ * And __up_write() must observe !osq_is_locked() when it observes the
+ * atomic_long_add_return() in order to not miss a wakeup.
+ *
+ * This boils down to:
+ *
+ * [S.rel] X = 1 [RmW] r0 = (Y += 0)
+ * MB RMB
+ * [RmW] Y += 1 [L] r1 = X
+ *
+ * exists (r0=1 /\ r1=0)
+ */
+ smp_rmb();
+
+ /*
* If a spinner is present, it is not necessary to do the wakeup.
* Try to do wakeup only if the trylock succeeds to minimize
* spinlock contention which may introduce too much delay in the
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f6e8727f7fa3..f81adb476c03 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8639,7 +8639,8 @@ static inline bool vruntime_normalized(struct task_struct *p)
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_remote_wakeup))
return true;
return false;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 7c477912f36d..b625cc7fcc1c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1649,6 +1649,22 @@ static inline void __run_timers(struct timer_base *base)
spin_lock_irq(&base->lock);
+ /*
+ * timer_base::must_forward_clk must be cleared before running
+ * timers so that any timer functions that call mod_timer() will
+ * not try to forward the base. Idle tracking / clock forwarding
+ * logic is only used with BASE_STD timers.
+ *
+ * The must_forward_clk flag is cleared unconditionally also for
+ * the deferrable base. The deferrable base is not affected by idle
+ * tracking and never forwarded, so clearing the flag is a NOOP.
+ *
+ * The fact that the deferrable base is never forwarded can cause
+ * large variations in granularity for deferrable timers, but they
+ * can be deferred for long periods due to idle anyway.
+ */
+ base->must_forward_clk = false;
+
while (time_after_eq(jiffies, base->clk)) {
levels = collect_expired_timers(base, heads);
@@ -1668,19 +1684,6 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
- /*
- * must_forward_clk must be cleared before running timers so that any
- * timer functions that call mod_timer will not try to forward the
- * base. idle trcking / clock forwarding logic is only used with
- * BASE_STD timers.
- *
- * The deferrable base does not do idle tracking at all, so we do
- * not forward it. This can result in very large variations in
- * granularity for deferrable timers, but they can be deferred for
- * long periods due to idle.
- */
- base->must_forward_clk = false;
-
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc29b600d2cb..f316e90ad538 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1504,6 +1504,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
tmp_iter_page = first_page;
do {
+ cond_resched();
+
to_remove_page = tmp_iter_page;
rb_inc_page(cpu_buffer, &tmp_iter_page);