summaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcuscale.c2
-rw-r--r--kernel/rcu/refscale.c3
-rw-r--r--kernel/rcu/tasks.h36
-rw-r--r--kernel/rcu/tree.c21
-rw-r--r--kernel/rcu/tree.h4
-rw-r--r--kernel/rcu/tree_exp.h5
-rw-r--r--kernel/rcu/tree_plugin.h8
-rw-r--r--kernel/rcu/tree_stall.h20
8 files changed, 79 insertions, 20 deletions
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 57ec414710bb..a83cb29d3760 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -402,7 +402,7 @@ rcu_scale_writer(void *arg)
sched_set_fifo_low(current);
if (holdoff)
- schedule_timeout_uninterruptible(holdoff * HZ);
+ schedule_timeout_idle(holdoff * HZ);
/*
* Wait until rcu_end_inkernel_boot() is called for normal GP tests
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 5abb0cf52803..fda220e2c042 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -849,12 +849,11 @@ ref_scale_init(void)
VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
for (i = 0; i < nreaders; i++) {
+ init_waitqueue_head(&reader_tasks[i].wq);
firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
reader_tasks[i].task);
if (firsterr)
goto unwind;
-
- init_waitqueue_head(&(reader_tasks[i].wq));
}
// Main Task
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 94b8ee84bc78..28f628c70245 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -951,7 +951,7 @@ reset_ipi:
static bool trc_inspect_reader(struct task_struct *t, void *arg)
{
int cpu = task_cpu(t);
- bool in_qs = false;
+ int nesting;
bool ofl = cpu_is_offline(cpu);
if (task_curr(t)) {
@@ -971,18 +971,18 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
n_heavy_reader_updates++;
if (ofl)
n_heavy_reader_ofl_updates++;
- in_qs = true;
+ nesting = 0;
} else {
// The task is not running, so C-language access is safe.
- in_qs = likely(!t->trc_reader_nesting);
+ nesting = t->trc_reader_nesting;
}
- // Mark as checked so that the grace-period kthread will
- // remove it from the holdout list.
- t->trc_reader_checked = true;
-
- if (in_qs)
- return true; // Already in quiescent state, done!!!
+ // If not exiting a read-side critical section, mark as checked
+ // so that the grace-period kthread will remove it from the
+ // holdout list.
+ t->trc_reader_checked = nesting >= 0;
+ if (nesting <= 0)
+ return !nesting; // If in QS, done, otherwise try again later.
// The task is in a read-side critical section, so set up its
// state so that it will awaken the grace-period kthread upon exit
@@ -1041,9 +1041,11 @@ static void trc_wait_for_one_reader(struct task_struct *t,
if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) {
// Just in case there is some other reason for
// failure than the target CPU being offline.
+ WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n",
+ __func__, cpu);
rcu_tasks_trace.n_ipis_fails++;
per_cpu(trc_ipi_to_cpu, cpu) = false;
- t->trc_ipi_to_cpu = cpu;
+ t->trc_ipi_to_cpu = -1;
}
}
}
@@ -1164,14 +1166,28 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
}
}
+static void rcu_tasks_trace_empty_fn(void *unused)
+{
+}
+
/* Wait for grace period to complete and provide ordering. */
static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
{
+ int cpu;
bool firstreport;
struct task_struct *g, *t;
LIST_HEAD(holdouts);
long ret;
+ // Wait for any lingering IPI handlers to complete. Note that
+ // if a CPU has gone offline or transitioned to userspace in the
+ // meantime, all IPI handlers should have been drained beforehand.
+ // Yes, this assumes that CPUs process IPIs in order. If that ever
+ // changes, there will need to be a recheck and/or timed wait.
+ for_each_online_cpu(cpu)
+ if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))
+ smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);
+
// Remove the safety count.
smp_mb__before_atomic(); // Order vs. earlier atomics
atomic_dec(&trc_n_readers_need_end);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index df016f6d0662..66c1ca01dd0e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -31,6 +31,7 @@
#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/completion.h>
+#include <linux/kmemleak.h>
#include <linux/moduleparam.h>
#include <linux/panic.h>
#include <linux/panic_notifier.h>
@@ -1906,10 +1907,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
*/
static void rcu_gp_fqs(bool first_time)
{
+ int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall);
struct rcu_node *rnp = rcu_get_root();
WRITE_ONCE(rcu_state.gp_activity, jiffies);
WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
+
+ WARN_ON_ONCE(nr_fqs > 3);
+ /* Only countdown nr_fqs for stall purposes if jiffies moves. */
+ if (nr_fqs) {
+ if (nr_fqs == 1) {
+ WRITE_ONCE(rcu_state.jiffies_stall,
+ jiffies + rcu_jiffies_till_stall_check());
+ }
+ WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs);
+ }
+
if (first_time) {
/* Collect dyntick-idle snapshots. */
force_qs_rnp(dyntick_save_progress_counter);
@@ -3597,6 +3610,14 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
WRITE_ONCE(krcp->count, krcp->count + 1);
+ /*
+ * The kvfree_rcu() caller considers the pointer freed at this point
+ * and likely removes any references to it. Since the actual slab
+ * freeing (and kmemleak_free()) is deferred, tell kmemleak to ignore
+ * this object (no scanning or false positives reporting).
+ */
+ kmemleak_ignore(ptr);
+
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 305cf6aeb408..2da96d8b894a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -351,6 +351,10 @@ struct rcu_state {
/* in jiffies. */
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
+ int nr_fqs_jiffies_stall; /* Number of fqs loops after
+ * which read jiffies and set
+ * jiffies_stall. Stall
+ * warnings disabled if !0. */
unsigned long jiffies_resched; /* Time at which to resched */
/* a reluctant CPU. */
unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f46c0c1a5eb3..407941a2903b 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -507,7 +507,10 @@ static void synchronize_rcu_expedited_wait(void)
if (rdp->rcu_forced_tick_exp)
continue;
rdp->rcu_forced_tick_exp = true;
- tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+ preempt_disable();
+ if (cpu_online(cpu))
+ tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+ preempt_enable();
}
}
j = READ_ONCE(jiffies_till_first_fqs);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f1a73a1f8472..b48b42d76474 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -458,7 +458,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
* be quite short, for example, in the case of the call from
* rcu_read_unlock_special().
*/
-static void
+static notrace void
rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
{
bool empty_exp;
@@ -578,7 +578,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* is disabled. This function cannot be expected to understand these
* nuances, so the caller must handle them.
*/
-static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
@@ -592,7 +592,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
* evaluate safety in terms of interrupt, softirq, and preemption
* disabling.
*/
-static void rcu_preempt_deferred_qs(struct task_struct *t)
+static notrace void rcu_preempt_deferred_qs(struct task_struct *t)
{
unsigned long flags;
@@ -923,7 +923,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
* Because there is no preemptible RCU, there can be no deferred quiescent
* states.
*/
-static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 677ee3d8671b..6c1234fd927c 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -121,12 +121,17 @@ static void panic_on_rcu_stall(void)
/**
* rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
*
+ * To perform the reset request from the caller, disable stall detection until
+ * 3 fqs loops have passed. This is required to ensure a fresh jiffies is
+ * loaded. It should be safe to do from the fqs loop as enough timer
+ * interrupts and context switches should have passed.
+ *
* The caller must disable hard irqs.
*/
void rcu_cpu_stall_reset(void)
{
- WRITE_ONCE(rcu_state.jiffies_stall,
- jiffies + rcu_jiffies_till_stall_check());
+ WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3);
+ WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX);
}
//////////////////////////////////////////////////////////////////////////////
@@ -142,6 +147,7 @@ static void record_gp_stall_check_time(void)
WRITE_ONCE(rcu_state.gp_start, j);
j1 = rcu_jiffies_till_stall_check();
smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
+ WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0);
WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
rcu_state.jiffies_resched = j + j1 / 2;
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
@@ -662,6 +668,16 @@ static void check_cpu_stall(struct rcu_data *rdp)
!rcu_gp_in_progress())
return;
rcu_stall_kick_kthreads();
+
+ /*
+ * Check if it was requested (via rcu_cpu_stall_reset()) that the FQS
+ * loop has to set jiffies to ensure a non-stale jiffies value. This
+ * is required to have good jiffies value after coming out of long
+ * breaks of jiffies updates. Not doing so can cause false positives.
+ */
+ if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0)
+ return;
+
j = jiffies;
/*