summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:21:16 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:21:16 -0800
commit8c1dccc80380fca8db09c2a81f5deb3c49b112b2 (patch)
treefb52b154f469b3a9fc433fc450a59b3077bc99a5 /kernel
parent1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42 (diff)
parent50df51d12c3175573de9c94968639bdd625ec549 (diff)
Merge tag 'core-rcu-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Thomas Gleixner: "RCU, LKMM and KCSAN updates collected by Paul McKenney. RCU: - Avoid cpuinfo-induced IPI pileups and idle-CPU IPIs - Lockdep-RCU updates reducing the need for __maybe_unused - Tasks-RCU updates - Miscellaneous fixes - Documentation updates - Torture-test updates KCSAN: - updates for selftests, avoiding setting watchpoints on NULL pointers - fix to watchpoint encoding LKMM: - updates for documentation along with some updates to example-code litmus tests" * tag 'core-rcu-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (72 commits) srcu: Take early exit on memory-allocation failure rcu/tree: Defer kvfree_rcu() allocation to a clean context rcu: Do not report strict GPs for outgoing CPUs rcu: Fix a typo in rcu_blocking_is_gp() header comment rcu: Prevent lockdep-RCU splats on lock acquisition/release rcu/tree: nocb: Avoid raising softirq for offloaded ready-to-execute CBs rcu,ftrace: Fix ftrace recursion rcu/tree: Make struct kernel_param_ops definitions const rcu/tree: Add a warning if CPU being onlined did not report QS already rcu: Clarify nocb kthreads naming in RCU_NOCB_CPU config rcu: Fix single-CPU check in rcu_blocking_is_gp() rcu: Implement rcu_segcblist_is_offloaded() config dependent list.h: Update comment to explicitly note circular lists rcu: Panic after fixed number of stalls x86/smpboot: Move rcu_cpu_starting() earlier rcu: Allow rcu_irq_enter_check_tick() from NMI tools/memory-model: Label MP tests' producers and consumers tools/memory-model: Use "buf" and "flag" for message-passing tests tools/memory-model: Add types to litmus tests tools/memory-model: Add a glossary of LKMM terms ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kcsan/encoding.h20
-rw-r--r--kernel/kcsan/selftest.c3
-rw-r--r--kernel/locking/locktorture.c36
-rw-r--r--kernel/rcu/Kconfig20
-rw-r--r--kernel/rcu/rcu.h16
-rw-r--r--kernel/rcu/rcu_segcblist.h2
-rw-r--r--kernel/rcu/rcuscale.c37
-rw-r--r--kernel/rcu/rcutorture.c52
-rw-r--r--kernel/rcu/refscale.c11
-rw-r--r--kernel/rcu/srcutree.c6
-rw-r--r--kernel/rcu/tasks.h49
-rw-r--r--kernel/rcu/tree.c200
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/tree_stall.h6
-rw-r--r--kernel/scftorture.c49
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/torture.c34
18 files changed, 398 insertions, 158 deletions
diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
index 1a6db2f797ac..7ee405524904 100644
--- a/kernel/kcsan/encoding.h
+++ b/kernel/kcsan/encoding.h
@@ -37,18 +37,20 @@
*/
#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)
-/*
- * Masks to set/retrieve the encoded data.
- */
-#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1)
-#define WATCHPOINT_SIZE_MASK \
- GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS)
-#define WATCHPOINT_ADDR_MASK \
- GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0)
+/* Bitmasks for the encoded watchpoint access information. */
+#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1)
+#define WATCHPOINT_SIZE_MASK GENMASK(BITS_PER_LONG-2, WATCHPOINT_ADDR_BITS)
+#define WATCHPOINT_ADDR_MASK GENMASK(WATCHPOINT_ADDR_BITS-1, 0)
+static_assert(WATCHPOINT_ADDR_MASK == (1UL << WATCHPOINT_ADDR_BITS) - 1);
+static_assert((WATCHPOINT_WRITE_MASK ^ WATCHPOINT_SIZE_MASK ^ WATCHPOINT_ADDR_MASK) == ~0UL);
static inline bool check_encodable(unsigned long addr, size_t size)
{
- return size <= MAX_ENCODABLE_SIZE;
+ /*
+ * While we can encode addrs<PAGE_SIZE, avoid crashing with a NULL
+ * pointer deref inside KCSAN.
+ */
+ return addr >= PAGE_SIZE && size <= MAX_ENCODABLE_SIZE;
}
static inline long
diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c
index d98bc208d06d..9014a3a82cf9 100644
--- a/kernel/kcsan/selftest.c
+++ b/kernel/kcsan/selftest.c
@@ -33,6 +33,9 @@ static bool test_encode_decode(void)
unsigned long addr;
prandom_bytes(&addr, sizeof(addr));
+ if (addr < PAGE_SIZE)
+ addr = PAGE_SIZE;
+
if (WARN_ON(!check_encodable(addr, size)))
return false;
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 62d215b2e39f..fd838cea3934 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/percpu-rwsem.h>
#include <linux/torture.h>
+#include <linux/reboot.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
@@ -60,6 +61,7 @@ static struct task_struct **reader_tasks;
static bool lock_is_write_held;
static bool lock_is_read_held;
+static unsigned long last_lock_release;
struct lock_stress_stats {
long n_lock_fail;
@@ -74,6 +76,7 @@ static void lock_torture_cleanup(void);
*/
struct lock_torture_ops {
void (*init)(void);
+ void (*exit)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
void (*task_boost)(struct torture_random_state *trsp);
@@ -90,12 +93,13 @@ struct lock_torture_cxt {
int nrealwriters_stress;
int nrealreaders_stress;
bool debug_lock;
+ bool init_called;
atomic_t n_lock_torture_errors;
struct lock_torture_ops *cur_ops;
struct lock_stress_stats *lwsa; /* writer statistics */
struct lock_stress_stats *lrsa; /* reader statistics */
};
-static struct lock_torture_cxt cxt = { 0, 0, false,
+static struct lock_torture_cxt cxt = { 0, 0, false, false,
ATOMIC_INIT(0),
NULL, NULL};
/*
@@ -571,6 +575,11 @@ static void torture_percpu_rwsem_init(void)
BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
}
+static void torture_percpu_rwsem_exit(void)
+{
+ percpu_free_rwsem(&pcpu_rwsem);
+}
+
static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
{
percpu_down_write(&pcpu_rwsem);
@@ -595,6 +604,7 @@ static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
static struct lock_torture_ops percpu_rwsem_lock_ops = {
.init = torture_percpu_rwsem_init,
+ .exit = torture_percpu_rwsem_exit,
.writelock = torture_percpu_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
@@ -632,6 +642,7 @@ static int lock_torture_writer(void *arg)
lwsp->n_lock_acquired++;
cxt.cur_ops->write_delay(&rand);
lock_is_write_held = false;
+ WRITE_ONCE(last_lock_release, jiffies);
cxt.cur_ops->writeunlock();
stutter_wait("lock_torture_writer");
@@ -786,9 +797,10 @@ static void lock_torture_cleanup(void)
/*
* Indicates early cleanup, meaning that the test has not run,
- * such as when passing bogus args when loading the module. As
- * such, only perform the underlying torture-specific cleanups,
- * and avoid anything related to locktorture.
+ * such as when passing bogus args when loading the module.
+ * However cxt->cur_ops.init() may have been invoked, so beside
+ * perform the underlying torture-specific cleanups, cur_ops.exit()
+ * will be invoked if needed.
*/
if (!cxt.lwsa && !cxt.lrsa)
goto end;
@@ -828,6 +840,11 @@ static void lock_torture_cleanup(void)
cxt.lrsa = NULL;
end:
+ if (cxt.init_called) {
+ if (cxt.cur_ops->exit)
+ cxt.cur_ops->exit();
+ cxt.init_called = false;
+ }
torture_cleanup_end();
}
@@ -868,14 +885,17 @@ static int __init lock_torture_init(void)
goto unwind;
}
- if (nwriters_stress == 0 && nreaders_stress == 0) {
+ if (nwriters_stress == 0 &&
+ (!cxt.cur_ops->readlock || nreaders_stress == 0)) {
pr_alert("lock-torture: must run at least one locking thread\n");
firsterr = -EINVAL;
goto unwind;
}
- if (cxt.cur_ops->init)
+ if (cxt.cur_ops->init) {
cxt.cur_ops->init();
+ cxt.init_called = true;
+ }
if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;
@@ -1038,6 +1058,10 @@ static int __init lock_torture_init(void)
unwind:
torture_init_end();
lock_torture_cleanup();
+ if (shutdown_secs) {
+ WARN_ON(!IS_MODULE(CONFIG_LOCK_TORTURE_TEST));
+ kernel_power_off();
+ }
return firsterr;
}
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index b71e21f73c40..cdc57b4f6d48 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -221,19 +221,23 @@ config RCU_NOCB_CPU
Use this option to reduce OS jitter for aggressive HPC or
real-time workloads. It can also be used to offload RCU
callback invocation to energy-efficient CPUs in battery-powered
- asymmetric multiprocessors.
+ asymmetric multiprocessors. The price of this reduced jitter
+ is that the overhead of call_rcu() increases and that some
+ workloads will incur significant increases in context-switch
+ rates.
This option offloads callback invocation from the set of CPUs
specified at boot time by the rcu_nocbs parameter. For each
such CPU, a kthread ("rcuox/N") will be created to invoke
callbacks, where the "N" is the CPU being offloaded, and where
- the "p" for RCU-preempt (PREEMPTION kernels) and "s" for RCU-sched
- (!PREEMPTION kernels). Nothing prevents this kthread from running
- on the specified CPUs, but (1) the kthreads may be preempted
- between each callback, and (2) affinity or cgroups can be used
- to force the kthreads to run on whatever set of CPUs is desired.
-
- Say Y here if you want to help to debug reduced OS jitter.
+ the "x" is "p" for RCU-preempt (PREEMPTION kernels) and "s" for
+ RCU-sched (!PREEMPTION kernels). Nothing prevents this kthread
+ from running on the specified CPUs, but (1) the kthreads may be
+ preempted between each callback, and (2) affinity or cgroups can
+ be used to force the kthreads to run on whatever set of CPUs is
+ desired.
+
+ Say Y here if you need reduced OS jitter, despite added overhead.
Say N here if you are unsure.
config TASKS_TRACE_RCU_READ_MB
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index e01cba5e4b52..59ef1ae6dc37 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -533,4 +533,20 @@ static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
static inline void rcu_bind_current_to_nocb(void) { }
#endif
+#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RCU)
+void show_rcu_tasks_classic_gp_kthread(void);
+#else
+static inline void show_rcu_tasks_classic_gp_kthread(void) {}
+#endif
+#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RUDE_RCU)
+void show_rcu_tasks_rude_gp_kthread(void);
+#else
+static inline void show_rcu_tasks_rude_gp_kthread(void) {}
+#endif
+#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_TRACE_RCU)
+void show_rcu_tasks_trace_gp_kthread(void);
+#else
+static inline void show_rcu_tasks_trace_gp_kthread(void) {}
+#endif
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 5c293afc07b8..492262bcb591 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -62,7 +62,7 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
/* Is the specified rcu_segcblist offloaded? */
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
{
- return rsclp->offloaded;
+ return IS_ENABLED(CONFIG_RCU_NOCB_CPU) && rsclp->offloaded;
}
/*
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 2819b95479af..06491d5530db 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -38,6 +38,7 @@
#include <asm/byteorder.h>
#include <linux/torture.h>
#include <linux/vmalloc.h>
+#include <linux/rcupdate_trace.h>
#include "rcu.h"
@@ -294,6 +295,35 @@ static struct rcu_scale_ops tasks_ops = {
.name = "tasks"
};
+/*
+ * Definitions for RCU-tasks-trace scalability testing.
+ */
+
+static int tasks_trace_scale_read_lock(void)
+{
+ rcu_read_lock_trace();
+ return 0;
+}
+
+static void tasks_trace_scale_read_unlock(int idx)
+{
+ rcu_read_unlock_trace();
+}
+
+static struct rcu_scale_ops tasks_tracing_ops = {
+ .ptype = RCU_TASKS_FLAVOR,
+ .init = rcu_sync_scale_init,
+ .readlock = tasks_trace_scale_read_lock,
+ .readunlock = tasks_trace_scale_read_unlock,
+ .get_gp_seq = rcu_no_completed,
+ .gp_diff = rcu_seq_diff,
+ .async = call_rcu_tasks_trace,
+ .gp_barrier = rcu_barrier_tasks_trace,
+ .sync = synchronize_rcu_tasks_trace,
+ .exp_sync = synchronize_rcu_tasks_trace,
+ .name = "tasks-tracing"
+};
+
static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@@ -754,7 +784,7 @@ rcu_scale_init(void)
long i;
int firsterr = 0;
static struct rcu_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
+ &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
};
if (!torture_init_begin(scale_type, verbose))
@@ -772,7 +802,6 @@ rcu_scale_init(void)
for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
pr_cont(" %s", scale_ops[i]->name);
pr_cont("\n");
- WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
firsterr = -EINVAL;
cur_ops = NULL;
goto unwind;
@@ -846,6 +875,10 @@ rcu_scale_init(void)
unwind:
torture_init_end();
rcu_scale_cleanup();
+ if (shutdown) {
+ WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
+ kernel_power_off();
+ }
return firsterr;
}
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 916ea4f66e4b..528ed10b78fd 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -317,6 +317,7 @@ struct rcu_torture_ops {
void (*cb_barrier)(void);
void (*fqs)(void);
void (*stats)(void);
+ void (*gp_kthread_dbg)(void);
int (*stall_dur)(void);
int irq_capable;
int can_boost;
@@ -466,6 +467,7 @@ static struct rcu_torture_ops rcu_ops = {
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
+ .gp_kthread_dbg = show_rcu_gp_kthreads,
.stall_dur = rcu_jiffies_till_stall_check,
.irq_capable = 1,
.can_boost = rcu_can_boost(),
@@ -693,6 +695,7 @@ static struct rcu_torture_ops tasks_ops = {
.exp_sync = synchronize_rcu_mult_test,
.call = call_rcu_tasks,
.cb_barrier = rcu_barrier_tasks,
+ .gp_kthread_dbg = show_rcu_tasks_classic_gp_kthread,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
@@ -762,6 +765,7 @@ static struct rcu_torture_ops tasks_rude_ops = {
.exp_sync = synchronize_rcu_tasks_rude,
.call = call_rcu_tasks_rude,
.cb_barrier = rcu_barrier_tasks_rude,
+ .gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
@@ -800,6 +804,7 @@ static struct rcu_torture_ops tasks_tracing_ops = {
.exp_sync = synchronize_rcu_tasks_trace,
.call = call_rcu_tasks_trace,
.cb_barrier = rcu_barrier_tasks_trace,
+ .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
@@ -912,7 +917,8 @@ static int rcu_torture_boost(void *arg)
oldstarttime = boost_starttime;
while (time_before(jiffies, oldstarttime)) {
schedule_timeout_interruptible(oldstarttime - jiffies);
- stutter_wait("rcu_torture_boost");
+ if (stutter_wait("rcu_torture_boost"))
+ sched_set_fifo_low(current);
if (torture_must_stop())
goto checkwait;
}
@@ -932,7 +938,8 @@ static int rcu_torture_boost(void *arg)
jiffies);
call_rcu_time = jiffies;
}
- stutter_wait("rcu_torture_boost");
+ if (stutter_wait("rcu_torture_boost"))
+ sched_set_fifo_low(current);
if (torture_must_stop())
goto checkwait;
}
@@ -964,7 +971,8 @@ static int rcu_torture_boost(void *arg)
}
/* Go do the stutter. */
-checkwait: stutter_wait("rcu_torture_boost");
+checkwait: if (stutter_wait("rcu_torture_boost"))
+ sched_set_fifo_low(current);
} while (!torture_must_stop());
/* Clean up and exit. */
@@ -987,6 +995,7 @@ rcu_torture_fqs(void *arg)
{
unsigned long fqs_resume_time;
int fqs_burst_remaining;
+ int oldnice = task_nice(current);
VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
do {
@@ -1002,7 +1011,8 @@ rcu_torture_fqs(void *arg)
udelay(fqs_holdoff);
fqs_burst_remaining -= fqs_holdoff;
}
- stutter_wait("rcu_torture_fqs");
+ if (stutter_wait("rcu_torture_fqs"))
+ sched_set_normal(current, oldnice);
} while (!torture_must_stop());
torture_kthread_stopping("rcu_torture_fqs");
return 0;
@@ -1022,9 +1032,11 @@ rcu_torture_writer(void *arg)
bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
bool gp_sync1 = gp_sync;
int i;
+ int oldnice = task_nice(current);
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
+ bool stutter_waited;
int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
RTWS_COND_GET, RTWS_SYNC };
int nsynctypes = 0;
@@ -1143,7 +1155,8 @@ rcu_torture_writer(void *arg)
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
- if (stutter_wait("rcu_torture_writer") &&
+ stutter_waited = stutter_wait("rcu_torture_writer");
+ if (stutter_waited &&
!READ_ONCE(rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
@@ -1155,6 +1168,8 @@ rcu_torture_writer(void *arg)
rcu_ftrace_dump(DUMP_ALL);
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
}
+ if (stutter_waited)
+ sched_set_normal(current, oldnice);
} while (!torture_must_stop());
rcu_torture_current = NULL; // Let stats task know that we are done.
/* Reset expediting back to unexpedited. */
@@ -1594,7 +1609,8 @@ rcu_torture_stats_print(void)
sched_show_task(wtp);
splatted = true;
}
- show_rcu_gp_kthreads();
+ if (cur_ops->gp_kthread_dbg)
+ cur_ops->gp_kthread_dbg();
rcu_ftrace_dump(DUMP_ALL);
}
rtcv_snap = rcu_torture_current_version;
@@ -1913,7 +1929,9 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
unsigned long stopat;
static DEFINE_TORTURE_RANDOM(trs);
- if (cur_ops->call && cur_ops->sync && cur_ops->cb_barrier) {
+ if (!cur_ops->sync)
+ return; // Cannot do need_resched() forward progress testing without ->sync.
+ if (cur_ops->call && cur_ops->cb_barrier) {
init_rcu_head_on_stack(&fcs.rh);
selfpropcb = true;
}
@@ -2103,6 +2121,7 @@ static struct notifier_block rcutorture_oom_nb = {
/* Carry out grace-period forward-progress testing. */
static int rcu_torture_fwd_prog(void *args)
{
+ int oldnice = task_nice(current);
struct rcu_fwd *rfp = args;
int tested = 0;
int tested_tries = 0;
@@ -2121,7 +2140,8 @@ static int rcu_torture_fwd_prog(void *args)
rcu_torture_fwd_prog_cr(rfp);
/* Avoid slow periods, better to test when busy. */
- stutter_wait("rcu_torture_fwd_prog");
+ if (stutter_wait("rcu_torture_fwd_prog"))
+ sched_set_normal(current, oldnice);
} while (!torture_must_stop());
/* Short runs might not contain a valid forward-progress attempt. */
WARN_ON(!tested && tested_tries >= 5);
@@ -2137,8 +2157,8 @@ static int __init rcu_torture_fwd_prog_init(void)
if (!fwd_progress)
return 0; /* Not requested, so don't do it. */
- if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0 ||
- cur_ops == &rcu_busted_ops) {
+ if ((!cur_ops->sync && !cur_ops->call) ||
+ !cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
return 0;
}
@@ -2472,7 +2492,8 @@ rcu_torture_cleanup(void)
return;
}
- show_rcu_gp_kthreads();
+ if (cur_ops->gp_kthread_dbg)
+ cur_ops->gp_kthread_dbg();
rcu_torture_read_exit_cleanup();
rcu_torture_barrier_cleanup();
rcu_torture_fwd_prog_cleanup();
@@ -2484,13 +2505,13 @@ rcu_torture_cleanup(void)
torture_stop_kthread(rcu_torture_reader,
reader_tasks[i]);
kfree(reader_tasks);
+ reader_tasks = NULL;
}
if (fakewriter_tasks) {
- for (i = 0; i < nfakewriters; i++) {
+ for (i = 0; i < nfakewriters; i++)
torture_stop_kthread(rcu_torture_fakewriter,
fakewriter_tasks[i]);
- }
kfree(fakewriter_tasks);
fakewriter_tasks = NULL;
}
@@ -2647,7 +2668,6 @@ rcu_torture_init(void)
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_cont(" %s", torture_ops[i]->name);
pr_cont("\n");
- WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
firsterr = -EINVAL;
cur_ops = NULL;
goto unwind;
@@ -2815,6 +2835,10 @@ rcu_torture_init(void)
unwind:
torture_init_end();
rcu_torture_cleanup();
+ if (shutdown_secs) {
+ WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
+ kernel_power_off();
+ }
return firsterr;
}
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 952595c678b3..23ff36a66f97 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -658,7 +658,6 @@ ref_scale_init(void)
for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
pr_cont(" %s", scale_ops[i]->name);
pr_cont("\n");
- WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
firsterr = -EINVAL;
cur_ops = NULL;
goto unwind;
@@ -681,6 +680,12 @@ ref_scale_init(void)
// Reader tasks (default to ~75% of online CPUs).
if (nreaders < 0)
nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
+ if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops))
+ loops = 1;
+ if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders))
+ nreaders = 1;
+ if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns))
+ nruns = 1;
reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (!reader_tasks) {
@@ -712,6 +717,10 @@ ref_scale_init(void)
unwind:
torture_init_end();
ref_scale_cleanup();
+ if (shutdown) {
+ WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
+ kernel_power_off();
+ }
return firsterr;
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index c13348ee80a5..0f23d20d485a 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -177,11 +177,13 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
INIT_DELAYED_WORK(&ssp->work, process_srcu);
if (!is_static)
ssp->sda = alloc_percpu(struct srcu_data);
+ if (!ssp->sda)
+ return -ENOMEM;
init_srcu_struct_nodes(ssp, is_static);
ssp->srcu_gp_seq_needed_exp = 0;
ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
- return ssp->sda ? 0 : -ENOMEM;
+ return 0;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -906,7 +908,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
{
struct rcu_synchronize rcu;
- RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) ||
+ RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d5d9f2d03e8a..35bdcfd84d42 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -290,7 +290,7 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
".C"[!!data_race(rtp->cbs_head)],
s);
}
-#endif /* #ifndef CONFIG_TINY_RCU */
+#endif // #ifndef CONFIG_TINY_RCU
static void exit_tasks_rcu_finish_trace(struct task_struct *t);
@@ -335,23 +335,18 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// Start off with initial wait and slowly back off to 1 HZ wait.
fract = rtp->init_fract;
- if (fract > HZ)
- fract = HZ;
- for (;;) {
+ while (!list_empty(&holdouts)) {
bool firstreport;
bool needreport;
int rtst;
- if (list_empty(&holdouts))
- break;
-
/* Slowly back off waiting for holdouts */
set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
- schedule_timeout_idle(HZ/fract);
+ schedule_timeout_idle(fract);
- if (fract > 1)
- fract--;
+ if (fract < HZ)
+ fract++;
rtst = READ_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
@@ -560,7 +555,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
static int __init rcu_spawn_tasks_kthread(void)
{
rcu_tasks.gp_sleep = HZ / 10;
- rcu_tasks.init_fract = 10;
+ rcu_tasks.init_fract = HZ / 10;
rcu_tasks.pregp_func = rcu_tasks_pregp_step;
rcu_tasks.pertask_func = rcu_tasks_pertask;
rcu_tasks.postscan_func = rcu_tasks_postscan;
@@ -571,12 +566,13 @@ static int __init rcu_spawn_tasks_kthread(void)
}
core_initcall(rcu_spawn_tasks_kthread);
-#ifndef CONFIG_TINY_RCU
-static void show_rcu_tasks_classic_gp_kthread(void)
+#if !defined(CONFIG_TINY_RCU)
+void show_rcu_tasks_classic_gp_kthread(void)
{
show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
}
-#endif /* #ifndef CONFIG_TINY_RCU */
+EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
+#endif // !defined(CONFIG_TINY_RCU)
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
@@ -598,7 +594,6 @@ void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
}
#else /* #ifdef CONFIG_TASKS_RCU */
-static inline void show_rcu_tasks_classic_gp_kthread(void) { }
void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
@@ -699,16 +694,14 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
}
core_initcall(rcu_spawn_tasks_rude_kthread);
-#ifndef CONFIG_TINY_RCU
-static void show_rcu_tasks_rude_gp_kthread(void)
+#if !defined(CONFIG_TINY_RCU)
+void show_rcu_tasks_rude_gp_kthread(void)
{
show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
}
-#endif /* #ifndef CONFIG_TINY_RCU */
-
-#else /* #ifdef CONFIG_TASKS_RUDE_RCU */
-static void show_rcu_tasks_rude_gp_kthread(void) {}
-#endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */
+EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
+#endif // !defined(CONFIG_TINY_RCU)
+#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
////////////////////////////////////////////////////////////////////////
//
@@ -1183,12 +1176,12 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
{
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
rcu_tasks_trace.gp_sleep = HZ / 10;
- rcu_tasks_trace.init_fract = 10;
+ rcu_tasks_trace.init_fract = HZ / 10;
} else {
rcu_tasks_trace.gp_sleep = HZ / 200;
if (rcu_tasks_trace.gp_sleep <= 0)
rcu_tasks_trace.gp_sleep = 1;
- rcu_tasks_trace.init_fract = HZ / 5;
+ rcu_tasks_trace.init_fract = HZ / 200;
if (rcu_tasks_trace.init_fract <= 0)
rcu_tasks_trace.init_fract = 1;
}
@@ -1202,8 +1195,8 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
}
core_initcall(rcu_spawn_tasks_trace_kthread);
-#ifndef CONFIG_TINY_RCU
-static void show_rcu_tasks_trace_gp_kthread(void)
+#if !defined(CONFIG_TINY_RCU)
+void show_rcu_tasks_trace_gp_kthread(void)
{
char buf[64];
@@ -1213,11 +1206,11 @@ static void show_rcu_tasks_trace_gp_kthread(void)
data_race(n_heavy_reader_attempts));
show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
}
-#endif /* #ifndef CONFIG_TINY_RCU */
+EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
+#endif // !defined(CONFIG_TINY_RCU)
#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
-static inline void show_rcu_tasks_trace_gp_kthread(void) {}
#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
#ifndef CONFIG_TINY_RCU
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index bd04b09b84b3..b7124c119c0b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
* per-CPU. Object size is equal to one page. This value
* can be changed at boot time.
*/
-static int rcu_min_cached_objs = 2;
+static int rcu_min_cached_objs = 5;
module_param(rcu_min_cached_objs, int, 0444);
/* Retrieve RCU kthreads priority for rcutorture */
@@ -341,6 +341,14 @@ static bool rcu_dynticks_in_eqs(int snap)
return !(snap & RCU_DYNTICK_CTRL_CTR);
}
+/* Return true if the specified CPU is currently idle from an RCU viewpoint. */
+bool rcu_is_idle_cpu(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+}
+
/*
* Return true if the CPU corresponding to the specified rcu_data
* structure has spent some time in an extended quiescent state since
@@ -546,12 +554,12 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
return ret;
}
-static struct kernel_param_ops first_fqs_jiffies_ops = {
+static const struct kernel_param_ops first_fqs_jiffies_ops = {
.set = param_set_first_fqs_jiffies,
.get = param_get_ulong,
};
-static struct kernel_param_ops next_fqs_jiffies_ops = {
+static const struct kernel_param_ops next_fqs_jiffies_ops = {
.set = param_set_next_fqs_jiffies,
.get = param_get_ulong,
};
@@ -928,8 +936,8 @@ void __rcu_irq_enter_check_tick(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- // Enabling the tick is unsafe in NMI handlers.
- if (WARN_ON_ONCE(in_nmi()))
+ // If we're here from NMI there's nothing to do.
+ if (in_nmi())
return;
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
@@ -1093,8 +1101,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
* CPU can safely enter RCU read-side critical sections. In other words,
* if the current CPU is not in its idle loop or is in an interrupt or
* NMI handler, return true.
+ *
+ * Make notrace because it can be called by the internal functions of
+ * ftrace, and making this notrace removes unnecessary recursion calls.
*/
-bool rcu_is_watching(void)
+notrace bool rcu_is_watching(void)
{
bool ret;
@@ -1149,7 +1160,7 @@ bool rcu_lockdep_current_cpu_online(void)
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
ret = true;
preempt_enable_notrace();
return ret;
@@ -1603,8 +1614,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
{
bool ret = false;
bool need_qs;
- const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_is_offloaded(&rdp->cblist);
+ const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
raw_lockdep_assert_held_rcu_node(rnp);
@@ -1715,6 +1725,7 @@ static void rcu_strict_gp_boundary(void *unused)
*/
static bool rcu_gp_init(void)
{
+ unsigned long firstseq;
unsigned long flags;
unsigned long oldmask;
unsigned long mask;
@@ -1758,6 +1769,12 @@ static bool rcu_gp_init(void)
*/
rcu_state.gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rnp) {
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+ firstseq = READ_ONCE(rnp->ofl_seq);
+ if (firstseq & 0x1)
+ while (firstseq == READ_ONCE(rnp->ofl_seq))
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -2048,8 +2065,7 @@ static void rcu_gp_cleanup(void)
needgp = true;
}
/* Advance CBs to reduce false positives below. */
- offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_is_offloaded(&rdp->cblist);
+ offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
@@ -2248,8 +2264,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
unsigned long flags;
unsigned long mask;
bool needwake = false;
- const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_is_offloaded(&rdp->cblist);
+ const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_node *rnp;
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2399,6 +2414,7 @@ int rcutree_dead_cpu(unsigned int cpu)
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
+ WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
/* Adjust any no-longer-needed kthreads. */
rcu_boost_kthread_setaffinity(rnp, -1);
/* Do any needed no-CB deferred wakeups from this CPU. */
@@ -2417,8 +2433,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
{
int div;
unsigned long flags;
- const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_is_offloaded(&rdp->cblist);
+ const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_head *rhp;
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
long bl, count;
@@ -2675,8 +2690,7 @@ static __latent_entropy void rcu_core(void)
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
- const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_is_offloaded(&rdp->cblist);
+ const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
if (cpu_is_offline(smp_processor_id()))
return;
@@ -2978,8 +2992,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
rcu_segcblist_n_cbs(&rdp->cblist));
/* Go handle any RCU core processing required. */
- if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
+ if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
} else {
__call_rcu_core(rdp, head, flags);
@@ -3084,6 +3097,9 @@ struct kfree_rcu_cpu_work {
* In order to save some per-cpu space the list is singular.
* Even though it is lockless an access has to be protected by the
* per-cpu lock.
+ * @page_cache_work: A work to refill the cache when it is empty
+ * @work_in_progress: Indicates that page_cache_work is running
+ * @hrtimer: A hrtimer for scheduling a page_cache_work
* @nr_bkv_objs: number of allocated objects at @bkvcache.
*
* This is a per-CPU structure. The reason that it is not included in
@@ -3100,6 +3116,11 @@ struct kfree_rcu_cpu {
bool monitor_todo;
bool initialized;
int count;
+
+ struct work_struct page_cache_work;
+ atomic_t work_in_progress;
+ struct hrtimer hrtimer;
+
struct llist_head bkvcache;
int nr_bkv_objs;
};
@@ -3217,10 +3238,10 @@ static void kfree_rcu_work(struct work_struct *work)
}
rcu_lock_release(&rcu_callback_map);
- krcp = krc_this_cpu_lock(&flags);
+ raw_spin_lock_irqsave(&krcp->lock, flags);
if (put_cached_bnode(krcp, bkvhead[i]))
bkvhead[i] = NULL;
- krc_this_cpu_unlock(krcp, flags);
+ raw_spin_unlock_irqrestore(&krcp->lock, flags);
if (bkvhead[i])
free_page((unsigned long) bkvhead[i]);
@@ -3347,6 +3368,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
+static enum hrtimer_restart
+schedule_page_work_fn(struct hrtimer *t)
+{
+ struct kfree_rcu_cpu *krcp =
+ container_of(t, struct kfree_rcu_cpu, hrtimer);
+
+ queue_work(system_highpri_wq, &krcp->page_cache_work);
+ return HRTIMER_NORESTART;
+}
+
+static void fill_page_cache_func(struct work_struct *work)
+{
+ struct kvfree_rcu_bulk_data *bnode;
+ struct kfree_rcu_cpu *krcp =
+ container_of(work, struct kfree_rcu_cpu,
+ page_cache_work);
+ unsigned long flags;
+ bool pushed;
+ int i;
+
+ for (i = 0; i < rcu_min_cached_objs; i++) {
+ bnode = (struct kvfree_rcu_bulk_data *)
+ __get_free_page(GFP_KERNEL | __GFP_NOWARN);
+
+ if (bnode) {
+ raw_spin_lock_irqsave(&krcp->lock, flags);
+ pushed = put_cached_bnode(krcp, bnode);
+ raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+ if (!pushed) {
+ free_page((unsigned long) bnode);
+ break;
+ }
+ }
+ }
+
+ atomic_set(&krcp->work_in_progress, 0);
+}
+
+static void
+run_page_cache_worker(struct kfree_rcu_cpu *krcp)
+{
+ if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+ !atomic_xchg(&krcp->work_in_progress, 1)) {
+ hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ krcp->hrtimer.function = schedule_page_work_fn;
+ hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+ }
+}
+
static inline bool
kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
{
@@ -3363,32 +3435,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
if (!krcp->bkvhead[idx] ||
krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
bnode = get_cached_bnode(krcp);
- if (!bnode) {
- /*
- * To keep this path working on raw non-preemptible
- * sections, prevent the optional entry into the
- * allocator as it uses sleeping locks. In fact, even
- * if the caller of kfree_rcu() is preemptible, this
- * path still is not, as krcp->lock is a raw spinlock.
- * With additional page pre-allocation in the works,
- * hitting this return is going to be much less likely.
- */
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- return false;
-
- /*
- * NOTE: For one argument of kvfree_rcu() we can
- * drop the lock and get the page in sleepable
- * context. That would allow to maintain an array
- * for the CONFIG_PREEMPT_RT as well if no cached
- * pages are available.
- */
- bnode = (struct kvfree_rcu_bulk_data *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- }
-
/* Switch to emergency path. */
- if (unlikely(!bnode))
+ if (!bnode)
return false;
/* Initialize the new block. */
@@ -3452,12 +3500,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
goto unlock_return;
}
- /*
- * Under high memory pressure GFP_NOWAIT can fail,
- * in that case the emergency path is maintained.
- */
success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
if (!success) {
+ run_page_cache_worker(krcp);
+
if (head == NULL)
// Inline if kvfree_rcu(one_arg) call.
goto unlock_return;
@@ -3567,7 +3613,7 @@ void __init kfree_rcu_scheduler_running(void)
* During early boot, any blocking grace-period wait automatically
* implies a grace period. Later on, this is never the case for PREEMPTION.
*
- * Howevr, because a context switch is a grace period for !PREEMPTION, any
+ * However, because a context switch is a grace period for !PREEMPTION, any
* blocking grace-period wait automatically implies a grace period if
* there is only one CPU online at any point time during execution of
* either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to
@@ -3583,7 +3629,20 @@ static int rcu_blocking_is_gp(void)
return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
might_sleep(); /* Check for RCU read-side critical section. */
preempt_disable();
- ret = num_online_cpus() <= 1;
+ /*
+ * If the rcu_state.n_online_cpus counter is equal to one,
+ * there is only one CPU, and that CPU sees all prior accesses
+ * made by any CPU that was online at the time of its access.
+ * Furthermore, if this counter is equal to one, its value cannot
+ * change until after the preempt_enable() below.
+ *
+ * Furthermore, if rcu_state.n_online_cpus is equal to one here,
+ * all later CPUs (both this one and any that come online later
+ * on) are guaranteed to see all accesses prior to this point
+ * in the code, without the need for additional memory barriers.
+ * Those memory barriers are provided by CPU-hotplug code.
+ */
+ ret = READ_ONCE(rcu_state.n_online_cpus) <= 1;
preempt_enable();
return ret;
}
@@ -3628,7 +3687,7 @@ void synchronize_rcu(void)
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU read-side critical section");
if (rcu_blocking_is_gp())
- return;
+ return; // Context allows vacuous grace periods.
if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
else
@@ -3707,13 +3766,13 @@ static int rcu_pending(int user)
return 1;
/* Does this CPU have callbacks ready to invoke? */
- if (rcu_segcblist_ready_cbs(&rdp->cblist))
+ if (!rcu_segcblist_is_offloaded(&rdp->cblist) &&
+ rcu_segcblist_ready_cbs(&rdp->cblist))
return 1;
/* Has RCU gone idle with this CPU needing another grace period? */
if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
- (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||
- !rcu_segcblist_is_offloaded(&rdp->cblist)) &&
+ !rcu_segcblist_is_offloaded(&rdp->cblist) &&
!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
return 1;
@@ -3969,6 +4028,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_prepare_kthreads(cpu);
rcu_spawn_cpu_nocb_kthread(cpu);
+ WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
return 0;
}
@@ -4057,6 +4117,9 @@ void rcu_cpu_starting(unsigned int cpu)
rnp = rdp->mynode;
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4067,13 +4130,18 @@ void rcu_cpu_starting(unsigned int cpu)
rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
- if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+
+ /* An incoming CPU should never be blocking a grace period. */
+ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
rcu_disable_urgency_upon_qs(rdp);
/* Report QS -after- changing ->qsmaskinitnext! */
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}
@@ -4100,6 +4168,9 @@ void rcu_report_dead(unsigned int cpu)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4112,6 +4183,9 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
rdp->cpu_started = false;
}
@@ -4449,24 +4523,14 @@ static void __init kfree_rcu_batch_init(void)
for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
- struct kvfree_rcu_bulk_data *bnode;
for (i = 0; i < KFREE_N_BATCHES; i++) {
INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
krcp->krw_arr[i].krcp = krcp;
}
- for (i = 0; i < rcu_min_cached_objs; i++) {
- bnode = (struct kvfree_rcu_bulk_data *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-
- if (bnode)
- put_cached_bnode(krcp, bnode);
- else
- pr_err("Failed to preallocate for %d CPU!\n", cpu);
- }
-
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+ INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
krcp->initialized = true;
}
if (register_shrinker(&kfree_rcu_shrinker))
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e4f66b8f7c47..7708ed161f4a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,6 +56,7 @@ struct rcu_node {
/* Initialized from ->qsmaskinitnext at the */
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
/* Online CPUs for next grace period. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */
@@ -298,6 +299,7 @@ struct rcu_state {
/* Hierarchy levels (+1 to */
/* shut bogus gcc warning) */
int ncpus; /* # CPUs seen so far. */
+ int n_online_cpus; /* # CPUs online for RCU. */
/* The following fields are guarded by the root rcu_node's lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index fd8a52e9a887..7e291ce0a1d6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -628,7 +628,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
set_tsk_need_resched(current);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
- !rdp->defer_qs_iw_pending && exp) {
+ !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
init_irq_work(&rdp->defer_qs_iw,
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index ca21d28a0f98..70d48c52fabc 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -13,6 +13,7 @@
/* panic() on RCU Stall sysctl. */
int sysctl_panic_on_rcu_stall __read_mostly;
+int sysctl_max_rcu_stall_to_panic __read_mostly;
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
@@ -106,6 +107,11 @@ early_initcall(check_cpu_stall_init);
/* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
static void panic_on_rcu_stall(void)
{
+ static int cpu_stall;
+
+ if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
+ return;
+
if (sysctl_panic_on_rcu_stall)
panic("RCU Stall\n");
}
diff --git a/kernel/scftorture.c b/kernel/scftorture.c
index 554a521ee235..d55a9f8cda3d 100644
--- a/kernel/scftorture.c
+++ b/kernel/scftorture.c
@@ -59,9 +59,10 @@ torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
torture_param(int, shutdown_secs, 0, "Shutdown time (ms), <= zero to disable.");
torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s.");
-torture_param(int, stutter_cpus, 5, "Number of jiffies to change CPUs under test, 0=disable");
+torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU hotplug.");
torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
+torture_param(int, weight_resched, -1, "Testing weight for resched_cpu() operations.");
torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations.");
torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations.");
torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations.");
@@ -82,6 +83,7 @@ torture_param(bool, shutdown, SCFTORT_SHUTDOWN, "Shutdown at end of torture test
struct scf_statistics {
struct task_struct *task;
int cpu;
+ long long n_resched;
long long n_single;
long long n_single_ofl;
long long n_single_wait;
@@ -97,12 +99,15 @@ static struct task_struct *scf_torture_stats_task;
static DEFINE_PER_CPU(long long, scf_invoked_count);
// Data for random primitive selection
-#define SCF_PRIM_SINGLE 0
-#define SCF_PRIM_MANY 1
-#define SCF_PRIM_ALL 2
-#define SCF_NPRIMS (2 * 3) // Need wait and no-wait versions of each.
+#define SCF_PRIM_RESCHED 0
+#define SCF_PRIM_SINGLE 1
+#define SCF_PRIM_MANY 2
+#define SCF_PRIM_ALL 3
+#define SCF_NPRIMS 7 // Need wait and no-wait versions of each,
+ // except for SCF_PRIM_RESCHED.
static char *scf_prim_name[] = {
+ "resched_cpu",
"smp_call_function_single",
"smp_call_function_many",
"smp_call_function",
@@ -136,6 +141,8 @@ static char *bangstr = "";
static DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand);
+extern void resched_cpu(int cpu); // An alternative IPI vector.
+
// Print torture statistics. Caller must ensure serialization.
static void scf_torture_stats_print(void)
{
@@ -148,6 +155,7 @@ static void scf_torture_stats_print(void)
for_each_possible_cpu(cpu)
invoked_count += data_race(per_cpu(scf_invoked_count, cpu));
for (i = 0; i < nthreads; i++) {
+ scfs.n_resched += scf_stats_p[i].n_resched;
scfs.n_single += scf_stats_p[i].n_single;
scfs.n_single_ofl += scf_stats_p[i].n_single_ofl;
scfs.n_single_wait += scf_stats_p[i].n_single_wait;
@@ -160,8 +168,8 @@ static void scf_torture_stats_print(void)
if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
bangstr = "!!! ";
- pr_alert("%s %sscf_invoked_count %s: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ",
- SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count,
+ pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ",
+ SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched,
scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl,
scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait);
torture_onoff_stats();
@@ -314,6 +322,13 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
}
}
switch (scfsp->scfs_prim) {
+ case SCF_PRIM_RESCHED:
+ if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST)) {
+ cpu = torture_random(trsp) % nr_cpu_ids;
+ scfp->n_resched++;
+ resched_cpu(cpu);
+ }
+ break;
case SCF_PRIM_SINGLE:
cpu = torture_random(trsp) % nr_cpu_ids;
if (scfsp->scfs_wait)
@@ -421,6 +436,7 @@ static int scftorture_invoker(void *arg)
was_offline = false;
}
cond_resched();
+ stutter_wait("scftorture_invoker");
} while (!torture_must_stop());
VERBOSE_SCFTORTOUT("scftorture_invoker %d ended", scfp->cpu);
@@ -433,8 +449,8 @@ static void
scftorture_print_module_parms(const char *tag)
{
pr_alert(SCFTORT_FLAG
- "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter_cpus=%d use_cpus_read_lock=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
- verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter_cpus, use_cpus_read_lock, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
+ "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
+ verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
}
static void scf_cleanup_handler(void *unused)
@@ -475,6 +491,7 @@ static int __init scf_torture_init(void)
{
long i;
int firsterr = 0;
+ unsigned long weight_resched1 = weight_resched;
unsigned long weight_single1 = weight_single;
unsigned long weight_single_wait1 = weight_single_wait;
unsigned long weight_many1 = weight_many;
@@ -487,9 +504,10 @@ static int __init scf_torture_init(void)
scftorture_print_module_parms("Start of test");
- if (weight_single == -1 && weight_single_wait == -1 &&
+ if (weight_resched == -1 && weight_single == -1 && weight_single_wait == -1 &&
weight_many == -1 && weight_many_wait == -1 &&
weight_all == -1 && weight_all_wait == -1) {
+ weight_resched1 = 2 * nr_cpu_ids;
weight_single1 = 2 * nr_cpu_ids;
weight_single_wait1 = 2 * nr_cpu_ids;
weight_many1 = 2;
@@ -497,6 +515,8 @@ static int __init scf_torture_init(void)
weight_all1 = 1;
weight_all_wait1 = 1;
} else {
+ if (weight_resched == -1)
+ weight_resched1 = 0;
if (weight_single == -1)
weight_single1 = 0;
if (weight_single_wait == -1)
@@ -517,6 +537,10 @@ static int __init scf_torture_init(void)
firsterr = -EINVAL;
goto unwind;
}
+ if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST))
+ scf_sel_add(weight_resched1, SCF_PRIM_RESCHED, false);
+ else if (weight_resched1)
+ VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
scf_sel_add(weight_many1, SCF_PRIM_MANY, false);
@@ -535,6 +559,11 @@ static int __init scf_torture_init(void)
if (firsterr)
goto unwind;
}
+ if (stutter > 0) {
+ firsterr = torture_stutter_init(stutter, stutter);
+ if (firsterr)
+ goto unwind;
+ }
// Worker tasks invoking smp_call_function().
if (nthreads < 0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index afad085960b8..c9fbdd848138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2650,6 +2650,17 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
+#if defined(CONFIG_TREE_RCU)
+ {
+ .procname = "max_rcu_stall_to_panic",
+ .data = &sysctl_max_rcu_stall_to_panic,
+ .maxlen = sizeof(sysctl_max_rcu_stall_to_panic),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
+ },
+#endif
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
{
.procname = "stack_erasing",
diff --git a/kernel/torture.c b/kernel/torture.c
index 1061492f14bd..8562ac18d2eb 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -602,18 +602,29 @@ static int stutter_gap;
*/
bool stutter_wait(const char *title)
{
- int spt;
+ ktime_t delay;
+ unsigned int i = 0;
bool ret = false;
+ int spt;
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
- ret = true;
+ if (!ret) {
+ sched_set_normal(current, MAX_NICE);
+ ret = true;
+ }
if (spt == 1) {
schedule_timeout_interruptible(1);
} else if (spt == 2) {
- while (READ_ONCE(stutter_pause_test))
+ while (READ_ONCE(stutter_pause_test)) {
+ if (!(i++ & 0xffff)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ delay = 10 * NSEC_PER_USEC;
+ schedule_hrtimeout(&delay, HRTIMER_MODE_REL);
+ }
cond_resched();
+ }
} else {
schedule_timeout_interruptible(round_jiffies_relative(HZ));
}
@@ -629,20 +640,27 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
+ ktime_t delay;
+ DEFINE_TORTURE_RANDOM(rand);
int wtime;
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
wtime = stutter;
- if (stutter > HZ + 1) {
+ if (stutter > 2) {
WRITE_ONCE(stutter_pause_test, 1);
- wtime = stutter - HZ - 1;
- schedule_timeout_interruptible(wtime);
- wtime = HZ + 1;
+ wtime = stutter - 3;
+ delay = ktime_divns(NSEC_PER_SEC * wtime, HZ);
+ delay += (torture_random(&rand) >> 3) % NSEC_PER_MSEC;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_hrtimeout(&delay, HRTIMER_MODE_REL);
+ wtime = 2;
}
WRITE_ONCE(stutter_pause_test, 2);
- schedule_timeout_interruptible(wtime);
+ delay = ktime_divns(NSEC_PER_SEC * wtime, HZ);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_hrtimeout(&delay, HRTIMER_MODE_REL);
}
WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())