From b9585e940a0d78770cda8f9aebf81b17b4d19e6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jul 2015 16:04:45 -0700 Subject: rcu: Consolidate tree setup for synchronize_rcu_expedited() This commit replaces sync_rcu_preempt_exp_init1(() and sync_rcu_preempt_exp_init2() with sync_exp_reset_tree_hotplug() and sync_exp_reset_tree(), which will also be used by synchronize_sched_expedited(), and sync_rcu_exp_select_nodes(), which contains code specific to synchronize_rcu_expedited(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2e991f8361e4..a57f25ecca58 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -171,16 +171,21 @@ struct rcu_node { /* an rcu_data structure, otherwise, each */ /* bit corresponds to a child rcu_node */ /* structure. */ - unsigned long expmask; /* Groups that have ->blkd_tasks */ - /* elements that need to drain to allow the */ - /* current expedited grace period to */ - /* complete (only for PREEMPT_RCU). */ unsigned long qsmaskinit; - /* Per-GP initial value for qsmask & expmask. */ + /* Per-GP initial value for qsmask. */ /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; /* Online CPUs for next grace period. */ + unsigned long expmask; /* CPUs or groups that need to check in */ + /* to allow the current expedited GP */ + /* to complete. */ + unsigned long expmaskinit; + /* Per-GP initial values for expmask. */ + /* Initialized from ->expmaskinitnext at the */ + /* beginning of each expedited GP. */ + unsigned long expmaskinitnext; + /* Online CPUs for next expedited GP. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ @@ -466,6 +471,7 @@ struct rcu_state { struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*func)(struct rcu_head *head)); + int ncpus; /* # CPUs seen so far. */ /* The following fields are guarded by the root rcu_node's lock. */ @@ -508,6 +514,7 @@ struct rcu_state { atomic_long_t expedited_normal; /* # fallbacks to normal. */ atomic_t expedited_need_qs; /* # CPUs left to check in. */ wait_queue_head_t expedited_wq; /* Wait for check-ins. */ + int ncpus_snap; /* # CPUs seen last time. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ -- cgit v1.2.3 From bce5fa12aad148e15efd9bc0015dc4898b6e723b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 5 Aug 2015 16:03:54 -0700 Subject: rcu: Move synchronize_sched_expedited() to combining tree Currently, synchronize_sched_expedited() uses a single global counter to track the number of remaining context switches that the current expedited grace period must wait on. This is problematic on large systems, where the resulting memory contention can be pathological. This commit therefore makes synchronize_sched_expedited() instead use the combining tree in the same manner as synchronize_rcu_expedited(), keeping memory contention down to a dull roar. This commit creates a temporary function sync_sched_exp_select_cpus() that is very similar to sync_rcu_exp_select_cpus(). A later commit will consolidate these two functions, which becomes possible when synchronize_sched_expedited() switches from stop_one_cpu_nowait() to smp_call_function_single(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a57f25ecca58..efe361c764ab 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -383,7 +383,6 @@ struct rcu_data { struct rcu_head oom_head; #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ struct mutex exp_funnel_mutex; - bool exp_done; /* Expedited QS for this CPU? */ /* 7) Callback offloading. */ #ifdef CONFIG_RCU_NOCB_CPU -- cgit v1.2.3 From 97c668b8e983b722e2ed765b98b05f644aff1b13 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 11:31:51 -0700 Subject: rcu: Rename qs_pending to core_needs_qs An upcoming commit needs to invert the sense of the ->passed_quiesce rcu_data structure field, so this commit is taking this opportunity to clarify things a bit by renaming ->qs_pending to ->core_needs_qs. So if !rdp->core_needs_qs, then this CPU need not concern itself with quiescent states, in particular, it need not acquire its leaf rcu_node structure's ->lock to check. Otherwise, it needs to report the next quiescent state. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index efe361c764ab..4a0f30676ba8 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -303,7 +303,7 @@ struct rcu_data { unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ bool passed_quiesce; /* User-mode/idle loop etc. */ - bool qs_pending; /* Core waits for quiesc state. */ + bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ @@ -368,7 +368,7 @@ struct rcu_data { /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ - unsigned long n_rp_qs_pending; + unsigned long n_rp_core_needs_qs; unsigned long n_rp_report_qs; unsigned long n_rp_cb_ready; unsigned long n_rp_cpu_needs_gp; -- cgit v1.2.3 From 0d43eb34f9aabcddf41c99b7af2d0ced33e9a3cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 14:17:29 -0700 Subject: rcu: Invert passed_quiesce and rename to cpu_no_qs This commit inverts the sense of the rcu_data structure's ->passed_quiesce field and renames it to ->cpu_no_qs. This will allow a later commit to use an "aggregate OR" operation to test expedited as well as normal grace periods without added overhead. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 4a0f30676ba8..ded4ceebed76 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -302,7 +302,7 @@ struct rcu_data { /* is aware of having started. */ unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ - bool passed_quiesce; /* User-mode/idle loop etc. */ + bool cpu_no_qs; /* No QS yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ -- cgit v1.2.3 From 5b74c458906fc4a62f932ee8bb801d29baf15fec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 15:16:57 -0700 Subject: rcu: Make ->cpu_no_qs be a union for aggregate OR This commit converts the rcu_data structure's ->cpu_no_qs field to a union. The bytewise side of this union allows individual access to indications as to whether this CPU needs to find a quiescent state for a normal (.norm) and/or expedited (.exp) grace period. The setwise side of the union allows testing whether or not a quiescent state is needed at all, for either type of grace period. For now, only .norm is used. A later commit will introduce the expedited usage. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ded4ceebed76..3eee48bcf52b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -286,6 +286,18 @@ struct rcu_node { for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) +/* + * Union to allow "aggregate OR" operation on the need for a quiescent + * state by the normal and expedited grace periods. + */ +union rcu_noqs { + struct { + u8 norm; + u8 exp; + } b; /* Bits. */ + u16 s; /* Set of bits, aggregate OR here. */ +}; + /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ @@ -302,7 +314,7 @@ struct rcu_data { /* is aware of having started. */ unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ - bool cpu_no_qs; /* No QS yet for this CPU. */ + union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ -- cgit v1.2.3 From 6587a23b6b9bdb47205ec96c703e5bf8a2d39701 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 16:50:39 -0700 Subject: rcu: Switch synchronize_sched_expedited() to IPI This commit switches synchronize_sched_expedited() from stop_one_cpu_nowait() to smp_call_function_single(), thus moving from an IPI and a pair of context switches to an IPI and a single pass through the scheduler. Of course, if the scheduler actually does decide to switch to a different task, there will still be a pair of context switches, but there would likely have been a pair of context switches anyway, just a bit later. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 3eee48bcf52b..1b969cef8fe4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -324,9 +324,6 @@ struct rcu_data { /* ticks this CPU has handled */ /* during and after the last grace */ /* period it is aware of. */ - struct cpu_stop_work exp_stop_work; - /* Expedited grace-period control */ - /* for CPU stopping. */ /* 2) batch handling */ /* -- cgit v1.2.3 From 83c2c735e78da1a0d994911f730f6e1d36c88d7a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 20:43:02 -0700 Subject: rcu: Stop silencing lockdep false positive for expedited grace periods This reverts commit af859beaaba4 (rcu: Silence lockdep false positive for expedited grace periods). Because synchronize_rcu_expedited() no longer invokes synchronize_sched_expedited(), ->exp_funnel_mutex acquisition is no longer nested, so the false positive no longer happens. This commit therefore removes the extra lockdep data structures, as they are no longer needed. --- kernel/rcu/tree.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 1b969cef8fe4..6f3b63b68886 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -70,8 +70,6 @@ # define RCU_NODE_NAME_INIT { "rcu_node_0" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } # define RCU_EXP_NAME_INIT { "rcu_node_exp_0" } -# define RCU_EXP_SCHED_NAME_INIT \ - { "rcu_node_exp_sched_0" } #elif NR_CPUS <= RCU_FANOUT_2 # define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 @@ -81,8 +79,6 @@ # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" } -# define RCU_EXP_SCHED_NAME_INIT \ - { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1" } #elif NR_CPUS <= RCU_FANOUT_3 # define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 @@ -93,8 +89,6 @@ # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" } -# define RCU_EXP_SCHED_NAME_INIT \ - { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1", "rcu_node_exp_sched_2" } #elif NR_CPUS <= RCU_FANOUT_4 # define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 @@ -106,8 +100,6 @@ # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" } -# define RCU_EXP_SCHED_NAME_INIT \ - { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1", "rcu_node_exp_sched_2", "rcu_node_exp_sched_3" } #else # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ -- cgit v1.2.3 From 74611ecb0fc4c850a8f89a744ce99cbf0dd43cb2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Aug 2015 10:20:43 -0700 Subject: rcu: Add online/offline info to expedited stall warning message This commit makes the RCU CPU stall warning message print online/offline indications immediately after the CPU number. A "O" indicates global offline, a "." global online, and a "o" indicates RCU believes that the CPU is offline for the current grace period and "." otherwise, and an "N" indicates that RCU believes that the CPU will be offline for the next grace period, and "." otherwise, all right after the CPU number. So for CPU 10, you would normally see "10-...:" indicating that everything believes that the CPU is online. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6f3b63b68886..191aa3678575 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -589,6 +589,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); +static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_preempt_check_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); -- cgit v1.2.3