From 26ece8ef6eca97f19eb5ad5186b8c1a29ab25d76 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jan 2016 18:48:37 -0800 Subject: rcu: Fix synchronize_rcu_expedited() header comment This commit brings the synchronize_rcu_expedited() function's header comment into line with the new implementation. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index efdf7b61ce12..a2ac2628ef8e 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -722,13 +722,19 @@ static void sync_rcu_exp_handler(void *info) * synchronize_rcu_expedited - Brute-force RCU grace period * * Wait for an RCU-preempt grace period, but expedite it. The basic - * idea is to invoke synchronize_sched_expedited() to push all the tasks to - * the ->blkd_tasks lists and wait for this list to drain. This consumes - * significant time on all CPUs and is unfriendly to real-time workloads, - * so is thus not recommended for any sort of common-case code. - * In fact, if you are using synchronize_rcu_expedited() in a loop, - * please restructure your code to batch your updates, and then Use a - * single synchronize_rcu() instead. + * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler + * checks whether the CPU is in an RCU-preempt critical section, and + * if so, it sets a flag that causes the outermost rcu_read_unlock() + * to report the quiescent state. On the other hand, if the CPU is + * not in an RCU read-side critical section, the IPI handler reports + * the quiescent state immediately. + * + * Although this is a greate improvement over previous expedited + * implementations, it is still unfriendly to real-time workloads, so is + * thus not recommended for any sort of common-case code. In fact, if + * you are using synchronize_rcu_expedited() in a loop, please restructure + * your code to batch your updates, and then Use a single synchronize_rcu() + * instead. */ void synchronize_rcu_expedited(void) { -- cgit v1.2.3 From bea2de44ae647698dc848a671fdee6e53c192423 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jan 2016 20:30:06 -0800 Subject: rcu: Add funnel-locking tracing for expedited grace periods Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a2ac2628ef8e..cd2dae43ff48 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -767,6 +767,9 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); mutex_unlock(&rnp_unlock->exp_funnel_mutex); + trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, + rnp_unlock->grplo, rnp_unlock->grphi, + TPS("rel")); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 4f41530245c7fd4837152e264d120d05ae940eb0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jan 2016 20:49:49 -0800 Subject: rcu: Add expedited-grace-period event tracing Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index cd2dae43ff48..36e94aed38a7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -750,12 +750,14 @@ void synchronize_rcu_expedited(void) } s = rcu_exp_gp_seq_snap(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); rnp_unlock = exp_funnel_lock(rsp, s); if (rnp_unlock == NULL) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); @@ -766,6 +768,7 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); mutex_unlock(&rnp_unlock->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, rnp_unlock->grplo, rnp_unlock->grphi, -- cgit v1.2.3 From f6a12f34a448cc8a624070fd365c29c890138a48 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 30 Jan 2016 17:57:35 -0800 Subject: rcu: Enforce expedited-GP fairness via funnel wait queue The current mutex-based funnel-locking approach used by expedited grace periods is subject to severe unfairness. The problem arises when a few tasks, making a path from leaves to root, all wake up before other tasks do. A new task can then follow this path all the way to the root, which needlessly delays tasks whose grace period is done, but who do not happen to acquire the lock quickly enough. This commit avoids this problem by maintaining per-rcu_node wait queues, along with a per-rcu_node counter that tracks the latest grace period sought by an earlier task to visit this node. If that grace period would satisfy the current task, instead of proceeding up the tree, it waits on the current rcu_node structure using a pair of wait queues provided for that purpose. This decouples awakening of old tasks from the arrival of new tasks. If the wakeups prove to be a bottleneck, additional kthreads can be brought to bear for that purpose. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 36e94aed38a7..c82c3640493f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -738,8 +738,6 @@ static void sync_rcu_exp_handler(void *info) */ void synchronize_rcu_expedited(void) { - struct rcu_node *rnp; - struct rcu_node *rnp_unlock; struct rcu_state *rsp = rcu_state_p; unsigned long s; @@ -752,8 +750,7 @@ void synchronize_rcu_expedited(void) s = rcu_exp_gp_seq_snap(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - rnp_unlock = exp_funnel_lock(rsp, s); - if (rnp_unlock == NULL) + if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); @@ -763,16 +760,13 @@ void synchronize_rcu_expedited(void) sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); /* Wait for snapshotted ->blkd_tasks lists to drain. */ - rnp = rcu_get_root(rsp); synchronize_sched_expedited_wait(rsp); - - /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - mutex_unlock(&rnp_unlock->exp_funnel_mutex); - trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, - rnp_unlock->grplo, rnp_unlock->grphi, - TPS("rel")); + rcu_exp_wake(rsp, s); + + trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); + mutex_unlock(&rsp->exp_mutex); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 4ea3e85b113ab37a2d55cfabf0d709ddec088bb3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:22:25 -0700 Subject: rcu: Consolidate expedited GP code into rcu_exp_wait_wake() Currently, synchronize_rcu_expedited() and rcu_sched_expedited() have significant duplicate code. This commit therefore consolidates some of this code into rcu_exp_wake(), which is now renamed to rcu_exp_wait_wake() in recognition of its added responsibilities. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c82c3640493f..b6d5dde6eab9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -759,14 +759,8 @@ void synchronize_rcu_expedited(void) /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); - /* Wait for snapshotted ->blkd_tasks lists to drain. */ - synchronize_sched_expedited_wait(rsp); - rcu_exp_gp_seq_end(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - rcu_exp_wake(rsp, s); - - trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); - mutex_unlock(&rsp->exp_mutex); + /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */ + rcu_exp_wait_wake(rsp, s); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 179e5dcd1e5bdfac1128431d131b31322aedd2bc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:27:44 -0700 Subject: rcu: Consolidate expedited GP tracing into rcu_exp_gp_seq_snap() This commit moves some duplicate code from synchronize_rcu_expedited() and synchronize_sched_expedited() into rcu_exp_gp_seq_snap(). This doesn't save lines of code, but does eliminate a "tell me twice" issue. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b6d5dde6eab9..529a44085a63 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -748,8 +748,6 @@ void synchronize_rcu_expedited(void) } s = rcu_exp_gp_seq_snap(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ -- cgit v1.2.3 From aff12cdf86e6fa891d1c30c0fad112d138bd7b10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:32:24 -0700 Subject: rcu: Consolidate expedited GP code into exp_funnel_lock() This commit pulls the grace-period-start counter adjustment and tracing from synchronize_rcu_expedited() and synchronize_sched_expedited() into exp_funnel_lock(), thus eliminating some code duplication. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 529a44085a63..ff1cd4e1188d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -751,9 +751,6 @@ void synchronize_rcu_expedited(void) if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - rcu_exp_gp_seq_start(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); - /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); -- cgit v1.2.3