diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 227 |
1 files changed, 137 insertions, 90 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f3077c0ab181..683c4f3b87e0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -176,9 +176,29 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) return &rsp->node[0]; } +/* + * Record the specified "completed" value, which is later used to validate + * dynticks counter manipulations and CPU-offline checks. Specify + * "rsp->completed - 1" to unconditionally invalidate any future dynticks + * manipulations and CPU-offline checks. Such invalidation is useful at + * the beginning of a grace period. + */ +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ + rsp->dynticks_completed = comp; +} + #ifdef CONFIG_SMP /* + * Recall the previously recorded value of the completion for dynticks. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->dynticks_completed; +} + +/* * If the specified CPU is offline, tell the caller that it is in * a quiescent state. Otherwise, whack it with a reschedule IPI. * Grace periods can end up waiting on an offline CPU when that @@ -335,28 +355,9 @@ void rcu_irq_exit(void) set_need_resched(); } -/* - * Record the specified "completed" value, which is later used to validate - * dynticks counter manipulations. Specify "rsp->completed - 1" to - * unconditionally invalidate any future dynticks manipulations (which is - * useful at the beginning of a grace period). - */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ - rsp->dynticks_completed = comp; -} - #ifdef CONFIG_SMP /* - * Recall the previously recorded value of the completion for dynticks. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->dynticks_completed; -} - -/* * Snapshot the specified CPU's dynticks counter so that we can later * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. @@ -419,24 +420,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #else /* #ifdef CONFIG_NO_HZ */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ -} - #ifdef CONFIG_SMP -/* - * If there are no dynticks, then the only way that a CPU can passively - * be in a quiescent state is to be offline. Unlike dynticks idle, which - * is a point in time during the prior (already finished) grace period, - * an offline CPU is always in a quiescent state, and thus can be - * unconditionally applied. So just return the current value of completed. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->completed; -} - static int dyntick_save_progress_counter(struct rcu_data *rdp) { return 0; @@ -553,13 +538,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and when we notice - * that someone else started the grace period. + * that someone else started the grace period. The caller must hold the + * ->lock of the leaf rcu_node structure corresponding to the current CPU, + * and must have irqs disabled. */ +static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + if (rdp->gpnum != rnp->gpnum) { + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rnp->gpnum; + } +} + static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) { - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->gpnum = rsp->gpnum; + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __note_new_gpnum(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -583,6 +588,79 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) } /* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. In addition, the corresponding leaf rcu_node structure's + * ->lock must be held by the caller, with irqs disabled. + */ +static void +__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Did another grace period end? */ + if (rdp->completed != rnp->completed) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = rnp->completed; + } +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __rcu_process_gp_end(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Do per-CPU grace-period initialization for running CPU. The caller + * must hold the lock of the leaf rcu_node structure corresponding to + * this CPU. + */ +static void +rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Prior grace period ended, so advance callbacks for current CPU. */ + __rcu_process_gp_end(rsp, rnp, rdp); + + /* + * Because this CPU just now started the new grace period, we know + * that all of its callbacks will be covered by this upcoming grace + * period, even the ones that were registered arbitrarily recently. + * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. + * + * Other CPUs cannot be sure exactly when the grace period started. + * Therefore, their recently registered callbacks must pass through + * an additional RCU_NEXT_READY stage, so that they will be handled + * by the next RCU grace period. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Set state so that this CPU will detect the next quiescent state. */ + __note_new_gpnum(rsp, rnp, rdp); +} + +/* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold * the root node's ->lock, which is released before return. Hard irqs must @@ -607,28 +685,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); dyntick_record_completed(rsp, rsp->completed - 1); - note_new_gpnum(rsp, rdp); - - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -661,6 +726,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; + if (rnp == rdp->mynode) + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -672,34 +740,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) } /* - * Advance this CPU's callbacks, but only if the current grace period - * has ended. This may be called only from the CPU to whom the rdp - * belongs. - */ -static void -rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) -{ - long completed_snap; - unsigned long flags; - - local_irq_save(flags); - completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ - - /* Did another grace period end? */ - if (rdp->completed != completed_snap) { - - /* Advance callbacks. No harm if list empty. */ - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - - /* Remember that we saw this grace-period completion. */ - rdp->completed = completed_snap; - } - local_irq_restore(flags); -} - -/* * Clean up after the prior grace period and let rcu_start_gp() start up * the next grace period if one is needed. Note that the caller must * hold rnp->lock, as required by rcu_start_gp(), which will release it. @@ -710,7 +750,6 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; rsp->signaled = RCU_GP_IDLE; - rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -1144,6 +1183,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; + u8 forcenow; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1180,16 +1220,23 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if (rcu_process_dyntick(rsp, lastcomp, dyntick_save_progress_counter)) goto unlock_ret; + /* fall into next case. */ + + case RCU_SAVE_COMPLETED: /* Update state, record completion counter. */ + forcenow = 0; spin_lock(&rnp->lock); if (lastcomp == rsp->completed && - rsp->signaled == RCU_SAVE_DYNTICK) { + rsp->signaled == signaled) { rsp->signaled = RCU_FORCE_QS; dyntick_record_completed(rsp, lastcomp); + forcenow = signaled == RCU_SAVE_COMPLETED; } spin_unlock(&rnp->lock); - break; + if (!forcenow) + break; + /* fall into next case. */ case RCU_FORCE_QS: @@ -1544,21 +1591,16 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) { unsigned long flags; - long lastcomp; unsigned long mask; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ spin_lock_irqsave(&rnp->lock, flags); - lastcomp = rsp->completed; - rdp->completed = lastcomp; - rdp->gpnum = lastcomp; rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ rdp->preemptable = preemptable; - rdp->passed_quiesc_completed = lastcomp - 1; rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -1580,6 +1622,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit |= mask; mask = rnp->grpmask; + if (rnp == rdp->mynode) { + rdp->gpnum = rnp->completed; /* if GP in progress... */ + rdp->completed = rnp->completed; + rdp->passed_quiesc_completed = rnp->completed - 1; + } spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); |