summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2026-04-21 14:29:44 +0000
committerJakub Kicinski <kuba@kernel.org>2026-04-22 21:12:47 -0700
commit5154561d9b119f781249f8e845fecf059b38b483 (patch)
tree7bd8fb4566c8024016ea023a4c8db81ef4945f16
parenta6edf2cd4156b71e07258876b7626692e158f7e8 (diff)
net/sched: sch_pie: annotate data-races in pie_dump_stats()
pie_dump_stats() only runs with RTNL held, reading fields that can be changed in qdisc fast path. Add READ_ONCE()/WRITE_ONCE() annotations. Alternative would be to acquire the qdisc spinlock, but our long-term goal is to make qdisc dump operations lockless as much as we can. tc_pie_xstats fields don't need to be latched atomically, otherwise this bug would have been caught earlier. Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com> Link: https://patch.msgid.link/20260421142944.4009941-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/pie.h2
-rw-r--r--net/sched/sch_pie.c38
2 files changed, 20 insertions, 20 deletions
diff --git a/include/net/pie.h b/include/net/pie.h
index 01cbc66825a4..1f3db0c35514 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -104,7 +104,7 @@ static inline void pie_vars_init(struct pie_vars *vars)
vars->dq_tstamp = DTIME_INVALID;
vars->accu_prob = 0;
vars->dq_count = DQCOUNT_INVALID;
- vars->avg_dq_rate = 0;
+ WRITE_ONCE(vars->avg_dq_rate, 0);
}
static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 16f3f629cb8e..fb53fbf0e328 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -90,7 +90,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
bool enqueue = false;
if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
- q->stats.overlimit++;
+ WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1);
goto out;
}
@@ -104,7 +104,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If packet is ecn capable, mark it if drop probability
* is lower than 10%, else drop it.
*/
- q->stats.ecn_mark++;
+ WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1);
enqueue = true;
}
@@ -114,15 +114,15 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (!q->params.dq_rate_estimator)
pie_set_enqueue_time(skb);
- q->stats.packets_in++;
+ WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1);
if (qdisc_qlen(sch) > q->stats.maxq)
- q->stats.maxq = qdisc_qlen(sch);
+ WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch));
return qdisc_enqueue_tail(skb, sch);
}
out:
- q->stats.dropped++;
+ WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1);
q->vars.accu_prob = 0;
return qdisc_drop_reason(skb, sch, to_free, reason);
}
@@ -267,11 +267,11 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
count = count / dtime;
if (vars->avg_dq_rate == 0)
- vars->avg_dq_rate = count;
+ WRITE_ONCE(vars->avg_dq_rate, count);
else
- vars->avg_dq_rate =
+ WRITE_ONCE(vars->avg_dq_rate,
(vars->avg_dq_rate -
- (vars->avg_dq_rate >> 3)) + (count >> 3);
+ (vars->avg_dq_rate >> 3)) + (count >> 3));
/* If the queue has receded below the threshold, we hold
* on to the last drain rate calculated, else we reset
@@ -381,7 +381,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
if (delta > 0) {
/* prevent overflow */
if (vars->prob < oldprob) {
- vars->prob = MAX_PROB;
+ WRITE_ONCE(vars->prob, MAX_PROB);
/* Prevent normalization error. If probability is at
* maximum value already, we normalize it here, and
* skip the check to do a non-linear drop in the next
@@ -392,7 +392,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
} else {
/* prevent underflow */
if (vars->prob > oldprob)
- vars->prob = 0;
+ WRITE_ONCE(vars->prob, 0);
}
/* Non-linear drop in probability: Reduce drop probability quickly if
@@ -403,7 +403,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
/* Reduce drop probability to 98.4% */
vars->prob -= vars->prob / 64;
- vars->qdelay = qdelay;
+ WRITE_ONCE(vars->qdelay, qdelay);
vars->backlog_old = backlog;
/* We restart the measurement cycle if the following conditions are met
@@ -502,21 +502,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
struct pie_sched_data *q = qdisc_priv(sch);
struct tc_pie_xstats st = {
.prob = q->vars.prob << BITS_PER_BYTE,
- .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
+ .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) /
NSEC_PER_USEC,
- .packets_in = q->stats.packets_in,
- .overlimit = q->stats.overlimit,
- .maxq = q->stats.maxq,
- .dropped = q->stats.dropped,
- .ecn_mark = q->stats.ecn_mark,
+ .packets_in = READ_ONCE(q->stats.packets_in),
+ .overlimit = READ_ONCE(q->stats.overlimit),
+ .maxq = READ_ONCE(q->stats.maxq),
+ .dropped = READ_ONCE(q->stats.dropped),
+ .ecn_mark = READ_ONCE(q->stats.ecn_mark),
};
/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
st.dq_rate_estimating = q->params.dq_rate_estimator;
/* unscale and return dq_rate in bytes per sec */
- if (q->params.dq_rate_estimator)
- st.avg_dq_rate = q->vars.avg_dq_rate *
+ if (st.dq_rate_estimating)
+ st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) *
(PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
return gnet_stats_copy_app(d, &st, sizeof(st));