summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-18 12:54:41 +0100
committerDavid S. Miller <davem@davemloft.net>2021-10-18 12:54:41 +0100
commitf8ba22a14268b432f685fd8463a958a3594b7fb6 (patch)
tree6feb3ee12123611d1988813e9e139c4c81280aec /include
parent254ec036db1123b10e23e1412c191a3cf70dce71 (diff)
parent29cbcd85828372333aa87542c51f2b2b0fd4380c (diff)
Merge branch 'remove-qdisc-running-counter'
Sebastian Andrzej Siewior says: ==================== Try to simplify the gnet_stats and remove qdisc->running sequence counter. The first few patches is a follow up to https://lore.kernel.org/all/20211007175000.2334713-1-bigeasy@linutronix.de/ The remaining patches (#5+) remove the seqcount_t (Qdisc::running) from the Qdisc. The statistics (Qdisc::bstats and Qdisc::cpu_bstats) use u64_stats_t and the "running state" is now represented by a bit in Qdisc::state. By removing the seqcount_t from Qdisc and decoupling the bstats statistics from the seqcount_t it is possible to query the statistics even if the Qdisc is running instead of waiting until it is idle again. The try-lock like usage of the seqcount_t in qdisc_run_begin() is problematic on PREEMPT_RT. Inside the qdisc_run_begin/end() qdisc->running sequence counter write sections, at sch_direct_xmit(), the seqcount write serialization lock is released then re-acquired. This is fine for !RT, because the writer is in a BH disabled region and there is a no in-IRQ reader. For RT though, BH sections are preemptible. The earlier introduced seqcount_LOCKNAME_t mechanism, which for RT the reader acquires then relesaes the write serailization lock to avoid infinite spinning if it preempts a seqcount write section, cannot work: the qdisc->running write serialization lock is already intermittingly released inside the seqcount write section. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/u64_stats_sync.h10
-rw-r--r--include/net/act_api.h10
-rw-r--r--include/net/gen_stats.h59
-rw-r--r--include/net/netfilter/xt_rateest.h2
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/sch_generic.h74
7 files changed, 75 insertions, 88 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 173984414f38..f9cd6fea213f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
* @sfp_bus: attached &struct sfp_bus structure.
*
* @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
*
* @proto_down: protocol port state information can be sent to the
* switch driver and used to set the phys state of the
@@ -2250,7 +2249,6 @@ struct net_device {
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
struct lock_class_key *qdisc_tx_busylock;
- struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
unsigned threaded:1;
@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
#define netdev_lockdep_set_classes(dev) \
{ \
static struct lock_class_key qdisc_tx_busylock_key; \
- static struct lock_class_key qdisc_running_key; \
static struct lock_class_key qdisc_xmit_lock_key; \
static struct lock_class_key dev_addr_list_lock_key; \
unsigned int i; \
\
(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
- (dev)->qdisc_running_key = &qdisc_running_key; \
lockdep_set_class(&(dev)->addr_list_lock, \
&dev_addr_list_lock_key); \
for (i = 0; i < (dev)->num_tx_queues; i++) \
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e81856c0ba13..e8ec116c916b 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
return local64_read(&p->v);
}
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+ local64_set(&p->v, val);
+}
+
static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
{
local64_add(val, &p->v);
@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
return p->v;
}
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+ p->v = val;
+}
+
static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
{
p->v += val;
diff --git a/include/net/act_api.h b/include/net/act_api.h
index f19f7f4a463c..b5b624c7e488 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -30,13 +30,13 @@ struct tc_action {
atomic_t tcfa_bindcnt;
int tcfa_action;
struct tcf_t tcfa_tm;
- struct gnet_stats_basic_packed tcfa_bstats;
- struct gnet_stats_basic_packed tcfa_bstats_hw;
+ struct gnet_stats_basic_sync tcfa_bstats;
+ struct gnet_stats_basic_sync tcfa_bstats_hw;
struct gnet_stats_queue tcfa_qstats;
struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie __rcu *act_cookie;
struct tcf_chain __rcu *goto_chain;
@@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
struct sk_buff *skb)
{
if (likely(a->cpu_bstats)) {
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
return;
}
spin_lock(&a->tcfa_lock);
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 1424e02cef90..7aa2b8e1fb29 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -7,14 +7,17 @@
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
-/* Note: this used to be in include/uapi/linux/gen_stats.h */
-struct gnet_stats_basic_packed {
- __u64 bytes;
- __u64 packets;
-};
-
-struct gnet_stats_basic_cpu {
- struct gnet_stats_basic_packed bstats;
+/* Throughput stats.
+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
+ *
+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
+ * bstats), then the internal stat values can be written to and read
+ * from directly. Otherwise, use _bstats_set/update() for writes and
+ * gnet_stats_add_basic() for reads.
+ */
+struct gnet_stats_basic_sync {
+ u64_stats_t bytes;
+ u64_stats_t packets;
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
@@ -34,6 +37,7 @@ struct gnet_dump {
struct tc_stats tc_stats;
};
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d, int padattr);
@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);
-int gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
struct net_rate_estimator __rcu **ptr);
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu_q,
- const struct gnet_stats_queue *q, __u32 qlen);
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
struct gnet_stats_rate_est64 *sample);
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 832ab69efda5..4c3809e141f4 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,7 +6,7 @@
struct xt_rateest {
/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
spinlock_t lock;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 83a6d0792180..4a5833108083 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -765,7 +765,7 @@ struct tc_cookie {
};
struct tc_qopt_offload_stats {
- struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_basic_sync *bstats;
struct gnet_stats_queue *qstats;
};
@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
};
struct tc_gred_qopt_offload_stats {
- struct gnet_stats_basic_packed bstats[MAX_DPs];
+ struct gnet_stats_basic_sync bstats[MAX_DPs];
struct gnet_stats_queue qstats[MAX_DPs];
struct red_stats *xstats[MAX_DPs];
};
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5a011f8d394e..baad2ab4d971 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -38,6 +38,10 @@ enum qdisc_state_t {
__QDISC_STATE_DEACTIVATED,
__QDISC_STATE_MISSED,
__QDISC_STATE_DRAINING,
+ /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
+ * Use qdisc_run_begin/end() or qdisc_is_running() instead.
+ */
+ __QDISC_STATE_RUNNING,
};
#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
@@ -97,7 +101,7 @@ struct Qdisc {
struct netdev_queue *dev_queue;
struct net_rate_estimator __rcu *rate_est;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
int pad;
refcount_t refcnt;
@@ -107,8 +111,7 @@ struct Qdisc {
*/
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
- struct gnet_stats_basic_packed bstats;
- seqcount_t running;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
return NULL;
}
+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
+ * root_lock section, or provide their own memory barriers -- ordering
+ * against qdisc_run_begin/end() atomic bit operations.
+ */
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return spin_is_locked(&qdisc->seqlock);
- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
+ return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}
static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
return !READ_ONCE(qdisc->q.qlen);
}
+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
+ * the qdisc root lock acquired.
+ */
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
* after it releases the lock at the end of qdisc_run_end().
*/
return spin_trylock(&qdisc->seqlock);
- } else if (qdisc_is_running(qdisc)) {
- return false;
}
- /* Variant of write_seqcount_begin() telling lockdep a trylock
- * was attempted.
- */
- raw_write_seqcount_begin(&qdisc->running);
- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
- return true;
+ return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}
static inline void qdisc_run_end(struct Qdisc *qdisc)
@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
&qdisc->state)))
__netif_schedule(qdisc);
} else {
- write_seqcount_end(&qdisc->running);
+ clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}
}
@@ -592,14 +595,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
return qdisc_lock(root);
}
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-{
- struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
- ASSERT_RTNL();
- return &root->running;
-}
-
static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->dev;
@@ -849,14 +844,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return sch->enqueue(skb, sch, to_free);
}
-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
__u64 bytes, __u32 packets)
{
- bstats->bytes += bytes;
- bstats->packets += packets;
+ u64_stats_update_begin(&bstats->syncp);
+ u64_stats_add(&bstats->bytes, bytes);
+ u64_stats_add(&bstats->packets, packets);
+ u64_stats_update_end(&bstats->syncp);
}
-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
const struct sk_buff *skb)
{
_bstats_update(bstats,
@@ -864,26 +861,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
}
-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
- __u64 bytes, __u32 packets)
-{
- u64_stats_update_begin(&bstats->syncp);
- _bstats_update(&bstats->bstats, bytes, packets);
- u64_stats_update_end(&bstats->syncp);
-}
-
-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
- const struct sk_buff *skb)
-{
- u64_stats_update_begin(&bstats->syncp);
- bstats_update(&bstats->bstats, skb);
- u64_stats_update_end(&bstats->syncp);
-}
-
static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
const struct sk_buff *skb)
{
- bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
}
static inline void qdisc_bstats_update(struct Qdisc *sch,
@@ -972,10 +953,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
__u32 *backlog)
{
struct gnet_stats_queue qstats = { 0 };
- __u32 len = qdisc_qlen_sum(sch);
- __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
- *qlen = qstats.qlen;
+ gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
+ *qlen = qstats.qlen + qdisc_qlen(sch);
*backlog = qstats.backlog;
}
@@ -1316,7 +1296,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
struct mini_Qdisc {
struct tcf_proto *filter_list;
struct tcf_block *block;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
struct rcu_head rcu;
};
@@ -1324,7 +1304,7 @@ struct mini_Qdisc {
static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
const struct sk_buff *skb)
{
- bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
}
static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)