summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorXin Long <lucien.xin@gmail.com>2025-01-15 09:27:54 -0500
committerDavid S. Miller <davem@davemloft.net>2025-01-20 09:21:27 +0000
commita12c76a03386e32413ae8eaaefa337e491880632 (patch)
tree2d251a5f78836e97a80c6fbb714e844f0d96c7a5 /include
parent59372af69d4d71e6487614f1b35712cf241eadb4 (diff)
net: sched: refine software bypass handling in tc_run
This patch addresses issues with filter counting in block (tcf_block), particularly for software bypass scenarios, by introducing a more accurate mechanism using useswcnt. Previously, filtercnt and skipswcnt were introduced by: Commit 2081fd3445fe ("net: sched: cls_api: add filter counter") and Commit f631ef39d819 ("net: sched: cls_api: add skip_sw counter") filtercnt tracked all tp (tcf_proto) objects added to a block, and skipswcnt counted tp objects with the skipsw attribute set. The problem is: a single tp can contain multiple filters, some with skipsw and others without. The current implementation fails in the case: When the first filter in a tp has skipsw, both skipswcnt and filtercnt are incremented, then adding a second filter without skipsw to the same tp does not modify these counters because tp->counted is already set. This results in bypass software behavior based solely on skipswcnt equaling filtercnt, even when the block includes filters without skipsw. Consequently, filters without skipsw are inadvertently bypassed. To address this, the patch introduces useswcnt in block to explicitly count tp objects containing at least one filter without skipsw. Key changes include: Whenever a filter without skipsw is added, its tp is marked with usesw and counted in useswcnt. tc_run() now uses useswcnt to determine software bypass, eliminating reliance on filtercnt and skipswcnt. This refined approach prevents software bypass for blocks containing mixed filters, ensuring correct behavior in tc_run(). Additionally, as atomic operations on useswcnt ensure thread safety and tp->lock guards access to tp->usesw and tp->counted, the broader lock down_write(&block->cb_lock) is no longer required in tc_new_tfilter(), and this resolves a performance regression caused by the filter counting mechanism during parallel filter insertions. The improvement can be demonstrated using the following script: # cat insert_tc_rules.sh tc qdisc add dev ens1f0np0 ingress for i in $(seq 16); do taskset -c $i tc -b rules_$i.txt & done wait Each of rules_$i.txt files above includes 100000 tc filter rules to a mlx5 driver NIC ens1f0np0. Without this patch: # time sh insert_tc_rules.sh real 0m50.780s user 0m23.556s sys 4m13.032s With this patch: # time sh insert_tc_rules.sh real 0m17.718s user 0m7.807s sys 3m45.050s Fixes: 047f340b36fc ("net: sched: make skip_sw actually skip software") Reported-by: Shuang Li <shuali@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Reviewed-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> Tested-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/pkt_cls.h13
-rw-r--r--include/net/sch_generic.h5
2 files changed, 13 insertions, 5 deletions
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 22c5ab4269d7..c64fd896b1f9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
}
#ifdef CONFIG_NET_CLS_ACT
-DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
+DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
static inline bool tcf_block_bypass_sw(struct tcf_block *block)
{
- return block && block->bypass_wanted;
+ return block && !atomic_read(&block->useswcnt);
}
#endif
@@ -760,6 +760,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
cls_common->extack = extack;
}
+static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags)
+{
+ if (tp->usesw)
+ return;
+ if (tc_skip_sw(flags) && tc_in_hw(flags))
+ return;
+ tp->usesw = true;
+}
+
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
{
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8074322dd636..d635c5b47eba 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -425,6 +425,7 @@ struct tcf_proto {
spinlock_t lock;
bool deleting;
bool counted;
+ bool usesw;
refcount_t refcnt;
struct rcu_head rcu;
struct hlist_node destroy_ht_node;
@@ -474,9 +475,7 @@ struct tcf_block {
struct flow_block flow_block;
struct list_head owner_list;
bool keep_dst;
- bool bypass_wanted;
- atomic_t filtercnt; /* Number of filters */
- atomic_t skipswcnt; /* Number of skip_sw filters */
+ atomic_t useswcnt;
atomic_t offloadcnt; /* Number of oddloaded filters */
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */