summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-05-15 10:03:54 +0000
committerJakub Kicinski <kuba@kernel.org>2025-05-16 16:03:48 -0700
commit9cd5ef0b8c04c46a15c8f5d002f02ea0d0477790 (patch)
tree8fe626057698681774a6e9affe03a7d020530e99 /include
parentf24f7b2f3af9e008ded20f804d7829ee2efd43f2 (diff)
net: rfs: add sock_rps_delete_flow() helper
RFS can exhibit lower performance for workloads using short-lived flows and a small set of 4-tuple. This is often the case for load-testers, using a pair of hosts, if the server has a single listener port. Typical use case : Server : tcp_crr -T128 -F1000 -6 -U -l30 -R 14250 Client : tcp_crr -T128 -F1000 -6 -U -l30 -c -H server | grep local_throughput This is because RFS global hash table contains stale information, when the same RSS key is recycled for another socket and another cpu. Make sure to undo the changes and go back to initial state when a flow is disconnected. Performance of the above test is increased by 22 %, going from 372604 transactions per second to 457773. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Octavian Purdila <tavip@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Link: https://patch.msgid.link/20250515100354.3339920-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/rps.h24
1 files changed, 24 insertions, 0 deletions
diff --git a/include/net/rps.h b/include/net/rps.h
index 507f4aa5d39b..d8ab3a08bcc4 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -123,6 +123,30 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *table;
+ u32 hash, index;
+
+ if (!static_branch_unlikely(&rfs_needed))
+ return;
+
+ hash = READ_ONCE(sk->sk_rxhash);
+ if (!hash)
+ return;
+
+ rcu_read_lock();
+ table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (table) {
+ index = hash & table->mask;
+ if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
+ WRITE_ONCE(table->ents[index], RPS_NO_CPU);
+ }
+ rcu_read_unlock();
+#endif
+}
+
static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS