summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-10-24 09:12:40 +0000
committerJakub Kicinski <kuba@kernel.org>2025-10-28 17:41:17 -0700
commitc72568c21b97dbc48d02b769f4eec6667ad13d5a (patch)
treeff30418520f52173e8bbcb0c4ba251e52e25e66a /include
parenta086e9860ce6a751acd71dbec54d24a819dd6baa (diff)
net: rps: softnet_data reorg to make enqueue_to_backlog() fast
enqueue_to_backlog() is showing up in kernel profiles on hosts with many cores, when RFS/RPS is used. The following softnet_data fields need to be updated: - input_queue_tail - input_pkt_queue (next, prev, qlen, lock) - backlog.state (if input_pkt_queue was empty) Unfortunately they are currenly using two cache lines: /* --- cacheline 3 boundary (192 bytes) --- */ call_single_data_t csd __attribute__((__aligned__(64))); /* 0xc0 0x20 */ struct softnet_data * rps_ipi_next; /* 0xe0 0x8 */ unsigned int cpu; /* 0xe8 0x4 */ unsigned int input_queue_tail; /* 0xec 0x4 */ struct sk_buff_head input_pkt_queue; /* 0xf0 0x18 */ /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x108 0x1f0 */ Add one ____cacheline_aligned_in_smp to make sure they now are using a single cache line. Also, because napi_struct has written fields, make @state its first field. We want to make sure that cpus adding packets to sd->input_pkt_queue are not slowing down cpus processing their backlog because of false sharing. After this patch new layout is: /* --- cacheline 5 boundary (320 bytes) --- */ long int pad[3] __attribute__((__aligned__(64))); /* 0x140 0x18 */ unsigned int input_queue_tail; /* 0x158 0x4 */ /* XXX 4 bytes hole, try to pack */ struct sk_buff_head input_pkt_queue; /* 0x160 0x18 */ struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x178 0x1f0 */ Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20251024091240.3292546-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netdevice.h11
1 files changed, 10 insertions, 1 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f5aad5cc9a1..9c1e5042c5e7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -377,6 +377,8 @@ struct napi_config {
* Structure for NAPI scheduling similar to tasklet but with weighting
*/
struct napi_struct {
+ /* This field should be first or softnet_data.backlog needs tweaks. */
+ unsigned long state;
/* The poll_list must only be managed by the entity which
* changes the state of the NAPI_STATE_SCHED bit. This means
* whoever atomically sets that bit can add this napi_struct
@@ -385,7 +387,6 @@ struct napi_struct {
*/
struct list_head poll_list;
- unsigned long state;
int weight;
u32 defer_hard_irqs_count;
int (*poll)(struct napi_struct *, int);
@@ -3529,9 +3530,17 @@ struct softnet_data {
call_single_data_t csd ____cacheline_aligned_in_smp;
struct softnet_data *rps_ipi_next;
unsigned int cpu;
+
+ /* We force a cacheline alignment from here, to hold together
+ * input_queue_tail, input_pkt_queue and backlog.state.
+ * We add holes so that backlog.state is the last field
+ * of this cache line.
+ */
+ long pad[3] ____cacheline_aligned_in_smp;
unsigned int input_queue_tail;
#endif
struct sk_buff_head input_pkt_queue;
+
struct napi_struct backlog;
struct numa_drop_counters drop_counters;