diff options
| author | Eric Dumazet <edumazet@google.com> | 2025-10-24 09:12:40 +0000 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-10-28 17:41:17 -0700 |
| commit | c72568c21b97dbc48d02b769f4eec6667ad13d5a (patch) | |
| tree | ff30418520f52173e8bbcb0c4ba251e52e25e66a /include | |
| parent | a086e9860ce6a751acd71dbec54d24a819dd6baa (diff) | |
net: rps: softnet_data reorg to make enqueue_to_backlog() fast
enqueue_to_backlog() is showing up in kernel profiles on hosts
with many cores, when RFS/RPS is used.
The following softnet_data fields need to be updated:
- input_queue_tail
- input_pkt_queue (next, prev, qlen, lock)
- backlog.state (if input_pkt_queue was empty)
Unfortunately they are currenly using two cache lines:
/* --- cacheline 3 boundary (192 bytes) --- */
call_single_data_t csd __attribute__((__aligned__(64))); /* 0xc0 0x20 */
struct softnet_data * rps_ipi_next; /* 0xe0 0x8 */
unsigned int cpu; /* 0xe8 0x4 */
unsigned int input_queue_tail; /* 0xec 0x4 */
struct sk_buff_head input_pkt_queue; /* 0xf0 0x18 */
/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x108 0x1f0 */
Add one ____cacheline_aligned_in_smp to make sure they now are using
a single cache line.
Also, because napi_struct has written fields, make @state its first field.
We want to make sure that cpus adding packets to sd->input_pkt_queue
are not slowing down cpus processing their backlog because of
false sharing.
After this patch new layout is:
/* --- cacheline 5 boundary (320 bytes) --- */
long int pad[3] __attribute__((__aligned__(64))); /* 0x140 0x18 */
unsigned int input_queue_tail; /* 0x158 0x4 */
/* XXX 4 bytes hole, try to pack */
struct sk_buff_head input_pkt_queue; /* 0x160 0x18 */
struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x178 0x1f0 */
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251024091240.3292546-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/netdevice.h | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7f5aad5cc9a1..9c1e5042c5e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -377,6 +377,8 @@ struct napi_config { * Structure for NAPI scheduling similar to tasklet but with weighting */ struct napi_struct { + /* This field should be first or softnet_data.backlog needs tweaks. */ + unsigned long state; /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct @@ -385,7 +387,6 @@ struct napi_struct { */ struct list_head poll_list; - unsigned long state; int weight; u32 defer_hard_irqs_count; int (*poll)(struct napi_struct *, int); @@ -3529,9 +3530,17 @@ struct softnet_data { call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; + + /* We force a cacheline alignment from here, to hold together + * input_queue_tail, input_pkt_queue and backlog.state. + * We add holes so that backlog.state is the last field + * of this cache line. + */ + long pad[3] ____cacheline_aligned_in_smp; unsigned int input_queue_tail; #endif struct sk_buff_head input_pkt_queue; + struct napi_struct backlog; struct numa_drop_counters drop_counters; |
