summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-09-28 08:49:33 +0000
committerPaolo Abeni <pabeni@redhat.com>2025-09-30 15:45:53 +0200
commit844c9db7f7f5fe1b0b53ed9f1c2bc7313b3021c8 (patch)
tree9dd9c2e17b08ae6dd785ea2e7dbf7ac65c3f70f6
parent9c94ae6bb0b2895024b6e29fcc1cbec968b4776a (diff)
net: use llist for sd->defer_list
Get rid of sd->defer_lock and adopt llist operations. We optimize skb_attempt_defer_free() for the common case, where the packet is queued. Otherwise sd->defer_count is increasing, until skb_defer_free_flush() clears it. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250928084934.3266948-3-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/skbuff.c15
3 files changed, 17 insertions, 24 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 27e3fa69253f..5c9aa16933d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3537,10 +3537,10 @@ struct softnet_data {
struct numa_drop_counters drop_counters;
/* Another possibly contended cache line */
- spinlock_t defer_lock ____cacheline_aligned_in_smp;
- atomic_t defer_count;
- int defer_ipi_scheduled;
- struct sk_buff *defer_list;
+ struct llist_head defer_list ____cacheline_aligned_in_smp;
+ atomic_long_t defer_count;
+
+ int defer_ipi_scheduled ____cacheline_aligned_in_smp;
call_single_data_t defer_csd;
};
diff --git a/net/core/dev.c b/net/core/dev.c
index 8566678d8344..fb67372774de 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6717,22 +6717,16 @@ EXPORT_SYMBOL(napi_complete_done);
static void skb_defer_free_flush(struct softnet_data *sd)
{
+ struct llist_node *free_list;
struct sk_buff *skb, *next;
- /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
- if (!READ_ONCE(sd->defer_list))
+ if (llist_empty(&sd->defer_list))
return;
+ atomic_long_set(&sd->defer_count, 0);
+ free_list = llist_del_all(&sd->defer_list);
- spin_lock(&sd->defer_lock);
- skb = sd->defer_list;
- sd->defer_list = NULL;
- atomic_set(&sd->defer_count, 0);
- spin_unlock(&sd->defer_lock);
-
- while (skb != NULL) {
- next = skb->next;
+ llist_for_each_entry_safe(skb, next, free_list, ll_node) {
napi_consume_skb(skb, 1);
- skb = next;
}
}
@@ -12995,7 +12989,7 @@ static int __init net_dev_init(void)
sd->cpu = i;
#endif
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
- spin_lock_init(&sd->defer_lock);
+ init_llist_head(&sd->defer_list);
gro_init(&sd->backlog.gro);
sd->backlog.poll = process_backlog;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 16cd357d62a6..17455fc1e692 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -7185,6 +7185,7 @@ static void kfree_skb_napi_cache(struct sk_buff *skb)
*/
void skb_attempt_defer_free(struct sk_buff *skb)
{
+ unsigned long defer_count;
int cpu = skb->alloc_cpu;
struct softnet_data *sd;
unsigned int defer_max;
@@ -7202,17 +7203,15 @@ nodefer: kfree_skb_napi_cache(skb);
sd = &per_cpu(softnet_data, cpu);
defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
- if (atomic_read(&sd->defer_count) >= defer_max)
+ defer_count = atomic_long_inc_return(&sd->defer_count);
+
+ if (defer_count >= defer_max)
goto nodefer;
- spin_lock_bh(&sd->defer_lock);
- /* Send an IPI every time queue reaches half capacity. */
- kick = (atomic_inc_return(&sd->defer_count) - 1) == (defer_max >> 1);
+ llist_add(&skb->ll_node, &sd->defer_list);
- skb->next = sd->defer_list;
- /* Paired with READ_ONCE() in skb_defer_free_flush() */
- WRITE_ONCE(sd->defer_list, skb);
- spin_unlock_bh(&sd->defer_lock);
+ /* Send an IPI every time queue reaches half capacity. */
+ kick = (defer_count - 1) == (defer_max >> 1);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely).