From 291515c7640962f8865e4c54897a5e91526b450c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 25 Feb 2025 18:17:43 +0100 Subject: net: gro: decouple GRO from the NAPI layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In fact, these two are not tied closely to each other. The only requirements to GRO are to use it in the BH context and have some sane limits on the packet batches, e.g. NAPI has a limit of its budget (64/8/etc.). Move purely GRO fields into a new structure, &gro_node. Embed it into &napi_struct and adjust all the references. gro_node::cached_napi_id is effectively the same as napi_struct::napi_id, but to be used on GRO hotpath to mark skbs. napi_struct::napi_id is now a fully control path field. Three Ethernet drivers use napi_gro_flush() not really meant to be exported, so move it to and add that include there. napi_gro_receive() is used in more than 100 drivers, keep it in . This does not make GRO ready to use outside of the NAPI context yet. Tested-by: Daniel Xu Acked-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 37 ++++++++++++++++++++++++++++--------- include/net/busy_poll.h | 12 +++++++++--- include/net/gro.h | 35 +++++++++++++++++++++++++---------- 3 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2094d3edda73..26a0c4e4d963 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -340,11 +340,27 @@ struct gro_list { }; /* - * size of gro hash buckets, must less than bit number of - * napi_struct::gro_bitmask + * size of gro hash buckets, must be <= the number of bits in + * gro_node::bitmask */ #define GRO_HASH_BUCKETS 8 +/** + * struct gro_node - structure to support Generic Receive Offload + * @bitmask: bitmask to indicate used buckets in @hash + * @hash: hashtable of pending aggregated skbs, separated by flows + * @rx_list: list of pending ``GRO_NORMAL`` skbs + * @rx_count: cached current length of @rx_list + * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone + */ +struct gro_node { + unsigned long bitmask; + struct gro_list hash[GRO_HASH_BUCKETS]; + struct list_head rx_list; + u32 rx_count; + u32 cached_napi_id; +}; + /* * Structure for per-NAPI config */ @@ -371,7 +387,6 @@ struct napi_struct { unsigned long state; int weight; u32 defer_hard_irqs_count; - unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL /* CPU actively polling if netpoll is configured */ @@ -380,11 +395,8 @@ struct napi_struct { /* CPU on which NAPI has been scheduled for processing */ int list_owner; struct net_device *dev; - struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; - struct list_head rx_list; /* Pending GRO_NORMAL skbs */ - int rx_count; /* length of rx_list */ - unsigned int napi_id; /* protected by netdev_lock */ + struct gro_node gro; struct hrtimer timer; /* all fields past this point are write-protected by netdev_lock */ struct task_struct *thread; @@ -392,6 +404,7 @@ struct napi_struct { unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ + u32 napi_id; struct list_head dev_list; struct hlist_node napi_hash_node; int irq; @@ -4131,8 +4144,14 @@ int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); -gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -void napi_gro_flush(struct napi_struct *napi, bool flush_old); +gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb); + +static inline gro_result_t napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb) +{ + return gro_receive_skb(&napi->gro, skb); +} + struct sk_buff *napi_get_frags(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index cab6146a510a..6e172d0f6ef5 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -127,18 +127,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) } /* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) +static inline void __skb_mark_napi_id(struct sk_buff *skb, + const struct gro_node *gro) { #ifdef CONFIG_NET_RX_BUSY_POLL /* If the skb was already marked with a valid NAPI ID, avoid overwriting * it. */ if (!napi_id_valid(skb->napi_id)) - skb->napi_id = napi->napi_id; + skb->napi_id = gro->cached_napi_id; #endif } +static inline void skb_mark_napi_id(struct sk_buff *skb, + const struct napi_struct *napi) +{ + __skb_mark_napi_id(skb, &napi->gro); +} + /* used in the protocol handler to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { diff --git a/include/net/gro.h b/include/net/gro.h index 7b548f91754b..38d70c69ff80 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -509,26 +509,41 @@ static inline int gro_receive_network_flush(const void *th, const void *th2, int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); +void __gro_flush(struct gro_node *gro, bool flush_old); + +static inline void gro_flush(struct gro_node *gro, bool flush_old) +{ + if (!gro->bitmask) + return; + + __gro_flush(gro, flush_old); +} + +static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) +{ + gro_flush(&napi->gro, flush_old); +} /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static inline void gro_normal_list(struct napi_struct *napi) +static inline void gro_normal_list(struct gro_node *gro) { - if (!napi->rx_count) + if (!gro->rx_count) return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; + netif_receive_skb_list_internal(&gro->rx_list); + INIT_LIST_HEAD(&gro->rx_list); + gro->rx_count = 0; } /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ -static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, + int segs) { - list_add_tail(&skb->list, &napi->rx_list); - napi->rx_count += segs; - if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) - gro_normal_list(napi); + list_add_tail(&skb->list, &gro->rx_list); + gro->rx_count += segs; + if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) + gro_normal_list(gro); } /* This function is the alternative of 'inet_iif' and 'inet_sdif' -- cgit v1.2.3 From 388d31417ce0f1d08a1a86cab4c1dd700e9e9481 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 25 Feb 2025 18:17:44 +0100 Subject: net: gro: expose GRO init/cleanup to use outside of NAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make GRO init and cleanup functions global to be able to use GRO without a NAPI instance. Taking into account already global gro_flush(), it's now fully usable standalone. New functions are not exported, since they're not supposed to be used outside of the kernel core code. Tested-by: Daniel Xu Reviewed-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/net/gro.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 38d70c69ff80..22d3a69e4404 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -546,6 +546,9 @@ static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, gro_normal_list(gro); } +void gro_init(struct gro_node *gro); +void gro_cleanup(struct gro_node *gro); + /* This function is the alternative of 'inet_iif' and 'inet_sdif' * functions in case we can not rely on fields of IPCB. * -- cgit v1.2.3 From 859d6acd94cc4ad65e9eb3fa2a9815a19e5b35cf Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 25 Feb 2025 18:17:47 +0100 Subject: net: skbuff: introduce napi_skb_cache_get_bulk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function to get an array of skbs from the NAPI percpu cache. It's supposed to be a drop-in replacement for kmem_cache_alloc_bulk(skbuff_head_cache, GFP_ATOMIC) and xdp_alloc_skb_bulk(GFP_ATOMIC). The difference (apart from the requirement to call it only from the BH) is that it tries to use as many NAPI cache entries for skbs as possible, and allocate new ones only if needed. The logic is as follows: * there is enough skbs in the cache: decache them and return to the caller; * not enough: try refilling the cache first. If there is now enough skbs, return; * still not enough: try allocating skbs directly to the output array with %GFP_ZERO, maybe we'll be able to get some. If there's now enough, return; * still not enough: return as many as we were able to obtain. Most of times, if called from the NAPI polling loop, the first one will be true, sometimes (rarely) the second one. The third and the fourth -- only under heavy memory pressure. It can save significant amounts of CPU cycles if there are GRO cycles and/or Tx completion cycles (anything that descends to napi_skb_cache_put()) happening on this CPU. Tested-by: Daniel Xu Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 171aa15f6541..14517e95a46c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1320,6 +1320,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); void skb_attempt_defer_free(struct sk_buff *skb); +u32 napi_skb_cache_get_bulk(void **skbs, u32 n); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); struct sk_buff *slab_build_skb(void *data); -- cgit v1.2.3 From b696d289c07d8480a7d4752e448f4ee2bee9e443 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 25 Feb 2025 18:17:50 +0100 Subject: xdp: remove xdp_alloc_skb_bulk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only user was veth, which now uses napi_skb_cache_get_bulk(). It's now preferred over a direct allocation and is exported as well, so remove this one. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/net/xdp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 4dafc5e021f1..48efacbaa35d 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -343,7 +343,6 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct net_device *dev); struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct net_device *dev); -int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf); static inline -- cgit v1.2.3