diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-04-09 18:24:34 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-04-09 18:24:35 -0700 |
| commit | 15089225889ba4b29f0263757cd66932fa676cb0 (patch) | |
| tree | 73b8cc252fcebbafad57f5b100c2f774eb7a42c1 /include/net | |
| parent | b6e39e48469e37057fce27a1b87cf6d3e456aa42 (diff) | |
| parent | 65d657d806848add1e1f0632562d7f47d5d5c188 (diff) | |
Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'
Daniel Borkmann says:
====================
netkit: Support for io_uring zero-copy and AF_XDP
Containers use virtual netdevs to route traffic from a physical netdev
in the host namespace. They do not have access to the physical netdev
in the host and thus can't use memory providers or AF_XDP that require
reconfiguring/restarting queues in the physical netdev.
This patchset adds the concept of queue leasing to virtual netdevs that
allow containers to use memory providers and AF_XDP at native speed.
Leased queues are bound to a real queue in a physical netdev and act
as a proxy.
Memory providers and AF_XDP operations take an ifindex and queue id,
so containers would pass in an ifindex for a virtual netdev and a queue
id of a leased queue, which then gets proxied to the underlying real
queue.
We have implemented support for this concept in netkit and tested the
latter against Nvidia ConnectX-6 (mlx5) as well as Broadcom BCM957504
(bnxt_en) 100G NICs. For more details see the individual patches.
====================
Link: https://patch.msgid.link/20260402231031.447597-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
| -rw-r--r-- | include/net/netdev_queues.h | 23 | ||||
| -rw-r--r-- | include/net/netdev_rx_queue.h | 29 | ||||
| -rw-r--r-- | include/net/page_pool/memory_provider.h | 8 |
3 files changed, 48 insertions, 12 deletions
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 95ed28212f4e..70c9fe9e83cc 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -150,6 +150,11 @@ enum { * When NIC-wide config is changed the callback will * be invoked for all queues. * + * @ndo_queue_create: Create a new RX queue on a virtual device that will + * be paired with a physical device's queue via leasing. + * Return the new queue id on success, negative error + * on failure. + * * @supported_params: Bitmask of supported parameters, see QCFG_*. * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while @@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops { struct netlink_ext_ack *extack); struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, int idx); + int (*ndo_queue_create)(struct net_device *dev, + struct netlink_ext_ack *extack); unsigned int supported_params; }; @@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops { void netdev_queue_config(struct net_device *dev, int rxq, struct netdev_queue_config *qcfg); -bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); +bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx); /** * DOC: Lockless queue stopping / waking helpers. @@ -373,6 +380,14 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) get_desc, start_thrs); \ }) -struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); - -#endif +struct device *netdev_queue_get_dma_dev(struct net_device *dev, + unsigned int idx, + enum netdev_queue_type type); +bool netdev_can_create_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_can_lease_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_queue_busy(struct net_device *dev, unsigned int idx, + enum netdev_queue_type type, + struct netlink_ext_ack *extack); +#endif /* _LINUX_NET_QUEUES_H */ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 08f81329fc11..7e98c679ea84 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -31,6 +31,14 @@ struct netdev_rx_queue { struct napi_struct *napi; struct netdev_queue_config qcfg; struct pp_memory_provider_params mp_params; + + /* If a queue is leased, then the lease pointer is always + * valid. From the physical device it points to the virtual + * queue, and from the virtual device it points to the + * physical queue. + */ + struct netdev_rx_queue *lease; + netdevice_tracker lease_tracker; } ____cacheline_aligned_in_smp; /* @@ -59,6 +67,23 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) return index; } -int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); +enum netif_lease_dir { + NETIF_VIRT_TO_PHYS, + NETIF_PHYS_TO_VIRT, +}; -#endif +struct netdev_rx_queue * +__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, + enum netif_lease_dir dir); + +struct netdev_rx_queue * +netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); +void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, + struct net_device *dev); + +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); +void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +#endif /* _LINUX_NETDEV_RX_QUEUE_H */ diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index ada4f968960a..255ce4cfd975 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); void net_mp_niov_clear_page_pool(struct net_iov *niov); -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p); -int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack); -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p); -void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p); /** |
