summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-04-09 18:24:34 -0700
committerJakub Kicinski <kuba@kernel.org>2026-04-09 18:24:35 -0700
commit15089225889ba4b29f0263757cd66932fa676cb0 (patch)
tree73b8cc252fcebbafad57f5b100c2f774eb7a42c1 /include/net
parentb6e39e48469e37057fce27a1b87cf6d3e456aa42 (diff)
parent65d657d806848add1e1f0632562d7f47d5d5c188 (diff)
Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'
Daniel Borkmann says: ==================== netkit: Support for io_uring zero-copy and AF_XDP Containers use virtual netdevs to route traffic from a physical netdev in the host namespace. They do not have access to the physical netdev in the host and thus can't use memory providers or AF_XDP that require reconfiguring/restarting queues in the physical netdev. This patchset adds the concept of queue leasing to virtual netdevs that allow containers to use memory providers and AF_XDP at native speed. Leased queues are bound to a real queue in a physical netdev and act as a proxy. Memory providers and AF_XDP operations take an ifindex and queue id, so containers would pass in an ifindex for a virtual netdev and a queue id of a leased queue, which then gets proxied to the underlying real queue. We have implemented support for this concept in netkit and tested the latter against Nvidia ConnectX-6 (mlx5) as well as Broadcom BCM957504 (bnxt_en) 100G NICs. For more details see the individual patches. ==================== Link: https://patch.msgid.link/20260402231031.447597-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/netdev_queues.h23
-rw-r--r--include/net/netdev_rx_queue.h29
-rw-r--r--include/net/page_pool/memory_provider.h8
3 files changed, 48 insertions, 12 deletions
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 95ed28212f4e..70c9fe9e83cc 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -150,6 +150,11 @@ enum {
* When NIC-wide config is changed the callback will
* be invoked for all queues.
*
+ * @ndo_queue_create: Create a new RX queue on a virtual device that will
+ * be paired with a physical device's queue via leasing.
+ * Return the new queue id on success, negative error
+ * on failure.
+ *
* @supported_params: Bitmask of supported parameters, see QCFG_*.
*
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
@@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops {
struct netlink_ext_ack *extack);
struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
int idx);
+ int (*ndo_queue_create)(struct net_device *dev,
+ struct netlink_ext_ack *extack);
unsigned int supported_params;
};
@@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops {
void netdev_queue_config(struct net_device *dev, int rxq,
struct netdev_queue_config *qcfg);
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx);
/**
* DOC: Lockless queue stopping / waking helpers.
@@ -373,6 +380,14 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq)
get_desc, start_thrs); \
})
-struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
-
-#endif
+struct device *netdev_queue_get_dma_dev(struct net_device *dev,
+ unsigned int idx,
+ enum netdev_queue_type type);
+bool netdev_can_create_queue(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+bool netdev_can_lease_queue(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
+ enum netdev_queue_type type,
+ struct netlink_ext_ack *extack);
+#endif /* _LINUX_NET_QUEUES_H */
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 08f81329fc11..7e98c679ea84 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -31,6 +31,14 @@ struct netdev_rx_queue {
struct napi_struct *napi;
struct netdev_queue_config qcfg;
struct pp_memory_provider_params mp_params;
+
+ /* If a queue is leased, then the lease pointer is always
+ * valid. From the physical device it points to the virtual
+ * queue, and from the virtual device it points to the
+ * physical queue.
+ */
+ struct netdev_rx_queue *lease;
+ netdevice_tracker lease_tracker;
} ____cacheline_aligned_in_smp;
/*
@@ -59,6 +67,23 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
return index;
}
-int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+enum netif_lease_dir {
+ NETIF_VIRT_TO_PHYS,
+ NETIF_PHYS_TO_VIRT,
+};
-#endif
+struct netdev_rx_queue *
+__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
+ enum netif_lease_dir dir);
+
+struct netdev_rx_queue *
+netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq);
+void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
+ struct net_device *dev);
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
+ struct netdev_rx_queue *rxq_src);
+void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
+ struct netdev_rx_queue *rxq_src);
+#endif /* _LINUX_NETDEV_RX_QUEUE_H */
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
index ada4f968960a..255ce4cfd975 100644
--- a/include/net/page_pool/memory_provider.h
+++ b/include/net/page_pool/memory_provider.h
@@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
void net_mp_niov_clear_page_pool(struct net_iov *niov);
-int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *p);
-int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack);
-void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *old_p);
-void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *old_p);
/**