summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-04-09 18:24:34 -0700
committerJakub Kicinski <kuba@kernel.org>2026-04-09 18:24:35 -0700
commit15089225889ba4b29f0263757cd66932fa676cb0 (patch)
tree73b8cc252fcebbafad57f5b100c2f774eb7a42c1 /Documentation
parentb6e39e48469e37057fce27a1b87cf6d3e456aa42 (diff)
parent65d657d806848add1e1f0632562d7f47d5d5c188 (diff)
Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'
Daniel Borkmann says: ==================== netkit: Support for io_uring zero-copy and AF_XDP Containers use virtual netdevs to route traffic from a physical netdev in the host namespace. They do not have access to the physical netdev in the host and thus can't use memory providers or AF_XDP that require reconfiguring/restarting queues in the physical netdev. This patchset adds the concept of queue leasing to virtual netdevs that allow containers to use memory providers and AF_XDP at native speed. Leased queues are bound to a real queue in a physical netdev and act as a proxy. Memory providers and AF_XDP operations take an ifindex and queue id, so containers would pass in an ifindex for a virtual netdev and a queue id of a leased queue, which then gets proxied to the underlying real queue. We have implemented support for this concept in netkit and tested the latter against Nvidia ConnectX-6 (mlx5) as well as Broadcom BCM957504 (bnxt_en) 100G NICs. For more details see the individual patches. ==================== Link: https://patch.msgid.link/20260402231031.447597-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/netlink/specs/netdev.yaml46
-rw-r--r--Documentation/netlink/specs/rt-link.yaml11
-rw-r--r--Documentation/networking/netdevices.rst6
3 files changed, 63 insertions, 0 deletions
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 596c306ce52b..b93beb247a11 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -339,6 +339,15 @@ attribute-sets:
doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
+ -
+ name: lease
+ doc: |
+ A queue from a virtual device can have a lease which refers to
+ another queue from a physical device. This is useful for memory
+ providers and AF_XDP operations which take an ifindex and queue id
+ to allow applications to bind against virtual devices in containers.
+ type: nest
+ nested-attributes: lease
-
name: qstats
doc: |
@@ -538,6 +547,26 @@ attribute-sets:
-
name: type
-
+ name: lease
+ attributes:
+ -
+ name: ifindex
+ doc: The netdev ifindex to lease the queue from.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: queue
+ doc: The netdev queue to lease from.
+ type: nest
+ nested-attributes: queue-id
+ -
+ name: netns-id
+ doc: The network namespace id of the netdev.
+ type: s32
+ checks:
+ min: 0
+ -
name: dmabuf
attributes:
-
@@ -686,6 +715,7 @@ operations:
- dmabuf
- io-uring
- xsk
+ - lease
dump:
request:
attributes:
@@ -797,6 +827,22 @@ operations:
reply:
attributes:
- id
+ -
+ name: queue-create
+ doc: |
+ Create a new queue for the given netdevice. Whether this operation
+ is supported depends on the device and the driver.
+ attribute-set: queue
+ flags: [admin-perm]
+ do:
+ request:
+ attributes:
+ - ifindex
+ - type
+ - lease
+ reply: &queue-create-op
+ attributes:
+ - id
kernel-family:
headers: ["net/netdev_netlink.h"]
diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml
index df4b56beb818..fcb5aaf0926f 100644
--- a/Documentation/netlink/specs/rt-link.yaml
+++ b/Documentation/netlink/specs/rt-link.yaml
@@ -826,6 +826,13 @@ definitions:
- name: none
- name: default
-
+ name: netkit-pairing
+ type: enum
+ enum-name: netkit-pairing
+ entries:
+ - name: pair
+ - name: single
+ -
name: ovpn-mode
enum-name: ovpn-mode
name-prefix: ovpn-mode
@@ -2299,6 +2306,10 @@ attribute-sets:
-
name: tailroom
type: u16
+ -
+ name: pairing
+ type: u32
+ enum: netkit-pairing
-
name: linkinfo-ovpn-attrs
name-prefix: ifla-ovpn-
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 35704d115312..83e28b96884f 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -329,6 +329,12 @@ by setting ``request_ops_lock`` to true. Code comments and docs refer
to drivers which have ops called under the instance lock as "ops locked".
See also the documentation of the ``lock`` member of struct net_device.
+There is also a case of taking two per-netdev locks in sequence when netdev
+queues are leased, that is, the netdev-scope lock is taken for both the
+virtual and the physical device. To prevent deadlocks, the virtual device's
+lock must always be acquired before the physical device's (see
+``netdev_nl_queue_create_doit``).
+
In the future, there will be an option for individual
drivers to opt out of using ``rtnl_lock`` and instead perform their control
operations directly under the netdev instance lock.