summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-07-10 13:32:35 -0700
committerJakub Kicinski <kuba@kernel.org>2025-07-10 13:32:35 -0700
commitb430f6c38da629381f43b5ee723bd7e325ffa784 (patch)
tree47181e16765012b8ab8fb4ecf2f2bbe920b11ba2 /include/linux
parent3321e97eab71df7d632b35276da9f8503e6e040f (diff)
parentbbca931fce262cdb3e5fddcc39e62f3bf9ac25cc (diff)
Merge branch 'virtio_udp_tunnel_08_07_2025' of https://github.com/pabeni/linux-devel
Paolo Abeni says: ==================== virtio: introduce GSO over UDP tunnel Some virtualized deployments use UDP tunnel pervasively and are impacted negatively by the lack of GSO support for such kind of traffic in the virtual NIC driver. The virtio_net specification recently introduced support for GSO over UDP tunnel, this series updates the virtio implementation to support such a feature. Currently the kernel virtio support limits the feature space to 64, while the virtio specification allows for a larger number of features. Specifically the GSO-over-UDP-tunnel-related virtio features use bits 65-69. The first four patches in this series rework the virtio and vhost feature support to cope with up to 128 bits. The limit is set by a define and could be easily raised in future, as needed. This implementation choice is aimed at keeping the code churn as limited as possible. For the same reason, only the virtio_net driver is reworked to leverage the extended feature space; all other virtio/vhost drivers are unaffected, but could be upgraded to support the extended features space in a later time. The last four patches bring in the actual GSO over UDP tunnel support. As per specification, some additional fields are introduced into the virtio net header to support the new offload. The presence of such fields depends on the negotiated features. New helpers are introduced to convert the UDP-tunneled skb metadata to an extended virtio net header and vice versa. Such helpers are used by the tun and virtio_net driver to cope with the newly supported offloads. Tested with basic stream transfer with all the possible permutations of host kernel/qemu/guest kernel with/without GSO over UDP tunnel support. ==================== Link: https://patch.msgid.link/cover.1751874094.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/virtio.h9
-rw-r--r--include/linux/virtio_config.h43
-rw-r--r--include/linux/virtio_features.h88
-rw-r--r--include/linux/virtio_net.h197
-rw-r--r--include/linux/virtio_pci_modern.h43
5 files changed, 343 insertions, 37 deletions
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 64cb4b04be7a..04b90c88d164 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -11,6 +11,7 @@
#include <linux/gfp.h>
#include <linux/dma-mapping.h>
#include <linux/completion.h>
+#include <linux/virtio_features.h>
/**
* struct virtqueue - a queue to register buffers for sending or receiving.
@@ -141,7 +142,9 @@ struct virtio_admin_cmd {
* @config: the configuration ops for this device.
* @vringh_config: configuration ops for host vrings.
* @vqs: the list of virtqueues for this device.
- * @features: the features supported by both driver and device.
+ * @features: the 64 lower features supported by both driver and device.
+ * @features_array: the full features space supported by both driver and
+ * device.
* @priv: private pointer for the driver's use.
* @debugfs_dir: debugfs directory entry.
* @debugfs_filter_features: features to be filtered set by debugfs.
@@ -159,11 +162,11 @@ struct virtio_device {
const struct virtio_config_ops *config;
const struct vringh_config_ops *vringh_config;
struct list_head vqs;
- u64 features;
+ VIRTIO_DECLARE_FEATURES(features);
void *priv;
#ifdef CONFIG_VIRTIO_DEBUG
struct dentry *debugfs_dir;
- u64 debugfs_filter_features;
+ u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS];
#endif
};
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index b3e1d30c765b..918cf25cd3c6 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -77,7 +77,11 @@ struct virtqueue_info {
* vdev: the virtio_device
* @get_features: get the array of feature bits for this device.
* vdev: the virtio_device
- * Returns the first 64 feature bits (all we currently need).
+ * Returns the first 64 feature bits.
+ * @get_extended_features:
+ * vdev: the virtio_device
+ * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently
+ * need).
* @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
* This sends the driver feature bits to the device: it can change
@@ -121,6 +125,8 @@ struct virtio_config_ops {
void (*del_vqs)(struct virtio_device *);
void (*synchronize_cbs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
+ void (*get_extended_features)(struct virtio_device *vdev,
+ u64 *features);
int (*finalize_features)(struct virtio_device *vdev);
const char *(*bus_name)(struct virtio_device *vdev);
int (*set_vq_affinity)(struct virtqueue *vq,
@@ -147,13 +153,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
static inline bool __virtio_test_bit(const struct virtio_device *vdev,
unsigned int fbit)
{
- /* Did you forget to fix assumptions on max features? */
- if (__builtin_constant_p(fbit))
- BUILD_BUG_ON(fbit >= 64);
- else
- BUG_ON(fbit >= 64);
-
- return vdev->features & BIT_ULL(fbit);
+ return virtio_features_test_bit(vdev->features_array, fbit);
}
/**
@@ -164,13 +164,7 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev,
static inline void __virtio_set_bit(struct virtio_device *vdev,
unsigned int fbit)
{
- /* Did you forget to fix assumptions on max features? */
- if (__builtin_constant_p(fbit))
- BUILD_BUG_ON(fbit >= 64);
- else
- BUG_ON(fbit >= 64);
-
- vdev->features |= BIT_ULL(fbit);
+ virtio_features_set_bit(vdev->features_array, fbit);
}
/**
@@ -181,13 +175,7 @@ static inline void __virtio_set_bit(struct virtio_device *vdev,
static inline void __virtio_clear_bit(struct virtio_device *vdev,
unsigned int fbit)
{
- /* Did you forget to fix assumptions on max features? */
- if (__builtin_constant_p(fbit))
- BUILD_BUG_ON(fbit >= 64);
- else
- BUG_ON(fbit >= 64);
-
- vdev->features &= ~BIT_ULL(fbit);
+ virtio_features_clear_bit(vdev->features_array, fbit);
}
/**
@@ -204,6 +192,17 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
return __virtio_test_bit(vdev, fbit);
}
+static inline void virtio_get_features(struct virtio_device *vdev,
+ u64 *features)
+{
+ if (vdev->config->get_extended_features) {
+ vdev->config->get_extended_features(vdev, features);
+ return;
+ }
+
+ virtio_features_from_u64(features, vdev->config->get_features(vdev));
+}
+
/**
* virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h
new file mode 100644
index 000000000000..f748f2f87de8
--- /dev/null
+++ b/include/linux/virtio_features.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_FEATURES_H
+#define _LINUX_VIRTIO_FEATURES_H
+
+#include <linux/bits.h>
+
+#define VIRTIO_FEATURES_DWORDS 2
+#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64)
+#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2)
+#define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f)
+#define VIRTIO_DWORD(b) ((b) >> 6)
+#define VIRTIO_DECLARE_FEATURES(name) \
+ union { \
+ u64 name; \
+ u64 name##_array[VIRTIO_FEATURES_DWORDS];\
+ }
+
+static inline bool virtio_features_chk_bit(unsigned int bit)
+{
+ if (__builtin_constant_p(bit)) {
+ /*
+ * Don't care returning the correct value: the build
+ * will fail before any bad features access
+ */
+ BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX);
+ } else {
+ if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX))
+ return false;
+ }
+ return true;
+}
+
+static inline bool virtio_features_test_bit(const u64 *features,
+ unsigned int bit)
+{
+ return virtio_features_chk_bit(bit) &&
+ !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit));
+}
+
+static inline void virtio_features_set_bit(u64 *features,
+ unsigned int bit)
+{
+ if (virtio_features_chk_bit(bit))
+ features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit);
+}
+
+static inline void virtio_features_clear_bit(u64 *features,
+ unsigned int bit)
+{
+ if (virtio_features_chk_bit(bit))
+ features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit);
+}
+
+static inline void virtio_features_zero(u64 *features)
+{
+ memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS);
+}
+
+static inline void virtio_features_from_u64(u64 *features, u64 from)
+{
+ virtio_features_zero(features);
+ features[0] = from;
+}
+
+static inline bool virtio_features_equal(const u64 *f1, const u64 *f2)
+{
+ int i;
+
+ for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i)
+ if (f1[i] != f2[i])
+ return false;
+ return true;
+}
+
+static inline void virtio_features_copy(u64 *to, const u64 *from)
+{
+ memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS);
+}
+
+static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2)
+{
+ int i;
+
+ for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
+ to[i] = f1[i] & ~f2[i];
+}
+
+#endif
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 02a9f4dc594d..20e0584db1dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -47,9 +47,9 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
return 0;
}
-static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
- const struct virtio_net_hdr *hdr,
- bool little_endian)
+static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb,
+ const struct virtio_net_hdr *hdr,
+ bool little_endian, u8 hdr_gso_type)
{
unsigned int nh_min_len = sizeof(struct iphdr);
unsigned int gso_type = 0;
@@ -57,8 +57,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
unsigned int p_off = 0;
unsigned int ip_proto;
- if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
gso_type = SKB_GSO_TCPV4;
ip_proto = IPPROTO_TCP;
@@ -84,7 +84,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
return -EINVAL;
}
- if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+ if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN)
gso_type |= SKB_GSO_TCP_ECN;
if (hdr->gso_size == 0)
@@ -122,7 +122,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (!protocol)
virtio_net_hdr_set_proto(skb, hdr);
- else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type))
+ else if (!virtio_net_hdr_match_proto(protocol,
+ hdr_gso_type))
return -EINVAL;
else
skb->protocol = protocol;
@@ -153,7 +154,7 @@ retry:
}
}
- if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) {
u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
unsigned int nh_off = p_off;
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -199,6 +200,13 @@ retry:
return 0;
}
+static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+ const struct virtio_net_hdr *hdr,
+ bool little_endian)
+{
+ return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type);
+}
+
static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr *hdr,
bool little_endian,
@@ -242,4 +250,177 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
return 0;
}
+static inline unsigned int virtio_l3min(bool is_ipv6)
+{
+ return is_ipv6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr);
+}
+
+static inline int
+virtio_net_hdr_tnl_to_skb(struct sk_buff *skb,
+ const struct virtio_net_hdr_v1_hash_tunnel *vhdr,
+ bool tnl_hdr_negotiated,
+ bool tnl_csum_negotiated,
+ bool little_endian)
+{
+ const struct virtio_net_hdr *hdr = (const struct virtio_net_hdr *)vhdr;
+ unsigned int inner_nh, outer_th, inner_th;
+ unsigned int inner_l3min, outer_l3min;
+ u8 gso_inner_type, gso_tunnel_type;
+ bool outer_isv6, inner_isv6;
+ int ret;
+
+ gso_tunnel_type = hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL;
+ if (!gso_tunnel_type)
+ return virtio_net_hdr_to_skb(skb, hdr, little_endian);
+
+ /* Tunnel not supported/negotiated, but the hdr asks for it. */
+ if (!tnl_hdr_negotiated)
+ return -EINVAL;
+
+ /* Either ipv4 or ipv6. */
+ if (gso_tunnel_type == VIRTIO_NET_HDR_GSO_UDP_TUNNEL)
+ return -EINVAL;
+
+ /* The UDP tunnel must carry a GSO packet, but no UFO. */
+ gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN |
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL);
+ if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP)
+ return -EINVAL;
+
+ /* Rely on csum being present. */
+ if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
+ return -EINVAL;
+
+ /* Validate offsets. */
+ outer_isv6 = gso_tunnel_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+ inner_isv6 = gso_inner_type == VIRTIO_NET_HDR_GSO_TCPV6;
+ inner_l3min = virtio_l3min(inner_isv6);
+ outer_l3min = ETH_HLEN + virtio_l3min(outer_isv6);
+
+ inner_th = __virtio16_to_cpu(little_endian, hdr->csum_start);
+ inner_nh = le16_to_cpu(vhdr->inner_nh_offset);
+ outer_th = le16_to_cpu(vhdr->outer_th_offset);
+ if (outer_th < outer_l3min ||
+ inner_nh < outer_th + sizeof(struct udphdr) ||
+ inner_th < inner_nh + inner_l3min)
+ return -EINVAL;
+
+ /* Let the basic parsing deal with plain GSO features. */
+ ret = __virtio_net_hdr_to_skb(skb, hdr, true,
+ hdr->gso_type & ~gso_tunnel_type);
+ if (ret)
+ return ret;
+
+ /* In case of USO, the inner protocol is still unknown and
+ * `inner_isv6` is just a guess, additional parsing is needed.
+ * The previous validation ensures that accessing an ipv4 inner
+ * network header is safe.
+ */
+ if (gso_inner_type == VIRTIO_NET_HDR_GSO_UDP_L4) {
+ struct iphdr *iphdr = (struct iphdr *)(skb->data + inner_nh);
+
+ inner_isv6 = iphdr->version == 6;
+ inner_l3min = virtio_l3min(inner_isv6);
+ if (inner_th < inner_nh + inner_l3min)
+ return -EINVAL;
+ }
+
+ skb_set_inner_protocol(skb, inner_isv6 ? htons(ETH_P_IPV6) :
+ htons(ETH_P_IP));
+ if (hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM) {
+ if (!tnl_csum_negotiated)
+ return -EINVAL;
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ } else {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
+
+ skb->inner_transport_header = inner_th + skb_headroom(skb);
+ skb->inner_network_header = inner_nh + skb_headroom(skb);
+ skb->inner_mac_header = inner_nh + skb_headroom(skb);
+ skb->transport_header = outer_th + skb_headroom(skb);
+ skb->encapsulation = 1;
+ return 0;
+}
+
+/* Checksum-related fields validation for the driver */
+static inline int virtio_net_handle_csum_offload(struct sk_buff *skb,
+ struct virtio_net_hdr *hdr,
+ bool tnl_csum_negotiated)
+{
+ if (!(hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL)) {
+ if (!(hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID))
+ return 0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (!(hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM))
+ return 0;
+
+ /* tunnel csum packets are invalid when the related
+ * feature has not been negotiated
+ */
+ if (!tnl_csum_negotiated)
+ return -EINVAL;
+ skb->csum_level = 1;
+ return 0;
+ }
+
+ /* DATA_VALID is mutually exclusive with NEEDS_CSUM, and GSO
+ * over UDP tunnel requires the latter
+ */
+ if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID)
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * vlan_hlen always refers to the outermost MAC header. That also
+ * means it refers to the only MAC header, if the packet does not carry
+ * any encapsulation.
+ */
+static inline int
+virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
+ struct virtio_net_hdr_v1_hash_tunnel *vhdr,
+ bool tnl_hdr_negotiated,
+ bool little_endian,
+ int vlan_hlen)
+{
+ struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
+ unsigned int inner_nh, outer_th;
+ int tnl_gso_type;
+ int ret;
+
+ tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM);
+ if (!tnl_gso_type)
+ return virtio_net_hdr_from_skb(skb, hdr, little_endian, false,
+ vlan_hlen);
+
+ /* Tunnel support not negotiated but skb ask for it. */
+ if (!tnl_hdr_negotiated)
+ return -EINVAL;
+
+ /* Let the basic parsing deal with plain GSO features. */
+ skb_shinfo(skb)->gso_type &= ~tnl_gso_type;
+ ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen);
+ skb_shinfo(skb)->gso_type |= tnl_gso_type;
+ if (ret)
+ return ret;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+ else
+ hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4;
+
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
+ hdr->flags |= VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM;
+
+ inner_nh = skb->inner_network_header - skb_headroom(skb);
+ outer_th = skb->transport_header - skb_headroom(skb);
+ vhdr->inner_nh_offset = cpu_to_le16(inner_nh);
+ vhdr->outer_th_offset = cpu_to_le16(outer_th);
+ return 0;
+}
+
#endif /* _LINUX_VIRTIO_NET_H */
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index c0b1b1ca1163..48bc12d1045b 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -3,6 +3,7 @@
#define _LINUX_VIRTIO_PCI_MODERN_H
#include <linux/pci.h>
+#include <linux/virtio_config.h>
#include <linux/virtio_pci.h>
/**
@@ -95,10 +96,44 @@ static inline void vp_iowrite64_twopart(u64 val,
vp_iowrite32(val >> 32, hi);
}
-u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev);
-u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev);
-void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
- u64 features);
+void
+vp_modern_get_driver_extended_features(struct virtio_pci_modern_device *mdev,
+ u64 *features);
+void vp_modern_get_extended_features(struct virtio_pci_modern_device *mdev,
+ u64 *features);
+void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev,
+ const u64 *features);
+
+static inline u64
+vp_modern_get_features(struct virtio_pci_modern_device *mdev)
+{
+ u64 features_array[VIRTIO_FEATURES_DWORDS];
+
+ vp_modern_get_extended_features(mdev, features_array);
+ return features_array[0];
+}
+
+static inline u64
+vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
+{
+ u64 features_array[VIRTIO_FEATURES_DWORDS];
+ int i;
+
+ vp_modern_get_driver_extended_features(mdev, features_array);
+ for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i)
+ WARN_ON_ONCE(features_array[i]);
+ return features_array[0];
+}
+
+static inline void
+vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features)
+{
+ u64 features_array[VIRTIO_FEATURES_DWORDS];
+
+ virtio_features_from_u64(features_array, features);
+ vp_modern_set_extended_features(mdev, features_array);
+}
+
u32 vp_modern_generation(struct virtio_pci_modern_device *mdev);
u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev);
void vp_modern_set_status(struct virtio_pci_modern_device *mdev,