From e7d4c1c5a54648fd5b787a4a0f81521ec7260bcd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:49 +0200 Subject: virtio: introduce extended features The virtio specifications allows for up to 128 bits for the device features. Soon we are going to use some of the 'extended' bits features (above 64) for the virtio_net driver. Introduce extended features as a fixed size array of u64. To minimize the diffstat allows legacy driver to access the low 64 bits via a transparent union. Introduce an extended get_extended_features configuration callback that devices supporting the extended features range must implement in place of the traditional one. Note that legacy and transport features don't need any change, as they are always in the low 64 bit range. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/linux/virtio.h | 9 +++-- include/linux/virtio_config.h | 43 ++++++++++---------- include/linux/virtio_features.h | 88 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 25 deletions(-) create mode 100644 include/linux/virtio_features.h (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 64cb4b04be7a..04b90c88d164 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,6 +11,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -141,7 +142,9 @@ struct virtio_admin_cmd { * @config: the configuration ops for this device. * @vringh_config: configuration ops for host vrings. * @vqs: the list of virtqueues for this device. - * @features: the features supported by both driver and device. + * @features: the 64 lower features supported by both driver and device. + * @features_array: the full features space supported by both driver and + * device. * @priv: private pointer for the driver's use. * @debugfs_dir: debugfs directory entry. * @debugfs_filter_features: features to be filtered set by debugfs. @@ -159,11 +162,11 @@ struct virtio_device { const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; struct list_head vqs; - u64 features; + VIRTIO_DECLARE_FEATURES(features); void *priv; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; - u64 debugfs_filter_features; + u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; #endif }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b3e1d30c765b..918cf25cd3c6 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -77,7 +77,11 @@ struct virtqueue_info { * vdev: the virtio_device * @get_features: get the array of feature bits for this device. * vdev: the virtio_device - * Returns the first 64 feature bits (all we currently need). + * Returns the first 64 feature bits. + * @get_extended_features: + * vdev: the virtio_device + * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently + * need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device * This sends the driver feature bits to the device: it can change @@ -121,6 +125,8 @@ struct virtio_config_ops { void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); + void (*get_extended_features)(struct virtio_device *vdev, + u64 *features); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, @@ -147,13 +153,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, static inline bool __virtio_test_bit(const struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - return vdev->features & BIT_ULL(fbit); + return virtio_features_test_bit(vdev->features_array, fbit); } /** @@ -164,13 +164,7 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev, static inline void __virtio_set_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features |= BIT_ULL(fbit); + virtio_features_set_bit(vdev->features_array, fbit); } /** @@ -181,13 +175,7 @@ static inline void __virtio_set_bit(struct virtio_device *vdev, static inline void __virtio_clear_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features &= ~BIT_ULL(fbit); + virtio_features_clear_bit(vdev->features_array, fbit); } /** @@ -204,6 +192,17 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, return __virtio_test_bit(vdev, fbit); } +static inline void virtio_get_features(struct virtio_device *vdev, + u64 *features) +{ + if (vdev->config->get_extended_features) { + vdev->config->get_extended_features(vdev, features); + return; + } + + virtio_features_from_u64(features, vdev->config->get_features(vdev)); +} + /** * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h new file mode 100644 index 000000000000..f748f2f87de8 --- /dev/null +++ b/include/linux/virtio_features.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_FEATURES_H +#define _LINUX_VIRTIO_FEATURES_H + +#include + +#define VIRTIO_FEATURES_DWORDS 2 +#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64) +#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2) +#define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f) +#define VIRTIO_DWORD(b) ((b) >> 6) +#define VIRTIO_DECLARE_FEATURES(name) \ + union { \ + u64 name; \ + u64 name##_array[VIRTIO_FEATURES_DWORDS];\ + } + +static inline bool virtio_features_chk_bit(unsigned int bit) +{ + if (__builtin_constant_p(bit)) { + /* + * Don't care returning the correct value: the build + * will fail before any bad features access + */ + BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX); + } else { + if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX)) + return false; + } + return true; +} + +static inline bool virtio_features_test_bit(const u64 *features, + unsigned int bit) +{ + return virtio_features_chk_bit(bit) && + !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit)); +} + +static inline void virtio_features_set_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit); +} + +static inline void virtio_features_clear_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit); +} + +static inline void virtio_features_zero(u64 *features) +{ + memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_from_u64(u64 *features, u64 from) +{ + virtio_features_zero(features); + features[0] = from; +} + +static inline bool virtio_features_equal(const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i) + if (f1[i] != f2[i]) + return false; + return true; +} + +static inline void virtio_features_copy(u64 *to, const u64 *from) +{ + memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++) + to[i] = f1[i] & ~f2[i]; +} + +#endif -- cgit v1.2.3 From 69b9461512246599ed80cf13358e7e6aff7285f9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:52 +0200 Subject: virtio_pci_modern: allow configuring extended features The virtio specifications allows for up to 128 bits for the device features. Soon we are going to use some of the 'extended' bits features (above 64) for the virtio_net driver. Extend the virtio pci modern driver to support configuring the full virtio features range, replacing the unrolled loops reading and writing the features space with explicit one bounded to the actual features space size in word and implementing the get_extended_features callback. Note that in vp_finalize_features() we only need to cache the lower 64 features bits, to process the transport features. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/linux/virtio_pci_modern.h | 43 +++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index c0b1b1ca1163..48bc12d1045b 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_PCI_MODERN_H #include +#include #include /** @@ -95,10 +96,44 @@ static inline void vp_iowrite64_twopart(u64 val, vp_iowrite32(val >> 32, hi); } -u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev); -u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev); -void vp_modern_set_features(struct virtio_pci_modern_device *mdev, - u64 features); +void +vp_modern_get_driver_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_get_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev, + const u64 *features); + +static inline u64 +vp_modern_get_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + vp_modern_get_extended_features(mdev, features_array); + return features_array[0]; +} + +static inline u64 +vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + int i; + + vp_modern_get_driver_extended_features(mdev, features_array); + for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i) + WARN_ON_ONCE(features_array[i]); + return features_array[0]; +} + +static inline void +vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + virtio_features_from_u64(features_array, features); + vp_modern_set_extended_features(mdev, features_array); +} + u32 vp_modern_generation(struct virtio_pci_modern_device *mdev); u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev); void vp_modern_set_status(struct virtio_pci_modern_device *mdev, -- cgit v1.2.3 From a2fb4bc4e2a6a031683910d85b278c1d25ae5420 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:59 +0200 Subject: net: implement virtio helpers to handle UDP GSO tunneling. The virtio specification are introducing support for GSO over UDP tunnel. This patch brings in the needed defines and the additional virtio hdr parsing/building helpers. The UDP tunnel support uses additional fields in the virtio hdr, and such fields location can change depending on other negotiated features - specifically VIRTIO_NET_F_HASH_REPORT. Try to be as conservative as possible with the new field validation. Existing implementation for plain GSO offloads allow for invalid/ self-contradictory values of such fields. With GSO over UDP tunnel we can be more strict, with no need to deal with legacy implementation. Since the checksum-related field validation is asymmetric in the driver and in the device, introduce a separate helper to implement the new checks (to be used only on the driver side). Note that while the feature space exceeds the 64-bit boundaries, the guest offload space is fixed by the specification of the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET command to a 64-bit size. Prior to the UDP tunnel GSO support, each guest offload bit corresponded to the feature bit with the same value and vice versa. Due to the limited 'guest offload' space, relevant features in the high 64 bits are 'mapped' to free bits in the lower range. That is simpler than defining a new command (and associated features) to exchange an extended guest offloads set. As a consequence, the uAPIs also specify the mapped guest offload value corresponding to the UDP tunnel GSO features. Acked-by: Jason Wang Signed-off-by: Paolo Abeni -- v4 -> v5: - avoid lines above 80 chars v3 -> v4: - fixed offset for UDP GSO tunnel, update accordingly the helpers - tried to clarified vlan_hlen semantic - virtio_net_chk_data_valid() -> virtio_net_handle_csum_offload() v2 -> v3: - add definitions for possible vnet hdr layouts with tunnel support v1 -> v2: - 'relay' -> 'rely' typo - less unclear comment WRT enforced inner GSO checks - inner header fields are allowed only with 'modern' virtio, thus are always le - clarified in the commit message the need for 'mapped features' defines - assume little_endian is true when UDP GSO is enabled. - fix inner proto type value --- include/linux/virtio_net.h | 197 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 189 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 02a9f4dc594d..20e0584db1dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -47,9 +47,9 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, return 0; } -static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, - const struct virtio_net_hdr *hdr, - bool little_endian) +static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian, u8 hdr_gso_type) { unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; @@ -57,8 +57,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int p_off = 0; unsigned int ip_proto; - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; @@ -84,7 +84,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return -EINVAL; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) @@ -122,7 +122,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!protocol) virtio_net_hdr_set_proto(skb, hdr); - else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) + else if (!virtio_net_hdr_match_proto(protocol, + hdr_gso_type)) return -EINVAL; else skb->protocol = protocol; @@ -153,7 +154,7 @@ retry: } } - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -199,6 +200,13 @@ retry: return 0; } +static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +{ + return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -242,4 +250,177 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return 0; } +static inline unsigned int virtio_l3min(bool is_ipv6) +{ + return is_ipv6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr); +} + +static inline int +virtio_net_hdr_tnl_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool tnl_csum_negotiated, + bool little_endian) +{ + const struct virtio_net_hdr *hdr = (const struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th, inner_th; + unsigned int inner_l3min, outer_l3min; + u8 gso_inner_type, gso_tunnel_type; + bool outer_isv6, inner_isv6; + int ret; + + gso_tunnel_type = hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL; + if (!gso_tunnel_type) + return virtio_net_hdr_to_skb(skb, hdr, little_endian); + + /* Tunnel not supported/negotiated, but the hdr asks for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Either ipv4 or ipv6. */ + if (gso_tunnel_type == VIRTIO_NET_HDR_GSO_UDP_TUNNEL) + return -EINVAL; + + /* The UDP tunnel must carry a GSO packet, but no UFO. */ + gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN | + VIRTIO_NET_HDR_GSO_UDP_TUNNEL); + if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP) + return -EINVAL; + + /* Rely on csum being present. */ + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + + /* Validate offsets. */ + outer_isv6 = gso_tunnel_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + inner_isv6 = gso_inner_type == VIRTIO_NET_HDR_GSO_TCPV6; + inner_l3min = virtio_l3min(inner_isv6); + outer_l3min = ETH_HLEN + virtio_l3min(outer_isv6); + + inner_th = __virtio16_to_cpu(little_endian, hdr->csum_start); + inner_nh = le16_to_cpu(vhdr->inner_nh_offset); + outer_th = le16_to_cpu(vhdr->outer_th_offset); + if (outer_th < outer_l3min || + inner_nh < outer_th + sizeof(struct udphdr) || + inner_th < inner_nh + inner_l3min) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + ret = __virtio_net_hdr_to_skb(skb, hdr, true, + hdr->gso_type & ~gso_tunnel_type); + if (ret) + return ret; + + /* In case of USO, the inner protocol is still unknown and + * `inner_isv6` is just a guess, additional parsing is needed. + * The previous validation ensures that accessing an ipv4 inner + * network header is safe. + */ + if (gso_inner_type == VIRTIO_NET_HDR_GSO_UDP_L4) { + struct iphdr *iphdr = (struct iphdr *)(skb->data + inner_nh); + + inner_isv6 = iphdr->version == 6; + inner_l3min = virtio_l3min(inner_isv6); + if (inner_th < inner_nh + inner_l3min) + return -EINVAL; + } + + skb_set_inner_protocol(skb, inner_isv6 ? htons(ETH_P_IPV6) : + htons(ETH_P_IP)); + if (hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM) { + if (!tnl_csum_negotiated) + return -EINVAL; + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } + + skb->inner_transport_header = inner_th + skb_headroom(skb); + skb->inner_network_header = inner_nh + skb_headroom(skb); + skb->inner_mac_header = inner_nh + skb_headroom(skb); + skb->transport_header = outer_th + skb_headroom(skb); + skb->encapsulation = 1; + return 0; +} + +/* Checksum-related fields validation for the driver */ +static inline int virtio_net_handle_csum_offload(struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool tnl_csum_negotiated) +{ + if (!(hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL)) { + if (!(hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID)) + return 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM)) + return 0; + + /* tunnel csum packets are invalid when the related + * feature has not been negotiated + */ + if (!tnl_csum_negotiated) + return -EINVAL; + skb->csum_level = 1; + return 0; + } + + /* DATA_VALID is mutually exclusive with NEEDS_CSUM, and GSO + * over UDP tunnel requires the latter + */ + if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) + return -EINVAL; + return 0; +} + +/* + * vlan_hlen always refers to the outermost MAC header. That also + * means it refers to the only MAC header, if the packet does not carry + * any encapsulation. + */ +static inline int +virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, + struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool little_endian, + int vlan_hlen) +{ + struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th; + int tnl_gso_type; + int ret; + + tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM); + if (!tnl_gso_type) + return virtio_net_hdr_from_skb(skb, hdr, little_endian, false, + vlan_hlen); + + /* Tunnel support not negotiated but skb ask for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + skb_shinfo(skb)->gso_type &= ~tnl_gso_type; + ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); + skb_shinfo(skb)->gso_type |= tnl_gso_type; + if (ret) + return ret; + + if (skb->protocol == htons(ETH_P_IPV6)) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + else + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + hdr->flags |= VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM; + + inner_nh = skb->inner_network_header - skb_headroom(skb); + outer_th = skb->transport_header - skb_headroom(skb); + vhdr->inner_nh_offset = cpu_to_le16(inner_nh); + vhdr->outer_th_offset = cpu_to_le16(outer_th); + return 0; +} + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3