From 117aef12a7b1b797bce9f66b156c65eab850b5b5 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:52 +0100 Subject: ip_tunnel: use a separate struct to store tunnel params in the kernel Unlike IPv6 tunnels which use purely-kernel __ip6_tnl_parm structure to store params inside the kernel, IPv4 tunnel code uses the same ip_tunnel_parm which is being used to talk with the userspace. This makes it difficult to alter or add any fields or use a different format for whatever data. Define struct ip_tunnel_parm_kern, a 1:1 copy of ip_tunnel_parm for now, and use it throughout the code. Define the pieces, where the copy user <-> kernel happens, as standalone functions, and copy the data there field-by-field, so that the kernel-side structure could be easily modified later on and the users wouldn't have to care about this. Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 5cd64bb2104d..20f0319ab149 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -110,6 +110,17 @@ struct ip_tunnel_prl_entry { struct metadata_dst; +/* Kernel-side copy of ip_tunnel_parm */ +struct ip_tunnel_parm_kern { + char name[IFNAMSIZ]; + int link; + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; + struct iphdr iph; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -136,7 +147,7 @@ struct ip_tunnel { struct dst_cache dst_cache; - struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ @@ -291,7 +302,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd); +bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, + const void __user *data); +bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); @@ -307,16 +322,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); void ip_tunnel_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms); + struct ip_tunnel_parm_kern *parms); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); -- cgit v1.2.3 From 5832c4a77d6931cebf9ba737129ae8f14b66ee1d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:53 +0100 Subject: ip_tunnel: convert __be16 tunnel flags to bitmaps Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT have been defined as __be16. Now all of those 16 bits are occupied and there's no more free space for new flags. It can't be simply switched to a bigger container with no adjustments to the values, since it's an explicit Endian storage, and on LE systems (__be16)0x0001 equals to (__be64)0x0001000000000000. We could probably define new 64-bit flags depending on the Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on LE, but that would introduce an Endianness dependency and spawn a ton of Sparse warnings. To mitigate them, all of those places which were adjusted with this change would be touched anyway, so why not define stuff properly if there's no choice. Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the value already coded and a fistful of <16 <-> bitmap> converters and helpers. The two flags which have a different bit position are SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as __cpu_to_be16(), but as (__force __be16), i.e. had different positions on LE and BE. Now they both have strongly defined places. Change all __be16 fields which were used to store those flags, to IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) -> unsigned long[1] for now, and replace all TUNNEL_* occurrences to their bitmap counterparts. Use the converters in the places which talk to the userspace, hardware (NFP) or other hosts (GRE header). The rest must explicitly use the new flags only. This must be done at once, otherwise there will be too many conversions throughout the code in the intermediate commits. Finally, disable the old __be16 flags for use in the kernel code (except for the two 'irregular' flags mentioned above), to prevent any accidental (mis)use of them. For the userspace, nothing is changed, only additions were made. Most noticeable bloat-o-meter difference (.text): vmlinux: 307/-1 (306) gre.ko: 62/0 (62) ip_gre.ko: 941/-217 (724) [*] ip_tunnel.ko: 390/-900 (-510) [**] ip_vti.ko: 138/0 (138) ip6_gre.ko: 534/-18 (516) [*] ip6_tunnel.ko: 118/-10 (108) [*] gre_flags_to_tnl_flags() grew, but still is inlined [**] ip_tunnel_find() got uninlined, hence such decrease The average code size increase in non-extreme case is 100-200 bytes per module, mostly due to sizeof(long) > sizeof(__be16), as %__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers are able to expand the majority of bitmap_*() calls here into direct operations on scalars. Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 119 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 19 deletions(-) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 20f0319ab149..ed8e48cc9054 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -36,6 +36,24 @@ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) +#define __ipt_flag_op(op, ...) \ + op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) + +#define IP_TUNNEL_DECLARE_FLAGS(...) \ + __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) + +#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) +#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) +#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) +#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) + +#define ip_tunnel_flags_empty(...) \ + __ipt_flag_op(bitmap_empty, __VA_ARGS__) +#define ip_tunnel_flags_intersect(...) \ + __ipt_flag_op(bitmap_intersects, __VA_ARGS__) +#define ip_tunnel_flags_subset(...) \ + __ipt_flag_op(bitmap_subset, __VA_ARGS__) + struct ip_tunnel_key { __be64 tun_id; union { @@ -48,11 +66,11 @@ struct ip_tunnel_key { struct in6_addr dst; } ipv6; } u; - __be16 tun_flags; - u8 tos; /* TOS for IPv4, TC for IPv6 */ - u8 ttl; /* TTL for IPv4, HL for IPv6 */ + IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; @@ -110,14 +128,14 @@ struct ip_tunnel_prl_entry { struct metadata_dst; -/* Kernel-side copy of ip_tunnel_parm */ +/* Kernel-side variant of ip_tunnel_parm */ struct ip_tunnel_parm_kern { char name[IFNAMSIZ]; - int link; - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; + int link; struct iphdr iph; }; @@ -168,7 +186,7 @@ struct ip_tunnel { }; struct tnl_ptk_info { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; @@ -190,11 +208,77 @@ struct ip_tunnel_net { int type; }; +static inline void ip_tunnel_set_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + +static inline void ip_tunnel_clear_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + __ipt_flag_op(bitmap_andnot, flags, flags, present); +} + +static inline bool ip_tunnel_is_options_present(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + return ip_tunnel_flags_intersect(flags, present); +} + +static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(supp) = { }; + + bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); + __set_bit(IP_TUNNEL_VTI_BIT, supp); + + return ip_tunnel_flags_subset(flags, supp); +} + +static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) +{ + ip_tunnel_flags_zero(dst); + + bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); + __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); +} + +static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) +{ + __be16 ret; + + ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); + if (test_bit(IP_TUNNEL_VTI_BIT, flags)) + ret |= VTI_ISVTI; + + return ret; +} + static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, + const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; @@ -204,7 +288,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, key->tos = tos; key->ttl = ttl; key->label = label; - key->tun_flags = tun_flags; + ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. @@ -225,12 +309,8 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb, { if (skb->mark) return false; - if (!info) - return true; - if (info->key.tun_flags & TUNNEL_NOCACHE) - return false; - return true; + return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@ -313,7 +393,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); @@ -529,12 +609,13 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); - info->key.tun_flags |= flags; + ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, + flags); } } @@ -578,7 +659,7 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = 0; } -- cgit v1.2.3 From 6dd514f48110ebb4bf36875b9e7e02d07b589caa Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 27 Mar 2024 16:23:56 +0100 Subject: pfcp: always set pfcp metadata In PFCP receive path set metadata needed by flower code to do correct classification based on this metadata. Signed-off-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index ed8e48cc9054..d8f574fbb11e 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -216,6 +216,7 @@ static inline void ip_tunnel_set_options_present(unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); ip_tunnel_flags_or(flags, flags, present); } @@ -228,6 +229,7 @@ static inline void ip_tunnel_clear_options_present(unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); __ipt_flag_op(bitmap_andnot, flags, flags, present); } @@ -240,6 +242,7 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); return ip_tunnel_flags_intersect(flags, present); } -- cgit v1.2.3