From 93d7a7ed07342f5e3da2d250cfd67f899d0b5318 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Oct 2025 12:32:23 +0200 Subject: netfilter: flowtable: move path discovery infrastructure to its own file This file contains the path discovery that is run from the forward chain for the packet offloading the flow into the flowtable. This consists of a series of calls to dev_fill_forward_path() for each device stack. More topologies may be supported in the future, so move this code to its own file to separate it from the nftables flow_offload expression. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index c003cd194fa2..e9f72d2558e9 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -222,6 +222,12 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); +struct nft_flowtable; +struct nft_pktinfo; +int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, + struct nf_flow_route *route, enum ip_conntrack_dir dir, + struct nft_flowtable *ft); + static inline int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, flow_setup_cb_t *cb, void *cb_priv) -- cgit v1.2.3 From b5964aac51e0c286a50e68225e0dfcf11fb554cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Oct 2025 12:32:35 +0200 Subject: netfilter: flowtable: consolidate xmit path Use dev_queue_xmit() for the XMIT_NEIGH case. Store the interface index of the real device behind the vlan/pppoe device, this introduces an extra lookup for the real device in the xmit path because rt->dst.dev provides the vlan/pppoe device. XMIT_NEIGH now looks more similar to XMIT_DIRECT but the check for stale dst and the neighbour lookup still remain in place which is convenient to deal with network topology changes. Note that nft_flow_route() needs to relax the check for _XMIT_NEIGH so the existing basic xfrm offload (which only works in one direction) does not break. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index e9f72d2558e9..7c330caae52b 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -141,6 +141,7 @@ struct flow_offload_tuple { union { struct { struct dst_entry *dst_cache; + u32 ifidx; u32 dst_cookie; }; struct { -- cgit v1.2.3 From 030feea3097c41ed268c81240e5c334d9977b1c4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Oct 2025 12:50:07 +0200 Subject: netfilter: flowtable: remove hw_ifidx hw_ifidx was originally introduced to store the real netdevice as a requirement for the hardware offload support in: 73f97025a972 ("netfilter: nft_flow_offload: use direct xmit if hardware offload is enabled") Since ("netfilter: flowtable: consolidate xmit path"), ifidx and hw_ifidx points to the real device in the xmit path, remove it. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 7c330caae52b..f7306276ece7 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -146,7 +146,6 @@ struct flow_offload_tuple { }; struct { u32 ifidx; - u32 hw_ifidx; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; -- cgit v1.2.3 From ab427db17885814069bae891834f20842f0ac3a4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 7 Nov 2025 12:14:46 +0100 Subject: netfilter: flowtable: Add IPIP rx sw acceleration Introduce sw acceleration for rx path of IPIP tunnels relying on the netfilter flowtable infrastructure. Subsequent patches will add sw acceleration for IPIP tunnels tx path. This series introduces basic infrastructure to accelerate other tunnel types (e.g. IP6IP6). IPIP rx sw acceleration can be tested running the following scenario where the traffic is forwarded between two NICs (eth0 and eth1) and an IPIP tunnel is used to access a remote site (using eth1 as the underlay device): ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (192.168.100.2) $ip addr show 6: eth0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.0.2/24 scope global eth0 valid_lft forever preferred_lft forever 7: eth1: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.1.1/24 scope global eth1 valid_lft forever preferred_lft forever 8: tun0@NONE: mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000 link/ipip 192.168.1.1 peer 192.168.1.2 inet 192.168.100.1/24 scope global tun0 valid_lft forever preferred_lft forever $ip route show default via 192.168.100.2 dev tun0 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.2 192.168.1.0/24 dev eth1 proto kernel scope link src 192.168.1.1 192.168.100.0/24 dev tun0 proto kernel scope link src 192.168.100.1 $nft list ruleset table inet filter { flowtable ft { hook ingress priority filter devices = { eth0, eth1 } } chain forward { type filter hook forward priority filter; policy accept; meta l4proto { tcp, udp } flow add @ft } } Reproducing the scenario described above using veths I got the following results: - TCP stream received from the IPIP tunnel: - net-next: (baseline) ~ 71Gbps - net-next + IPIP flowtbale support: ~101Gbps Signed-off-by: Lorenzo Bianconi Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 13 +++++++++++++ include/net/netfilter/nf_flow_table.h | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e808071dbb7d..bf99fe8622da 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -877,6 +877,7 @@ enum net_device_path_type { DEV_PATH_PPPOE, DEV_PATH_DSA, DEV_PATH_MTK_WDMA, + DEV_PATH_TUN, }; struct net_device_path { @@ -888,6 +889,18 @@ struct net_device_path { __be16 proto; u8 h_dest[ETH_ALEN]; } encap; + struct { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + + u8 l3_proto; + } tun; struct { enum { DEV_PATH_BR_VLAN_KEEP, diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index f7306276ece7..b09c11c048d5 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -107,6 +107,19 @@ enum flow_offload_xmit_type { #define NF_FLOW_TABLE_ENCAP_MAX 2 +struct flow_offload_tunnel { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + + u8 l3_proto; +}; + struct flow_offload_tuple { union { struct in_addr src_v4; @@ -130,12 +143,15 @@ struct flow_offload_tuple { __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; + struct flow_offload_tunnel tun; + /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; u8 dir:2, xmit_type:3, encap_num:2, + tun_num:2, in_vlan_ingress:2; u16 mtu; union { @@ -206,7 +222,9 @@ struct nf_flow_route { u16 id; __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; + struct flow_offload_tunnel tun; u8 num_encaps:2, + num_tuns:2, ingress_vlans:2; } in; struct { -- cgit v1.2.3 From be102eb6a0e7c03db00e50540622f4e43b2d2844 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 21 Nov 2025 01:14:30 +0100 Subject: netfilter: nf_conncount: rework API to use sk_buff directly When using nf_conncount infrastructure for non-confirmed connections a duplicated track is possible due to an optimization introduced since commit d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC"). In order to fix this introduce a new conncount API that receives directly an sk_buff struct. It fetches the tuple and zone and the corresponding ct from it. It comes with both existing conncount variants nf_conncount_count_skb() and nf_conncount_add_skb(). In addition remove the old API and adjust all the users to use the new one. This way, for each sk_buff struct it is possible to check if there is a ct present and already confirmed. If so, skip the add operation. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 1b58b5b91ff6..52a06de41aa0 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -18,15 +18,14 @@ struct nf_conncount_list { struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data); -unsigned int nf_conncount_count(struct net *net, - struct nf_conncount_data *data, - const u32 *key, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); - -int nf_conncount_add(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); +unsigned int nf_conncount_count_skb(struct net *net, + const struct sk_buff *skb, + u16 l3num, + struct nf_conncount_data *data, + const u32 *key); + +int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb, + u16 l3num, struct nf_conncount_list *list); void nf_conncount_list_init(struct nf_conncount_list *list); -- cgit v1.2.3 From c4f0ab06e1e0c1331e6febd03538a7f621f15134 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Nov 2025 12:20:50 -0700 Subject: netfilter: ip6t_srh: fix UAPI kernel-doc comments format Fix the kernel-doc format for struct members to be "@member" instead of "@ member" to avoid kernel-doc warnings. Warning: ip6t_srh.h:60 struct member 'next_hdr' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'hdr_len' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'segs_left' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'last_entry' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'tag' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'mt_flags' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'mt_invflags' not described in 'ip6t_srh' Warning: ip6t_srh.h:93 struct member 'next_hdr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'hdr_len' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'segs_left' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'last_entry' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'tag' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'psid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'nsid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'lsid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'psid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'nsid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'lsid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'mt_flags' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'mt_invflags' not described in 'ip6t_srh1' Signed-off-by: Randy Dunlap Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_srh.h | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h index 54ed83360dac..80c66c8ece82 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h @@ -41,13 +41,13 @@ /** * struct ip6t_srh - SRH match options - * @ next_hdr: Next header field of SRH - * @ hdr_len: Extension header length field of SRH - * @ segs_left: Segments left field of SRH - * @ last_entry: Last entry field of SRH - * @ tag: Tag field of SRH - * @ mt_flags: match options - * @ mt_invflags: Invert the sense of match options + * @next_hdr: Next header field of SRH + * @hdr_len: Extension header length field of SRH + * @segs_left: Segments left field of SRH + * @last_entry: Last entry field of SRH + * @tag: Tag field of SRH + * @mt_flags: match options + * @mt_invflags: Invert the sense of match options */ struct ip6t_srh { @@ -62,19 +62,19 @@ struct ip6t_srh { /** * struct ip6t_srh1 - SRH match options (revision 1) - * @ next_hdr: Next header field of SRH - * @ hdr_len: Extension header length field of SRH - * @ segs_left: Segments left field of SRH - * @ last_entry: Last entry field of SRH - * @ tag: Tag field of SRH - * @ psid_addr: Address of previous SID in SRH SID list - * @ nsid_addr: Address of NEXT SID in SRH SID list - * @ lsid_addr: Address of LAST SID in SRH SID list - * @ psid_msk: Mask of previous SID in SRH SID list - * @ nsid_msk: Mask of next SID in SRH SID list - * @ lsid_msk: MAsk of last SID in SRH SID list - * @ mt_flags: match options - * @ mt_invflags: Invert the sense of match options + * @next_hdr: Next header field of SRH + * @hdr_len: Extension header length field of SRH + * @segs_left: Segments left field of SRH + * @last_entry: Last entry field of SRH + * @tag: Tag field of SRH + * @psid_addr: Address of previous SID in SRH SID list + * @nsid_addr: Address of NEXT SID in SRH SID list + * @lsid_addr: Address of LAST SID in SRH SID list + * @psid_msk: Mask of previous SID in SRH SID list + * @nsid_msk: Mask of next SID in SRH SID list + * @lsid_msk: MAsk of last SID in SRH SID list + * @mt_flags: match options + * @mt_invflags: Invert the sense of match options */ struct ip6t_srh1 { -- cgit v1.2.3 From d3a439e55c193b930e0007967cf8d7a29890449b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Nov 2025 12:20:38 -0700 Subject: netfilter: nf_tables: improve UAPI kernel-doc comments In include/uapi/linux/netfilter/nf_tables.h, correct the kernel-doc comments for mistyped enum names and enum values to avoid these kernel-doc warnings and improve the documentation: nf_tables.h:896: warning: Enum value 'NFT_EXTHDR_OP_TCPOPT' not described in enum 'nft_exthdr_op' nf_tables.h:896: warning: Excess enum value 'NFT_EXTHDR_OP_TCP' description in 'nft_exthdr_op' nf_tables.h:1210: warning: expecting prototype for enum nft_flow_attributes. Prototype was for enum nft_offload_attributes instead nf_tables.h:1428: warning: expecting prototype for enum nft_reject_code. Prototype was for enum nft_reject_inet_code instead (add beginning '@' to each enum value description:) nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_FAMILY' not described in enum 'nft_tproxy_attributes' nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_REG_ADDR' not described in enum 'nft_tproxy_attributes' nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_REG_PORT' not described in enum 'nft_tproxy_attributes' nf_tables.h:1796: warning: expecting prototype for enum nft_device_attributes. Prototype was for enum nft_devices_attributes instead Signed-off-by: Randy Dunlap Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7c0c915f0306..45c71f7d21c2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -881,7 +881,7 @@ enum nft_exthdr_flags { * enum nft_exthdr_op - nf_tables match options * * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers - * @NFT_EXTHDR_OP_TCP: match against tcp options + * @NFT_EXTHDR_OP_TCPOPT: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options * @NFT_EXTHDR_OP_SCTP: match against sctp chunks * @NFT_EXTHDR_OP_DCCP: match against dccp otions @@ -1200,7 +1200,7 @@ enum nft_ct_attributes { #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) /** - * enum nft_flow_attributes - ct offload expression attributes + * enum nft_offload_attributes - ct offload expression attributes * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) */ enum nft_offload_attributes { @@ -1410,7 +1410,7 @@ enum nft_reject_types { }; /** - * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * enum nft_reject_inet_code - Generic reject codes for IPv4/IPv6 * * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable @@ -1480,9 +1480,9 @@ enum nft_nat_attributes { /** * enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes * - * NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) - * NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) - * NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) + * @NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) + * @NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) + * @NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) */ enum nft_tproxy_attributes { NFTA_TPROXY_UNSPEC, @@ -1783,7 +1783,7 @@ enum nft_synproxy_attributes { #define NFTA_SYNPROXY_MAX (__NFTA_SYNPROXY_MAX - 1) /** - * enum nft_device_attributes - nf_tables device netlink attributes + * enum nft_devices_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) -- cgit v1.2.3