From eefca7ec514262aef08d0ef261552f2f604bd851 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 May 2023 11:49:50 -0400 Subject: net/handshake: Enable the SNI extension to work properly Enable the upper layer protocol to specify the SNI peername. This avoids the need for tlshd to use a DNS lookup, which can return a hostname that doesn't match the incoming certificate's SubjectName. Fixes: 2fd5532044a8 ("net/handshake: Add a kernel API for requesting a TLSv1.3 handshake") Reviewed-by: Simon Horman Signed-off-by: Chuck Lever Signed-off-by: David S. Miller --- include/uapi/linux/handshake.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h index 1de4d0b95325..3d7ea58778c9 100644 --- a/include/uapi/linux/handshake.h +++ b/include/uapi/linux/handshake.h @@ -44,6 +44,7 @@ enum { HANDSHAKE_A_ACCEPT_AUTH_MODE, HANDSHAKE_A_ACCEPT_PEER_IDENTITY, HANDSHAKE_A_ACCEPT_CERTIFICATE, + HANDSHAKE_A_ACCEPT_PEERNAME, __HANDSHAKE_A_ACCEPT_MAX, HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) -- cgit v1.2.3 From 69474a8a5837be63f13c6f60a7d622b98ed5c539 Mon Sep 17 00:00:00 2001 From: Vladimir Nikishkin Date: Fri, 12 May 2023 11:40:33 +0800 Subject: net: vxlan: Add nolocalbypass option to vxlan. If a packet needs to be encapsulated towards a local destination IP, the packet will undergo a "local bypass" and be injected into the Rx path as if it was received by the target VXLAN device without undergoing encapsulation. If such a device does not exist, the packet will be dropped. There are scenarios where we do not want to perform such a bypass, but instead want the packet to be encapsulated and locally received by a user space program for post-processing. To that end, add a new VXLAN device attribute that controls whether a "local bypass" is performed or not. Default to performing a bypass to maintain existing behavior. Signed-off-by: Vladimir Nikishkin Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4ac1000b0ef2..0f6a0fe09bdb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -828,6 +828,7 @@ enum { IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ + IFLA_VXLAN_LOCALBYPASS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From b9f9a485fb0eb80b0e2b90410b28cbb9b0e85687 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 9 May 2023 22:19:45 +0100 Subject: netfilter: nft_exthdr: add boolean DCCP option matching The xt_dccp iptables module supports the matching of DCCP packets based on the presence or absence of DCCP options. Extend nft_exthdr to add this functionality to nftables. Link: https://bugzilla.netfilter.org/show_bug.cgi?id=930 Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c4d4d8e42dc8..e059dc2644df 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -859,12 +859,14 @@ enum nft_exthdr_flags { * @NFT_EXTHDR_OP_TCP: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options * @NFT_EXTHDR_OP_SCTP: match against sctp chunks + * @NFT_EXTHDR_OP_DCCP: match against dccp otions */ enum nft_exthdr_op { NFT_EXTHDR_OP_IPV6, NFT_EXTHDR_OP_TCPOPT, NFT_EXTHDR_OP_IPV4, NFT_EXTHDR_OP_SCTP, + NFT_EXTHDR_OP_DCCP, __NFT_EXTHDR_OP_MAX }; #define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) -- cgit v1.2.3 From cb8edce28073a906401c9e421eca7c99f3396da1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 May 2023 16:48:06 -0700 Subject: bpf: Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands Current UAPI of BPF_OBJ_PIN and BPF_OBJ_GET commands of bpf() syscall forces users to specify pinning location as a string-based absolute or relative (to current working directory) path. This has various implications related to security (e.g., symlink-based attacks), forces BPF FS to be exposed in the file system, which can cause races with other applications. One of the feedbacks we got from folks working with containers heavily was that inability to use purely FD-based location specification was an unfortunate limitation and hindrance for BPF_OBJ_PIN and BPF_OBJ_GET commands. This patch closes this oversight, adding path_fd field to BPF_OBJ_PIN and BPF_OBJ_GET UAPI, following conventions established by *at() syscalls for dirfd + pathname combinations. This now allows interesting possibilities like working with detached BPF FS mount (e.g., to perform multiple pinnings without running a risk of someone interfering with them), and generally making pinning/getting more secure and not prone to any races and/or security attacks. This is demonstrated by a selftest added in subsequent patch that takes advantage of new mount APIs (fsopen, fsconfig, fsmount) to demonstrate creating detached BPF FS mount, pinning, and then getting BPF map out of it, all while never exposing this private instance of BPF FS to outside worlds. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Christian Brauner Link: https://lore.kernel.org/bpf/20230523170013.728457-4-andrii@kernel.org --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bb11a6ee667..9273c654743c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1272,6 +1272,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1423,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ -- cgit v1.2.3 From e9261467ae86a6544bb602a55a1eab52696e71e3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 23 May 2023 11:15:48 +0100 Subject: net: mdio: add clause 73 to ethtool conversion helper Add a helper to convert a clause 73 advertisement to an ethtool bitmap. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- include/uapi/linux/mdio.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 256b463e47a6..b826598d1e94 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -231,6 +231,30 @@ #define MDIO_PMA_EXTABLE_BT1 0x0800 /* BASE-T1 ability */ #define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ +/* AN Clause 73 linkword */ +#define MDIO_AN_C73_0_S_MASK GENMASK(4, 0) +#define MDIO_AN_C73_0_E_MASK GENMASK(9, 5) +#define MDIO_AN_C73_0_PAUSE BIT(10) +#define MDIO_AN_C73_0_ASM_DIR BIT(11) +#define MDIO_AN_C73_0_C2 BIT(12) +#define MDIO_AN_C73_0_RF BIT(13) +#define MDIO_AN_C73_0_ACK BIT(14) +#define MDIO_AN_C73_0_NP BIT(15) +#define MDIO_AN_C73_1_T_MASK GENMASK(4, 0) +#define MDIO_AN_C73_1_1000BASE_KX BIT(5) +#define MDIO_AN_C73_1_10GBASE_KX4 BIT(6) +#define MDIO_AN_C73_1_10GBASE_KR BIT(7) +#define MDIO_AN_C73_1_40GBASE_KR4 BIT(8) +#define MDIO_AN_C73_1_40GBASE_CR4 BIT(9) +#define MDIO_AN_C73_1_100GBASE_CR10 BIT(10) +#define MDIO_AN_C73_1_100GBASE_KP4 BIT(11) +#define MDIO_AN_C73_1_100GBASE_KR4 BIT(12) +#define MDIO_AN_C73_1_100GBASE_CR4 BIT(13) +#define MDIO_AN_C73_1_25GBASE_R_S BIT(14) +#define MDIO_AN_C73_1_25GBASE_R BIT(15) +#define MDIO_AN_C73_2_2500BASE_KX BIT(0) +#define MDIO_AN_C73_2_5GBASE_KR BIT(1) + /* PHY XGXS lane state register. */ #define MDIO_PHYXS_LNSTAT_SYNC0 0x0001 #define MDIO_PHYXS_LNSTAT_SYNC1 0x0002 -- cgit v1.2.3 From 1a432018c0cdf51a77a2e134b19ba6cab4c29c89 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 May 2023 14:48:30 +0300 Subject: net/sched: flower: Allow matching on layer 2 miss Add the 'TCA_FLOWER_L2_MISS' netlink attribute that allows user space to match on packets that encountered a layer 2 miss. The miss indication is set as metadata in the tc skb extension by the bridge driver upon FDB or MDB lookup miss and dissected by the flow dissector to the 'FLOW_DISSECTOR_KEY_META' key. The use of this skb extension is guarded by the 'tc_skb_ext_tc' static key. As such, enable / disable this key when filters that match on layer 2 miss are added / deleted. Tested: # cat tc_skb_ext_tc.py #!/usr/bin/env -S drgn -s vmlinux refcount = prog["tc_skb_ext_tc"].key.enabled.counter.value_() print(f"tc_skb_ext_tc reference count is {refcount}") # ./tc_skb_ext_tc.py tc_skb_ext_tc reference count is 0 # tc filter add dev swp1 egress proto all handle 101 pref 1 flower src_mac 00:11:22:33:44:55 action drop # tc filter add dev swp1 egress proto all handle 102 pref 2 flower src_mac 00:11:22:33:44:55 l2_miss true action drop # tc filter add dev swp1 egress proto all handle 103 pref 3 flower src_mac 00:11:22:33:44:55 l2_miss false action drop # ./tc_skb_ext_tc.py tc_skb_ext_tc reference count is 2 # tc filter replace dev swp1 egress proto all handle 102 pref 2 flower src_mac 00:01:02:03:04:05 l2_miss false action drop # ./tc_skb_ext_tc.py tc_skb_ext_tc reference count is 2 # tc filter del dev swp1 egress proto all handle 103 pref 3 flower # tc filter del dev swp1 egress proto all handle 102 pref 2 flower # tc filter del dev swp1 egress proto all handle 101 pref 1 flower # ./tc_skb_ext_tc.py tc_skb_ext_tc reference count is 0 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 648a82f32666..00933dda7b10 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -594,6 +594,8 @@ enum { TCA_FLOWER_KEY_L2TPV3_SID, /* be32 */ + TCA_FLOWER_L2_MISS, /* u8 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 6c1adb650c8d85c6cb471dbc900c2468f462995a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 12:19:46 +0300 Subject: net/sched: taprio: add netlink reporting for offload statistics counters Offloading drivers may report some additional statistics counters, some of them even suggested by 802.1Q, like TransmissionOverrun. In my opinion we don't have to limit ourselves to reporting counters only globally to the Qdisc/interface, especially if the device has more detailed reporting (per traffic class), since the more detailed info is valuable for debugging and can help identifying who is exceeding its time slot. But on the other hand, some devices may not be able to report both per TC and global stats. So we end up reporting both ways, and use the good old ethtool_put_stat() strategy to determine which statistics are supported by this NIC. Statistics which aren't set are simply not reported to netlink. For this reason, we need something dynamic (a nlattr nest) to be reported through TCA_STATS_APP, and not something daft like the fixed-size and inextensible struct tc_codel_xstats. A good model for xstats which are a nlattr nest rather than a fixed struct seems to be cake. # Global stats $ tc -s qdisc show dev eth0 root # Per-tc stats $ tc -s class show dev eth0 Signed-off-by: Vladimir Oltean Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 51a7addc56c6..00f6ff0aff1f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1259,6 +1259,16 @@ enum { TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1) }; +enum { + TCA_TAPRIO_OFFLOAD_STATS_PAD = 1, /* u64 */ + TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS, /* u64 */ + TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS, /* u64 */ + + /* add new constants above here */ + __TCA_TAPRIO_OFFLOAD_STATS_CNT, + TCA_TAPRIO_OFFLOAD_STATS_MAX = (__TCA_TAPRIO_OFFLOAD_STATS_CNT - 1) +}; + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ -- cgit v1.2.3 From 8ad77e72caae22a1ddcfd0c03f2884929e93b7a4 Mon Sep 17 00:00:00 2001 From: Louis DeLosSantos Date: Wed, 31 May 2023 15:38:48 -0400 Subject: bpf: Add table ID to bpf_fib_lookup BPF helper Add ability to specify routing table ID to the `bpf_fib_lookup` BPF helper. A new field `tbid` is added to `struct bpf_fib_lookup` used as parameters to the `bpf_fib_lookup` BPF helper. When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and `BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup` will be used as the table ID for the fib lookup. If the `tbid` does not exist the fib lookup will fail with `BPF_FIB_LKUP_RET_NOT_FWDED`. The `tbid` field becomes a union over the vlan related output fields in `struct bpf_fib_lookup` and will be zeroed immediately after usage. This functionality is useful in containerized environments. For instance, if a CNI wants to dictate the next-hop for traffic leaving a container it can create a container-specific routing table and perform a fib lookup against this table in a "host-net-namespace-side" TC program. This functionality also allows `ip rule` like functionality at the TC layer, allowing an eBPF program to pick a routing table based on some aspect of the sk_buff. As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN datapath. When egress traffic leaves a Pod an eBPF program attached by Cilium will determine which VRF the egress traffic should target, and then perform a FIB lookup in a specific table representing this VRF's FIB. Signed-off-by: Louis DeLosSantos Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com --- include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9273c654743c..a7b5e91dd768 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3177,6 +3177,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6831,6 +6835,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6891,9 +6896,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; -- cgit v1.2.3 From e069ba07e6c7af69e119316bc87ff44869095f49 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Fri, 9 Jun 2023 09:59:55 -0400 Subject: net: openvswitch: add support for l4 symmetric hashing Since its introduction, the ovs module execute_hash action allowed hash algorithms other than the skb->l4_hash to be used. However, additional hash algorithms were not implemented. This means flows requiring different hash distributions weren't able to use the kernel datapath. Now, introduce support for symmetric hashing algorithm as an alternative hash supported by the ovs module using the flow dissector. Output of flow using l4_sym hash: recirc_id(0),in_port(3),eth(),eth_type(0x0800), ipv4(dst=64.0.0.0/192.0.0.0,proto=6,frag=no), packets:30473425, bytes:45902883702, used:0.000s, flags:SP., actions:hash(sym_l4(0)),recirc(0xd) Some performance testing with no GRO/GSO, two veths, single flow: hash(l4(0)): 4.35 GBits/s hash(l4_sym(0)): 4.24 GBits/s Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index c5d62ee82567..e94870e77ee9 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -765,6 +765,7 @@ struct ovs_action_push_vlan { */ enum ovs_hash_alg { OVS_HASH_ALG_L4, + OVS_HASH_ALG_SYM_L4, }; /* -- cgit v1.2.3 From 5e2ff6704a275be009be8979af17c52361b79b89 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 8 Jun 2023 22:26:25 +0200 Subject: scm: add SO_PASSPIDFD and SCM_PIDFD Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, but it contains pidfd instead of plain pid, which allows programmers not to care about PID reuse problem. We mask SO_PASSPIDFD feature if CONFIG_UNIX is not builtin because it depends on a pidfd_prepare() API which is not exported to the kernel modules. Idea comes from UAPI kernel group: https://uapi-group.org/kernel-features/ Big thanks to Christian Brauner and Lennart Poettering for productive discussions about this. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Leon Romanovsky Cc: David Ahern Cc: Arnd Bergmann Cc: Kees Cook Cc: Christian Brauner Cc: Kuniyuki Iwashima Cc: Lennart Poettering Cc: Luca Boccassi Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-arch@vger.kernel.org Tested-by: Luca Boccassi Reviewed-by: Kuniyuki Iwashima Reviewed-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 638230899e98..b76169fdb80b 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -132,6 +132,8 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 7b26952a91cf65ff1cc867a2382a8964d8c0ee7d Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 8 Jun 2023 22:26:26 +0200 Subject: net: core: add getsockopt SO_PEERPIDFD Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd. This thing is direct analog of SO_PEERCRED which allows to get plain PID. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Leon Romanovsky Cc: David Ahern Cc: Arnd Bergmann Cc: Kees Cook Cc: Christian Brauner Cc: Kuniyuki Iwashima Cc: Lennart Poettering Cc: Luca Boccassi Cc: Daniel Borkmann Cc: Stanislav Fomichev Cc: bpf@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-arch@vger.kernel.org Reviewed-by: Christian Brauner Acked-by: Stanislav Fomichev Tested-by: Luca Boccassi Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index b76169fdb80b..8ce8a39a1e5f 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -133,6 +133,7 @@ #define SO_RCVMARK 75 #define SO_PASSPIDFD 76 +#define SO_PEERPIDFD 77 #if !defined(__KERNEL__) -- cgit v1.2.3 From 7cfffd5fed3e385010583840402f0bf66c4ed147 Mon Sep 17 00:00:00 2001 From: Zahari Doychev Date: Thu, 8 Jun 2023 12:56:47 +0200 Subject: net: flower: add support for matching cfm fields Add support to the tc flower classifier to match based on fields in CFM information elements like level and opcode. tc filter add dev ens6 ingress protocol 802.1q \ flower vlan_id 698 vlan_ethtype 0x8902 cfm mdl 5 op 46 \ action drop Signed-off-by: Zahari Doychev Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 00933dda7b10..7865f5a9885b 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -596,6 +596,8 @@ enum { TCA_FLOWER_L2_MISS, /* u8 */ + TCA_FLOWER_KEY_CFM, /* nested */ + __TCA_FLOWER_MAX, }; @@ -704,6 +706,13 @@ enum { TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; +enum { + TCA_FLOWER_KEY_CFM_OPT_UNSPEC, + TCA_FLOWER_KEY_CFM_MD_LEVEL, + TCA_FLOWER_KEY_CFM_OPCODE, + TCA_FLOWER_KEY_CFM_OPT_MAX, +}; + #define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ /* Match-all classifier */ -- cgit v1.2.3 From 2ad66fcb2fded5359a676f7146cf442641d28307 Mon Sep 17 00:00:00 2001 From: Gilad Itzkovitch Date: Thu, 18 May 2023 12:07:23 +1200 Subject: wifi: cfg80211: S1G rate information and calculations Increase the size of S1G rate_info flags to support S1G and add flags for new S1G MCS and the supported bandwidths. Also, include S1G rate information to netlink STA rate message. Lastly, add rate calculation function for S1G MCS. Signed-off-by: Gilad Itzkovitch Link: https://lore.kernel.org/r/20230518000723.991912-1-gilad.itzkovitch@morsemicro.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c59fec406da5..435c4ac5d9bf 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3667,6 +3667,13 @@ enum nl80211_eht_ru_alloc { * (u8, see &enum nl80211_eht_gi) * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then * non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc) + * @NL80211_RATE_INFO_S1G_MCS: S1G MCS index (u8, 0-10) + * @NL80211_RATE_INFO_S1G_NSS: S1G NSS value (u8, 1-4) + * @NL80211_RATE_INFO_1_MHZ_WIDTH: 1 MHz S1G rate + * @NL80211_RATE_INFO_2_MHZ_WIDTH: 2 MHz S1G rate + * @NL80211_RATE_INFO_4_MHZ_WIDTH: 4 MHz S1G rate + * @NL80211_RATE_INFO_8_MHZ_WIDTH: 8 MHz S1G rate + * @NL80211_RATE_INFO_16_MHZ_WIDTH: 16 MHz S1G rate * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -3693,6 +3700,13 @@ enum nl80211_rate_info { NL80211_RATE_INFO_EHT_NSS, NL80211_RATE_INFO_EHT_GI, NL80211_RATE_INFO_EHT_RU_ALLOC, + NL80211_RATE_INFO_S1G_MCS, + NL80211_RATE_INFO_S1G_NSS, + NL80211_RATE_INFO_1_MHZ_WIDTH, + NL80211_RATE_INFO_2_MHZ_WIDTH, + NL80211_RATE_INFO_4_MHZ_WIDTH, + NL80211_RATE_INFO_8_MHZ_WIDTH, + NL80211_RATE_INFO_16_MHZ_WIDTH, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, -- cgit v1.2.3 From 6cf963edbbd3b279185e28e4864c9698ffaa23c3 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 8 Jun 2023 16:36:10 +0300 Subject: wifi: cfg80211: Support association to AP MLD with disabled links An AP part of an AP MLD might be temporarily disabled, and might be enabled later. Such a link should be included in the association exchange, but should not be used until enabled. Extend the NL80211_CMD_ASSOCIATE to also indicate disabled links. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.c4c61ee4c4a5.I784ef4a0d619fc9120514b5615458fbef3b3684a@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 435c4ac5d9bf..03939bdb0e48 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2805,6 +2805,9 @@ enum nl80211_commands { * index. If the userspace includes more RNR elements than number of * MBSSID elements then these will be added in every EMA beacon. * + * @NL80211_ATTR_MLO_LINK_DISABLED: Flag attribute indicating that the link is + * disabled. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3341,6 +3344,8 @@ enum nl80211_attrs { NL80211_ATTR_EMA_RNR_ELEMS, + NL80211_ATTR_MLO_LINK_DISABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 065563b20a664a6575dc158688dfb0e121c25b38 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 17 Mar 2023 19:51:53 +0530 Subject: wifi: cfg80211/nl80211: Add support to indicate STA MLD setup links removal STA MLD setup links may get removed if AP MLD remove the corresponding affiliated APs with Multi-Link reconfiguration as described in P802.11be_D3.0, section 35.3.6.2.2 Removing affiliated APs. Currently, there is no support to notify such operation to cfg80211 and userspace. Add support for the drivers to indicate STA MLD setup links removal to cfg80211 and notify the same to userspace. Upon receiving such indication from the driver, clear the MLO links information of the removed links in the WDEV. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230317142153.237900-1-quic_vjakkam@quicinc.com [rename function and attribute, fix kernel-doc] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 03939bdb0e48..3190d34269ef 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1309,6 +1309,11 @@ * The number of peers that HW timestamping can be enabled for concurrently * is indicated by %NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS. * + * @NL80211_CMD_LINKS_REMOVED: Notify userspace about the removal of STA MLD + * setup links due to AP MLD removing the corresponding affiliated APs with + * Multi-Link reconfiguration. %NL80211_ATTR_MLO_LINKS is used to provide + * information about the removed STA MLD setup links. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1562,6 +1567,8 @@ enum nl80211_commands { NL80211_CMD_SET_HW_TIMESTAMP, + NL80211_CMD_LINKS_REMOVED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From c3b60ab7a4dff6e6e608e685b70ddc3d6b2aca81 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 12 Jun 2023 14:14:56 -0700 Subject: ptp: Add .getmaxphase callback to ptp_clock_info Enables advertisement of the maximum offset supported by the phase control functionality of PHCs. The callback is used to return an error if an offset not supported by the PHC is used in ADJ_OFFSET. The ioctls PTP_CLOCK_GETCAPS and PTP_CLOCK_GETCAPS2 now advertise the maximum offset a PHC's phase control functionality is capable of supporting. Introduce new sysfs node, max_phase_adjustment. Cc: Jakub Kicinski Cc: Shuah Khan Cc: Richard Cochran Cc: Maciek Machnikowski Signed-off-by: Rahul Rameshbabu Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 1d108d597f66..05cc35fc94ac 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -95,7 +95,8 @@ struct ptp_clock_caps { int cross_timestamping; /* Whether the clock supports adjust phase */ int adjust_phase; - int rsv[12]; /* Reserved for future use. */ + int max_phase_adj; /* Maximum phase adjustment in nanoseconds. */ + int rsv[11]; /* Reserved for future use. */ }; struct ptp_extts_request { -- cgit v1.2.3 From 6c5b9a3296e146cc74b1d006c6a546ea92534ade Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Jun 2023 16:26:53 +0300 Subject: wifi: nl80211/reg: add no-EHT regulatory flag This just propagates to the channel flags, like no-HE and similar other flags before it. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230619161906.74ce2983aed8.Ifa343ba89c11760491daad5aee5a81209d5735a7@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3190d34269ef..88eb85c63029 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4450,6 +4450,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed * @NL80211_RRF_NO_HE: HE operation not allowed * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed + * @NL80211_RRF_NO_EHT: EHT operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4469,6 +4470,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_160MHZ = 1<<16, NL80211_RRF_NO_HE = 1<<17, NL80211_RRF_NO_320MHZ = 1<<18, + NL80211_RRF_NO_EHT = 1<<19, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 38967f424b5be79c4c676712e5640d846efd07e3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:30:15 +0200 Subject: mptcp: track some aggregate data counters Currently there are no data transfer counters accounting for all the subflows used by a given MPTCP socket. The user-space can compute such figures aggregating the subflow info, but that is inaccurate if any subflow is closed before the MPTCP socket itself. Add the new counters in the MPTCP socket itself and expose them via the existing diag and sockopt. While touching mptcp_diag_fill_info(), acquire the relevant locks before fetching the msk data, to ensure better data consistency Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/385 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 32af2d278cb4..a124be6ebbba 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -123,6 +123,11 @@ struct mptcp_info { __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; }; /* -- cgit v1.2.3 From 492432074e4fce4f8880213bf009b47adbf94a3a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:30:18 +0200 Subject: mptcp: introduce MPTCP_FULL_INFO getsockopt Some user-space applications want to monitor the subflows utilization. Dumping the per subflow tcp_info is not enough, as the PM could close and re-create the subflows under-the-hood, fooling the accounting. Even checking the src/dst addresses used by each subflow could not be enough, because new subflows could re-use the same address/port of the just closed one. This patch introduces a new socket option, allow dumping all the relevant information all-at-once (everything, everywhere...), in a consistent manner. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/388 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index a124be6ebbba..ee9c49f949a2 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -249,9 +249,33 @@ struct mptcp_subflow_addrs { }; }; +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_full_info { + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ + __u32 size_arrays_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info/tcp_info + * are respectively at least: + * size_arrays * size_sfinfo_user + * size_arrays * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info; + __aligned_u64 tcp_info; + struct mptcp_info mptcp_info; +}; + /* MPTCP socket options */ #define MPTCP_INFO 1 #define MPTCP_TCPINFO 2 #define MPTCP_SUBFLOW_ADDRS 3 +#define MPTCP_FULL_INFO 4 #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From 735d86a8aaf660e2a5fd5d711ee05fa817e8d567 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 9 Jun 2023 14:10:51 +0200 Subject: can: uapi: move CAN_RAW_FILTER_MAX definition to raw.h CAN_RAW_FILTER_MAX is only relevant for CAN_RAW sockets and used in linux/can/raw.c or in userspace applications that include the raw.h file anyway. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230609121051.9631-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 1 - include/uapi/linux/can/raw.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index dd645ea72306..939db2388208 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -285,6 +285,5 @@ struct can_filter { }; #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ -#define CAN_RAW_FILTER_MAX 512 /* maximum number of can_filter set via setsockopt() */ #endif /* !_UAPI_CAN_H */ diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index ff12f525c37c..31622c9b7988 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -49,6 +49,8 @@ #include #define SOL_CAN_RAW (SOL_CAN_BASE + CAN_RAW) +#define CAN_RAW_FILTER_MAX 512 /* maximum number of can_filter set via setsockopt() */ + enum { SCM_CAN_RAW_ERRQUEUE = 1, }; -- cgit v1.2.3 From 079cd633219d7298d087cd115c17682264244c18 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 15 Jun 2023 16:31:40 +0200 Subject: netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET Analogous to NFT_MSG_GETOBJ_RESET, but for set elements with a timeout or attached stateful expressions like counters or quotas - reset them all at once. Respect a per element timeout value if present to reset the 'expires' value to. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e059dc2644df..8466c2a9938f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -105,6 +105,7 @@ enum nft_verdicts { * @NFT_MSG_DESTROYSETELEM: destroy a set element (enum nft_set_elem_attributes) * @NFT_MSG_DESTROYOBJ: destroy a stateful object (enum nft_object_attributes) * @NFT_MSG_DESTROYFLOWTABLE: destroy flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETSETELEM_RESET: get set elements and reset attached stateful expressions (enum nft_set_elem_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -140,6 +141,7 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYSETELEM, NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, + NFT_MSG_GETSETELEM_RESET, NFT_MSG_MAX, }; -- cgit v1.2.3