From 391145ba2accc48b596f3d438af1a6255b62a555 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:24 -0700 Subject: bpf: Add __bpf_kfunc_{start,end}_defs macros BPF kfuncs are meant to be called from BPF programs. Accordingly, most kfuncs are not called from anywhere in the kernel, which the -Wmissing-prototypes warning is unhappy about. We've peppered __diag_ignore_all("-Wmissing-prototypes", ... everywhere kfuncs are defined in the codebase to suppress this warning. This patch adds two macros meant to bound one or many kfunc definitions. All existing kfunc definitions which use these __diag calls to suppress -Wmissing-prototypes are migrated to use the newly-introduced macros. A new __diag_ignore_all - for "-Wmissing-declarations" - is added to the __bpf_kfunc_start_defs macro based on feedback from Andrii on an earlier version of this patch [0] and another recent mailing list thread [1]. In the future we might need to ignore different warnings or do other kfunc-specific things. This change will make it easier to make such modifications for all kfunc defs. [0]: https://lore.kernel.org/bpf/CAEf4BzaE5dRWtK6RPLnjTW-MW9sx9K3Fn6uwqCTChK2Dcb1Xig@mail.gmail.com/ [1]: https://lore.kernel.org/bpf/ZT+2qCc%2FaXep0%2FLf@krava/ Signed-off-by: Dave Marchevsky Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Cc: Jiri Olsa Acked-by: Jiri Olsa Acked-by: David Vernet Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index c2231c64d60b..dc5ce962f600 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -84,6 +84,15 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit -- cgit v1.2.3 From 15fb6f2b6c4c3c129adc2412ae12ec15e60a6adb Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:25 -0700 Subject: bpf: Add __bpf_hook_{start,end} macros Not all uses of __diag_ignore_all(...) in BPF-related code in order to suppress warnings are wrapping kfunc definitions. Some "hook point" definitions - small functions meant to be used as attach points for fentry and similar BPF progs - need to suppress -Wmissing-declarations. We could use __bpf_kfunc_{start,end}_defs added in the previous patch in such cases, but this might be confusing to someone unfamiliar with BPF internals. Instead, this patch adds __bpf_hook_{start,end} macros, currently having the same effect as __bpf_kfunc_{start,end}_defs, then uses them to suppress warnings for two hook points in the kernel itself and some bpf_testmod hook points as well. Signed-off-by: Dave Marchevsky Cc: Yafang Shao Acked-by: Jiri Olsa Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index dc5ce962f600..59d404e22814 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -92,6 +92,8 @@ "Global kfuncs as their definitions will be in BTF") #define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() /* * Return the name of the passed struct, if exists, or halt the build if for -- cgit v1.2.3 From 1726483b79a72e0150734d5367e4a0238bf8fcff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Oct 2023 14:10:37 +0000 Subject: inet: shrink struct flowi_common I am looking at syzbot reports triggering kernel stack overflows involving a cascade of ipvlan devices. We can save 8 bytes in struct flowi_common. This patch alone will not fix the issue, but is a start. Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash") Signed-off-by: Eric Dumazet Cc: wenxu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231025141037.3448203-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/flow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 7f0adda3bf2f..335bbc52171c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -40,8 +40,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; kuid_t flowic_uid; - struct flowi_tunnel flowic_tun_key; __u32 flowic_multipath_hash; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { -- cgit v1.2.3 From f55d8e60f10909dbc5524e261041e1d28d7d20d8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 28 Oct 2023 21:25:11 +0200 Subject: net: ethtool: Fix documentation of ethtool_sprintf() This function takes a pointer to a pointer, unlike sprintf() which is passed a plain pointer. Fix up the documentation to make this clear. Fixes: 7888fe53b706 ("ethtool: Add common function for filling out strings") Cc: Alexander Duyck Cc: Justin Stitt Cc: stable@vger.kernel.org Signed-off-by: Andrew Lunn Reviewed-by: Justin Stitt Link: https://lore.kernel.org/r/20231028192511.100001-1-andrew@lunn.ch Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 226a36ed5aa1..689028257fcc 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1045,10 +1045,10 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, /** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); -- cgit v1.2.3 From e8ae8ad479e2d037daa33756e5e72850a7bd37a9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 24 Oct 2023 09:53:33 +1100 Subject: Fix termination state for idr_for_each_entry_ul() The comment for idr_for_each_entry_ul() states after normal termination @entry is left with the value NULL This is not correct in the case where UINT_MAX has an entry in the idr. In that case @entry will be non-NULL after termination. No current code depends on the documentation being correct, but to save future code we should fix it. Also fix idr_for_each_entry_continue_ul(). While this is not documented as leaving @entry as NULL, the mellanox driver appears to depend on it doing so. So make that explicit in the documentation as well as in the code. Fixes: e33d2b74d805 ("idr: fix overflow case for idr_for_each_entry_ul()") Cc: Matthew Wilcox Cc: Chris Mi Cc: Cong Wang Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/idr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* -- cgit v1.2.3 From cdbab6236605dc11780779d9af689aea7d58cab1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Oct 2023 06:19:45 +0000 Subject: tcp: fix fastopen code vs usec TS After blamed commit, TFO client-ack-dropped-then-recovery-ms-timestamps packetdrill test failed. David Morley and Neal Cardwell started investigating and Neal pointed that we had : tcp_conn_request() tcp_try_fastopen() -> tcp_fastopen_create_child -> child = inet_csk(sk)->icsk_af_ops->syn_recv_sock() -> tcp_create_openreq_child() -> copy req_usec_ts from req: newtp->tcp_usec_ts = treq->req_usec_ts; // now the new TFO server socket always does usec TS, no matter // what the route options are... send_synack() -> tcp_make_synack() // disable tcp_rsk(req)->req_usec_ts if route option is not present: if (tcp_rsk(req)->req_usec_ts < 0) tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_conn_request() has the initial dst, we can initialize tcp_rsk(req)->req_usec_ts there instead of later in send_synack(); This means tcp_rsk(req)->req_usec_ts can be a boolean. Many thanks to David an Neal for their help. Fixes: 614e8316aa4c ("tcp: add support for usec resolution in TCP TS values") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202310302216.f79d78bc-oliver.sang@intel.com Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: David Morley Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec4e9367f5b0..68f3d315d2e1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,7 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; - s8 req_usec_ts; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif -- cgit v1.2.3 From 02f0717e9835dbdeee26084e42086fbd32e64eb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Nov 2023 04:52:33 +0000 Subject: net/tcp: fix possible out-of-bounds reads in tcp_hash_fail() syzbot managed to trigger a fault by sending TCP packets with all flags being set. v2: - While fixing this bug, add PSH flag handling and represent flags the way tcpdump does : [S], [S.], [P.] - Print 4-tuples more consistently between families. BUG: KASAN: stack-out-of-bounds in string_nocheck lib/vsprintf.c:645 [inline] BUG: KASAN: stack-out-of-bounds in string+0x394/0x3d0 lib/vsprintf.c:727 Read of size 1 at addr ffffc9000397f3f5 by task syz-executor299/5039 CPU: 1 PID: 5039 Comm: syz-executor299 Not tainted 6.6.0-rc7-syzkaller-02075-g55c900477f5b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 string_nocheck lib/vsprintf.c:645 [inline] string+0x394/0x3d0 lib/vsprintf.c:727 vsnprintf+0xc5f/0x1870 lib/vsprintf.c:2818 vprintk_store+0x3a0/0xb80 kernel/printk/printk.c:2191 vprintk_emit+0x14c/0x5f0 kernel/printk/printk.c:2288 vprintk+0x7b/0x90 kernel/printk/printk_safe.c:45 _printk+0xc8/0x100 kernel/printk/printk.c:2332 tcp_inbound_hash.constprop.0+0xdb2/0x10d0 include/net/tcp.h:2760 tcp_v6_rcv+0x2b31/0x34d0 net/ipv6/tcp_ipv6.c:1882 ip6_protocol_deliver_rcu+0x33b/0x13d0 net/ipv6/ip6_input.c:438 ip6_input_finish+0x14f/0x2f0 net/ipv6/ip6_input.c:483 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip6_input+0xce/0x440 net/ipv6/ip6_input.c:492 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:79 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ipv6_rcv+0x563/0x720 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core+0x115/0x180 net/core/dev.c:5527 __netif_receive_skb+0x1f/0x1b0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x133/0x700 net/core/dev.c:5786 tun_rx_batched+0x429/0x780 drivers/net/tun.c:1579 tun_get_user+0x29e7/0x3bc0 drivers/net/tun.c:2002 tun_chr_write_iter+0xe8/0x210 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1956 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x650/0xe40 fs/read_write.c:584 ksys_write+0x12f/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2717b5adea9e ("net/tcp: Add tcp_hash_fail() ratelimited logs") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Dmitry Safonov Cc: Francesco Ruggeri Cc: David Ahern Reviewed-by: Dmitry Safonov Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a375a171ef3c..b56be10838f0 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -124,7 +124,7 @@ struct tcp_ao_info { #define tcp_hash_fail(msg, family, skb, fmt, ...) \ do { \ const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[5] = {}; \ + char hdr_flags[6]; \ char *f = hdr_flags; \ \ if (th->fin) \ @@ -133,17 +133,18 @@ do { \ *f++ = 'S'; \ if (th->rst) \ *f++ = 'R'; \ + if (th->psh) \ + *f++ = 'P'; \ if (th->ack) \ - *f++ = 'A'; \ - if (f != hdr_flags) \ - *f = ' '; \ + *f++ = '.'; \ + *f = 0; \ if ((family) == AF_INET) { \ - net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \ + net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ &ip_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ } else { \ - net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \ + net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ -- cgit v1.2.3 From d93f9528573e1d419b69ca5ff4130201d05f6b90 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2023 11:52:27 -0700 Subject: nfsd: regenerate user space parsers after ynl-gen changes Commit 8cea95b0bd79 ("tools: ynl-gen: handle do ops with no input attrs") added support for some of the previously-skipped ops in nfsd. Regenerate the user space parsers to fill them in. Signed-off-by: Jakub Kicinski Acked-by: Chuck Lever Signed-off-by: David S. Miller --- include/uapi/linux/nfsd_netlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index c8ae72466ee6..3cd044edee5d 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -3,8 +3,8 @@ /* Documentation/netlink/specs/nfsd.yaml */ /* YNL-GEN uapi header */ -#ifndef _UAPI_LINUX_NFSD_H -#define _UAPI_LINUX_NFSD_H +#ifndef _UAPI_LINUX_NFSD_NETLINK_H +#define _UAPI_LINUX_NFSD_NETLINK_H #define NFSD_FAMILY_NAME "nfsd" #define NFSD_FAMILY_VERSION 1 @@ -36,4 +36,4 @@ enum { NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) }; -#endif /* _UAPI_LINUX_NFSD_H */ +#endif /* _UAPI_LINUX_NFSD_NETLINK_H */ -- cgit v1.2.3 From 9bc64bd0cd765f696fcd40fc98909b1f7c73b2ba Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 3 Nov 2023 16:14:10 +0100 Subject: net/sched: act_ct: Always fill offloading tuple iifidx Referenced commit doesn't always set iifidx when offloading the flow to hardware. Fix the following cases: - nf_conn_act_ct_ext_fill() is called before extension is created with nf_conn_act_ct_ext_add() in tcf_ct_act(). This can cause rule offload with unspecified iifidx when connection is offloaded after only single original-direction packet has been processed by tc data path. Always fill the new nf_conn_act_ct_ext instance after creating it in nf_conn_act_ct_ext_add(). - Offloading of unidirectional UDP NEW connections is now supported, but ct flow iifidx field is not updated when connection is promoted to bidirectional which can result reply-direction iifidx to be zero when refreshing the connection. Fill in the extension and update flow iifidx before calling flow_offload_refresh(). Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx") Reviewed-by: Paul Blakey Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections") Link: https://lore.kernel.org/r/20231103151410.764271-1-vladbu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_conntrack_act_ct.h | 30 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ -- cgit v1.2.3