From d8accf661fcf3088ef4c349ab56ecd19a5071b19 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 9 Oct 2025 22:11:07 +0200 Subject: bpf: Refactor cleanup of bpf_prog_test_run_skb This bit of refactoring aims to simplify how we free memory in bpf_prog_test_run_skb to avoid code duplication. Signed-off-by: Paul Chaignon Signed-off-by: Martin KaFai Lau Tested-by: syzbot@syzkaller.appspotmail.com Link: https://patch.msgid.link/8971e01ae87b84f5af6b8b40defd3c310faf1c0f.1760037899.git.paul.chaignon@gmail.com --- net/bpf/test_run.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfb03ee0bb62..a39b26739a1e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -990,10 +990,10 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; + struct sk_buff *skb = NULL; + struct sock *sk = NULL; u32 retval, duration; int hh_len = ETH_HLEN; - struct sk_buff *skb; - struct sock *sk; void *data; int ret; @@ -1012,8 +1012,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); if (IS_ERR(ctx)) { - kfree(data); - return PTR_ERR(ctx); + ret = PTR_ERR(ctx); + ctx = NULL; + goto out; } switch (prog->type) { @@ -1033,21 +1034,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { - kfree(data); - kfree(ctx); - return -ENOMEM; + ret = -ENOMEM; + goto out; } sock_init_data(NULL, sk); skb = slab_build_skb(data); if (!skb) { - kfree(data); - kfree(ctx); - sk_free(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } skb->sk = sk; + data = NULL; /* data released via kfree_skb */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); @@ -1142,7 +1142,9 @@ out: if (dev && dev != net->loopback_dev) dev_put(dev); kfree_skb(skb); - sk_free(sk); + kfree(data); + if (sk) + sk_free(sk); kfree(ctx); return ret; } -- cgit v1.2.3 From 57bb2f6717930a5e670da179c316159719b724a2 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 9 Oct 2025 22:11:22 +0200 Subject: bpf: Reorder bpf_prog_test_run_skb initialization This patch reorders the initialization of bpf_prog_test_run_skb to simplify the subsequent patch. Program types are checked first, followed by the ctx init, and finally the data init. With the subsequent patch, program types and the ctx init provide information that is used in the data init. Thus, we need the data init to happen last. Signed-off-by: Paul Chaignon Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/063475176f15828a882c07846017394baf72f682.1760037899.git.paul.chaignon@gmail.com --- net/bpf/test_run.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a39b26739a1e..b9b49d0c7014 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1004,19 +1004,6 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (size < ETH_HLEN) return -EINVAL; - data = bpf_test_init(kattr, kattr->test.data_size_in, - size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); - if (IS_ERR(data)) - return PTR_ERR(data); - - ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - ctx = NULL; - goto out; - } - switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -1032,6 +1019,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, break; } + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + data = bpf_test_init(kattr, kattr->test.data_size_in, + size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + data = NULL; + goto out; + } + sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { ret = -ENOMEM; -- cgit v1.2.3 From 838baa351cee86526973fb3ef49c0f4c1b2f3b0c Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 9 Oct 2025 22:11:43 +0200 Subject: bpf: Craft non-linear skbs in BPF_PROG_TEST_RUN This patch adds support for crafting non-linear skbs in BPF test runs for tc programs. The size of the linear area is given by ctx->data_end, with a minimum of ETH_HLEN always pulled in the linear area. If ctx or ctx->data_end are null, a linear skb is used. This is particularly useful to test support for non-linear skbs in large codebases such as Cilium. We've had multiple bugs in the past few years where we were missing calls to bpf_skb_pull_data(). This support in BPF_PROG_TEST_RUN would allow us to automatically cover this case in our BPF tests. LWT program types are currently excluded in this patch. Allowing non-linear skbs for these programs would require a bit more care because they are able to call helpers (ex., bpf_clone_redirect, bpf_redirect) that themselves call eth_type_trans(). eth_type_trans() assumes there are at least ETH_HLEN bytes in the linear area. That may not be true for LWT programs as we already pulled the L2 header via the eth_type_trans() call in bpf_prog_test_run_skb(). In addition to the selftests introduced later in the series, this patch was tested by enabling non-linear skbs for all tc selftests programs and checking test failures were expected. Suggested-by: Daniel Borkmann Signed-off-by: Paul Chaignon Signed-off-by: Martin KaFai Lau Tested-by: syzbot@syzkaller.appspotmail.com Link: https://patch.msgid.link/5694d4d1af31bddf974afcb1bbb1e28b8998dcd0.1760037899.git.paul.chaignon@gmail.com --- net/bpf/test_run.c | 103 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 21 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b9b49d0c7014..05e30ff5b6f9 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -447,7 +447,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, static int bpf_test_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, - struct skb_shared_info *sinfo, u32 size, + struct skb_shared_info *sinfo, u32 size, u32 frag_size, u32 retval, u32 duration) { void __user *data_out = u64_to_user_ptr(kattr->test.data_out); @@ -464,7 +464,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, } if (data_out) { - int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size; + int len = sinfo ? copy_size - frag_size : copy_size; if (len < 0) { err = -ENOSPC; @@ -910,6 +910,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* cb is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), + offsetof(struct __sk_buff, data_end))) + return -EINVAL; + + /* data_end is allowed, but not copied to skb */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end), offsetof(struct __sk_buff, tstamp))) return -EINVAL; @@ -984,34 +990,39 @@ static struct proto bpf_dummy_proto = { int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - bool is_l2 = false, is_direct_pkt_access = false; + bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false; + u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct net *net = current->nsproxy->net_ns; struct net_device *dev = net->loopback_dev; - u32 size = kattr->test.data_size_in; + u32 headroom = NET_SKB_PAD + NET_IP_ALIGN; + u32 linear_sz = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; struct sk_buff *skb = NULL; struct sock *sk = NULL; u32 retval, duration; int hh_len = ETH_HLEN; - void *data; + void *data = NULL; int ret; if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; - if (size < ETH_HLEN) + if (kattr->test.data_size_in < ETH_HLEN) return -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: + is_direct_pkt_access = true; is_l2 = true; - fallthrough; + break; case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: + is_lwt = true; + fallthrough; case BPF_PROG_TYPE_CGROUP_SKB: is_direct_pkt_access = true; break; @@ -1023,9 +1034,24 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(ctx)) return PTR_ERR(ctx); - data = bpf_test_init(kattr, kattr->test.data_size_in, - size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (ctx) { + if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) { + ret = -EINVAL; + goto out; + } + if (ctx->data_end) { + /* Non-linear LWT test_run is unsupported for now. */ + if (is_lwt) { + ret = -EINVAL; + goto out; + } + linear_sz = max(ETH_HLEN, ctx->data_end); + } + } + + linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom); + + data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); data = NULL; @@ -1049,7 +1075,43 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, data = NULL; /* data released via kfree_skb */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); + __skb_put(skb, linear_sz); + + if (unlikely(kattr->test.data_size_in > linear_sz)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + struct skb_shared_info *sinfo = skb_shinfo(skb); + u32 copied = linear_sz; + + while (copied < kattr->test.data_size_in) { + struct page *page; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + data_len = min_t(u32, kattr->test.data_size_in - copied, + PAGE_SIZE); + skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len); + + if (copy_from_user(page_address(page), data_in + copied, + data_len)) { + ret = -EFAULT; + goto out; + } + skb->data_len += data_len; + skb->truesize += PAGE_SIZE; + skb->len += data_len; + copied += data_len; + } + } if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); @@ -1129,12 +1191,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, convert_skb_to___skb(skb, ctx); - size = skb->len; - /* bpf program can never convert linear skb to non-linear */ - if (WARN_ON_ONCE(skb_is_nonlinear(skb))) - size = skb_headlen(skb); - ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, - duration); + if (skb_is_nonlinear(skb)) + /* bpf program can never convert linear skb to non-linear */ + WARN_ON_ONCE(linear_sz == kattr->test.data_size_in); + ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len, + skb->data_len, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); @@ -1342,7 +1403,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, goto out; size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, @@ -1433,7 +1494,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, goto out; ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, - sizeof(flow_keys), retval, duration); + sizeof(flow_keys), 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(struct bpf_flow_keys)); @@ -1534,7 +1595,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); } - ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); @@ -1734,7 +1795,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog, if (ret) goto out; - ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); out: kfree(user_ctx); -- cgit v1.2.3 From 04a899573fb87273a656f178b5f920c505f68875 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 20 Oct 2025 09:54:41 +0200 Subject: bpf: Do not let BPF test infra emit invalid GSO types to stack Yinhao et al. reported that their fuzzer tool was able to trigger a skb_warn_bad_offload() from netif_skb_features() -> gso_features_check(). When a BPF program - triggered via BPF test infra - pushes the packet to the loopback device via bpf_clone_redirect() then mentioned offload warning can be seen. GSO-related features are then rightfully disabled. We get into this situation due to convert___skb_to_skb() setting gso_segs and gso_size but not gso_type. Technically, it makes sense that this warning triggers since the GSO properties are malformed due to the gso_type. Potentially, the gso_type could be marked non-trustworthy through setting it at least to SKB_GSO_DODGY without any other specific assumptions, but that also feels wrong given we should not go further into the GSO engine in the first place. The checks were added in 121d57af308d ("gso: validate gso_type in GSO handlers") because there were malicious (syzbot) senders that combine a protocol with a non-matching gso_type. If we would want to drop such packets, gso_features_check() currently only returns feature flags via netif_skb_features(), so one location for potentially dropping such skbs could be validate_xmit_unreadable_skb(), but then otoh it would be an additional check in the fast-path for a very corner case. Given bpf_clone_redirect() is the only place where BPF test infra could emit such packets, lets reject them right there. Fixes: 850a88cc4096 ("bpf: Expose __sk_buff wire_len/gso_segs to BPF_PROG_TEST_RUN") Fixes: cf62089b0edd ("bpf: Add gso_size to __sk_buff") Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Reported-by: Dongliang Mu Signed-off-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251020075441.127980-1-daniel@iogearbox.net --- net/bpf/test_run.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/bpf/test_run.c') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 73d5f0d9f5f4..655efac6f133 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -945,6 +945,11 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; + + /* Currently GSO type is zero/unset. If this gets extended with + * a small list of accepted GSO types in future, the filter for + * an unset GSO type in bpf_clone_redirect() can be lifted. + */ skb_shinfo(skb)->gso_segs = __skb->gso_segs; skb_shinfo(skb)->gso_size = __skb->gso_size; skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; -- cgit v1.2.3