summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-09-24 10:21:39 -0700
committerJakub Kicinski <kuba@kernel.org>2025-09-24 10:22:37 -0700
commit5e3fee34f626a8cb8715f5b5409416c481714ebf (patch)
treede89f33043d7a2528e5de871df017a2fe2e5f226 /net/core
parentdc1dea796b197aba2c3cae25bfef45f4b3ad46fe (diff)
parent55d5a5154d751023bdf12c196fb0f1accdacf300 (diff)
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2025-09-23 We've added 9 non-merge commits during the last 33 day(s) which contain a total of 10 files changed, 480 insertions(+), 53 deletions(-). The main changes are: 1) A new bpf_xdp_pull_data kfunc that supports pulling data from a frag into the linear area of a xdp_buff, from Amery Hung. This includes changes in the xdp_native.bpf.c selftest, which Nimrod's future work depends on. It is a merge from a stable branch 'xdp_pull_data' which has also been merged to bpf-next. There is a conflict with recent changes in 'include/net/xdp.h' in the net-next tree that will need to be resolved. 2) A compiler warning fix when CONFIG_NET=n in the recent dynptr skb_meta support, from Jakub Sitnicki. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: selftests: drv-net: Pull data before parsing headers selftests/bpf: Test bpf_xdp_pull_data bpf: Support specifying linear xdp packet data size for BPF_PROG_TEST_RUN bpf: Make variables in bpf_prog_test_run_xdp less confusing bpf: Clear packet pointers after changing packet data in kfuncs bpf: Support pulling non-linear xdp data bpf: Allow bpf_xdp_shrink_data to shrink a frag from head and tail bpf: Clear pfmemalloc flag when freeing all fragments bpf: Return an error pointer for skb metadata when CONFIG_NET=n ==================== Link: https://patch.msgid.link/20250924050303.2466356-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/filter.c135
1 files changed, 118 insertions, 17 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index b005363f482c..080dc58455a8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
return 0;
}
-static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
- enum xdp_mem_type mem_type, bool release)
+static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ bool tail, bool release)
{
- struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+ struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
+ xsk_buff_get_head(xdp);
if (release) {
- xsk_buff_del_tail(zc_frag);
- __xdp_return(0, mem_type, false, zc_frag);
+ xsk_buff_del_frag(zc_frag);
} else {
- zc_frag->data_end -= shrink;
+ if (tail)
+ zc_frag->data_end -= shrink;
+ else
+ zc_frag->data += shrink;
}
+
+ return zc_frag;
}
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
- int shrink)
+ int shrink, bool tail)
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
+ netmem_ref netmem = skb_frag_netmem(frag);
+ struct xdp_buff *zc_frag = NULL;
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
- bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
- goto out;
+ netmem = 0;
+ zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
}
- if (release)
- __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
+ if (release) {
+ __xdp_return(netmem, mem_type, false, zc_frag);
+ } else {
+ if (!tail)
+ skb_frag_off_add(frag, shrink);
+ skb_frag_size_sub(frag, shrink);
+ }
-out:
return release;
}
@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
- if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
n_frags_free++;
- } else {
- skb_frag_size_sub(frag, shrink);
- break;
- }
}
sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free;
if (unlikely(!sinfo->nr_frags)) {
xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
xdp->data_end -= offset;
}
@@ -12205,6 +12213,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
return 0;
}
+/**
+ * bpf_xdp_pull_data() - Pull in non-linear xdp data.
+ * @x: &xdp_md associated with the XDP buffer
+ * @len: length of data to be made directly accessible in the linear part
+ *
+ * Pull in data in case the XDP buffer associated with @x is non-linear and
+ * not all @len are in the linear data area.
+ *
+ * Direct packet access allows reading and writing linear XDP data through
+ * packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
+ * ends up in the linear part of the xdp_buff depends on the NIC and its
+ * configuration. When a frag-capable XDP program wants to directly access
+ * headers that may be in the non-linear area, call this kfunc to make sure
+ * the data is available in the linear area. Alternatively, use dynptr or
+ * bpf_xdp_{load,store}_bytes() to access data without pulling.
+ *
+ * This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
+ * headers in the non-linear data area.
+ *
+ * A call to this kfunc may reduce headroom. If there is not enough tailroom
+ * in the linear data area, metadata and data will be shifted down.
+ *
+ * A call to this kfunc is susceptible to change the buffer geometry.
+ * Therefore, at load time, all checks on pointers previously done by the
+ * verifier are invalidated and must be performed again, if the kfunc is used
+ * in combination with direct packet access.
+ *
+ * Return:
+ * * %0 - success
+ * * %-EINVAL - invalid len
+ */
+__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)x;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, delta, shift, headroom, tailroom, n_frags_free = 0;
+ void *data_hard_end = xdp_data_hard_end(xdp);
+ int data_len = xdp->data_end - xdp->data;
+ void *start;
+
+ if (len <= data_len)
+ return 0;
+
+ if (unlikely(len > xdp_get_buff_len(xdp)))
+ return -EINVAL;
+
+ start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
+
+ headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
+ tailroom = data_hard_end - xdp->data_end;
+
+ delta = len - data_len;
+ if (unlikely(delta > tailroom + headroom))
+ return -EINVAL;
+
+ shift = delta - tailroom;
+ if (shift > 0) {
+ memmove(start - shift, start, xdp->data_end - start);
+
+ xdp->data_meta -= shift;
+ xdp->data -= shift;
+ xdp->data_end -= shift;
+ }
+
+ for (i = 0; i < sinfo->nr_frags && delta; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ u32 shrink = min_t(u32, delta, skb_frag_size(frag));
+
+ memcpy(xdp->data_end, skb_frag_address(frag), shrink);
+
+ xdp->data_end += shrink;
+ sinfo->xdp_frags_size -= shrink;
+ delta -= shrink;
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
+ n_frags_free++;
+ }
+
+ if (unlikely(n_frags_free)) {
+ memmove(sinfo->frags, sinfo->frags + n_frags_free,
+ (sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
+
+ sinfo->nr_frags -= n_frags_free;
+
+ if (!sinfo->nr_frags) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
+ }
+ }
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12232,6 +12332,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_ID_FLAGS(func, bpf_xdp_pull_data)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)