diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2025-05-12 18:29:04 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2025-05-12 18:32:48 -0700 |
| commit | f4efc73b1ee791cb946156eec6c64e75b6092d48 (patch) | |
| tree | 0408a432e2a56edf773653bc7908c9b2b1ffd7b7 /kernel | |
| parent | fd5fd538a1f4b34cee6823ba0ddda2f7a55aca96 (diff) | |
| parent | c61bcd29eda9ea8db753ad5217fd24d9ee42a96b (diff) | |
Merge branch 'introduce-kfuncs-for-memory-reads-into-dynptrs'
Mykyta Yatsenko says:
====================
Introduce kfuncs for memory reads into dynptrs
From: Mykyta Yatsenko <yatsenko@meta.com>
This patch adds new kfuncs that enable reading variable-length
user or kernel data directly into dynptrs.
These kfuncs provide a way to perform dynamically-sized reads
while maintaining memory safety. Unlike existing
`bpf_probe_read_{user|kernel}` APIs, which are limited to constant-sized
reads, these new kfuncs allow for more flexible data access.
v4 -> v5
* Fix pointers annotations, use __user where necessary, cast where needed
v3 -> v4
* Added pid filtering in selftests
v2 -> v3
* Add KF_TRUSTED_ARGS for kfuncs that take pointer to task_struct
as an argument
* Remove checks for non-NULL task, where it was not necessary
* Added comments on constants used in selftests, etc.
v1 -> v2
* Renaming helper functions to use "user_str" instead of "user_data_str"
suffix
====================
Link: https://patch.msgid.link/20250512205348.191079-1-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/helpers.c | 22 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 194 |
2 files changed, 204 insertions, 12 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index fed53da75025..ebb604e39eb1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1714,16 +1714,6 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) memset(ptr, 0, sizeof(*ptr)); } -static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) -{ - u32 size = __bpf_dynptr_size(ptr); - - if (len > size || offset > size - len) - return -E2BIG; - - return 0; -} - BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) { int err; @@ -1810,8 +1800,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg5_type = ARG_ANYTHING, }; -static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, - u32 len, u64 flags) +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, + u32 len, u64 flags) { enum bpf_dynptr_type type; int err; @@ -3388,6 +3378,14 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) +BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr) +BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr) +BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr) +BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr) +BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 868920994517..985ce3854af8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3466,6 +3466,142 @@ static int __init bpf_kprobe_multi_kfuncs_init(void) late_initcall(bpf_kprobe_multi_kfuncs_init); +typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); + +/* + * The __always_inline is to make sure the compiler doesn't + * generate indirect calls into callbacks, which is expensive, + * on some kernel configurations. This allows compiler to put + * direct calls into all the specific callback implementations + * (copy_user_data_sleepable, copy_user_data_nofault, and so on) + */ +static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size, + const void *unsafe_src, + copy_fn_t str_copy_fn, + struct task_struct *tsk) +{ + struct bpf_dynptr_kern *dst; + u32 chunk_sz, off; + void *dst_slice; + int cnt, err; + char buf[256]; + + dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); + if (likely(dst_slice)) + return str_copy_fn(dst_slice, unsafe_src, size, tsk); + + dst = (struct bpf_dynptr_kern *)dptr; + if (bpf_dynptr_check_off_len(dst, doff, size)) + return -E2BIG; + + for (off = 0; off < size; off += chunk_sz - 1) { + chunk_sz = min_t(u32, sizeof(buf), size - off); + /* Expect str_copy_fn to return count of copied bytes, including + * zero terminator. Next iteration increment off by chunk_sz - 1 to + * overwrite NUL. + */ + cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk); + if (cnt < 0) + return cnt; + err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0); + if (err) + return err; + if (cnt < chunk_sz || chunk_sz == 1) /* we are done */ + return off + cnt; + } + return off; +} + +static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff, + u32 size, const void *unsafe_src, + copy_fn_t copy_fn, struct task_struct *tsk) +{ + struct bpf_dynptr_kern *dst; + void *dst_slice; + char buf[256]; + u32 off, chunk_sz; + int err; + + dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); + if (likely(dst_slice)) + return copy_fn(dst_slice, unsafe_src, size, tsk); + + dst = (struct bpf_dynptr_kern *)dptr; + if (bpf_dynptr_check_off_len(dst, doff, size)) + return -E2BIG; + + for (off = 0; off < size; off += chunk_sz) { + chunk_sz = min_t(u32, sizeof(buf), size - off); + err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); + if (err) + return err; + err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0); + if (err) + return err; + } + return 0; +} + +static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size); +} + +static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + int ret; + + if (!tsk) /* Read from the current task */ + return copy_from_user(dst, (const void __user *)unsafe_src, size); + + ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0); + if (ret != size) + return -EFAULT; + return 0; +} + +static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return copy_from_kernel_nofault(dst, unsafe_src, size); +} + +static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size); +} + +static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + int ret; + + if (unlikely(size == 0)) + return 0; + + if (tsk) { + ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0); + } else { + ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1); + /* strncpy_from_user does not guarantee NUL termination */ + if (ret >= 0) + ((char *)dst)[ret] = '\0'; + } + + if (ret < 0) + return ret; + return ret + 1; +} + +static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return strncpy_from_kernel_nofault(dst, unsafe_src, size); +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, @@ -3477,4 +3613,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } +__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_data_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, + copy_kernel_data_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_str_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, + copy_kernel_str_nofault, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_data_sleepable, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_str_sleepable, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign, + struct task_struct *tsk) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_data_sleepable, tsk); +} + +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off, + u32 size, const void __user *unsafe_ptr__ign, + struct task_struct *tsk) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, + copy_user_str_sleepable, tsk); +} + __bpf_kfunc_end_defs(); |
