From ea70faa1f244ea0bd30f413e4ec7c79a3b5b96f1 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Thu, 30 Jan 2025 12:12:35 -0800 Subject: docs/bpf: Document the semantics of BTF tags with kind_flag Explain the meaning of kind_flag in BTF type_tags and decl_tags. Update uapi btf.h kind_flag comment to reflect the changes. Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250130201239.1429648-3-ihor.solodrai@linux.dev --- include/uapi/linux/btf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index ec1798b6d3ff..266d4ffa6c07 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -36,7 +36,8 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union, enum, fwd and enum64 + * struct, union, enum, fwd, enum64, + * decl_tag and type_tag */ __u32 info; /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. -- cgit v1.2.3 From 0abff462d802a352c87b7f5e71b442b09bf9cfff Mon Sep 17 00:00:00 2001 From: Levi Zim Date: Tue, 4 Feb 2025 10:00:21 +0800 Subject: bpf: Add comment about helper freeze Put a comment after the bpf helper list in uapi bpf.h to prevent people from trying to add new helpers there and direct them to kfuncs. Suggested-by: Andrii Nakryiko Signed-off-by: Levi Zim Signed-off-by: Andrii Nakryiko Reviewed-by: Bagas Sanjaya Acked-by: Daniel Xu Link: https://lore.kernel.org/bpf/CAEf4BzZvQF+QQ=oip4vdz5A=9bd+OmN-CXk5YARYieaipK9s+A@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20221231004213.h5fx3loccbs5hyzu@macbook-pro-6.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20250204-bpf-helper-freeze-v1-1-46efd9ff20dc@outlook.com --- include/uapi/linux/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2acf9b336371..fff6cdb8d11a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6019,7 +6019,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ - /* */ + /* This helper list is effectively frozen. If you are trying to \ + * add a new helper, you should add a kfunc instead which has \ + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ + */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't * know or care about integer value that is now passed as second argument -- cgit v1.2.3 From 531118f1ccfc209d6d100f338eed064636f6da9b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Jan 2025 13:35:45 -0800 Subject: fs/xattr: bpf: Introduce security.bpf. xattr name prefix Introduct new xattr name prefix security.bpf., and enable reading these xattrs from bpf kfuncs bpf_get_[file|dentry]_xattr(). As we are on it, correct the comments for return value of bpf_get_[file|dentry]_xattr(), i.e. return length the xattr value on success. Signed-off-by: Song Liu Acked-by: Christian Brauner Reviewed-by: Jan Kara Reviewed-by: Matt Bobrowski Link: https://lore.kernel.org/r/20250130213549.3353349-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/xattr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 9854f9cff3c6..c7c85bb504ba 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -83,6 +83,10 @@ struct xattr_args { #define XATTR_CAPS_SUFFIX "capability" #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX +#define XATTR_BPF_LSM_SUFFIX "bpf." +#define XATTR_NAME_BPF_LSM (XATTR_SECURITY_PREFIX XATTR_BPF_LSM_SUFFIX) +#define XATTR_NAME_BPF_LSM_LEN (sizeof(XATTR_NAME_BPF_LSM) - 1) + #define XATTR_POSIX_ACL_ACCESS "posix_acl_access" #define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS #define XATTR_POSIX_ACL_DEFAULT "posix_acl_default" -- cgit v1.2.3 From 4b82b181a26cff8bf7adc3a85a88d121d92edeaf Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 24 Feb 2025 15:01:16 -0800 Subject: bpf: Allow pre-ordering for bpf cgroup progs Currently for bpf progs in a cgroup hierarchy, the effective prog array is computed from bottom cgroup to upper cgroups (post-ordering). For example, the following cgroup hierarchy root cgroup: p1, p2 subcgroup: p3, p4 have BPF_F_ALLOW_MULTI for both cgroup levels. The effective cgroup array ordering looks like p3 p4 p1 p2 and at run time, progs will execute based on that order. But in some cases, it is desirable to have root prog executes earlier than children progs (pre-ordering). For example, - prog p1 intends to collect original pkt dest addresses. - prog p3 will modify original pkt dest addresses to a proxy address for security reason. The end result is that prog p1 gets proxy address which is not what it wants. Putting p1 to every child cgroup is not desirable either as it will duplicate itself in many child cgroups. And this is exactly a use case we are encountering in Meta. To fix this issue, let us introduce a flag BPF_F_PREORDER. If the flag is specified at attachment time, the prog has higher priority and the ordering with that flag will be from top to bottom (pre-ordering). For example, in the above example, root cgroup: p1, p2 subcgroup: p3, p4 Let us say p2 and p4 are marked with BPF_F_PREORDER. The final effective array ordering will be p2 p4 p3 p1 Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250224230116.283071-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fff6cdb8d11a..beac5cdf2d2c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1207,6 +1207,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the -- cgit v1.2.3 From 880442305a3908589bf4d6fc1d79edb577ee497c Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 4 Mar 2025 01:06:13 +0000 Subject: bpf: Introduce load-acquire and store-release instructions Introduce BPF instructions with load-acquire and store-release semantics, as discussed in [1]. Define 2 new flags: #define BPF_LOAD_ACQ 0x100 #define BPF_STORE_REL 0x110 A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_LOAD_ACQ (0x100). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_STORE_REL (0x110). Unlike existing atomic read-modify-write operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). As an exception, however, 64-bit load-acquires/store-releases are not supported on 32-bit architectures (to fix a build error reported by the kernel test robot). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDARH and friends. Similar to existing atomic read-modify-write operations, misaligned load-acquires/store-releases are not allowed (even if BPF_F_ANY_ALIGNMENT is set). As an example, consider the following 64-bit load-acquire BPF instruction (assuming little-endian): db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000100): BPF_LOAD_ACQ Similarly, a 16-bit BPF store-release: cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2) opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000110): BPF_STORE_REL In arch/{arm64,s390,x86}/net/bpf_jit_comp.c, have bpf_jit_supports_insn(..., /*in_arena=*/true) return false for the new instructions, until the corresponding JIT compiler supports them in arena. [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/ Acked-by: Eduard Zingerman Acked-by: Ilya Leoshkevich Cc: kernel test robot Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/a217f46f0e445fbd573a1a024be5c6bf1d5fe716.1741049567.git.yepeilin@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index beac5cdf2d2c..bb37897c0393 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; -- cgit v1.2.3 From 0de445d18e36ca5914337217c118016ba5db574d Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 17 Mar 2025 17:40:36 +0000 Subject: bpf: BPF token support for BPF_BTF_GET_FD_BY_ID Currently BPF_BTF_GET_FD_BY_ID requires CAP_SYS_ADMIN, which does not allow running it from user namespace. This creates a problem when freplace program running from user namespace needs to query target program BTF. This patch relaxes capable check from CAP_SYS_ADMIN to CAP_BPF and adds support for BPF token that can be passed in attributes to syscall. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250317174039.161275-2-mykyta.yatsenko5@gmail.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bb37897c0393..661de2444965 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1652,6 +1652,7 @@ union bpf_attr { }; __u32 next_id; __u32 open_flags; + __s32 fd_by_id_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ -- cgit v1.2.3