From f64c4acea51fbe2c08c0b0f48b7f5d1657d7a5e4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 10 Sep 2021 01:04:08 +0300 Subject: bpf: Add hardware timestamp field to __sk_buff BPF programs may want to know hardware timestamps if NIC supports such timestamping. Expose this data as hwtstamp field of __sk_buff the same way as gso_segs/gso_size. This field could be accessed from the same programs as tstamp field, but it's read-only field. Explicit test to deny access to padding data is added to bpf_skb_is_valid_access. Also update BPF_PROG_TEST_RUN tests of the feature. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210909220409.8804-2-vfedorenko@novek.ru --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 791f31dd0abe..51cfd91cc387 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5284,6 +5284,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u32 :32; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { -- cgit v1.2.3 From c22ac2a3d4bd83411ebf0b1726e9e5fc4f5e7ebf Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 10 Sep 2021 11:33:50 -0700 Subject: perf: Enable branch record for software events The typical way to access branch record (e.g. Intel LBR) is via hardware perf_event. For CPUs with FREEZE_LBRS_ON_PMI support, PMI could capture reliable LBR. On the other hand, LBR could also be useful in non-PMI scenario. For example, in kretprobe or bpf fexit program, LBR could provide a lot of information on what happened with the function. Add API to use branch record for software use. Note that, when the software event triggers, it is necessary to stop the branch record hardware asap. Therefore, static_call is used to remove some branch instructions in this process. Suggested-by: Peter Zijlstra Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Andrii Nakryiko Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210910183352.3151445-2-songliubraving@fb.com --- include/linux/perf_event.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe156a8170aa..0cbc5dfe1110 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -57,6 +57,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -1612,4 +1613,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); #endif +/* + * Snapshot branch stack on software events. + * + * Branch stack can be very useful in understanding software events. For + * example, when a long function, e.g. sys_perf_event_open, returns an + * errno, it is not obvious why the function failed. Branch stack could + * provide very helpful information in this type of scenarios. + * + * On software event, it is necessary to stop the hardware branch recorder + * fast. Otherwise, the hardware register/buffer will be flushed with + * entries of the triggering event. Therefore, static call is used to + * stop the hardware recorder. + */ + +/* + * cnt is the number of entries allocated for entries. + * Return number of entries copied to . + */ +typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries, + unsigned int cnt); +DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t); + #endif /* _LINUX_PERF_EVENT_H */ -- cgit v1.2.3 From 856c02dbce4f8d6a5644083db22c11750aa11481 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 10 Sep 2021 11:33:51 -0700 Subject: bpf: Introduce helper bpf_get_branch_snapshot Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get branch trace from hardware (e.g. Intel LBR). To use the feature, the user need to create perf_event with proper branch_record filtering on each cpu, and then calls bpf_get_branch_snapshot in the bpf function. On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210910183352.3151445-3-songliubraving@fb.com --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 51cfd91cc387..d21326558d42 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4877,6 +4877,27 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5076,7 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(get_branch_snapshot), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 41ced4cd88020c9d4b71ff7c50d020f081efa4a0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:09 -0700 Subject: btf: Change BTF_KIND_* macros to enums Change BTF_KIND_* macros to enums so they are encoded in dwarf and appear in vmlinux.h. This will make it easier for bpf programs to use these constants without macro definitions. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223009.245307-1-yhs@fb.com --- include/uapi/linux/btf.h | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index d27b1708efe9..10e401073dd1 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -56,25 +56,28 @@ struct btf_type { #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_FLOAT 16 /* Floating point */ -#define BTF_KIND_MAX BTF_KIND_FLOAT -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) +enum { + BTF_KIND_UNKN = 0, /* Unknown */ + BTF_KIND_INT = 1, /* Integer */ + BTF_KIND_PTR = 2, /* Pointer */ + BTF_KIND_ARRAY = 3, /* Array */ + BTF_KIND_STRUCT = 4, /* Struct */ + BTF_KIND_UNION = 5, /* Union */ + BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_FWD = 7, /* Forward */ + BTF_KIND_TYPEDEF = 8, /* Typedef */ + BTF_KIND_VOLATILE = 9, /* Volatile */ + BTF_KIND_CONST = 10, /* Const */ + BTF_KIND_RESTRICT = 11, /* Restrict */ + BTF_KIND_FUNC = 12, /* Function */ + BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ + BTF_KIND_VAR = 14, /* Variable */ + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +}; /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. -- cgit v1.2.3 From b5ea834dde6b6e7f75e51d5f66dac8cd7c97b5ef Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:15 -0700 Subject: bpf: Support for new btf kind BTF_KIND_TAG LLVM14 added support for a new C attribute ([1]) __attribute__((btf_tag("arbitrary_str"))) This attribute will be emitted to dwarf ([2]) and pahole will convert it to BTF. Or for bpf target, this attribute will be emitted to BTF directly ([3], [4]). The attribute is intended to provide additional information for - struct/union type or struct/union member - static/global variables - static/global function or function parameter. For linux kernel, the btf_tag can be applied in various places to specify user pointer, function pre- or post- condition, function allow/deny in certain context, etc. Such information will be encoded in vmlinux BTF and can be used by verifier. The btf_tag can also be applied to bpf programs to help global verifiable functions, e.g., specifying preconditions, etc. This patch added basic parsing and checking support in kernel for new BTF_KIND_TAG kind. [1] https://reviews.llvm.org/D106614 [2] https://reviews.llvm.org/D106621 [3] https://reviews.llvm.org/D106622 [4] https://reviews.llvm.org/D109560 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223015.245546-1-yhs@fb.com --- include/uapi/linux/btf.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 10e401073dd1..642b6ecb37d7 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. + * FUNC, FUNC_PROTO, VAR and TAG. * "type" is a type_id referring to another type. */ union { @@ -74,6 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_TAG = 17, /* Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -173,4 +174,15 @@ struct btf_var_secinfo { __u32 size; }; +/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe + * additional information related to the tag applied location. + * If component_idx == -1, the tag is applied to a struct, union, + * variable or function. Otherwise, it is applied to a struct/union + * member or a func argument, and component_idx indicates which member + * or argument (0 ... vlen-1). + */ +struct btf_tag { + __s32 component_idx; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 336562752acc1a723f9a24b5b8129ae22e0478c6 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 15 Sep 2021 01:54:00 +0200 Subject: bpf: Update bpf_get_smp_processor_id() documentation BPF programs run with migration disabled regardless of preemption, as they are protected by migrate_disable(). Update the uapi documentation accordingly. Signed-off-by: Matteo Croce Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210914235400.59427-1-mcroce@linux.microsoft.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d21326558d42..3e9785f1064a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1629,7 +1629,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return -- cgit v1.2.3