summaryrefslogtreecommitdiff
path: root/include/linux/btf.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
commit7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (patch)
treeae0427c5a3b905f24b3a44b510a9bcf35d9b67a3 /include/linux/btf.h
parent1ca06f1c1acecbe02124f14a37cce347b8c1a90c (diff)
parent7c4a6309e27f411743817fe74a832ec2d2798a4b (diff)
Merge tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Allow live renaming when an interface is up - Add retpoline wrappers for tc, improving considerably the performances of complex queue discipline configurations - Add inet drop monitor support - A few GRO performance improvements - Add infrastructure for atomic dev stats, addressing long standing data races - De-duplicate common code between OVS and conntrack offloading infrastructure - A bunch of UBSAN_BOUNDS/FORTIFY_SOURCE improvements - Netfilter: introduce packet parser for tunneled packets - Replace IPVS timer-based estimators with kthreads to scale up the workload with the number of available CPUs - Add the helper support for connection-tracking OVS offload BPF: - Support for user defined BPF objects: the use case is to allocate own objects, build own object hierarchies and use the building blocks to build own data structures flexibly, for example, linked lists in BPF - Make cgroup local storage available to non-cgroup attached BPF programs - Avoid unnecessary deadlock detection and failures wrt BPF task storage helpers - A relevant bunch of BPF verifier fixes and improvements - Veristat tool improvements to support custom filtering, sorting, and replay of results - Add LLVM disassembler as default library for dumping JITed code - Lots of new BPF documentation for various BPF maps - Add bpf_rcu_read_{,un}lock() support for sleepable programs - Add RCU grace period chaining to BPF to wait for the completion of access from both sleepable and non-sleepable BPF programs - Add support storing struct task_struct objects as kptrs in maps - Improve helper UAPI by explicitly defining BPF_FUNC_xxx integer values - Add libbpf *_opts API-variants for bpf_*_get_fd_by_id() functions Protocols: - TCP: implement Protective Load Balancing across switch links - TCP: allow dynamically disabling TCP-MD5 static key, reverting back to fast[er]-path - UDP: Introduce optional per-netns hash lookup table - IPv6: simplify and cleanup sockets disposal - Netlink: support different type policies for each generic netlink operation - MPTCP: add MSG_FASTOPEN and FastOpen listener side support - MPTCP: add netlink notification support for listener sockets events - SCTP: add VRF support, allowing sctp sockets binding to VRF devices - Add bridging MAC Authentication Bypass (MAB) support - Extensions for Ethernet VPN bridging implementation to better support multicast scenarios - More work for Wi-Fi 7 support, comprising conversion of all the existing drivers to internal TX queue usage - IPSec: introduce a new offload type (packet offload) allowing complete header processing and crypto offloading - IPSec: extended ack support for more descriptive XFRM error reporting - RXRPC: increase SACK table size and move processing into a per-local endpoint kernel thread, reducing considerably the required locking - IEEE 802154: synchronous send frame and extended filtering support, initial support for scanning available 15.4 networks - Tun: bump the link speed from 10Mbps to 10Gbps - Tun/VirtioNet: implement UDP segmentation offload support Driver API: - PHY/SFP: improve power level switching between standard level 1 and the higher power levels - New API for netdev <-> devlink_port linkage - PTP: convert existing drivers to new frequency adjustment implementation - DSA: add support for rx offloading - Autoload DSA tagging driver when dynamically changing protocol - Add new PCP and APPTRUST attributes to Data Center Bridging - Add configuration support for 800Gbps link speed - Add devlink port function attribute to enable/disable RoCE and migratable - Extend devlink-rate to support strict prioriry and weighted fair queuing - Add devlink support to directly reading from region memory - New device tree helper to fetch MAC address from nvmem - New big TCP helper to simplify temporary header stripping New hardware / drivers: - Ethernet: - Marvel Octeon CNF95N and CN10KB Ethernet Switches - Marvel Prestera AC5X Ethernet Switch - WangXun 10 Gigabit NIC - Motorcomm yt8521 Gigabit Ethernet - Microchip ksz9563 Gigabit Ethernet Switch - Microsoft Azure Network Adapter - Linux Automation 10Base-T1L adapter - PHY: - Aquantia AQR112 and AQR412 - Motorcomm YT8531S - PTP: - Orolia ART-CARD - WiFi: - MediaTek Wi-Fi 7 (802.11be) devices - RealTek rtw8821cu, rtw8822bu, rtw8822cu and rtw8723du USB devices - Bluetooth: - Broadcom BCM4377/4378/4387 Bluetooth chipsets - Realtek RTL8852BE and RTL8723DS - Cypress.CYW4373A0 WiFi + Bluetooth combo device Drivers: - CAN: - gs_usb: bus error reporting support - kvaser_usb: listen only and bus error reporting support - Ethernet NICs: - Intel (100G): - extend action skbedit to RX queue mapping - implement devlink-rate support - support direct read from memory - nVidia/Mellanox (mlx5): - SW steering improvements, increasing rules update rate - Support for enhanced events compression - extend H/W offload packet manipulation capabilities - implement IPSec packet offload mode - nVidia/Mellanox (mlx4): - better big TCP support - Netronome Ethernet NICs (nfp): - IPsec offload support - add support for multicast filter - Broadcom: - RSS and PTP support improvements - AMD/SolarFlare: - netlink extened ack improvements - add basic flower matches to offload, and related stats - Virtual NICs: - ibmvnic: introduce affinity hint support - small / embedded: - FreeScale fec: add initial XDP support - Marvel mv643xx_eth: support MII/GMII/RGMII modes for Kirkwood - TI am65-cpsw: add suspend/resume support - Mediatek MT7986: add RX wireless wthernet dispatch support - Realtek 8169: enable GRO software interrupt coalescing per default - Ethernet high-speed switches: - Microchip (sparx5): - add support for Sparx5 TC/flower H/W offload via VCAP - Mellanox mlxsw: - add 802.1X and MAC Authentication Bypass offload support - add ip6gre support - Embedded Ethernet switches: - Mediatek (mtk_eth_soc): - improve PCS implementation, add DSA untag support - enable flow offload support - Renesas: - add rswitch R-Car Gen4 gPTP support - Microchip (lan966x): - add full XDP support - add TC H/W offload via VCAP - enable PTP on bridge interfaces - Microchip (ksz8): - add MTU support for KSZ8 series - Qualcomm 802.11ax WiFi (ath11k): - support configuring channel dwell time during scan - MediaTek WiFi (mt76): - enable Wireless Ethernet Dispatch (WED) offload support - add ack signal support - enable coredump support - remain_on_channel support - Intel WiFi (iwlwifi): - enable Wi-Fi 7 Extremely High Throughput (EHT) PHY capabilities - 320 MHz channels support - RealTek WiFi (rtw89): - new dynamic header firmware format support - wake-over-WLAN support" * tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2002 commits) ipvs: fix type warning in do_div() on 32 bit net: lan966x: Remove a useless test in lan966x_ptp_add_trap() net: ipa: add IPA v4.7 support dt-bindings: net: qcom,ipa: Add SM6350 compatible bnxt: Use generic HBH removal helper in tx path IPv6/GRO: generic helper to remove temporary HBH/jumbo header in driver selftests: forwarding: Add bridge MDB test selftests: forwarding: Rename bridge_mdb test bridge: mcast: Support replacement of MDB port group entries bridge: mcast: Allow user space to specify MDB entry routing protocol bridge: mcast: Allow user space to add (*, G) with a source list and filter mode bridge: mcast: Add support for (*, G) with a source list and filter mode bridge: mcast: Avoid arming group timer when (S, G) corresponds to a source bridge: mcast: Add a flag for user installed source entries bridge: mcast: Expose __br_multicast_del_group_src() bridge: mcast: Expose br_multicast_new_group_src() bridge: mcast: Add a centralized error path bridge: mcast: Place netlink policy before validation functions bridge: mcast: Split (*, G) and (S, G) addition into different functions bridge: mcast: Do not derive entry type from its filter mode ...
Diffstat (limited to 'include/linux/btf.h')
-rw-r--r--include/linux/btf.h150
1 files changed, 123 insertions, 27 deletions
diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9aababc5d78..5f628f323442 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,8 @@
#include <linux/types.h>
#include <linux/bpfptr.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
#include <uapi/linux/btf.h>
#include <uapi/linux/bpf.h>
@@ -17,40 +19,58 @@
#define KF_RELEASE (1 << 1) /* kfunc is a release function */
#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */
#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */
-/* Trusted arguments are those which are meant to be referenced arguments with
- * unchanged offset. It is used to enforce that pointers obtained from acquire
- * kfuncs remain unmodified when being passed to helpers taking trusted args.
+/* Trusted arguments are those which are guaranteed to be valid when passed to
+ * the kfunc. It is used to enforce that pointers obtained from either acquire
+ * kfuncs, or from the main kernel on a tracepoint or struct_ops callback
+ * invocation, remain unmodified when being passed to helpers taking trusted
+ * args.
*
- * Consider
- * struct foo {
- * int data;
- * struct foo *next;
- * };
+ * Consider, for example, the following new task tracepoint:
*
- * struct bar {
- * int data;
- * struct foo f;
- * };
+ * SEC("tp_btf/task_newtask")
+ * int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags)
+ * {
+ * ...
+ * }
*
- * struct foo *f = alloc_foo(); // Acquire kfunc
- * struct bar *b = alloc_bar(); // Acquire kfunc
+ * And the following kfunc:
*
- * If a kfunc set_foo_data() wants to operate only on the allocated object, it
- * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like:
+ * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
*
- * set_foo_data(f, 42); // Allowed
- * set_foo_data(f->next, 42); // Rejected, non-referenced pointer
- * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type
- * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset
+ * All invocations to the kfunc must pass the unmodified, unwalked task:
*
- * In the final case, usually for the purposes of type matching, it is deduced
- * by looking at the type of the member at the offset, but due to the
- * requirement of trusted argument, this deduction will be strict and not done
- * for this case.
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(task->last_wakee); // Rejected, walked task
+ *
+ * Programs may also pass referenced tasks directly to the kfunc:
+ *
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(task); // Allowed, same as above
+ * bpf_task_acquire(acquired); // Allowed
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(acquired->last_wakee); // Rejected, walked task
+ *
+ * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or
+ * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these
+ * pointers are guaranteed to be safe. For example, the following BPF program
+ * would be rejected:
+ *
+ * SEC("kretprobe/free_task")
+ * int BPF_PROG(free_task_probe, struct task_struct *tsk)
+ * {
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer
+ * bpf_task_release(acquired);
+ *
+ * return 0;
+ * }
*/
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
+#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
/*
* Return the name of the passed struct, if exists, or halt the build if for
@@ -78,6 +98,17 @@ struct btf_id_dtor_kfunc {
u32 kfunc_btf_id;
};
+struct btf_struct_meta {
+ u32 btf_id;
+ struct btf_record *record;
+ struct btf_field_offs *field_offs;
+};
+
+struct btf_struct_metas {
+ u32 cnt;
+ struct btf_struct_meta types[];
+};
+
typedef void (*btf_dtor_kfunc_t)(void *);
extern const struct file_operations btf_fops;
@@ -163,8 +194,10 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
-struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
- const struct btf_type *t);
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
+ u32 field_mask, u32 value_size);
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
+struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
@@ -288,6 +321,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
}
+static inline bool btf_type_is_volatile(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE;
+}
+
static inline bool btf_type_is_func(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
@@ -318,6 +356,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
@@ -402,9 +450,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
return (struct btf_param *)(t + 1);
}
-#ifdef CONFIG_BPF_SYSCALL
+static inline int btf_id_cmp_func(const void *a, const void *b)
+{
+ const int *pa = a, *pb = b;
+
+ return *pa - *pb;
+}
+
+static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+{
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
+
+static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
struct bpf_prog;
+struct bpf_verifier_log;
+#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
@@ -412,11 +478,21 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -448,6 +524,26 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt
{
return 0;
}
+static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+ return NULL;
+}
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
+{
+ return NULL;
+}
+static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
+ enum bpf_prog_type prog_type) {
+ return -EINVAL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ return false;
+}
#endif
static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)