summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-10 11:26:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-10 11:26:21 -0800
commitf17b474e36647c23801ef8fdaf2255ab66dd2973 (patch)
tree7fbaa4d93d71d72eb1cf8f61201eb42881daaeb0 /include
parenta7423e6ea2f8f6f453de79213c26f7a36c86d9a2 (diff)
parentdb975debcb8c4cd367a78811bc1ba84c83f854bd (diff)
Merge tag 'bpf-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Support associating BPF program with struct_ops (Amery Hung) - Switch BPF local storage to rqspinlock and remove recursion detection counters which were causing false positives (Amery Hung) - Fix live registers marking for indirect jumps (Anton Protopopov) - Introduce execution context detection BPF helpers (Changwoo Min) - Improve verifier precision for 32bit sign extension pattern (Cupertino Miranda) - Optimize BTF type lookup by sorting vmlinux BTF and doing binary search (Donglin Peng) - Allow states pruning for misc/invalid slots in iterator loops (Eduard Zingerman) - In preparation for ASAN support in BPF arenas teach libbpf to move global BPF variables to the end of the region and enable arena kfuncs while holding locks (Emil Tsalapatis) - Introduce support for implicit arguments in kfuncs and migrate a number of them to new API. This is a prerequisite for cgroup sub-schedulers in sched-ext (Ihor Solodrai) - Fix incorrect copied_seq calculation in sockmap (Jiayuan Chen) - Fix ORC stack unwind from kprobe_multi (Jiri Olsa) - Speed up fentry attach by using single ftrace direct ops in BPF trampolines (Jiri Olsa) - Require frozen map for calculating map hash (KP Singh) - Fix lock entry creation in TAS fallback in rqspinlock (Kumar Kartikeya Dwivedi) - Allow user space to select cpu in lookup/update operations on per-cpu array and hash maps (Leon Hwang) - Make kfuncs return trusted pointers by default (Matt Bobrowski) - Introduce "fsession" support where single BPF program is executed upon entry and exit from traced kernel function (Menglong Dong) - Allow bpf_timer and bpf_wq use in all programs types (Mykyta Yatsenko, Andrii Nakryiko, Kumar Kartikeya Dwivedi, Alexei Starovoitov) - Make KF_TRUSTED_ARGS the default for all kfuncs and clean up their definition across the tree (Puranjay Mohan) - Allow BPF arena calls from non-sleepable context (Puranjay Mohan) - Improve register id comparison logic in the verifier and extend linked registers with negative offsets (Puranjay Mohan) - In preparation for BPF-OOM introduce kfuncs to access memcg events (Roman Gushchin) - Use CFI compatible destructor kfunc type (Sami Tolvanen) - Add bitwise tracking for BPF_END in the verifier (Tianci Cao) - Add range tracking for BPF_DIV and BPF_MOD in the verifier (Yazhou Tang) - Make BPF selftests work with 64k page size (Yonghong Song) * tag 'bpf-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (268 commits) selftests/bpf: Fix outdated test on storage->smap selftests/bpf: Choose another percpu variable in bpf for btf_dump test selftests/bpf: Remove test_task_storage_map_stress_lookup selftests/bpf: Update task_local_storage/task_storage_nodeadlock test selftests/bpf: Update task_local_storage/recursion test selftests/bpf: Update sk_storage_omem_uncharge test bpf: Switch to bpf_selem_unlink_nofail in bpf_local_storage_{map_free, destroy} bpf: Support lockless unlink when freeing map or local storage bpf: Prepare for bpf_selem_unlink_nofail() bpf: Remove unused percpu counter from bpf_local_storage_map_free bpf: Remove cgroup local storage percpu counter bpf: Remove task local storage percpu counter bpf: Change local_storage->lock and b->lock to rqspinlock bpf: Convert bpf_selem_unlink to failable bpf: Convert bpf_selem_link_map to failable bpf: Convert bpf_selem_unlink_map to failable bpf: Select bpf_local_storage_map_bucket based on bpf_local_storage selftests/xsk: fix number of Tx frags in invalid packet selftests/xsk: properly handle batch ending in the middle of a packet bpf: Prevent reentrance into call_rcu_tasks_trace() ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/rqspinlock.h2
-rw-r--r--include/linux/bpf-cgroup.h4
-rw-r--r--include/linux/bpf.h178
-rw-r--r--include/linux/bpf_local_storage.h29
-rw-r--r--include/linux/bpf_mprog.h10
-rw-r--r--include/linux/bpf_verifier.h14
-rw-r--r--include/linux/btf.h9
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/ftrace.h31
-rw-r--r--include/linux/ftrace_regs.h25
-rw-r--r--include/linux/memcontrol.h19
-rw-r--r--include/linux/skmsg.h70
-rw-r--r--include/linux/tnum.h5
-rw-r--r--include/uapi/linux/bpf.h28
14 files changed, 393 insertions, 32 deletions
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
index 0f2dcbbfee2f..5c5cf2f7fc39 100644
--- a/include/asm-generic/rqspinlock.h
+++ b/include/asm-generic/rqspinlock.h
@@ -191,7 +191,7 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
#else
-#define res_spin_lock(lock) resilient_tas_spin_lock(lock)
+#define res_spin_lock(lock) ({ grab_held_lock_entry(lock); resilient_tas_spin_lock(lock); })
#endif /* CONFIG_QUEUED_SPINLOCKS */
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d1eb5c7729cb..2f535331f926 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -172,7 +172,7 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
void *value, u64 flags);
@@ -470,7 +470,7 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
static inline void bpf_cgroup_storage_free(
struct bpf_cgroup_storage *storage) {}
static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
- void *value) {
+ void *value, u64 flags) {
return 0;
}
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e5be698256d1..cd9b96434904 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -287,6 +287,7 @@ struct bpf_map_owner {
enum bpf_prog_type type;
bool jited;
bool xdp_has_frags;
+ bool sleepable;
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
const struct btf_type *attach_func_proto;
enum bpf_attach_type expected_attach_type;
@@ -673,6 +674,22 @@ void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
struct bpf_dynptr *ptr__uninit);
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
+ u64 flags);
+void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt);
+#else
+static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ return NULL;
+}
+
+static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
+{
+}
+#endif
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -737,7 +754,7 @@ enum bpf_type_flag {
MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
/* PTR was passed from the kernel in a trusted context, and may be
- * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+ * passed to kfuncs or BPF helper functions.
* Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
* PTR_UNTRUSTED refers to a kptr that was read directly from a map
* without invoking bpf_kptr_xchg(). What we really need to know is
@@ -1213,6 +1230,9 @@ enum {
#endif
};
+#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
+#define BPF_TRAMP_IS_RETURN_SHIFT 63
+
struct bpf_tramp_links {
struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
int nr_links;
@@ -1293,6 +1313,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
+ BPF_TRAMP_FSESSION,
};
struct bpf_tramp_image {
@@ -1309,14 +1330,17 @@ struct bpf_tramp_image {
};
struct bpf_trampoline {
- /* hlist for trampoline_table */
- struct hlist_node hlist;
+ /* hlist for trampoline_key_table */
+ struct hlist_node hlist_key;
+ /* hlist for trampoline_ip_table */
+ struct hlist_node hlist_ip;
struct ftrace_ops *fops;
/* serializes access to fields of this trampoline */
struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
+ unsigned long ip;
struct {
struct btf_func_model model;
void *addr;
@@ -1418,7 +1442,7 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
void *src, u64 len, u64 flags);
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
- void *buffer__opt, u64 buffer__szk);
+ void *buffer__nullable, u64 buffer__szk);
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
{
@@ -1742,8 +1766,12 @@ struct bpf_prog_aux {
struct rcu_head rcu;
};
struct bpf_stream stream[2];
+ struct mutex st_ops_assoc_mutex;
+ struct bpf_map __rcu *st_ops_assoc;
};
+#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -1759,6 +1787,7 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
+ call_session_cookie:1, /* Do we call bpf_session_cookie() */
tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
@@ -1770,7 +1799,7 @@ struct bpf_prog {
u8 tag[BPF_TAG_SIZE];
};
struct bpf_prog_stats __percpu *stats;
- int __percpu *active;
+ u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
struct bpf_prog_aux *aux; /* Auxiliary fields */
@@ -1855,6 +1884,11 @@ struct bpf_tracing_link {
struct bpf_prog *tgt_prog;
};
+struct bpf_fsession_link {
+ struct bpf_tracing_link link;
+ struct bpf_tramp_link fexit;
+};
+
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
@@ -2002,6 +2036,40 @@ struct bpf_struct_ops_common_value {
enum bpf_struct_ops_state state;
};
+static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+ u32 val;
+
+ preempt_disable();
+ active[rctx]++;
+ val = le32_to_cpu(*(__le32 *)active);
+ preempt_enable();
+ if (val != BIT(rctx * 8))
+ return false;
+
+ return true;
+#else
+ return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1;
+#endif
+}
+
+static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog)
+{
+#ifdef CONFIG_ARM64
+ u8 rctx = interrupt_context_level();
+ u8 *active = this_cpu_ptr(prog->active);
+
+ preempt_disable();
+ active[rctx]--;
+ preempt_enable();
+#else
+ this_cpu_dec(*(int __percpu *)(prog->active));
+#endif
+}
+
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
/* This macro helps developer to register a struct_ops type and generate
* type information correctly. Developers should use this macro to register
@@ -2044,6 +2112,9 @@ static inline void bpf_module_put(const void *data, struct module *owner)
module_put(owner);
}
int bpf_struct_ops_link_create(union bpf_attr *attr);
+int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog);
+void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux);
u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
@@ -2091,6 +2162,17 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
+static inline int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map)
+{
+ return -EOPNOTSUPP;
+}
+static inline void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog)
+{
+}
+static inline void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux)
+{
+ return NULL;
+}
static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
}
@@ -2101,6 +2183,37 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+{
+ return link->link.prog->call_session_cookie;
+}
+
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ cnt++;
+ }
+
+ return cnt;
+}
+
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt);
@@ -2540,6 +2653,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg);
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg);
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
@@ -2564,6 +2681,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
kvcalloc(_n, _size, _flags)
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
__alloc_percpu_gfp(_size, _align, _flags)
+static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = NULL;
+ *old_memcg = NULL;
+}
+
+static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg)
+{
+}
#endif
static inline int
@@ -2764,8 +2892,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
@@ -3243,6 +3371,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add
}
#endif /* CONFIG_BPF_SYSCALL */
+static inline bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
@@ -3832,14 +3965,43 @@ bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
}
#endif
+static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
+{
+ switch (map_type) {
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
{
- if (flags & ~allowed_flags)
+ u32 cpu;
+
+ if ((u32)flags & ~allowed_flags)
return -EINVAL;
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
return -EINVAL;
+ if (!(flags & BPF_F_CPU) && flags >> 32)
+ return -EINVAL;
+
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) {
+ if (!bpf_map_supports_cpu_flags(map->map_type))
+ return -EINVAL;
+ if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS))
+ return -EINVAL;
+
+ cpu = flags >> 32;
+ if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus())
+ return -ERANGE;
+ }
+
return 0;
}
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 66432248cd81..85efa9772530 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -15,12 +15,13 @@
#include <linux/types.h>
#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
+#include <asm/rqspinlock.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
struct bpf_local_storage_map_bucket {
struct hlist_head list;
- raw_spinlock_t lock;
+ rqspinlock_t lock;
};
/* Thp map is not the primary owner of a bpf_local_storage_elem.
@@ -67,6 +68,11 @@ struct bpf_local_storage_data {
u8 data[] __aligned(8);
};
+#define SELEM_MAP_UNLINKED (1 << 0)
+#define SELEM_STORAGE_UNLINKED (1 << 1)
+#define SELEM_UNLINKED (SELEM_MAP_UNLINKED | SELEM_STORAGE_UNLINKED)
+#define SELEM_TOFREE (1 << 2)
+
/* Linked to bpf_local_storage and bpf_local_storage_map */
struct bpf_local_storage_elem {
struct hlist_node map_node; /* Linked to bpf_local_storage_map */
@@ -79,7 +85,9 @@ struct bpf_local_storage_elem {
* after raw_spin_unlock
*/
};
- /* 8 bytes hole */
+ atomic_t state;
+ bool use_kmalloc_nolock;
+ /* 3 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
*/
@@ -88,13 +96,14 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
- struct bpf_local_storage_map __rcu *smap;
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
- raw_spinlock_t lock; /* Protect adding/removing from the "list" */
+ rqspinlock_t lock; /* Protect adding/removing from the "list" */
+ u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */
+ refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
bool use_kmalloc_nolock;
};
@@ -162,11 +171,10 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
return SDATA(selem);
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter);
+ struct bpf_local_storage_cache *cache);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
@@ -176,10 +184,11 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem);
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem);
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h
index 929225f7b095..0b9f4caeeb0a 100644
--- a/include/linux/bpf_mprog.h
+++ b/include/linux/bpf_mprog.h
@@ -340,4 +340,14 @@ static inline bool bpf_mprog_supported(enum bpf_prog_type type)
return false;
}
}
+
+static inline bool bpf_mprog_detach_empty(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return bpf_net_capable();
+ default:
+ return false;
+ }
+}
#endif /* __BPF_MPROG_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 130bcbd66f60..ef8e45a362d9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -147,8 +147,12 @@ struct bpf_reg_state {
* registers. Example:
* r1 = r2; both will have r1->id == r2->id == N
* r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10
+ * r3 = r2; both will have r3->id == r2->id == N
+ * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->off == 10
*/
-#define BPF_ADD_CONST (1U << 31)
+#define BPF_ADD_CONST64 (1U << 31)
+#define BPF_ADD_CONST32 (1U << 30)
+#define BPF_ADD_CONST (BPF_ADD_CONST64 | BPF_ADD_CONST32)
u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
* from a pointer-cast helper, bpf_sk_fullsock() and
@@ -692,12 +696,16 @@ struct bpf_id_pair {
struct bpf_idmap {
u32 tmp_id_gen;
+ u32 cnt;
struct bpf_id_pair map[BPF_ID_MAP_SIZE];
};
struct bpf_idset {
- u32 count;
- u32 ids[BPF_ID_MAP_SIZE];
+ u32 num_ids;
+ struct {
+ u32 id;
+ u32 cnt;
+ } entries[BPF_ID_MAP_SIZE];
};
/* see verifier.c:compute_scc_callchain() */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index f06976ffb63f..48108471c5b1 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -34,7 +34,7 @@
*
* And the following kfunc:
*
- * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+ * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE)
*
* All invocations to the kfunc must pass the unmodified, unwalked task:
*
@@ -66,7 +66,6 @@
* return 0;
* }
*/
-#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
@@ -79,6 +78,7 @@
#define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */
#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */
#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */
+#define KF_IMPLICIT_ARGS (1 << 16) /* kfunc has implicit arguments supplied by the verifier */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
@@ -220,6 +220,7 @@ bool btf_is_module(const struct btf *btf);
bool btf_is_vmlinux(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
u32 btf_nr_types(const struct btf *btf);
+u32 btf_named_start_id(const struct btf *btf, bool own);
struct btf *btf_base_btf(const struct btf *btf);
bool btf_type_is_i32(const struct btf_type *t);
bool btf_type_is_i64(const struct btf_type *t);
@@ -575,8 +576,8 @@ const char *btf_name_by_offset(const struct btf *btf, u32 offset);
const char *btf_str_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
-u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
- const struct bpf_prog *prog);
+u32 *btf_kfunc_flags(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog);
+bool btf_kfunc_is_allowed(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fd54fed8f95f..4e1cb4f91f49 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1167,6 +1167,7 @@ bool bpf_jit_supports_arena(void);
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena);
bool bpf_jit_supports_private_stack(void);
bool bpf_jit_supports_timed_may_goto(void);
+bool bpf_jit_supports_fsession(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
u64 arch_bpf_timed_may_goto(void);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3a8989e3268..705db0a6d995 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -82,6 +82,7 @@ static inline void early_trace_init(void) { }
struct module;
struct ftrace_hash;
+struct ftrace_func_entry;
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
defined(CONFIG_DYNAMIC_FTRACE)
@@ -359,7 +360,6 @@ enum {
FTRACE_OPS_FL_DIRECT = BIT(17),
FTRACE_OPS_FL_SUBOP = BIT(18),
FTRACE_OPS_FL_GRAPH = BIT(19),
- FTRACE_OPS_FL_JMP = BIT(20),
};
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -403,9 +403,17 @@ enum ftrace_ops_cmd {
* Negative on failure. The return value is dependent on the
* callback.
*/
-typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
+typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, unsigned long ip, enum ftrace_ops_cmd cmd);
#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define FTRACE_HASH_DEFAULT_BITS 10
+
+struct ftrace_hash *alloc_ftrace_hash(int size_bits);
+void free_ftrace_hash(struct ftrace_hash *hash);
+struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
+ unsigned long ip, unsigned long direct);
+
/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
struct ftrace_hash __rcu *notrace_hash;
@@ -535,6 +543,10 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr);
int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr);
+int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash);
+int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash);
+int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock);
+
void ftrace_stub_direct_tramp(void);
#else
@@ -561,6 +573,21 @@ static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned l
return -ENODEV;
}
+static inline int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ return -ENODEV;
+}
+
+static inline int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash)
+{
+ return -ENODEV;
+}
+
+static inline int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock)
+{
+ return -ENODEV;
+}
+
/*
* This must be implemented by the architecture.
* It is the way the ftrace direct_ops helper, when called
diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index 15627ceea9bc..386fa48c4a95 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -33,6 +33,31 @@ struct ftrace_regs;
#define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&arch_ftrace_regs(fregs)->regs)
+static __always_inline void
+ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs) { }
+
+#else
+
+/*
+ * ftrace_partial_regs_update - update the original ftrace_regs from regs
+ * @fregs: The ftrace_regs to update from @regs
+ * @regs: The partial regs from ftrace_partial_regs() that was updated
+ *
+ * Some architectures have the partial regs living in the ftrace_regs
+ * structure, whereas other architectures need to make a different copy
+ * of the @regs. If a partial @regs is retrieved by ftrace_partial_regs() and
+ * if the code using @regs updates a field (like the instruction pointer or
+ * stack pointer) it may need to propagate that change to the original @fregs
+ * it retrieved the partial @regs from. Use this function to guarantee that
+ * update happens.
+ */
+static __always_inline void
+ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ ftrace_regs_set_instruction_pointer(fregs, instruction_pointer(regs));
+ ftrace_regs_set_return_value(fregs, regs_return_value(regs));
+}
+
#endif /* HAVE_ARCH_FTRACE_REGS */
/* This can be overridden by the architectures */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5b004b95648b..67f154de10bc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -949,7 +949,11 @@ static inline void mod_memcg_page_state(struct page *page,
rcu_read_unlock();
}
+unsigned long memcg_events(struct mem_cgroup *memcg, int event);
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
+unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
+bool memcg_stat_item_valid(int idx);
+bool memcg_vm_event_item_valid(enum vm_event_item idx);
unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx);
@@ -1375,6 +1379,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
return 0;
}
+static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item)
+{
+ return 0;
+}
+
+static inline bool memcg_stat_item_valid(int idx)
+{
+ return false;
+}
+
+static inline bool memcg_vm_event_item_valid(enum vm_event_item idx)
+{
+ return false;
+}
+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 49847888c287..829b281d6c9c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -97,6 +97,8 @@ struct sk_psock {
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
spinlock_t ingress_lock;
+ /** @msg_tot_len: Total bytes queued in ingress_msg list. */
+ u32 msg_tot_len;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
@@ -141,6 +143,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags, int *copied_from_self);
bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
@@ -319,6 +323,27 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
+static inline u32 sk_psock_get_msg_len_nolock(struct sk_psock *psock)
+{
+ /* Used by ioctl to read msg_tot_len only; lock-free for performance */
+ return READ_ONCE(psock->msg_tot_len);
+}
+
+static inline void sk_psock_msg_len_add_locked(struct sk_psock *psock, int diff)
+{
+ /* Use WRITE_ONCE to ensure correct read in sk_psock_get_msg_len_nolock().
+ * ingress_lock should be held to prevent concurrent updates to msg_tot_len
+ */
+ WRITE_ONCE(psock->msg_tot_len, psock->msg_tot_len + diff);
+}
+
+static inline void sk_psock_msg_len_add(struct sk_psock *psock, int diff)
+{
+ spin_lock_bh(&psock->ingress_lock);
+ sk_psock_msg_len_add_locked(psock, diff);
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
@@ -327,6 +352,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
+ sk_psock_msg_len_add_locked(psock, msg->sg.size);
ret = true;
} else {
sk_msg_free(psock->sk, msg);
@@ -343,18 +369,25 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
- if (msg)
+ if (msg) {
list_del(&msg->list);
+ sk_psock_msg_len_add_locked(psock, -msg->sg.size);
+ }
spin_unlock_bh(&psock->ingress_lock);
return msg;
}
+static inline struct sk_msg *sk_psock_peek_msg_locked(struct sk_psock *psock)
+{
+ return list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+}
+
static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
{
struct sk_msg *msg;
spin_lock_bh(&psock->ingress_lock);
- msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ msg = sk_psock_peek_msg_locked(psock);
spin_unlock_bh(&psock->ingress_lock);
return msg;
}
@@ -521,6 +554,39 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}
+/* for tcp only, sk is locked */
+static inline ssize_t sk_psock_msg_inq(struct sock *sk)
+{
+ struct sk_psock *psock;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ inq = sk_psock_get_msg_len_nolock(psock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
+/* for udp only, sk is not locked */
+static inline ssize_t sk_msg_first_len(struct sock *sk)
+{
+ struct sk_psock *psock;
+ struct sk_msg *msg;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ spin_lock_bh(&psock->ingress_lock);
+ msg = sk_psock_peek_msg_locked(psock);
+ if (msg)
+ inq = msg->sg.size;
+ spin_unlock_bh(&psock->ingress_lock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
#define BPF_F_STRPARSER (1UL << 1)
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index c52b862dad45..fa4654ffb621 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -63,6 +63,11 @@ struct tnum tnum_union(struct tnum t1, struct tnum t2);
/* Return @a with all but the lowest @size bytes cleared */
struct tnum tnum_cast(struct tnum a, u8 size);
+/* Swap the bytes of a tnum */
+struct tnum tnum_bswap16(struct tnum a);
+struct tnum tnum_bswap32(struct tnum a);
+struct tnum tnum_bswap64(struct tnum a);
+
/* Returns true if @a is a known constant */
static inline bool tnum_is_const(struct tnum a)
{
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f8d8513eda27..c8d400b7680a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */
BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */
BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */
+ /*
+ * Walks the immediate children of the specified parent
+ * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+ * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+ * the iterator does not include the specified parent as one of the
+ * returned iterator elements.
+ */
+ BPF_CGROUP_ITER_CHILDREN,
};
union bpf_iter_link_info {
@@ -918,6 +926,16 @@ union bpf_iter_link_info {
* Number of bytes read from the stream on success, or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
+ * BPF_PROG_ASSOC_STRUCT_OPS
+ * Description
+ * Associate a BPF program with a struct_ops map. The struct_ops
+ * map is identified by *map_fd* and the BPF program is
+ * identified by *prog_fd*.
+ *
+ * Return
+ * 0 on success or -1 if an error occurred (in which case,
+ * *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -974,6 +992,7 @@ enum bpf_cmd {
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
BPF_PROG_STREAM_READ_BY_FD,
+ BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
};
@@ -1134,6 +1153,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
@@ -1373,6 +1393,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
@@ -1894,6 +1916,12 @@ union bpf_attr {
__u32 prog_fd;
} prog_stream_read;
+ struct {
+ __u32 map_fd;
+ __u32 prog_fd;
+ __u32 flags;
+ } prog_assoc_struct_ops;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF