summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c62
-rw-r--r--kernel/bpf/bpf_inode_storage.c6
-rw-r--r--kernel/bpf/bpf_local_storage.c408
-rw-r--r--kernel/bpf/bpf_task_storage.c154
-rw-r--r--kernel/bpf/helpers.c4
5 files changed, 305 insertions, 329 deletions
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 0687a760974a..c2a2ead1f466 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -11,29 +11,6 @@
DEFINE_BPF_STORAGE_CACHE(cgroup_cache);
-static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
-
-static void bpf_cgrp_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_cgrp_storage_busy);
-}
-
-static void bpf_cgrp_storage_unlock(void)
-{
- this_cpu_dec(bpf_cgrp_storage_busy);
-}
-
-static bool bpf_cgrp_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
- this_cpu_dec(bpf_cgrp_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
{
struct cgroup *cg = owner;
@@ -45,16 +22,14 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
if (!local_storage)
goto out;
- bpf_cgrp_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_cgrp_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static struct bpf_local_storage_data *
@@ -83,9 +58,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return ERR_CAST(cgroup);
- bpf_cgrp_storage_lock();
sdata = cgroup_storage_lookup(cgroup, map, true);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return sdata ? sdata->data : NULL;
}
@@ -102,10 +75,8 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, map_flags, false, GFP_ATOMIC);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -118,8 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
@@ -132,9 +102,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
err = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return err;
}
@@ -151,7 +119,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
+ bpf_local_storage_map_free(map, &cgroup_cache);
}
/* *gfp_flags* is a hidden argument provided by the verifier */
@@ -159,7 +127,6 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- bool nobusy;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
@@ -168,38 +135,27 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!cgroup)
return (unsigned long)NULL;
- nobusy = bpf_cgrp_storage_trylock();
-
- sdata = cgroup_storage_lookup(cgroup, map, nobusy);
+ sdata = cgroup_storage_lookup(cgroup, map, true);
if (sdata)
- goto unlock;
+ goto out;
/* only allocate new storage, when the cgroup is refcounted */
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy)
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, BPF_NOEXIST, false, gfp_flags);
-unlock:
- if (nobusy)
- bpf_cgrp_storage_unlock();
+out:
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!cgroup)
return -EINVAL;
- if (!bpf_cgrp_storage_trylock())
- return -EBUSY;
-
- ret = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
- return ret;
+ return cgroup_storage_delete(cgroup, map);
}
const struct bpf_map_ops cgrp_storage_map_ops = {
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e54cce2b9175..e86734609f3d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -110,9 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
@@ -186,7 +184,7 @@ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
static void inode_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &inode_cache, NULL);
+ bpf_local_storage_map_free(map, &inode_cache);
}
const struct bpf_map_ops inode_storage_map_ops = {
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index e2fe6c32822b..b28f07d3a0db 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -19,9 +19,9 @@
static struct bpf_local_storage_map_bucket *
select_bucket(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
+ struct bpf_local_storage *local_storage)
{
- return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
+ return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)];
}
static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
@@ -61,11 +61,6 @@ static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
return !hlist_unhashed(&selem->snode);
}
-static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
-{
- return !hlist_unhashed_lockless(&selem->map_node);
-}
-
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
@@ -90,6 +85,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+ atomic_set(&selem->state, 0);
+ selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
if (value) {
/* No need to call check_and_init_map_value as memory is zero init */
@@ -198,9 +195,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
- migrate_disable();
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- migrate_enable();
+ if (smap) {
+ migrate_disable();
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ migrate_enable();
+ }
kfree_nolock(selem);
}
@@ -219,13 +218,14 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- if (!smap->use_kmalloc_nolock) {
+ if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@@ -256,6 +256,36 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
bpf_selem_free(selem, reuse_now);
}
+static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ bool free_local_storage, bool pin_owner)
+{
+ void *owner = local_storage->owner;
+ u32 uncharge = smap->elem_size;
+
+ if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
+ SDATA(selem))
+ RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
+
+ if (pin_owner && !refcount_inc_not_zero(&local_storage->owner_refcnt))
+ return;
+
+ uncharge += free_local_storage ? sizeof(*local_storage) : 0;
+ mem_uncharge(smap, local_storage->owner, uncharge);
+ local_storage->mem_charge -= uncharge;
+
+ if (free_local_storage) {
+ local_storage->owner = NULL;
+
+ /* After this RCU_INIT, owner may be freed and cannot be used */
+ RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ }
+
+ if (pin_owner)
+ refcount_dec(&local_storage->owner_refcnt);
+}
+
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
@@ -266,124 +296,219 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- owner = local_storage->owner;
-
- /* All uncharging on the owner must be done first.
- * The owner may be freed once the last selem is unlinked
- * from local_storage.
- */
- mem_uncharge(smap, owner, smap->elem_size);
free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
- if (free_local_storage) {
- mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
- local_storage->owner = NULL;
- /* After this RCU_INIT, owner may be freed and cannot be used */
- RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage,
+ free_local_storage, false);
- /* local_storage is not freed now. local_storage->lock is
- * still held and raw_spin_unlock_bh(&local_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intuitive to
- * read if the freeing of the storage is done
- * after the raw_spin_unlock_bh(&local_storage->lock).
- *
- * Hence, a "bool free_local_storage" is returned
- * to the caller which then calls then frees the storage after
- * all the RCU grace periods have expired.
- */
- }
hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
hlist_add_head(&selem->free_node, free_selem_list);
- if (rcu_access_pointer(local_storage->smap) == smap)
- RCU_INIT_POINTER(local_storage->smap, NULL);
-
return free_local_storage;
}
-static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
- bool reuse_now)
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ local_storage->mem_charge += smap->elem_size;
+
+ RCU_INIT_POINTER(selem->local_storage, local_storage);
+ hlist_add_head_rcu(&selem->snode, &local_storage->list);
+}
+
+static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ local_storage = rcu_dereference_check(selem->local_storage,
+ bpf_rcu_lock_held());
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ b = select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_del_init_rcu(&selem->map_node);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem)
+{
+ hlist_del_init_rcu(&selem->map_node);
+}
+
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b,
+ struct bpf_local_storage_elem *selem)
+{
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+}
+
+/*
+ * Unlink an selem from map and local storage with lock held.
+ * This is the common path used by local storages to delete an selem.
+ */
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
HLIST_HEAD(selem_free_list);
unsigned long flags;
+ int err;
if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
- return;
+ return 0;
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- if (likely(selem_linked_to_storage(selem)))
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return err;
+
+ if (likely(selem_linked_to_storage(selem))) {
+ /* Always unlink from map before unlinking from local_storage
+ * because selem will be freed after successfully unlinked from
+ * the local_storage.
+ */
+ err = bpf_selem_unlink_map(selem);
+ if (err)
+ goto out;
+
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, &selem_free_list);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+out:
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
- bpf_selem_free_list(&selem_free_list, reuse_now);
+ bpf_selem_free_list(&selem_free_list, false);
if (free_local_storage)
- bpf_local_storage_free(local_storage, reuse_now);
-}
+ bpf_local_storage_free(local_storage, false);
-void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem)
-{
- RCU_INIT_POINTER(selem->local_storage, local_storage);
- hlist_add_head_rcu(&selem->snode, &local_storage->list);
+ return err;
}
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+/*
+ * Unlink an selem from map and local storage with lockless fallback if callers
+ * are racing or rqspinlock returns error. It should only be called by
+ * bpf_local_storage_destroy() or bpf_local_storage_map_free().
+ */
+static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map_bucket *b)
{
+ bool in_map_free = !!b, free_storage = false;
+ struct bpf_local_storage *local_storage;
struct bpf_local_storage_map *smap;
- struct bpf_local_storage_map_bucket *b;
unsigned long flags;
+ int err, unlink = 0;
- if (unlikely(!selem_linked_to_map_lockless(selem)))
- /* selem has already be unlinked from smap */
- return;
-
+ local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held());
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- b = select_bucket(smap, selem);
- raw_spin_lock_irqsave(&b->lock, flags);
- if (likely(selem_linked_to_map(selem)))
- hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
-{
- struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
- unsigned long flags;
+ if (smap) {
+ b = b ? : select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (!err) {
+ /*
+ * Call bpf_obj_free_fields() under b->lock to make sure it is done
+ * exactly once for an selem. Safe to free special fields immediately
+ * as no BPF program should be referencing the selem.
+ */
+ if (likely(selem_linked_to_map(selem))) {
+ hlist_del_init_rcu(&selem->map_node);
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+ }
+ /*
+ * Highly unlikely scenario: resource leak
+ *
+ * When map_free(selem1), destroy(selem1) and destroy(selem2) are racing
+ * and both selem belong to the same bucket, if destroy(selem2) acquired
+ * b->lock and block for too long, neither map_free(selem1) and
+ * destroy(selem1) will be able to free the special field associated
+ * with selem1 as raw_res_spin_lock_irqsave() returns -ETIMEDOUT.
+ */
+ WARN_ON_ONCE(err && in_map_free);
+ if (!err || in_map_free)
+ RCU_INIT_POINTER(SDATA(selem)->smap, NULL);
+ }
- raw_spin_lock_irqsave(&b->lock, flags);
- hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
+ if (local_storage) {
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (!err) {
+ if (likely(selem_linked_to_storage(selem))) {
+ free_storage = hlist_is_singular_node(&selem->snode,
+ &local_storage->list);
+ /*
+ * Okay to skip clearing owner_storage and storage->owner in
+ * destroy() since the owner is going away. No user or bpf
+ * programs should be able to reference it.
+ */
+ if (smap && in_map_free)
+ bpf_selem_unlink_storage_nolock_misc(
+ selem, smap, local_storage,
+ free_storage, true);
+ hlist_del_init_rcu(&selem->snode);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+ if (!err || !in_map_free)
+ RCU_INIT_POINTER(selem->local_storage, NULL);
+ }
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
-{
- /* Always unlink from map before unlinking from local_storage
- * because selem will be freed after successfully unlinked from
- * the local_storage.
+ if (unlink != 2)
+ atomic_or(in_map_free ? SELEM_MAP_UNLINKED : SELEM_STORAGE_UNLINKED, &selem->state);
+
+ /*
+ * Normally, an selem can be unlinked under local_storage->lock and b->lock, and
+ * then freed after an RCU grace period. However, if destroy() and map_free() are
+ * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free
+ * the selem only after both map_free() and destroy() see the selem.
*/
- bpf_selem_unlink_map(selem);
- bpf_selem_unlink_storage(selem, reuse_now);
+ if (unlink == 2 ||
+ atomic_cmpxchg(&selem->state, SELEM_UNLINKED, SELEM_TOFREE) == SELEM_UNLINKED)
+ bpf_selem_free(selem, true);
+
+ if (free_storage)
+ bpf_local_storage_free(local_storage, true);
}
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
@@ -391,16 +516,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem)
{
unsigned long flags;
+ int err;
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return;
+
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
}
static int check_flags(const struct bpf_local_storage_data *old_sdata,
@@ -424,6 +553,8 @@ int bpf_local_storage_alloc(void *owner,
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
int err;
err = mem_charge(smap, owner, sizeof(*storage));
@@ -441,14 +572,21 @@ int bpf_local_storage_alloc(void *owner,
goto uncharge;
}
- RCU_INIT_POINTER(storage->smap, smap);
INIT_HLIST_HEAD(&storage->list);
- raw_spin_lock_init(&storage->lock);
+ raw_res_spin_lock_init(&storage->lock);
storage->owner = owner;
+ storage->mem_charge = sizeof(*storage);
storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
+ refcount_set(&storage->owner_refcnt, 1);
bpf_selem_link_storage_nolock(storage, first_selem);
- bpf_selem_link_map(smap, first_selem);
+
+ b = select_bucket(smap, storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ goto uncharge;
+
+ bpf_selem_link_map_nolock(b, first_selem);
owner_storage_ptr =
(struct bpf_local_storage **)owner_storage(smap, owner);
@@ -464,10 +602,12 @@ int bpf_local_storage_alloc(void *owner,
*/
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
if (unlikely(prev_storage)) {
- bpf_selem_unlink_map(first_selem);
+ bpf_selem_unlink_map_nolock(first_selem);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
err = -EAGAIN;
goto uncharge;
}
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
return 0;
@@ -489,8 +629,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map_bucket *b;
HLIST_HEAD(old_selem_free_list);
- unsigned long flags;
+ unsigned long flags, b_flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -549,7 +690,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (!alloc_selem)
return ERR_PTR(-ENOMEM);
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ goto free_selem;
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@@ -574,22 +717,30 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, b_flags);
+ if (err)
+ goto unlock;
+
alloc_selem = NULL;
/* First, link the new selem to the map */
- bpf_selem_link_map(smap, selem);
+ bpf_selem_link_map_nolock(b, selem);
/* Second, link (and publish) the new selem to local_storage */
bpf_selem_link_storage_nolock(local_storage, selem);
/* Third, remove old selem, SELEM(old_sdata) */
if (old_sdata) {
- bpf_selem_unlink_map(SELEM(old_sdata));
+ bpf_selem_unlink_map_nolock(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
&old_selem_free_list);
}
+ raw_res_spin_unlock_irqrestore(&b->lock, b_flags);
unlock:
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+free_selem:
bpf_selem_free_list(&old_selem_free_list, false);
if (alloc_selem) {
mem_uncharge(smap, owner, smap->elem_size);
@@ -657,13 +808,13 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
return 0;
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
+/*
+ * Destroy local storage when the owner is going away. Caller must uncharge memory
+ * if memory charging is used.
+ */
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
struct bpf_local_storage_elem *selem;
- bool free_storage = false;
- HLIST_HEAD(free_selem_list);
- struct hlist_node *n;
- unsigned long flags;
/* Neither the bpf_prog nor the bpf_map's syscall
* could be modifying the local_storage->list now.
@@ -674,27 +825,20 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
- /* Always unlink from map before unlinking from
- * local_storage.
- */
- bpf_selem_unlink_map(selem);
- /* If local_storage list has only one element, the
- * bpf_selem_unlink_storage_nolock() will return true.
- * Otherwise, it will return false. The current loop iteration
- * intends to remove all local storage. So the last iteration
- * of the loop will set the free_cgroup_storage to true.
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
+ bpf_selem_unlink_nofail(selem, NULL);
+
+ if (!refcount_dec_and_test(&local_storage->owner_refcnt)) {
+ while (refcount_read(&local_storage->owner_refcnt))
+ cpu_relax();
+ /*
+ * Paired with refcount_dec() in bpf_selem_unlink_nofail()
+ * to make sure destroy() sees the correct local_storage->mem_charge.
*/
- free_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, &free_selem_list);
+ smp_mb();
}
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-
- bpf_selem_free_list(&free_selem_list, true);
- if (free_storage)
- bpf_local_storage_free(local_storage, true);
+ return local_storage->mem_charge;
}
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -736,7 +880,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
for (i = 0; i < nbuckets; i++) {
INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
+ raw_res_spin_lock_init(&smap->buckets[i].lock);
}
smap->elem_size = offsetof(struct bpf_local_storage_elem,
@@ -758,8 +902,7 @@ free_smap:
}
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter)
+ struct bpf_local_storage_cache *cache)
{
struct bpf_local_storage_map_bucket *b;
struct bpf_local_storage_elem *selem;
@@ -789,15 +932,14 @@ void bpf_local_storage_map_free(struct bpf_map *map,
rcu_read_lock();
/* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(
- rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_local_storage_elem, map_node))) {
- if (busy_counter)
- this_cpu_inc(*busy_counter);
- bpf_selem_unlink(selem, true);
- if (busy_counter)
- this_cpu_dec(*busy_counter);
- cond_resched_rcu();
+restart:
+ hlist_for_each_entry_rcu(selem, &b->list, map_node) {
+ bpf_selem_unlink_nofail(selem, b);
+
+ if (need_resched()) {
+ cond_resched_rcu();
+ goto restart;
+ }
}
rcu_read_unlock();
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index a1dc1bf0848a..605506792b5b 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,29 +20,6 @@
DEFINE_BPF_STORAGE_CACHE(task_cache);
-static DEFINE_PER_CPU(int, bpf_task_storage_busy);
-
-static void bpf_task_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_task_storage_busy);
-}
-
-static void bpf_task_storage_unlock(void)
-{
- this_cpu_dec(bpf_task_storage_busy);
-}
-
-static bool bpf_task_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
- this_cpu_dec(bpf_task_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
@@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage)
goto out;
- bpf_task_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_task_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true);
- bpf_task_storage_unlock();
put_pid(pid);
return sdata ? sdata->data : NULL;
out:
@@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
goto out;
}
- bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags,
true, GFP_ATOMIC);
- bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -155,8 +126,7 @@ out:
return err;
}
-static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
- bool nobusy)
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
{
struct bpf_local_storage_data *sdata;
@@ -164,12 +134,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
if (!sdata)
return -ENOENT;
- if (!nobusy)
- return -EBUSY;
-
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
@@ -194,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
- err = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
+ err = task_storage_delete(task, map);
out:
put_pid(pid);
return err;
}
-/* Called by bpf_task_storage_get*() helpers */
-static void *__bpf_task_storage_get(struct bpf_map *map,
- struct task_struct *task, void *value,
- u64 flags, gfp_t gfp_flags, bool nobusy)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- sdata = task_storage_lookup(task, map, nobusy);
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
+ return (unsigned long)NULL;
+
+ sdata = task_storage_lookup(task, map, true);
if (sdata)
- return sdata->data;
+ return (unsigned long)sdata->data;
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, false, gfp_flags);
- return IS_ERR(sdata) ? NULL : sdata->data;
+ return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
- return NULL;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- bool nobusy;
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- nobusy = bpf_task_storage_trylock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- bpf_task_storage_lock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, true);
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
- task)
-{
- bool nobusy;
- int ret;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (!task)
- return -EINVAL;
-
- nobusy = bpf_task_storage_trylock();
- /* This helper must only be called from places where the lifetime of the task
- * is guaranteed. Either by being refcounted or by being protected
- * by an RCU read-side critical section.
- */
- ret = task_storage_delete(task, map, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return ret;
+ return (unsigned long)NULL;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;
- bpf_task_storage_lock();
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
- ret = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
- return ret;
+ return task_storage_delete(task, map);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -313,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
static void task_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
+ bpf_local_storage_map_free(map, &task_cache);
}
BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
@@ -332,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
-const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
- .func = bpf_task_storage_get_recur,
- .gpl_only = false,
- .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
- .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
- .arg4_type = ARG_ANYTHING,
-};
-
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
@@ -354,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
-const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
- .func = bpf_task_storage_delete_recur,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
-};
-
const struct bpf_func_proto bpf_task_storage_delete_proto = {
.func = bpf_task_storage_delete,
.gpl_only = false,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 0458597134da..7ac32798eb04 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2167,12 +2167,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
return &bpf_task_storage_delete_proto;
default:
break;