diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2026-01-02 14:31:59 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2026-01-02 14:32:00 -0800 |
| commit | 7694ff8f6ca7f7cdf2e5636ecbe6dacaeb71678d (patch) | |
| tree | 018b2c54148636e944445795b87b9525a517ba28 | |
| parent | e40030a46acc07bb956068e59c614f1a17459a18 (diff) | |
| parent | e66fe1bc6d25d6fbced99de6c377f1b3d961a80e (diff) | |
Merge branch 'memcg-accounting-for-bpf-arena'
Puranjay Mohan says:
====================
memcg accounting for BPF arena
v4: https://lore.kernel.org/all/20260102181333.3033679-1-puranjay@kernel.org/
Changes in v4->v5:
- Remove unused variables from bpf_map_alloc_pages() (CI)
v3: https://lore.kernel.org/all/20260102151852.570285-1-puranjay@kernel.org/
Changes in v3->v4:
- Do memcg set/recover in arena_reserve_pages() rather than
bpf_arena_reserve_pages() for symmetry with other kfuncs (Alexei)
v2: https://lore.kernel.org/all/20251231141434.3416822-1-puranjay@kernel.org/
Changes in v2->v3:
- Remove memcg accounting from bpf_map_alloc_pages() as the caller does
it already. (Alexei)
- Do memcg set/recover in arena_alloc/free_pages() rather than
bpf_arena_alloc/free_pages(), it reduces copy pasting in
sleepable/non_sleepable functions.
v1: https://lore.kernel.org/all/20251230153006.1347742-1-puranjay@kernel.org/
Changes in v1->v2:
- Return both pointers through arguments from bpf_map_memcg_enter and
make it return void. (Alexei)
- Add memcg accounting in arena_free_worker (AI)
This set adds memcg accounting logic into arena kfuncs and other places
that do allocations in arena.c.
====================
Link: https://patch.msgid.link/20260102200230.25168-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | include/linux/bpf.h | 15 | ||||
| -rw-r--r-- | kernel/bpf/arena.c | 29 | ||||
| -rw-r--r-- | kernel/bpf/range_tree.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 53 |
4 files changed, 68 insertions, 34 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9efb2ddf331c..4e9667ed6630 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2608,6 +2608,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG +void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg); +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2632,6 +2636,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, kvcalloc(_n, _size, _flags) #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ __alloc_percpu_gfp(_size, _align, _flags) +static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg) +{ + *new_memcg = NULL; + *old_memcg = NULL; +} + +static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg) +{ +} #endif static inline int diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 2274319a95e6..42fae0a9f314 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -360,6 +360,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) { struct bpf_map *map = vmf->vma->vm_file->private_data; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + struct mem_cgroup *new_memcg, *old_memcg; struct page *page; long kbase, kaddr; unsigned long flags; @@ -377,6 +378,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) /* already have a page vmap-ed */ goto out; + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); + if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT) /* User space requested to segfault when page is not allocated by bpf prog */ goto out_unlock_sigsegv; @@ -400,12 +403,14 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) goto out_unlock_sigsegv; } flush_vmap_cache(kaddr, PAGE_SIZE); + bpf_map_memcg_exit(old_memcg, new_memcg); out: page_ref_add(page, 1); raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); vmf->page = page; return 0; out_unlock_sigsegv: + bpf_map_memcg_exit(old_memcg, new_memcg); raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); return VM_FAULT_SIGSEGV; } @@ -534,6 +539,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt /* user_vm_end/start are fixed before bpf prog runs */ long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena); + struct mem_cgroup *new_memcg, *old_memcg; struct apply_range_data data; struct page **pages = NULL; long remaining, mapped = 0; @@ -555,11 +561,14 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt return 0; } + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); /* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */ alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *)); - pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE); - if (!pages) + pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE); + if (!pages) { + bpf_map_memcg_exit(old_memcg, new_memcg); return 0; + } data.pages = pages; if (raw_res_spin_lock_irqsave(&arena->spinlock, flags)) @@ -617,6 +626,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT); raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); kfree_nolock(pages); + bpf_map_memcg_exit(old_memcg, new_memcg); return clear_lo32(arena->user_vm_start) + uaddr32; out: range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped); @@ -630,6 +640,7 @@ out_unlock_free_pages: raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); out_free_pages: kfree_nolock(pages); + bpf_map_memcg_exit(old_memcg, new_memcg); return 0; } @@ -651,6 +662,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt) static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable) { + struct mem_cgroup *new_memcg, *old_memcg; u64 full_uaddr, uaddr_end; long kaddr, pgoff; struct page *page; @@ -671,6 +683,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT; pgoff = compute_pgoff(arena, uaddr); + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); if (!sleepable) goto defer; @@ -709,11 +722,13 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, zap_pages(arena, full_uaddr, 1); __free_page(page); } + bpf_map_memcg_exit(old_memcg, new_memcg); return; defer: - s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1); + s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1); + bpf_map_memcg_exit(old_memcg, new_memcg); if (!s) /* * If allocation fails in non-sleepable context, pages are intentionally left @@ -735,6 +750,7 @@ defer: static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt) { long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; + struct mem_cgroup *new_memcg, *old_memcg; unsigned long flags; long pgoff; int ret; @@ -757,7 +773,9 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt } /* "Allocate" the region to prevent it from being allocated. */ + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); ret = range_tree_clear(&arena->rt, pgoff, page_cnt); + bpf_map_memcg_exit(old_memcg, new_memcg); out: raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); return ret; @@ -766,6 +784,7 @@ out: static void arena_free_worker(struct work_struct *work) { struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work); + struct mem_cgroup *new_memcg, *old_memcg; struct llist_node *list, *pos, *t; struct arena_free_span *s; u64 arena_vm_start, user_vm_start; @@ -780,6 +799,8 @@ static void arena_free_worker(struct work_struct *work) return; } + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); + init_llist_head(&free_pages); arena_vm_start = bpf_arena_get_kern_vm_start(arena); user_vm_start = bpf_arena_get_user_vm_start(arena); @@ -820,6 +841,8 @@ static void arena_free_worker(struct work_struct *work) page = llist_entry(pos, struct page, pcp_llist); __free_page(page); } + + bpf_map_memcg_exit(old_memcg, new_memcg); } static void arena_free_irq(struct irq_work *iw) diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c index 99c63d982c5d..2f28886f3ff7 100644 --- a/kernel/bpf/range_tree.c +++ b/kernel/bpf/range_tree.c @@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len) range_it_insert(rn, rt); /* Add a range */ - new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); + new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, + NUMA_NO_NODE); if (!new_rn) return -ENOMEM; new_rn->rn_start = last + 1; @@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len) right->rn_start = start; range_it_insert(right, rt); } else { - left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); + left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE); if (!left) return -ENOMEM; left->rn_start = start; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a4d38272d8bc..6dd2ad2f9e81 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -505,17 +505,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) return root_mem_cgroup; } +void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg) +{ + *new_memcg = bpf_map_get_memcg(map); + *old_memcg = set_active_memcg(*new_memcg); +} + +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *new_memcg) +{ + set_active_memcg(old_memcg); + mem_cgroup_put(new_memcg); +} + void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node) { struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + bpf_map_memcg_enter(map, &old_memcg, &memcg); ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -526,11 +538,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + bpf_map_memcg_enter(map, &old_memcg, &memcg); ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -540,11 +550,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + bpf_map_memcg_enter(map, &old_memcg, &memcg); ptr = kzalloc(size, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -555,11 +563,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + bpf_map_memcg_enter(map, &old_memcg, &memcg); ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -570,11 +576,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, struct mem_cgroup *memcg, *old_memcg; void __percpu *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + bpf_map_memcg_enter(map, &old_memcg, &memcg); ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -612,12 +616,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long i, j; struct page *pg; int ret = 0; -#ifdef CONFIG_MEMCG - struct mem_cgroup *memcg, *old_memcg; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); -#endif for (i = 0; i < nr_pages; i++) { pg = __bpf_alloc_page(nid); @@ -631,10 +630,6 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, break; } -#ifdef CONFIG_MEMCG - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); -#endif return ret; } |
