diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2024-01-03 21:08:26 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2024-01-03 21:08:27 -0800 |
| commit | f8506c5734902ebda5c7b4778859b46d0a2ae5f3 (patch) | |
| tree | 33e7bd54cb6f7ee386fa8abca5ab0ae9cea74d64 /tools | |
| parent | 417fa6d163df6f13fb2cfad5132eff354c8a472e (diff) | |
| parent | adc8c4549d9e74d2359c217d2478b18ecdd15c91 (diff) | |
Merge branch 'bpf-reduce-memory-usage-for-bpf_global_percpu_ma'
Yonghong Song says:
====================
bpf: Reduce memory usage for bpf_global_percpu_ma
Currently when a bpf program intends to allocate memory for percpu kptr,
the verifier will call bpf_mem_alloc_init() to prefill all supported
unit sizes and this caused memory consumption very big for large number
of cpus. For example, for 128-cpu system, the total memory consumption
with initial prefill is ~175MB. Things will become worse for systems
with even more cpus.
Patch 1 avoids unnecessary extra percpu memory allocation.
Patch 2 adds objcg to bpf_mem_alloc at init stage so objcg can be
associated with root cgroup and objcg can be passed to later
bpf_mem_alloc_percpu_unit_init().
Patch 3 addresses memory consumption issue by avoiding to prefill
with all unit sizes, i.e. only prefilling with user specified size.
Patch 4 further reduces memory consumption by limiting the
number of prefill entries for percpu memory allocation.
Patch 5 has much smaller low/high watermarks for percpu allocation
to reduce memory consumption.
Patch 6 rejects percpu memory allocation with bpf_global_percpu_ma
when allocation size is greater than 512 bytes.
Patch 7 fixed test_bpf_ma test due to Patch 5.
Patch 8 added one test to show the verification failure log message.
Changelogs:
v5 -> v6:
. Change bpf_mem_alloc_percpu_init() to add objcg as one of parameters.
For bpf_global_percpu_ma, the objcg is NULL, corresponding root memcg.
v4 -> v5:
. Do not do bpf_global_percpu_ma initialization at init stage, instead
doing initialization when the verifier knows it is going to be used
by bpf prog.
. Using much smaller low/high watermarks for percpu allocation.
v3 -> v4:
. Add objcg to bpf_mem_alloc during init stage.
. Initialize objcg at init stage but use it in bpf_mem_alloc_percpu_unit_init().
. Remove check_obj_size() in bpf_mem_alloc_percpu_unit_init().
v2 -> v3:
. Clear the bpf_mem_cache if prefill fails.
. Change test_bpf_ma percpu allocation tests to use bucket_size
as allocation size instead of bucket_size - 8.
. Remove __GFP_ZERO flag from __alloc_percpu_gfp() call.
v1 -> v2:
. Avoid unnecessary extra percpu memory allocation.
. Add a separate function to do bpf_global_percpu_ma initialization
. promote.
. Promote function static 'sizes' array to file static.
. Add comments to explain to refill only one item for percpu alloc.
====================
Link: https://lore.kernel.org/r/20231222031729.1287957-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/percpu_alloc_fail.c | 18 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/test_bpf_ma.c | 66 |
3 files changed, 64 insertions, 40 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c index d3491a84b3b9..ccae0b31ac6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -14,7 +14,8 @@ static void do_bpf_ma_test(const char *name) struct test_bpf_ma *skel; struct bpf_program *prog; struct btf *btf; - int i, err; + int i, err, id; + char tname[32]; skel = test_bpf_ma__open(); if (!ASSERT_OK_PTR(skel, "open")) @@ -25,16 +26,21 @@ static void do_bpf_ma_test(const char *name) goto out; for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { - char name[32]; - int id; - - snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); - id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); - if (!ASSERT_GT(id, 0, "bin_data")) + snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) goto out; skel->rodata->data_btf_ids[i] = id; } + for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) { + snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) + goto out; + skel->rodata->percpu_data_btf_ids[i] = id; + } + prog = bpf_object__find_program_by_name(skel->obj, name); if (!ASSERT_OK_PTR(prog, "invalid prog name")) goto out; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c index 1a891d30f1fe..f2b8eb2ff76f 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c @@ -17,6 +17,10 @@ struct val_with_rb_root_t { struct bpf_spin_lock lock; }; +struct val_600b_t { + char b[600]; +}; + struct elem { long sum; struct val_t __percpu_kptr *pc; @@ -161,4 +165,18 @@ int BPF_PROG(test_array_map_7) return 0; } +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512") +int BPF_PROG(test_array_map_8) +{ + struct val_600b_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_600b_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index b78f4f702ae0..3494ca30fa7f 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -20,6 +20,9 @@ char _license[] SEC("license") = "GPL"; const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; +const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512}; +const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + int err = 0; u32 pid = 0; @@ -27,10 +30,10 @@ u32 pid = 0; struct bin_data_##_size { \ char data[_size - sizeof(void *)]; \ }; \ + /* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */ \ + struct bin_data_##_size *__bin_data_##_size; \ struct map_value_##_size { \ struct bin_data_##_size __kptr * data; \ - /* To emit BTF info for bin_data_xx */ \ - struct bin_data_##_size not_used; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -40,8 +43,12 @@ u32 pid = 0; } array_##_size SEC(".maps") #define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \ + struct percpu_bin_data_##_size { \ + char data[_size]; \ + }; \ + struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \ struct map_value_percpu_##_size { \ - struct bin_data_##_size __percpu_kptr * data; \ + struct percpu_bin_data_##_size __percpu_kptr * data; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -114,7 +121,7 @@ static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int return; } /* per-cpu allocator may not be able to refill in time */ - new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL); + new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL); if (!new) continue; @@ -179,7 +186,7 @@ DEFINE_ARRAY_WITH_KPTR(1024); DEFINE_ARRAY_WITH_KPTR(2048); DEFINE_ARRAY_WITH_KPTR(4096); -/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */ +DEFINE_ARRAY_WITH_PERCPU_KPTR(8); DEFINE_ARRAY_WITH_PERCPU_KPTR(16); DEFINE_ARRAY_WITH_PERCPU_KPTR(32); DEFINE_ARRAY_WITH_PERCPU_KPTR(64); @@ -188,9 +195,6 @@ DEFINE_ARRAY_WITH_PERCPU_KPTR(128); DEFINE_ARRAY_WITH_PERCPU_KPTR(192); DEFINE_ARRAY_WITH_PERCPU_KPTR(256); DEFINE_ARRAY_WITH_PERCPU_KPTR(512); -DEFINE_ARRAY_WITH_PERCPU_KPTR(1024); -DEFINE_ARRAY_WITH_PERCPU_KPTR(2048); -DEFINE_ARRAY_WITH_PERCPU_KPTR(4096); SEC("?fentry/" SYS_PREFIX "sys_nanosleep") int test_batch_alloc_free(void *ctx) @@ -246,20 +250,18 @@ int test_batch_percpu_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, - * then free 128 16-bytes per-cpu objects in batch to trigger freeing. + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, + * then free 128 8-bytes per-cpu objects in batch to trigger freeing. */ - CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0); - CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1); - CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2); - CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3); - CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4); - CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5); - CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6); - CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7); - CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8); - CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9); - CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10); + CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0); + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); return 0; } @@ -270,20 +272,18 @@ int test_percpu_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, * then free these object through map free. */ - CALL_BATCH_PERCPU_ALLOC(16, 128, 0); - CALL_BATCH_PERCPU_ALLOC(32, 128, 1); - CALL_BATCH_PERCPU_ALLOC(64, 128, 2); - CALL_BATCH_PERCPU_ALLOC(96, 128, 3); - CALL_BATCH_PERCPU_ALLOC(128, 128, 4); - CALL_BATCH_PERCPU_ALLOC(192, 128, 5); - CALL_BATCH_PERCPU_ALLOC(256, 128, 6); - CALL_BATCH_PERCPU_ALLOC(512, 64, 7); - CALL_BATCH_PERCPU_ALLOC(1024, 32, 8); - CALL_BATCH_PERCPU_ALLOC(2048, 16, 9); - CALL_BATCH_PERCPU_ALLOC(4096, 8, 10); + CALL_BATCH_PERCPU_ALLOC(8, 128, 0); + CALL_BATCH_PERCPU_ALLOC(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC(512, 64, 8); return 0; } |
