From b2472efe4316b2687c153919c1513a098bd82c17 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:37 +0800 Subject: maple_tree: introduce {mtree,mas}_lock_nested() In some cases, nested locks may be needed, so {mtree,mas}_lock_nested is introduced. For example, when duplicating maple tree, we need to hold the locks of two trees, in which case nested locks are needed. At the same time, add the definition of spin_lock_nested() in tools for testing. Link: https://lkml.kernel.org/r/20231027033845.90608-3-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/include/linux/spinlock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index 622266b197d0..a6cdf25b6b9d 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -11,6 +11,7 @@ #define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock(x) pthread_mutex_lock(x) +#define spin_lock_nested(x, subclass) pthread_mutex_lock(x) #define spin_unlock(x) pthread_mutex_unlock(x) #define spin_lock_bh(x) pthread_mutex_lock(x) #define spin_unlock_bh(x) pthread_mutex_unlock(x) -- cgit v1.2.3 From 46c99e26f2f86260fed226cab217d0b3ca8dca56 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:39 +0800 Subject: radix tree test suite: align kmem_cache_alloc_bulk() with kernel behavior. When kmem_cache_alloc_bulk() fails to allocate, leave the freed pointers in the array. This enables a more accurate simulation of the kernel's behavior and allows for testing potential double-free scenarios. Link: https://lkml.kernel.org/r/20231027033845.90608-5-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/radix-tree/linux.c | 45 +++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 61fe2601cb3a..4eb442206d01 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -93,13 +93,9 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, return p; } -void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +void __kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) { assert(objp); - uatomic_dec(&nr_allocated); - uatomic_dec(&cachep->nr_allocated); - if (kmalloc_verbose) - printf("Freeing %p to slab\n", objp); if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); @@ -111,6 +107,15 @@ void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) } } +void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +{ + uatomic_dec(&nr_allocated); + uatomic_dec(&cachep->nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to slab\n", objp); + __kmem_cache_free_locked(cachep, objp); +} + void kmem_cache_free(struct kmem_cache *cachep, void *objp) { pthread_mutex_lock(&cachep->lock); @@ -141,18 +146,17 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, if (kmalloc_verbose) pr_debug("Bulk alloc %lu\n", size); - if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (cachep->non_kernel < size) - return 0; - - cachep->non_kernel -= size; - } - pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { struct radix_tree_node *node; for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + node = cachep->objs; cachep->nr_objs--; cachep->objs = node->parent; @@ -163,11 +167,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } else { pthread_mutex_unlock(&cachep->lock); for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + if (cachep->align) { posix_memalign(&p[i], cachep->align, cachep->size); } else { p[i] = malloc(cachep->size); + if (!p[i]) + break; } if (cachep->ctor) cachep->ctor(p[i]); @@ -176,6 +188,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } } + if (i < size) { + size = i; + pthread_mutex_lock(&cachep->lock); + for (i = 0; i < size; i++) + __kmem_cache_free_locked(cachep, p[i]); + pthread_mutex_unlock(&cachep->lock); + return 0; + } + for (i = 0; i < size; i++) { uatomic_inc(&nr_allocated); uatomic_inc(&cachep->nr_allocated); -- cgit v1.2.3 From a2587a7e8d37885dc063255f5400a66299b42e48 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:40 +0800 Subject: maple_tree: add test for mtree_dup() Add test for mtree_dup(). Test by duplicating different maple trees and then comparing the two trees. Includes tests for duplicating full trees and memory allocation failures on different nodes. Link: https://lkml.kernel.org/r/20231027033845.90608-6-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 361 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index e5da1cad70ba..12b3390e9591 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35857,6 +35857,363 @@ static noinline void __init check_locky(struct maple_tree *mt) mt_clear_in_rcu(mt); } +/* + * Compares two nodes except for the addresses stored in the nodes. + * Returns zero if they are the same, otherwise returns non-zero. + */ +static int __init compare_node(struct maple_enode *enode_a, + struct maple_enode *enode_b) +{ + struct maple_node *node_a, *node_b; + struct maple_node a, b; + void **slots_a, **slots_b; /* Do not use the rcu tag. */ + enum maple_type type; + int i; + + if (((unsigned long)enode_a & MAPLE_NODE_MASK) != + ((unsigned long)enode_b & MAPLE_NODE_MASK)) { + pr_err("The lower 8 bits of enode are different.\n"); + return -1; + } + + type = mte_node_type(enode_a); + node_a = mte_to_node(enode_a); + node_b = mte_to_node(enode_b); + a = *node_a; + b = *node_b; + + /* Do not compare addresses. */ + if (ma_is_root(node_a) || ma_is_root(node_b)) { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MA_ROOT_PARENT); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MA_ROOT_PARENT); + } else { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MAPLE_NODE_MASK); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MAPLE_NODE_MASK); + } + + if (a.parent != b.parent) { + pr_err("The lower 8 bits of parents are different. %p %p\n", + a.parent, b.parent); + return -1; + } + + /* + * If it is a leaf node, the slots do not contain the node address, and + * no special processing of slots is required. + */ + if (ma_is_leaf(type)) + goto cmp; + + slots_a = ma_slots(&a, type); + slots_b = ma_slots(&b, type); + + for (i = 0; i < mt_slots[type]; i++) { + if (!slots_a[i] && !slots_b[i]) + break; + + if (!slots_a[i] || !slots_b[i]) { + pr_err("The number of slots is different.\n"); + return -1; + } + + /* Do not compare addresses in slots. */ + ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK; + ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK; + } + +cmp: + /* + * Compare all contents of two nodes, including parent (except address), + * slots (except address), pivots, gaps and metadata. + */ + return memcmp(&a, &b, sizeof(struct maple_node)); +} + +/* + * Compare two trees and return 0 if they are the same, non-zero otherwise. + */ +static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) +{ + MA_STATE(mas_a, mt_a, 0, 0); + MA_STATE(mas_b, mt_b, 0, 0); + + if (mt_a->ma_flags != mt_b->ma_flags) { + pr_err("The flags of the two trees are different.\n"); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + + if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { + if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { + pr_err("One is MAS_ROOT and the other is not.\n"); + return -1; + } + return 0; + } + + while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { + + if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { + pr_err("One is MAS_NONE and the other is not.\n"); + return -1; + } + + if (mas_a.min != mas_b.min || + mas_a.max != mas_b.max) { + pr_err("mas->min, mas->max do not match.\n"); + return -1; + } + + if (compare_node(mas_a.node, mas_b.node)) { + pr_err("The contents of nodes %p and %p are different.\n", + mas_a.node, mas_b.node); + mt_dump(mt_a, mt_dump_dec); + mt_dump(mt_b, mt_dump_dec); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + } + + return 0; +} + +static __init void mas_subtree_max_range(struct ma_state *mas) +{ + unsigned long limit = mas->max; + MA_STATE(newmas, mas->tree, 0, 0); + void *entry; + + mas_for_each(mas, entry, limit) { + if (mas->last - mas->index >= + newmas.last - newmas.index) { + newmas = *mas; + } + } + + *mas = newmas; +} + +/* + * build_full_tree() - Build a full tree. + * @mt: The tree to build. + * @flags: Use @flags to build the tree. + * @height: The height of the tree to build. + * + * Build a tree with full leaf nodes and internal nodes. Note that the height + * should not exceed 3, otherwise it will take a long time to build. + * Return: zero if the build is successful, non-zero if it fails. + */ +static __init int build_full_tree(struct maple_tree *mt, unsigned int flags, + int height) +{ + MA_STATE(mas, mt, 0, 0); + unsigned long step; + int ret = 0, cnt = 1; + enum maple_type type; + + mt_init_flags(mt, flags); + mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL); + + mtree_lock(mt); + + while (1) { + mas_set(&mas, 0); + if (mt_height(mt) < height) { + mas.max = ULONG_MAX; + goto store; + } + + while (1) { + mas_dfs_preorder(&mas); + if (mas_is_none(&mas)) + goto unlock; + + type = mte_node_type(mas.node); + if (mas_data_end(&mas) + 1 < mt_slots[type]) { + mas_set(&mas, mas.min); + goto store; + } + } +store: + mas_subtree_max_range(&mas); + step = mas.last - mas.index; + if (step < 1) { + ret = -1; + goto unlock; + } + + step /= 2; + mas.last = mas.index + step; + mas_store_gfp(&mas, xa_mk_value(5), + GFP_KERNEL); + ++cnt; + } +unlock: + mtree_unlock(mt); + + MT_BUG_ON(mt, mt_height(mt) != height); + /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */ + return ret; +} + +static noinline void __init check_mtree_dup(struct maple_tree *mt) +{ + DEFINE_MTREE(new); + int i, j, ret, count = 0; + unsigned int rand_seed = 17, rand; + + /* store a value at [0, 0] */ + mt_init_flags(mt, 0); + mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + + /* The two trees have different attributes. */ + mt_init_flags(mt, 0); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* The new tree is not empty */ + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Test for duplicating full trees. */ + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, 0, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, 0); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test for normal duplicating. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test memory allocation failed. */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i < 30; i += 3) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + /* Failed at the first node. */ + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(0); + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + MT_BUG_ON(&new, ret != -ENOMEM); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Random maple tree fails at a random node. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + /* + * The rand() library function is not used, so we can generate + * the same random numbers on any platform. + */ + rand_seed = rand_seed * 1103515245 + 12345; + rand = rand_seed / 65536 % 128; + mt_set_non_kernel(rand); + + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + if (ret != 0) { + MT_BUG_ON(&new, ret != -ENOMEM); + count++; + mtree_destroy(mt); + continue; + } + + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* pr_info("mtree_dup() fail %d times\n", count); */ + BUG_ON(!count); +} + extern void test_kmem_cache_bulk(void); void farmer_tests(void) @@ -35904,6 +36261,10 @@ void farmer_tests(void) check_null_expand(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, 0); + check_mtree_dup(&tree); + mtree_destroy(&tree); + /* RCU testing */ mt_init_flags(&tree, 0); check_erase_testset(&tree); -- cgit v1.2.3 From f670fa1caadb4ea532a89012c5451e4c6789bfcc Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:42 +0800 Subject: maple_tree: skip other tests when BENCH is enabled Skip other tests when BENCH is enabled so that performance can be measured in user space. Link: https://lkml.kernel.org/r/20231027033845.90608-8-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 12b3390e9591..cb5358674521 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -36299,7 +36299,9 @@ void farmer_tests(void) void maple_tree_tests(void) { +#if !defined(BENCH) farmer_tests(); +#endif maple_tree_seed(); maple_tree_harvest(); } -- cgit v1.2.3 From 446e1867e6df3cbdd19af6be8f8f4ed56176adb4 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:43 +0800 Subject: maple_tree: update check_forking() and bench_forking() Updated check_forking() and bench_forking() to use __mt_dup() to duplicate maple tree. Link: https://lkml.kernel.org/r/20231027033845.90608-9-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/include/linux/rwsem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h index 83971b3cbfce..f8bffd4a987c 100644 --- a/tools/include/linux/rwsem.h +++ b/tools/include/linux/rwsem.h @@ -37,4 +37,8 @@ static inline int up_write(struct rw_semaphore *sem) { return pthread_rwlock_unlock(&sem->lock); } + +#define down_read_nested(sem, subclass) down_read(sem) +#define down_write_nested(sem, subclass) down_write(sem) + #endif /* _TOOLS_RWSEM_H */ -- cgit v1.2.3 From e6a9a2cbc13bf43e4c03f57666e93d511249d5d7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 6 Nov 2023 14:09:58 -0800 Subject: fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PAGEMAP_SCAN ioctl returns information regarding page table entries. It is more efficient compared to reading pagemap files. CRIU can start to utilize this ioctl, but it needs info about soft-dirty bits to track memory changes. We are aware of a new method for tracking memory changes implemented in the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method is its usability by unprivileged users. However, it is not feasible to transparently replace the soft-dirty tracker with the new one. The main problem here is userfault descriptors that have to be preserved between pre-dump iterations. It means criu continues supporting the soft-dirty method to avoid breakage for current users. The new method will be implemented as a separate feature. [avagin@google.com: update tools/include/uapi/linux/fs.h] Link: https://lkml.kernel.org/r/20231107164139.576046-1-avagin@google.com Link: https://lkml.kernel.org/r/20231106220959.296568-1-avagin@google.com Signed-off-by: Andrei Vagin Reviewed-by: Muhammad Usama Anjum Cc: Michał Mirosław Signed-off-by: Andrew Morton --- tools/include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index da43810b7485..48ad69f7722e 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_SWAPPED (1 << 4) #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) /* * struct page_region - Page region with flags -- cgit v1.2.3 From 600bca580579d8d8454cc8fe3290e2f8b9c01884 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 6 Nov 2023 14:09:59 -0800 Subject: selftests/mm: check that PAGEMAP_SCAN returns correct categories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, tests read page flags from /proc/pid/pagemap files. With this change, tests will check that PAGEMAP_SCAN return correct information too. [colin.i.king@gmail.com: fix spelling mistake "succedded" -> "succeeded"] Link: https://lkml.kernel.org/r/20231121093104.1728332-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20231106220959.296568-2-avagin@google.com Signed-off-by: Andrei Vagin Signed-off-by: Colin Ian King Reviewed-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Cc: Michał Mirosław [avagin@google.com: allow running tests on old kernels] Link: https://lkml.kernel.org/r/20231117181127.2574897-1-avagin@google.com Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/vm_util.c | 80 ++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 3082b40492dd..05736c615734 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include "../kselftest.h" @@ -28,19 +29,92 @@ uint64_t pagemap_get_entry(int fd, char *start) return entry; } +static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + psize()); + arg.vec = (uintptr_t)r; + arg.vec_len = 1; + arg.flags = 0; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = 0; + arg.category_inverted = 0; + arg.category_mask = 0; + arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; + arg.return_mask = arg.category_anyof_mask; + + return ioctl(fd, PAGEMAP_SCAN, &arg); +} + +static uint64_t pagemap_scan_get_categories(int fd, char *start) +{ + struct page_region r; + long ret; + + ret = __pagemap_scan_get_categories(fd, start, &r); + if (ret < 0) + ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); + if (ret == 0) + return 0; + return r.categories; +} + +/* `start` is any valid address. */ +static bool pagemap_scan_supported(int fd, char *start) +{ + static int supported = -1; + int ret; + + if (supported != -1) + return supported; + + /* Provide an invalid address in order to trigger EFAULT. */ + ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); + if (ret == 0) + ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); + + supported = errno == EFAULT; + + return supported; +} + +static bool page_entry_is(int fd, char *start, char *desc, + uint64_t pagemap_flags, uint64_t pagescan_flags) +{ + bool m = pagemap_get_entry(fd, start) & pagemap_flags; + + if (pagemap_scan_supported(fd, start)) { + bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; + + if (m == s) + return m; + + ksft_exit_fail_msg( + "read and ioctl return unmatched results for %s: %d %d", desc, m, s); + } + return m; +} + bool pagemap_is_softdirty(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; + return page_entry_is(fd, start, "soft-dirty", + PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); } bool pagemap_is_swapped(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SWAP; + return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); } bool pagemap_is_populated(int fd, char *start) { - return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); + return page_entry_is(fd, start, "populated", + PM_PRESENT | PM_SWAP, + PAGE_IS_PRESENT | PAGE_IS_SWAPPED); } unsigned long pagemap_get_pfn(int fd, char *start) -- cgit v1.2.3 From 60433a9d038db006ca2f49e3c5f050dc46aaad3a Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Thu, 23 Nov 2023 10:19:43 +0300 Subject: samples: introduce new samples subdir for cgroup Patch series "samples: introduce cgroup events listeners", v3. To begin with, this patch series relocates the cgroup example code to the samples/cgroup directory, which is the appropriate location for such code snippets. Furthermore, a new memcg events listener is introduced. This listener is a simple yet effective tool for monitoring memory events and managing counter changes during runtime. Additionally, as per Andrew Morton's suggestion, a helpful reminder comment is included in the memcontrol implementation. This comment serves to ensure that the samples code is updated whenever new events are added. This patch (of 3): Move the cgroup_event_listener for cgroup v1 to the samples directory. This suggestion was proposed by Andrew Morton during the discussion [1]. Link: https://lore.kernel.org/all/20231106140934.3f5d4960141562fe8da53906@linux-foundation.org/ [1] Link: https://lkml.kernel.org/r/20231123071945.25811-1-ddrokosov@salutedevices.com Link: https://lkml.kernel.org/r/20231123071945.25811-2-ddrokosov@salutedevices.com Signed-off-by: Dmitry Rokosov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- tools/cgroup/Makefile | 11 ----- tools/cgroup/cgroup_event_listener.c | 83 ------------------------------------ 2 files changed, 94 deletions(-) delete mode 100644 tools/cgroup/Makefile delete mode 100644 tools/cgroup/cgroup_event_listener.c (limited to 'tools') diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile deleted file mode 100644 index ffca068e4a76..000000000000 --- a/tools/cgroup/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for cgroup tools - -CFLAGS = -Wall -Wextra - -all: cgroup_event_listener -%: %.c - $(CC) $(CFLAGS) -o $@ $^ - -clean: - $(RM) cgroup_event_listener diff --git a/tools/cgroup/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c deleted file mode 100644 index 3d70dc831a76..000000000000 --- a/tools/cgroup/cgroup_event_listener.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * cgroup_event_listener.c - Simple listener of cgroup events - * - * Copyright (C) Kirill A. Shutemov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define USAGE_STR "Usage: cgroup_event_listener " - -int main(int argc, char **argv) -{ - int efd = -1; - int cfd = -1; - int event_control = -1; - char event_control_path[PATH_MAX]; - char line[LINE_MAX]; - int ret; - - if (argc != 3) - errx(1, "%s", USAGE_STR); - - cfd = open(argv[1], O_RDONLY); - if (cfd == -1) - err(1, "Cannot open %s", argv[1]); - - ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", - dirname(argv[1])); - if (ret >= PATH_MAX) - errx(1, "Path to cgroup.event_control is too long"); - - event_control = open(event_control_path, O_WRONLY); - if (event_control == -1) - err(1, "Cannot open %s", event_control_path); - - efd = eventfd(0, 0); - if (efd == -1) - err(1, "eventfd() failed"); - - ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); - if (ret >= LINE_MAX) - errx(1, "Arguments string is too long"); - - ret = write(event_control, line, strlen(line) + 1); - if (ret == -1) - err(1, "Cannot write to cgroup.event_control"); - - while (1) { - uint64_t result; - - ret = read(efd, &result, sizeof(result)); - if (ret == -1) { - if (errno == EINTR) - continue; - err(1, "Cannot read from eventfd"); - } - assert(ret == sizeof(result)); - - ret = access(event_control_path, W_OK); - if ((ret == -1) && (errno == ENOENT)) { - puts("The cgroup seems to have removed."); - break; - } - - if (ret == -1) - err(1, "cgroup.event_control is not accessible any more"); - - printf("%s %s: crossed\n", argv[1], argv[2]); - } - - return 0; -} -- cgit v1.2.3 From bf857ddd21d0bffc1edafc317e8e2ce0d6d5950c Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:20 -0400 Subject: maple_tree: move debug check to __mas_set_range() __mas_set_range() was created to shortcut resetting the maple state and a debug check was added to the caller (the vma iterator) to ensure the internal maple state remains safe to use. Move the debug check from the vma iterator into the maple tree itself so other users do not incorrectly use the advanced maple state modification. Fallout from this change include a large amount of debug setup needed to be moved to earlier in the header, and the maple_tree.h radix-tree test code needed to move the inclusion of the header to after the atomic define. None of those changes have functional changes. Link: https://lkml.kernel.org/r/20231101171629.3612299-4-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- tools/testing/radix-tree/linux/maple_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h index 7d8d1f445b89..06c89bdcc515 100644 --- a/tools/testing/radix-tree/linux/maple_tree.h +++ b/tools/testing/radix-tree/linux/maple_tree.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0+ */ #define atomic_t int32_t -#include "../../../../include/linux/maple_tree.h" #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) #define atomic_set(x, y) do {} while (0) #define U8_MAX UCHAR_MAX +#include "../../../../include/linux/maple_tree.h" -- cgit v1.2.3 From 31c532a8af57513228c2b12d281104198ff412b8 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:21 -0400 Subject: maple_tree: add end of node tracking to the maple state Analysis of the mas_for_each() iteration showed that there is a significant time spent finding the end of a node. This time can be greatly reduced if the end of the node is cached in the maple state. Care must be taken to update & invalidate as necessary. Link: https://lkml.kernel.org/r/20231101171629.3612299-5-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index cb5358674521..7095fb0ec026 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -945,6 +945,7 @@ retry: goto retry; } + mas->end = mas_data_end(mas); return ret; not_found: -- cgit v1.2.3 From 067311d33e650adfe7ae23765959ddcc1ba18510 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:25 -0400 Subject: maple_tree: separate ma_state node from status The maple tree node is overloaded to keep status as well as the active node. This, unfortunately, results in a re-walk on underflow or overflow. Since the maple state has room, the status can be placed in its own enum in the structure. Once an underflow/overflow is detected, certain modes can restore the status to active and others may need to re-walk just that one node to see the entry. The status being an enum has the benefit of detecting unhandled status in switch statements. [Liam.Howlett@oracle.com: fix comments about MAS_*] Link: https://lkml.kernel.org/r/20231106154124.614247-1-Liam.Howlett@oracle.com [Liam.Howlett@oracle.com: update forking to separate maple state and node] Link: https://lkml.kernel.org/r/20231106154551.615042-1-Liam.Howlett@oracle.com [Liam.Howlett@oracle.com: fix mas_prev() state separation code] Link: https://lkml.kernel.org/r/20231207193319.4025462-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20231101171629.3612299-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 7095fb0ec026..857c439e6bbc 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -118,6 +118,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.alloc == NULL); MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); mas_push_node(&mas, mn); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* free */ mtree_unlock(mt); @@ -141,7 +142,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - mas.node = MAS_START; + mas.status = ma_start; mas_nomem(&mas, GFP_KERNEL); /* Allocate 3 nodes, will fail. */ mas_node_count(&mas, 3); @@ -158,6 +159,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) /* Ensure we counted 3. */ MT_BUG_ON(mt, mas_allocated(&mas) != 3); /* Free. */ + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Set allocation request to 1. */ @@ -272,6 +274,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); } + mas_reset(&mas); MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); } @@ -294,6 +297,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) smn = smn->slot[0]; /* next. */ } MT_BUG_ON(mt, mas_allocated(&mas) != total); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Free. */ MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -441,7 +445,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10); mas_destroy(&mas); @@ -452,7 +456,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); @@ -941,7 +945,7 @@ retry: ret = mas_descend_walk(mas, range_min, range_max); if (unlikely(mte_dead_node(mas->node))) { - mas->node = MAS_START; + mas->status = ma_start; goto retry; } @@ -961,10 +965,10 @@ static inline void *mas_range_load(struct ma_state *mas, unsigned long index = mas->index; if (mas_is_none(mas) || mas_is_paused(mas)) - mas->node = MAS_START; + mas->status = ma_start; retry: if (mas_tree_walk(mas, range_min, range_max)) - if (unlikely(mas->node == MAS_ROOT)) + if (unlikely(mas->status == ma_root)) return mas_root(mas); if (likely(mas->offset != MAPLE_NODE_SLOTS)) @@ -35337,7 +35341,7 @@ static void mas_dfs_preorder(struct ma_state *mas) unsigned char end, slot = 0; unsigned long *pivots; - if (mas->node == MAS_START) { + if (mas->status == ma_start) { mas_start(mas); return; } @@ -35374,7 +35378,7 @@ walk_up: return; done: - mas->node = MAS_NONE; + mas->status = ma_none; } @@ -35833,7 +35837,7 @@ static noinline void __init check_nomem(struct maple_tree *mt) mas_store(&ms, &ms); /* insert 1 -> &ms, fails. */ MT_BUG_ON(mt, ms.node != MA_ERROR(-ENOMEM)); mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */ - MT_BUG_ON(mt, ms.node != MAS_START); + MT_BUG_ON(mt, ms.status != ma_start); mtree_unlock(mt); MT_BUG_ON(mt, mtree_insert(mt, 2, mt, GFP_KERNEL) != 0); mtree_lock(mt); @@ -35952,7 +35956,7 @@ static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { - pr_err("One is MAS_ROOT and the other is not.\n"); + pr_err("One is ma_root and the other is not.\n"); return -1; } return 0; @@ -35961,7 +35965,7 @@ static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { - pr_err("One is MAS_NONE and the other is not.\n"); + pr_err("One is ma_none and the other is not.\n"); return -1; } -- cgit v1.2.3 From 9a40d45c1f2c49273c04938ec3d7849f685eb3c1 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:26 -0400 Subject: maple_tree: remove mas_searchable() Now that the status of the maple state is outside of the node, the mas_searchable() function can be dropped for easier open-coding of what is going on. Link: https://lkml.kernel.org/r/20231101171629.3612299-10-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 857c439e6bbc..56ae47291ee0 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -974,8 +974,10 @@ retry: if (likely(mas->offset != MAPLE_NODE_SLOTS)) entry = mas_get_slot(mas, mas->offset); - if (mas_dead_node(mas, index)) + if (mas_is_active(mas) && mte_dead_node(mas->node)) { + mas_set(mas, index); goto retry; + } return entry; } -- cgit v1.2.3 From a697dc2be925d4814f26d7588347ccdd2c5525ed Mon Sep 17 00:00:00 2001 From: Domenico Cerasuolo Date: Thu, 30 Nov 2023 11:40:22 -0800 Subject: selftests: cgroup: update per-memcg zswap writeback selftest The memcg-zswap self test is updated to adjust to the behavior change implemented by commit 87730b165089 ("zswap: make shrinking memcg-aware"), where zswap performs writeback for specific memcg. Link: https://lkml.kernel.org/r/20231130194023.4102148-6-nphamcs@gmail.com Signed-off-by: Domenico Cerasuolo Signed-off-by: Nhat Pham Tested-by: Bagas Sanjaya Acked-by: Chris Li (Google) Cc: Dan Streetman Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Seth Jennings Cc: Shakeel Butt Cc: Shuah Khan Cc: Vitaly Wool Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_zswap.c | 74 +++++++++++++++++++---------- 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index c99d2adaca3f..47fdaa146443 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -50,9 +50,9 @@ static int get_zswap_stored_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/stored_pages", value); } -static int get_zswap_written_back_pages(size_t *value) +static int get_cg_wb_count(const char *cg) { - return read_int("/sys/kernel/debug/zswap/written_back_pages", value); + return cg_read_key_long(cg, "memory.stat", "zswp_wb"); } static long get_zswpout(const char *cgroup) @@ -73,6 +73,24 @@ static int allocate_bytes(const char *cgroup, void *arg) return 0; } +static char *setup_test_group_1M(const char *root, const char *name) +{ + char *group_name = cg_name(root, name); + + if (!group_name) + return NULL; + if (cg_create(group_name)) + goto fail; + if (cg_write(group_name, "memory.max", "1M")) { + cg_destroy(group_name); + goto fail; + } + return group_name; +fail: + free(group_name); + return NULL; +} + /* * Sanity test to check that pages are written into zswap. */ @@ -117,43 +135,51 @@ out: /* * When trying to store a memcg page in zswap, if the memcg hits its memory - * limit in zswap, writeback should not be triggered. - * - * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may - * not zswap"). Needs to be revised when a per memcg writeback mechanism is - * implemented. + * limit in zswap, writeback should affect only the zswapped pages of that + * memcg. */ static int test_no_invasive_cgroup_shrink(const char *root) { - size_t written_back_before, written_back_after; int ret = KSFT_FAIL; - char *test_group; + size_t control_allocation_size = MB(10); + char *control_allocation, *wb_group = NULL, *control_group = NULL; /* Set up */ - test_group = cg_name(root, "no_shrink_test"); - if (!test_group) - goto out; - if (cg_create(test_group)) + wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); + if (!wb_group) + return KSFT_FAIL; + if (cg_write(wb_group, "memory.zswap.max", "10K")) goto out; - if (cg_write(test_group, "memory.max", "1M")) + control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); + if (!control_group) goto out; - if (cg_write(test_group, "memory.zswap.max", "10K")) + + /* Push some test_group2 memory into zswap */ + if (cg_enter_current(control_group)) goto out; - if (get_zswap_written_back_pages(&written_back_before)) + control_allocation = malloc(control_allocation_size); + for (int i = 0; i < control_allocation_size; i += 4095) + control_allocation[i] = 'a'; + if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) goto out; - /* Allocate 10x memory.max to push memory into zswap */ - if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ + if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) goto out; - /* Verify that no writeback happened because of the memcg allocation */ - if (get_zswap_written_back_pages(&written_back_after)) - goto out; - if (written_back_after == written_back_before) + /* Verify that only zswapped memory from gwb_group has been written back */ + if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) ret = KSFT_PASS; out: - cg_destroy(test_group); - free(test_group); + cg_enter_current(root); + if (control_group) { + cg_destroy(control_group); + free(control_group); + } + cg_destroy(wb_group); + free(wb_group); + if (control_allocation) + free(control_allocation); return ret; } -- cgit v1.2.3 From 4b86316ef18231109e4ebb3661ffc69d816fb56f Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Wed, 29 Nov 2023 15:11:40 -0700 Subject: selftests/mm: dont run ksm_functional_tests twice ksm functional test is already being run. Remove the duplicate call to ./ksm_functional_tests. Link: https://lkml.kernel.org/r/20231129221140.614713-1-npache@redhat.com Fixes: 93fb70aa5904 ("selftests/vm: add KSM unmerge tests") Signed-off-by: Nico Pache Acked-by: Joel Savitz Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 00757445278e..c0212258b852 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -334,8 +334,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests -run_test ./ksm_functional_tests - # protection_keys tests if [ -x ./protection_keys_32 ] then -- cgit v1.2.3 From 3649caed1c9b7aa57049620c498596c17fc7af9e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 30 Nov 2023 02:36:49 +0000 Subject: selftests/damon: test quota goals directory Add DAMON selftests for testing creation/existence of quota goals directories and files, and simple valid input writes. Link: https://lkml.kernel.org/r/20231130023652.50284-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: David Gow Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 56f0230a8b92..e9a976d296e2 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -150,6 +150,32 @@ test_weights() ensure_file "$weights_dir/age_permil" "exist" "600" } +test_goal() +{ + goal_dir=$1 + ensure_dir "$goal_dir" "exist" + ensure_file "$goal_dir/target_value" "exist" "600" + ensure_file "$goal_dir/current_value" "exist" "600" +} + +test_goals() +{ + goals_dir=$1 + ensure_dir "$goals_dir" "exist" + ensure_file "$goals_dir/nr_goals" "exist" "600" + + ensure_write_succ "$goals_dir/nr_goals" "1" "valid input" + test_goal "$goals_dir/0" + + ensure_write_succ "$goals_dir/nr_goals" "2" "valid input" + test_goal "$goals_dir/0" + test_goal "$goals_dir/1" + + ensure_write_succ "$goals_dir/nr_goals" "0" "valid input" + ensure_dir "$goals_dir/0" "not_exist" + ensure_dir "$goals_dir/1" "not_exist" +} + test_quotas() { quotas_dir=$1 @@ -158,6 +184,7 @@ test_quotas() ensure_file "$quotas_dir/bytes" "exist" 600 ensure_file "$quotas_dir/reset_interval_ms" "exist" 600 test_weights "$quotas_dir/weights" + test_goals "$quotas_dir/goals" } test_access_pattern() -- cgit v1.2.3 From b6aab3384cafba151c53d3b5f7e1f8d073aadf03 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:06 +0000 Subject: selftests/mm/kugepaged: restore thp settings at exit Previously, the saved thp settings would be restored upon a signal or at the natural end of the test suite. But there are some tests that directly call exit() upon failure. In this case, the thp settings were not being restored, which could then influence other tests. Fix this by installing an atexit() handler to do the actual restore. The signal handler can now just call exit() and the atexit handler is invoked. Link: https://lkml.kernel.org/r/20231207161211.2374093-6-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Alistair Popple Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 030667cb5533..fc47a1c4944c 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -374,18 +374,22 @@ static void pop_settings(void) write_settings(current_settings()); } -static void restore_settings(int sig) +static void restore_settings_atexit(void) { if (skip_settings_restore) - goto out; + return; printf("Restore THP and khugepaged settings..."); write_settings(&saved_settings); success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); + + skip_settings_restore = true; +} + +static void restore_settings(int sig) +{ + /* exit() will invoke the restore_settings_atexit handler. */ + exit(sig ? EXIT_FAILURE : exit_status); } static void save_settings(void) @@ -415,6 +419,7 @@ static void save_settings(void) success("OK"); + atexit(restore_settings_atexit); signal(SIGTERM, restore_settings); signal(SIGINT, restore_settings); signal(SIGHUP, restore_settings); -- cgit v1.2.3 From 00679a183ac6d2584723cfc2a2c07c8285f802dc Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:07 +0000 Subject: selftests/mm: factor out thp settings management The khugepaged test has a useful framework for save/restore/pop/push of all thp settings via the sysfs interface. This will be useful to explicitly control multi-size THP settings in other tests, so let's move it out of khugepaged and into its own thp_settings.[c|h] utility. Link: https://lkml.kernel.org/r/20231207161211.2374093-7-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Alistair Popple Acked-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 4 +- tools/testing/selftests/mm/khugepaged.c | 346 ++---------------------------- tools/testing/selftests/mm/thp_settings.c | 296 +++++++++++++++++++++++++ tools/testing/selftests/mm/thp_settings.h | 71 ++++++ 4 files changed, 391 insertions(+), 326 deletions(-) create mode 100644 tools/testing/selftests/mm/thp_settings.c create mode 100644 tools/testing/selftests/mm/thp_settings.h (limited to 'tools') diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index dede0bcf97a3..2453add65d12 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -117,8 +117,8 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk -$(TEST_GEN_PROGS): vm_util.c -$(TEST_GEN_FILES): vm_util.c +$(TEST_GEN_PROGS): vm_util.c thp_settings.c +$(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index fc47a1c4944c..b15e7fd70176 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -22,13 +22,13 @@ #include "linux/magic.h" #include "vm_util.h" +#include "thp_settings.h" #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -71,78 +71,7 @@ struct file_info { }; static struct file_info finfo; - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool use_zero_page; - struct khugepaged_settings khugepaged; - unsigned long read_ahead_kb; -}; - -static struct settings saved_settings; static bool skip_settings_restore; - static int exit_status; static void success(const char *msg) @@ -161,226 +90,13 @@ static void skip(const char *msg) printf(" \e[33m%s\e[0m\n", msg); } -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) { - printf("open(%s)\n", path); - exit(EXIT_FAILURE); - return 0; - } - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) { - printf("write(%s)\n", buf); - exit(EXIT_FAILURE); - return 0; - } - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) -{ - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long _read_num(const char *path) -{ - char buf[21]; - - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return _read_num(path); -} - -static void _write_num(const char *path, unsigned long num) -{ - char buf[21]; - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - _write_num(path, num); -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); - - if (file_ops && finfo.type == VMA_FILE) - _write_num(finfo.dev_queue_read_ahead_path, - settings->read_ahead_kb); -} - -#define MAX_SETTINGS_DEPTH 4 -static struct settings settings_stack[MAX_SETTINGS_DEPTH]; -static int settings_index; - -static struct settings *current_settings(void) -{ - if (!settings_index) { - printf("Fail: No settings set"); - exit(EXIT_FAILURE); - } - return settings_stack + settings_index - 1; -} - -static void push_settings(struct settings *settings) -{ - if (settings_index >= MAX_SETTINGS_DEPTH) { - printf("Fail: Settings stack exceeded"); - exit(EXIT_FAILURE); - } - settings_stack[settings_index++] = *settings; - write_settings(current_settings()); -} - -static void pop_settings(void) -{ - if (settings_index <= 0) { - printf("Fail: Settings stack empty"); - exit(EXIT_FAILURE); - } - --settings_index; - write_settings(current_settings()); -} - static void restore_settings_atexit(void) { if (skip_settings_restore) return; printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); + thp_restore_settings(); success("OK"); skip_settings_restore = true; @@ -395,27 +111,9 @@ static void restore_settings(int sig) static void save_settings(void) { printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; if (file_ops && finfo.type == VMA_FILE) - saved_settings.read_ahead_kb = - _read_num(finfo.dev_queue_read_ahead_path); + thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path); + thp_save_settings(); success("OK"); @@ -798,7 +496,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect) { int ret; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); printf("%s...", msg); @@ -808,7 +506,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, */ settings.thp_enabled = THP_NEVER; settings.shmem_enabled = SHMEM_NEVER; - push_settings(&settings); + thp_push_settings(&settings); /* Clear VM_NOHUGEPAGE */ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); @@ -820,7 +518,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, else success("OK"); - pop_settings(); + thp_pop_settings(); } static void madvise_collapse(const char *msg, char *p, int nr_hpages, @@ -850,13 +548,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; + full_scans = thp_read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { if (ops->check_huge(p, nr_hpages)) break; - if (read_num("khugepaged/full_scans") >= full_scans) + if (thp_read_num("khugepaged/full_scans") >= full_scans) break; printf("."); usleep(TICK); @@ -911,11 +609,11 @@ static bool is_tmpfs(struct mem_ops *ops) static void alloc_at_fault(void) { - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - push_settings(&settings); + thp_push_settings(&settings); p = alloc_mapping(1); *p = 1; @@ -925,7 +623,7 @@ static void alloc_at_fault(void) else fail("Fail"); - pop_settings(); + thp_pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); @@ -973,11 +671,11 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); void *p; settings.khugepaged.max_ptes_none = max_ptes_none; - push_settings(&settings); + thp_push_settings(&settings); p = ops->setup_area(1); @@ -1002,7 +700,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o } skip: ops->cleanup_area(p, hpage_pmd_size); - pop_settings(); + thp_pop_settings(); } static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) @@ -1033,7 +731,7 @@ out: static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); + int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"); void *p; p = ops->setup_area(1); @@ -1250,11 +948,11 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o fail("Fail"); ops->fault(p, 0, page_size); - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); + thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", p, 1, ops, true); - write_num("khugepaged/max_ptes_shared", - current_settings()->khugepaged.max_ptes_shared); + thp_write_num("khugepaged/max_ptes_shared", + thp_current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); ops->cleanup_area(p, hpage_pmd_size); @@ -1275,7 +973,7 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); + int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; @@ -1443,7 +1141,7 @@ static void parse_test_type(int argc, const char **argv) int main(int argc, const char **argv) { - struct settings default_settings = { + struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, .shmem_enabled = SHMEM_ADVISE, @@ -1484,7 +1182,7 @@ int main(int argc, const char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; save_settings(); - push_settings(&default_settings); + thp_push_settings(&default_settings); alloc_at_fault(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c new file mode 100644 index 000000000000..5e8ec792cac7 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include "thp_settings.h" + +#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" +#define MAX_SETTINGS_DEPTH 4 +static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; +static struct thp_settings saved_settings; +static char dev_queue_read_ahead_path[PATH_MAX]; + +static const char * const thp_enabled_strings[] = { + "always", + "madvise", + "never", + NULL +}; + +static const char * const thp_defrag_strings[] = { + "always", + "defer", + "defer+madvise", + "madvise", + "never", + NULL +}; + +static const char * const shmem_enabled_strings[] = { + "always", + "within_size", + "advise", + "never", + "deny", + "force", + NULL +}; + +int read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); + return 0; + } + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); + return 0; + } + + return (unsigned int) numwritten; +} + +const unsigned long read_num(const char *path) +{ + char buf[21]; + + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file()"); + exit(EXIT_FAILURE); + } + + return strtoul(buf, NULL, 10); +} + +void write_num(const char *path, unsigned long num) +{ + char buf[21]; + + sprintf(buf, "%ld", num); + if (!write_file(path, buf, strlen(buf) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +int thp_read_string(const char *name, const char * const strings[]) +{ + char path[PATH_MAX]; + char buf[256]; + char *c; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!read_file(path, buf, sizeof(buf))) { + perror(path); + exit(EXIT_FAILURE); + } + + c = strchr(buf, '['); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + + c++; + memmove(buf, c, sizeof(buf) - (c - buf)); + + c = strchr(buf, ']'); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + *c = '\0'; + + ret = 0; + while (strings[ret]) { + if (!strcmp(strings[ret], buf)) + return ret; + ret++; + } + + printf("Failed to parse %s\n", name); + exit(EXIT_FAILURE); +} + +void thp_write_string(const char *name, const char *val) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(path, val, strlen(val) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +const unsigned long thp_read_num(const char *name) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return read_num(path); +} + +void thp_write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + write_num(path, num); +} + +void thp_read_settings(struct thp_settings *settings) +{ + *settings = (struct thp_settings) { + .thp_enabled = thp_read_string("enabled", thp_enabled_strings), + .thp_defrag = thp_read_string("defrag", thp_defrag_strings), + .shmem_enabled = + thp_read_string("shmem_enabled", shmem_enabled_strings), + .use_zero_page = thp_read_num("use_zero_page"), + }; + settings->khugepaged = (struct khugepaged_settings) { + .defrag = thp_read_num("khugepaged/defrag"), + .alloc_sleep_millisecs = + thp_read_num("khugepaged/alloc_sleep_millisecs"), + .scan_sleep_millisecs = + thp_read_num("khugepaged/scan_sleep_millisecs"), + .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"), + .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"), + .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"), + .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"), + }; + if (dev_queue_read_ahead_path[0]) + settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); +} + +void thp_write_settings(struct thp_settings *settings) +{ + struct khugepaged_settings *khugepaged = &settings->khugepaged; + + thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); + thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); + thp_write_string("shmem_enabled", + shmem_enabled_strings[settings->shmem_enabled]); + thp_write_num("use_zero_page", settings->use_zero_page); + + thp_write_num("khugepaged/defrag", khugepaged->defrag); + thp_write_num("khugepaged/alloc_sleep_millisecs", + khugepaged->alloc_sleep_millisecs); + thp_write_num("khugepaged/scan_sleep_millisecs", + khugepaged->scan_sleep_millisecs); + thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); + thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); + thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); + thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (dev_queue_read_ahead_path[0]) + write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); +} + +struct thp_settings *thp_current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +void thp_push_settings(struct thp_settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + thp_write_settings(thp_current_settings()); +} + +void thp_pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + thp_write_settings(thp_current_settings()); +} + +void thp_restore_settings(void) +{ + thp_write_settings(&saved_settings); +} + +void thp_save_settings(void) +{ + thp_read_settings(&saved_settings); +} + +void thp_set_read_ahead_path(char *path) +{ + if (!path) { + dev_queue_read_ahead_path[0] = '\0'; + return; + } + + strncpy(dev_queue_read_ahead_path, path, + sizeof(dev_queue_read_ahead_path)); + dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h new file mode 100644 index 000000000000..ff3d98c30617 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __THP_SETTINGS_H__ +#define __THP_SETTINGS_H__ + +#include +#include +#include + +enum thp_enabled { + THP_ALWAYS, + THP_MADVISE, + THP_NEVER, +}; + +enum thp_defrag { + THP_DEFRAG_ALWAYS, + THP_DEFRAG_DEFER, + THP_DEFRAG_DEFER_MADVISE, + THP_DEFRAG_MADVISE, + THP_DEFRAG_NEVER, +}; + +enum shmem_enabled { + SHMEM_ALWAYS, + SHMEM_WITHIN_SIZE, + SHMEM_ADVISE, + SHMEM_NEVER, + SHMEM_DENY, + SHMEM_FORCE, +}; + +struct khugepaged_settings { + bool defrag; + unsigned int alloc_sleep_millisecs; + unsigned int scan_sleep_millisecs; + unsigned int max_ptes_none; + unsigned int max_ptes_swap; + unsigned int max_ptes_shared; + unsigned long pages_to_scan; +}; + +struct thp_settings { + enum thp_enabled thp_enabled; + enum thp_defrag thp_defrag; + enum shmem_enabled shmem_enabled; + bool use_zero_page; + struct khugepaged_settings khugepaged; + unsigned long read_ahead_kb; +}; + +int read_file(const char *path, char *buf, size_t buflen); +int write_file(const char *path, const char *buf, size_t buflen); +const unsigned long read_num(const char *path); +void write_num(const char *path, unsigned long num); + +int thp_read_string(const char *name, const char * const strings[]); +void thp_write_string(const char *name, const char *val); +const unsigned long thp_read_num(const char *name); +void thp_write_num(const char *name, unsigned long num); + +void thp_write_settings(struct thp_settings *settings); +void thp_read_settings(struct thp_settings *settings); +struct thp_settings *thp_current_settings(void); +void thp_push_settings(struct thp_settings *settings); +void thp_pop_settings(void); +void thp_restore_settings(void); +void thp_save_settings(void); + +void thp_set_read_ahead_path(char *path); + +#endif /* __THP_SETTINGS_H__ */ -- cgit v1.2.3 From 4f5070a5e40db2e9dbf5fff4ec678d6fbb338d5c Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:08 +0000 Subject: selftests/mm: support multi-size THP interface in thp_settings Save and restore the new per-size hugepage enabled setting, if available on the running kernel. Since the number of per-size directories is not fixed, solve this as simply as possible by catering for a maximum number in the thp_settings struct (20). Each array index is the order. The value of THP_NEVER is changed to 0 so that all of these new settings default to THP_NEVER and the user only needs to fill in the ones they want to enable. Link: https://lkml.kernel.org/r/20231207161211.2374093-8-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Hildenbrand Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 3 ++ tools/testing/selftests/mm/thp_settings.c | 55 ++++++++++++++++++++++++++++++- tools/testing/selftests/mm/thp_settings.h | 11 ++++++- 3 files changed, 67 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index b15e7fd70176..7bd3baa9d34b 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1141,6 +1141,7 @@ static void parse_test_type(int argc, const char **argv) int main(int argc, const char **argv) { + int hpage_pmd_order; struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, @@ -1175,11 +1176,13 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } hpage_pmd_nr = hpage_pmd_size / page_size; + hpage_pmd_order = __builtin_ctz(hpage_pmd_nr); default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; + default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index 5e8ec792cac7..a4163438108e 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -16,9 +16,10 @@ static struct thp_settings saved_settings; static char dev_queue_read_ahead_path[PATH_MAX]; static const char * const thp_enabled_strings[] = { + "never", "always", + "inherit", "madvise", - "never", NULL }; @@ -198,6 +199,10 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int i; + *settings = (struct thp_settings) { .thp_enabled = thp_read_string("enabled", thp_enabled_strings), .thp_defrag = thp_read_string("defrag", thp_defrag_strings), @@ -218,11 +223,26 @@ void thp_read_settings(struct thp_settings *settings) }; if (dev_queue_read_ahead_path[0]) settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) { + settings->hugepages[i].enabled = THP_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + settings->hugepages[i].enabled = + thp_read_string(path, thp_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int enabled; + int i; thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); @@ -242,6 +262,15 @@ void thp_write_settings(struct thp_settings *settings) if (dev_queue_read_ahead_path[0]) write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + enabled = settings->hugepages[i].enabled; + thp_write_string(path, thp_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -294,3 +323,27 @@ void thp_set_read_ahead_path(char *path) sizeof(dev_queue_read_ahead_path)); dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } + +unsigned long thp_supported_orders(void) +{ + unsigned long orders = 0; + char path[PATH_MAX]; + char buf[256]; + int ret; + int i; + + for (i = 0; i < NR_ORDERS; i++) { + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + ret = read_file(path, buf, sizeof(buf)); + if (ret) + orders |= 1UL << i; + } + + return orders; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index ff3d98c30617..71cbff05f4c7 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -7,9 +7,10 @@ #include enum thp_enabled { + THP_NEVER, THP_ALWAYS, + THP_INHERIT, THP_MADVISE, - THP_NEVER, }; enum thp_defrag { @@ -29,6 +30,12 @@ enum shmem_enabled { SHMEM_FORCE, }; +#define NR_ORDERS 20 + +struct hugepages_settings { + enum thp_enabled enabled; +}; + struct khugepaged_settings { bool defrag; unsigned int alloc_sleep_millisecs; @@ -46,6 +53,7 @@ struct thp_settings { bool use_zero_page; struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; + struct hugepages_settings hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); @@ -67,5 +75,6 @@ void thp_restore_settings(void); void thp_save_settings(void); void thp_set_read_ahead_path(char *path); +unsigned long thp_supported_orders(void); #endif /* __THP_SETTINGS_H__ */ -- cgit v1.2.3 From 9f0704eae8a4edc8dca9c8a297f798d505a4103a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:09 +0000 Subject: selftests/mm/khugepaged: enlighten for multi-size THP The `collapse_max_ptes_none` test was previously failing when a THP size less than PMD-size had enabled="always". The root cause is because the test faults in 1 page less than the threshold it set for collapsing. But when THP is enabled always, we "over allocate" and therefore the threshold is passed, and collapse unexpectedly succeeds. Solve this by enlightening khugepaged selftest. Add a command line option to pass in the desired THP size that should be used for all anonymous allocations. The harness will then explicitly configure a THP size as requested and modify the `collapse_max_ptes_none` test so that it faults in the threshold minus the number of pages in the configured THP size. If no command line option is provided, default to order 0, as per previous behaviour. I chose to use an order in the command line interface, since this makes the interface agnostic of base page size, making it easier to invoke from run_vmtests.sh. Link: https://lkml.kernel.org/r/20231207161211.2374093-9-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Hildenbrand Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 48 ++++++++++++++++++++++++------- tools/testing/selftests/mm/run_vmtests.sh | 2 ++ 2 files changed, 39 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 7bd3baa9d34b..829320a519e7 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -28,6 +28,7 @@ static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; +static int anon_order; #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -607,6 +608,11 @@ static bool is_tmpfs(struct mem_ops *ops) return ops == &__file_ops && finfo.type == VMA_SHMEM; } +static bool is_anon(struct mem_ops *ops) +{ + return ops == &__anon_ops; +} + static void alloc_at_fault(void) { struct thp_settings settings = *thp_current_settings(); @@ -673,6 +679,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o int max_ptes_none = hpage_pmd_nr / 2; struct thp_settings settings = *thp_current_settings(); void *p; + int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1; settings.khugepaged.max_ptes_none = max_ptes_none; thp_push_settings(&settings); @@ -686,10 +693,10 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o goto skip; } - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, ops, !c->enforce_pte_scan_limits); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); if (c->enforce_pte_scan_limits) { ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); @@ -1076,7 +1083,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c, static void usage(void) { - fprintf(stderr, "\nUsage: ./khugepaged [dir]\n\n"); + fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] [dir]\n\n"); fprintf(stderr, "\t\t: :\n"); fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); fprintf(stderr, "\t\t: [all|anon|file|shmem]\n"); @@ -1085,15 +1092,34 @@ static void usage(void) fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\n\tSupported Options:\n"); + fprintf(stderr, "\t\t-h: This help message.\n"); + fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); exit(1); } -static void parse_test_type(int argc, const char **argv) +static void parse_test_type(int argc, char **argv) { + int opt; char *buf; const char *token; - if (argc == 1) { + while ((opt = getopt(argc, argv, "s:h")) != -1) { + switch (opt) { + case 's': + anon_order = atoi(optarg); + break; + case 'h': + default: + usage(); + } + } + + argv += optind; + argc -= optind; + + if (argc == 0) { /* Backwards compatibility */ khugepaged_context = &__khugepaged_context; madvise_context = &__madvise_context; @@ -1101,7 +1127,7 @@ static void parse_test_type(int argc, const char **argv) return; } - buf = strdup(argv[1]); + buf = strdup(argv[0]); token = strsep(&buf, ":"); if (!strcmp(token, "all")) { @@ -1135,11 +1161,13 @@ static void parse_test_type(int argc, const char **argv) if (!file_ops) return; - if (argc != 3) + if (argc != 2) usage(); + + get_finfo(argv[1]); } -int main(int argc, const char **argv) +int main(int argc, char **argv) { int hpage_pmd_order; struct thp_settings default_settings = { @@ -1164,9 +1192,6 @@ int main(int argc, const char **argv) parse_test_type(argc, argv); - if (file_ops) - get_finfo(argv[2]); - setbuf(stdout, NULL); page_size = getpagesize(); @@ -1183,6 +1208,7 @@ int main(int argc, const char **argv) default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; + default_settings.hugepages[anon_order].enabled = THP_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c0212258b852..87f513f5cf91 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -357,6 +357,8 @@ CATEGORY="cow" run_test ./cow CATEGORY="thp" run_test ./khugepaged +CATEGORY="thp" run_test ./khugepaged -s 2 + CATEGORY="thp" run_test ./transhuge-stress -d 20 CATEGORY="thp" run_test ./split_huge_page_test -- cgit v1.2.3 From 12dc16b38463a671bc91dc2df10f3a014a27ff3b Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:10 +0000 Subject: selftests/mm/cow: generalize do_run_with_thp() helper do_run_with_thp() prepares (PMD-sized) THP memory into different states before running tests. With the introduction of multi-size THP, we would like to reuse this logic to also test those smaller THP sizes. So let's add a thpsize parameter which tells the function what size THP it should operate on. A separate commit will utilize this change to add new tests for multi-size THP, where available. Link: https://lkml.kernel.org/r/20231207161211.2374093-10-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 121 ++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 6f2f83990441..a284918b1172 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -32,7 +32,7 @@ static size_t pagesize; static int pagemap_fd; -static size_t thpsize; +static size_t pmdsize; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; @@ -734,7 +734,7 @@ enum thp_run { THP_RUN_PARTIAL_SHARED, }; -static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) { char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; size_t size, mmap_size, mremap_size; @@ -759,11 +759,11 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) } /* - * Try to populate a THP. Touch the first sub-page and test if we get - * another sub-page populated automatically. + * Try to populate a THP. Touch the first sub-page and test if + * we get the last sub-page populated automatically. */ mem[0] = 0; - if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } @@ -773,12 +773,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) switch (thp_run) { case THP_RUN_PMD: case THP_RUN_PMD_SWAPOUT: + assert(thpsize == pmdsize); break; case THP_RUN_PTE: case THP_RUN_PTE_SWAPOUT: /* * Trigger PTE-mapping the THP by temporarily mapping a single - * subpage R/O. + * subpage R/O. This is a noop if the THP is not pmdsize (and + * therefore already PTE-mapped). */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { @@ -875,52 +877,60 @@ munmap: munmap(mremap_mem, mremap_size); } -static void run_with_thp(test_fn fn, const char *desc) +static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD); + ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD, size); } -static void run_with_thp_swap(test_fn fn, const char *desc) +static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } -static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE); + ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE, size); } -static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } -static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE); + ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } -static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } -static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } -static void run_with_partial_shared_thp(test_fn fn, const char *desc) +static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); + ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) @@ -1091,15 +1101,15 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (thpsize) { - run_with_thp(test_case->fn, test_case->desc); - run_with_thp_swap(test_case->fn, test_case->desc); - run_with_pte_mapped_thp(test_case->fn, test_case->desc); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); - run_with_single_pte_of_thp(test_case->fn, test_case->desc); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); - run_with_partial_mremap_thp(test_case->fn, test_case->desc); - run_with_partial_shared_thp(test_case->fn, test_case->desc); + if (pmdsize) { + run_with_thp(test_case->fn, test_case->desc, pmdsize); + run_with_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_pte_mapped_thp(test_case->fn, test_case->desc, pmdsize); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, pmdsize); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, pmdsize); + run_with_partial_shared_thp(test_case->fn, test_case->desc, pmdsize); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1120,7 +1130,7 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 8; return tests; } @@ -1329,7 +1339,7 @@ static void run_anon_thp_test_cases(void) { int i; - if (!thpsize) + if (!pmdsize) return; ksft_print_msg("[INFO] Anonymous THP tests\n"); @@ -1338,13 +1348,13 @@ static void run_anon_thp_test_cases(void) struct test_case const *test_case = &anon_thp_test_cases[i]; ksft_print_msg("[RUN] %s\n", test_case->desc); - do_run_with_thp(test_case->fn, THP_RUN_PMD); + do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } static int tests_per_anon_thp_test_case(void) { - return thpsize ? 1 : 0; + return pmdsize ? 1 : 0; } typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); @@ -1419,7 +1429,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* For alignment purposes, we need twice the thp size. */ - mmap_size = 2 * thpsize; + mmap_size = 2 * pmdsize; mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { @@ -1434,11 +1444,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* We need a THP-aligned memory area. */ - mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); - smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); - ret = madvise(mem, thpsize, MADV_HUGEPAGE); - ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); if (ret) { ksft_test_result_fail("MADV_HUGEPAGE failed\n"); goto munmap; @@ -1457,7 +1467,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) goto munmap; } - fn(mem, smem, thpsize); + fn(mem, smem, pmdsize); munmap: munmap(mmap_mem, mmap_size); if (mmap_smem != MAP_FAILED) @@ -1650,7 +1660,7 @@ static void run_non_anon_test_case(struct non_anon_test_case const *test_case) run_with_zeropage(test_case->fn, test_case->desc); run_with_memfd(test_case->fn, test_case->desc); run_with_tmpfile(test_case->fn, test_case->desc); - if (thpsize) + if (pmdsize) run_with_huge_zeropage(test_case->fn, test_case->desc); for (i = 0; i < nr_hugetlbsizes; i++) run_with_memfd_hugetlb(test_case->fn, test_case->desc, @@ -1671,7 +1681,7 @@ static int tests_per_non_anon_test_case(void) { int tests = 3 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 1; return tests; } @@ -1683,10 +1693,13 @@ int main(int argc, char **argv) ksft_print_header(); pagesize = getpagesize(); - thpsize = read_pmd_pagesize(); - if (thpsize) + pmdsize = read_pmd_pagesize(); + if (pmdsize) { + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", + pmdsize / 1024); ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - thpsize / 1024); + pmdsize / 1024); + } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); -- cgit v1.2.3 From c0f79103322c322ea9342d52c2d81528b7b56232 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:11 +0000 Subject: selftests/mm/cow: add tests for anonymous multi-size THP Add tests similar to the existing PMD-sized THP tests, but which operate on memory backed by (PTE-mapped) multi-size THP. This reuses all the existing infrastructure. If the test suite detects that multi-size THP is not supported by the kernel, the new tests are skipped. Link: https://lkml.kernel.org/r/20231207161211.2374093-11-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 82 ++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index a284918b1172..363bf5f801be 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -29,15 +29,49 @@ #include "../../../../mm/gup_test.h" #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" static size_t pagesize; static int pagemap_fd; static size_t pmdsize; +static int nr_thpsizes; +static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!pmdsize) + return 0; + + orders = 1UL << sz2ord(pmdsize); + orders |= thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + static void detect_huge_zeropage(void) { int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", @@ -1101,15 +1135,27 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (pmdsize) { - run_with_thp(test_case->fn, test_case->desc, pmdsize); - run_with_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_pte_mapped_thp(test_case->fn, test_case->desc, pmdsize); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_single_pte_of_thp(test_case->fn, test_case->desc, pmdsize); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_partial_mremap_thp(test_case->fn, test_case->desc, pmdsize); - run_with_partial_shared_thp(test_case->fn, test_case->desc, pmdsize); + for (i = 0; i < nr_thpsizes; i++) { + size_t size = thpsizes[i]; + struct thp_settings settings = *thp_current_settings(); + + settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + thp_push_settings(&settings); + + if (size == pmdsize) { + run_with_thp(test_case->fn, test_case->desc, size); + run_with_thp_swap(test_case->fn, test_case->desc, size); + } + + run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); + run_with_partial_shared_thp(test_case->fn, test_case->desc, size); + + thp_pop_settings(); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1130,8 +1176,9 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; + tests += 6 * nr_thpsizes; if (pmdsize) - tests += 8; + tests += 2; return tests; } @@ -1689,16 +1736,22 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { int err; + struct thp_settings default_settings; ksft_print_header(); pagesize = getpagesize(); pmdsize = read_pmd_pagesize(); if (pmdsize) { + /* Only if THP is supported. */ + thp_read_settings(&default_settings); + default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + thp_save_settings(); + thp_push_settings(&default_settings); + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", pmdsize / 1024); - ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - pmdsize / 1024); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); @@ -1717,6 +1770,11 @@ int main(int argc, char **argv) run_anon_thp_test_cases(); run_non_anon_test_cases(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_restore_settings(); + } + err = ksft_get_fail_cnt(); if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", -- cgit v1.2.3 From 03d69d49da496e31246f41a017b32b68b9d2362e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 8 Dec 2023 10:04:50 +0800 Subject: maple_tree: fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. ./tools/testing/radix-tree/maple.c:34142:15-16: WARNING comparing pointer to 0. Link: https://lkml.kernel.org/r/20231208020450.7003-1-jiapeng.chong@linux.alibaba.com Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7696 Signed-off-by: Jiapeng Chong Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 35cc8c2a10f4..f1caf4bcf937 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -34139,7 +34139,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set27, ARRAY_SIZE(set27)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 140415537422336)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 140415537422336)); mt_set_non_kernel(0); mt_validate(mt); mtree_destroy(mt); @@ -34263,7 +34263,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set37, ARRAY_SIZE(set37)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); @@ -34271,7 +34271,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set38, ARRAY_SIZE(set38)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); -- cgit v1.2.3 From 306abb63a8cab566bf80860c5430b1fa316646b7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:06 +0000 Subject: selftests/damon: implement a python module for test-purpose DAMON sysfs controls Patch series "selftests/damon: add Python-written DAMON functionality tests", v2. DAMON exports most of its functionality via its sysfs interface. Hence most DAMON functionality tests could be implemented using the interface. However, because the interfaces require simple but multiple operations for many controls, writing all such tests from the scratch could be repetitive and time consuming. Implement a minimum DAMON sysfs control module, and a couple of DAMON functionality tests using the control module. The first test is for ensuring minimum accuracy of data access monitoring, and the second test is for finding if a previously found and fixed bug is introduced again. Note that the DAMON sysfs control module is only for avoiding duplicating code in tests. For convenient and general control of DAMON, users should use DAMON user-space tools that developed for the purpose, such as damo[1]. [1] https://github.com/damonitor/damo Patches Sequence ---------------- This patchset is constructed with five patches. The first three patches implement a Python-written test implementation-purpose DAMON sysfs control module. The implementation is incrementally done in the sequence of the basic data structure (first patch) first, kdamonds start command (second patch) next, and finally DAMOS tried bytes update command (third patch). Then two patches for implementing selftests using the module follows. The fourth patch implements a basic functionality test of DAMON for working set estimation accuracy. Finally, the fifth patch implements a corner case test for a previously found bug. This patch (of 5): Implement a python module for DAMON sysfs controls. The module is aimed to be useful for writing DAMON functionality tests in future. Nonetheless, this module is only representing a subset of DAMON sysfs files. Following commits will implement more DAMON sysfs controls. Link: https://lkml.kernel.org/r/20231212194810.54457-1-sj@kernel.org Link: https://lkml.kernel.org/r/20231212194810.54457-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tools/testing/selftests/damon/_damon_sysfs.py (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py new file mode 100644 index 000000000000..78101846ab66 --- /dev/null +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0 + +class DamosAccessPattern: + size = None + nr_accesses = None + age = None + scheme = None + + def __init__(self, size=None, nr_accesses=None, age=None): + self.size = size + self.nr_accesses = nr_accesses + self.age = age + + if self.size == None: + self.size = [0, 2**64 - 1] + if self.nr_accesses == None: + self.nr_accesses = [0, 2**64 - 1] + if self.age == None: + self.age = [0, 2**64 - 1] + +class Damos: + action = None + access_pattern = None + # todo: Support quotas, watermarks, stats, tried_regions + idx = None + context = None + + def __init__(self, action='stat', access_pattern=DamosAccessPattern()): + self.action = action + self.access_pattern = access_pattern + self.access_pattern.scheme = self + +class DamonTarget: + pid = None + # todo: Support target regions if test is made + idx = None + context = None + + def __init__(self, pid): + self.pid = pid + +class DamonAttrs: + sample_us = None + aggr_us = None + update_us = None + min_nr_regions = None + max_nr_regions = None + context = None + + def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + min_nr_regions=10, max_nr_regions=1000): + self.sample_us = sample_us + self.aggr_us = aggr_us + self.update_us = update_us + self.min_nr_regions = min_nr_regions + self.max_nr_regions = max_nr_regions + +class DamonCtx: + ops = None + monitoring_attrs = None + targets = None + schemes = None + kdamond = None + idx = None + + def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[], + schemes=[]): + self.ops = ops + self.monitoring_attrs = monitoring_attrs + self.monitoring_attrs.context = self + + self.targets = targets + for idx, target in enumerate(self.targets): + target.idx = idx + target.context = self + + self.schemes = schemes + for idx, scheme in enumerate(self.schemes): + scheme.idx = idx + scheme.context = self + +class Kdamond: + state = None + pid = None + contexts = None + idx = None # index of this kdamond between siblings + kdamonds = None # parent + + def __init__(self, contexts=[]): + self.contexts = contexts + for idx, context in enumerate(self.contexts): + context.idx = idx + context.kdamond = self + +class Kdamonds: + kdamonds = [] + + def __init__(self, kdamonds=[]): + self.kdamonds = kdamonds + for idx, kdamond in enumerate(self.kdamonds): + kdamond.idx = idx + kdamond.kdamonds = self -- cgit v1.2.3 From f5f0e5a2bef9e46f7a674b71d7f2a4c4b7e6bc5d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:07 +0000 Subject: selftests/damon/_damon_sysfs: implement kdamonds start function Extend the tests-writing-purpose DAMON sysfs control module to support the kdamonds start functionality. Link: https://lkml.kernel.org/r/20231212194810.54457-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 206 ++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 78101846ab66..6b99f87a5f1e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -1,5 +1,28 @@ # SPDX-License-Identifier: GPL-2.0 +import os + +sysfs_root = '/sys/kernel/mm/damon/admin' + +def write_file(path, string): + "Returns error string if failed, or None otherwise" + string = '%s' % string + try: + with open(path, 'w') as f: + f.write(string) + except Exception as e: + return '%s' % e + return None + +def read_file(path): + '''Returns the read content and error string. The read content is None if + the reading failed''' + try: + with open(path, 'r') as f: + return f.read(), None + except Exception as e: + return None, '%s' % e + class DamosAccessPattern: size = None nr_accesses = None @@ -18,6 +41,35 @@ class DamosAccessPattern: if self.age == None: self.age = [0, 2**64 - 1] + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'), + self.nr_accesses[0]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'), + self.nr_accesses[1]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1]) + if err != None: + return err + class Damos: action = None access_pattern = None @@ -30,6 +82,39 @@ class Damos: self.access_pattern = access_pattern self.access_pattern.scheme = self + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'schemes', '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action) + if err != None: + return err + err = self.access_pattern.stage() + if err != None: + return err + + # disable quotas + err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0') + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0') + if err != None: + return err + + # disable watermarks + err = write_file( + os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + if err != None: + return err + + # disable filters + err = write_file( + os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + if err != None: + return err + class DamonTarget: pid = None # todo: Support target regions if test is made @@ -39,6 +124,18 @@ class DamonTarget: def __init__(self, pid): self.pid = pid + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'targets', '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0') + if err != None: + return err + return write_file( + os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) + class DamonAttrs: sample_us = None aggr_us = None @@ -55,6 +152,40 @@ class DamonAttrs: self.min_nr_regions = min_nr_regions self.max_nr_regions = max_nr_regions + def interval_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'intervals') + + def nr_regions_range_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'nr_regions') + + def stage(self): + err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'), + self.sample_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), + self.aggr_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), + self.update_us) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'min'), + self.min_nr_regions) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'max'), + self.max_nr_regions) + if err != None: + return err + class DamonCtx: ops = None monitoring_attrs = None @@ -79,6 +210,46 @@ class DamonCtx: scheme.idx = idx scheme.context = self + def sysfs_dir(self): + return os.path.join(self.kdamond.sysfs_dir(), 'contexts', + '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'operations'), self.ops) + if err != None: + return err + err = self.monitoring_attrs.stage() + if err != None: + return err + + nr_targets_file = os.path.join( + self.sysfs_dir(), 'targets', 'nr_targets') + content, err = read_file(nr_targets_file) + if err != None: + return err + if int(content) != len(self.targets): + err = write_file(nr_targets_file, '%d' % len(self.targets)) + if err != None: + return err + for target in self.targets: + err = target.stage() + if err != None: + return err + + nr_schemes_file = os.path.join( + self.sysfs_dir(), 'schemes', 'nr_schemes') + content, err = read_file(nr_schemes_file) + if int(content) != len(self.schemes): + err = write_file(nr_schemes_file, '%d' % len(self.schemes)) + if err != None: + return err + for scheme in self.schemes: + err = scheme.stage() + if err != None: + return err + return None + class Kdamond: state = None pid = None @@ -92,6 +263,27 @@ class Kdamond: context.idx = idx context.kdamond = self + def sysfs_dir(self): + return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx) + + def start(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err != None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err != None: + return err + + for context in self.contexts: + err = context.stage() + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + return err + class Kdamonds: kdamonds = [] @@ -100,3 +292,17 @@ class Kdamonds: for idx, kdamond in enumerate(self.kdamonds): kdamond.idx = idx kdamond.kdamonds = self + + def sysfs_dir(self): + return os.path.join(sysfs_root, 'kdamonds') + + def start(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'), + '%s' % len(self.kdamonds)) + if err != None: + return err + for kdamond in self.kdamonds: + err = kdamond.start() + if err != None: + return err + return None -- cgit v1.2.3 From 3402c6ce398e33bf1733f619756dd068ca2e2aa5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:08 +0000 Subject: selftests/damon/_damon_sysfs: implement updat_schemes_tried_bytes command Implement update_schemes_tried_bytes command of DAMON sysfs interface in _damon_sysfs.py. It is not only making the update, but also read the updated value from the sysfs interface and store it in the Kdamond python objects so that the user of the module can easily get the value. Link: https://lkml.kernel.org/r/20231212194810.54457-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 6b99f87a5f1e..e98cf4b6a4b7 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -76,6 +76,7 @@ class Damos: # todo: Support quotas, watermarks, stats, tried_regions idx = None context = None + tried_bytes = None def __init__(self, action='stat', access_pattern=DamosAccessPattern()): self.action = action @@ -284,6 +285,19 @@ class Kdamond: err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') return err + def update_schemes_tried_bytes(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_bytes') + if err != None: + return err + for context in self.contexts: + for scheme in context.schemes: + content, err = read_file(os.path.join(scheme.sysfs_dir(), + 'tried_regions', 'total_bytes')) + if err != None: + return err + scheme.tried_bytes = int(content) + class Kdamonds: kdamonds = [] -- cgit v1.2.3 From b5906f5f7359f561c5915dc146ced1bc2733401c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:09 +0000 Subject: selftests/damon: add a test for update_schemes_tried_regions sysfs command Add a selftest for verifying the accuracy of DAMON's access monitoring functionality. The test starts a program of artificial access pattern, monitor the access pattern using DAMON, and check if DAMON finds expected amount of hot data region (working set size) with only acceptable error rate. Note that the acceptable error rate is set with only naive assumptions and small number of tests. Hence failures of the test may not always mean DAMON is broken. Rather than that, those could be a signal to better understand the real accuracy level of DAMON in wider environments. Based on further finding, we could optimize DAMON or adjust the expectation of the test. Link: https://lkml.kernel.org/r/20231212194810.54457-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 2 + tools/testing/selftests/damon/access_memory.c | 41 ++++++++++++++++ ..._update_schemes_tried_regions_wss_estimation.py | 55 ++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 tools/testing/selftests/damon/access_memory.c create mode 100644 tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py (limited to 'tools') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index b71247ba7196..90ffafc42c5e 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -2,6 +2,7 @@ # Makefile for damon selftests TEST_GEN_FILES += huge_count_read_write +TEST_GEN_FILES += access_memory TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh @@ -9,6 +10,7 @@ TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c new file mode 100644 index 000000000000..585a2fa54329 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + */ + +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 4) { + printf("Usage: %s