diff options
| author | Shakeel Butt <shakeel.butt@linux.dev> | 2025-12-25 15:21:09 -0800 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-01-26 20:02:23 -0800 |
| commit | e77786b4682e69336e3de3eaeb12ec994027f611 (patch) | |
| tree | 412889d31b2b203720403ceb1e4adf63217d56e9 | |
| parent | 2c4c3e29897d43c431b1cf9432fb66977f262ac2 (diff) | |
memcg: introduce private id API for in-kernel users
Patch series "memcg: separate private and public ID namespaces".
The memory cgroup subsystem maintains a private ID infrastructure that
is decoupled from the cgroup IDs. This private ID system exists because
some kernel objects (like swap entries and shadow entries in the
workingset code) can outlive the cgroup they were associated with.
The motivation is best described in commit 73f576c04b941 ("mm:
memcontrol: fix cgroup creation failure after many small jobs").
Unfortunately, some in-kernel users (DAMON, LRU gen debugfs interface,
shrinker debugfs) started exposing these private IDs to userspace.
This is problematic because:
1. The private IDs are internal implementation details that could change
2. Userspace already has access to cgroup IDs through the cgroup
filesystem
3. Using different ID namespaces in different interfaces is confusing
This series cleans up the memcg ID infrastructure by:
1. Explicitly marking the private ID APIs with "private" in their names
to make it clear they are for internal use only (swap/workingset)
2. Making the public cgroup ID APIs (mem_cgroup_id/mem_cgroup_get_from_id)
unconditionally available
3. Converting DAMON, LRU gen, and shrinker debugfs interfaces to use
the public cgroup IDs instead of the private IDs
4. Removing the now-unused wrapper functions and renaming the public
APIs for clarity
After this series:
- mem_cgroup_private_id() / mem_cgroup_from_private_id() are used for
internal kernel objects that outlive their cgroup (swap, workingset)
- mem_cgroup_id() / mem_cgroup_get_from_id() return the public cgroup ID
(from cgroup_id()) for use in userspace-facing interfaces
This patch (of 8):
The memory cgroup maintains a private ID infrastructure decoupled from the
cgroup IDs for swapout records and shadow entries. The main motivation of
this private ID infra is best described in the commit 73f576c04b941 ("mm:
memcontrol: fix cgroup creation failure after many small jobs").
Unfortunately some users have started exposing these private IDs to the
userspace where they should have used the cgroup IDs which are already
exposed to the userspace. Let's rename the memcg ID APIs to explicitly
mark them private.
No functional change is intended.
Link: https://lkml.kernel.org/r/20251225232116.294540-1-shakeel.butt@linux.dev
Link: https://lkml.kernel.org/r/20251225232116.294540-2-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | include/linux/memcontrol.h | 24 | ||||
| -rw-r--r-- | mm/list_lru.c | 2 | ||||
| -rw-r--r-- | mm/memcontrol-v1.c | 6 | ||||
| -rw-r--r-- | mm/memcontrol-v1.h | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 55 | ||||
| -rw-r--r-- | mm/workingset.c | 8 |
6 files changed, 61 insertions, 38 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fd400082313a..1c4224bcfb23 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -65,7 +65,7 @@ struct mem_cgroup_reclaim_cookie { #define MEM_CGROUP_ID_SHIFT 16 -struct mem_cgroup_id { +struct mem_cgroup_private_id { int id; refcount_t ref; }; @@ -191,7 +191,7 @@ struct mem_cgroup { struct cgroup_subsys_state css; /* Private memcg ID. Used to ID objects that outlive the cgroup */ - struct mem_cgroup_id id; + struct mem_cgroup_private_id id; /* Accounted resources */ struct page_counter memory; /* Both v1 & v2 */ @@ -821,13 +821,19 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, int (*)(struct task_struct *, void *), void *arg); -static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) +static inline unsigned short mem_cgroup_private_id(struct mem_cgroup *memcg) { if (mem_cgroup_disabled()) return 0; return memcg->id.id; } +struct mem_cgroup *mem_cgroup_from_private_id(unsigned short id); + +static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) +{ + return mem_cgroup_private_id(memcg); +} struct mem_cgroup *mem_cgroup_from_id(unsigned short id); #ifdef CONFIG_SHRINKER_DEBUG @@ -1290,6 +1296,18 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return NULL; } +static inline unsigned short mem_cgroup_private_id(struct mem_cgroup *memcg) +{ + return 0; +} + +static inline struct mem_cgroup *mem_cgroup_from_private_id(unsigned short id) +{ + WARN_ON_ONCE(id); + /* XXX: This should always return root_mem_cgroup */ + return NULL; +} + #ifdef CONFIG_SHRINKER_DEBUG static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg) { diff --git a/mm/list_lru.c b/mm/list_lru.c index 37b642f6cbda..13b9f66d950e 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -369,7 +369,7 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid, xa_for_each(&lru->xa, index, mlru) { rcu_read_lock(); - memcg = mem_cgroup_from_id(index); + memcg = mem_cgroup_from_private_id(index); if (!mem_cgroup_tryget(memcg)) { rcu_read_unlock(); continue; diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 0b50cb122ff3..0e3d972fad33 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -635,14 +635,14 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry) * have an ID allocated to it anymore, charge the closest online * ancestor for the swap instead and transfer the memory+swap charge. */ - swap_memcg = mem_cgroup_id_get_online(memcg); + swap_memcg = mem_cgroup_private_id_get_online(memcg); nr_entries = folio_nr_pages(folio); /* Get references for the tail pages, too */ if (nr_entries > 1) - mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); + mem_cgroup_private_id_get_many(swap_memcg, nr_entries - 1); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); + swap_cgroup_record(folio, mem_cgroup_private_id(swap_memcg), entry); folio_unqueue_deferred_split(folio); folio->memcg_data = 0; diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index e92b21af92b1..49933925b4ba 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -28,8 +28,8 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event); unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); int memory_stat_show(struct seq_file *m, void *v); -void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n); -struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg); +void mem_cgroup_private_id_get_many(struct mem_cgroup *memcg, unsigned int n); +struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg); /* Cgroup v1-specific declarations */ #ifdef CONFIG_MEMCG_V1 diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75fc22a33b28..25ad8433df2e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3554,38 +3554,38 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) */ #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1) -static DEFINE_XARRAY_ALLOC1(mem_cgroup_ids); +static DEFINE_XARRAY_ALLOC1(mem_cgroup_private_ids); -static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +static void mem_cgroup_private_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { - xa_erase(&mem_cgroup_ids, memcg->id.id); + xa_erase(&mem_cgroup_private_ids, memcg->id.id); memcg->id.id = 0; } } -void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, +void __maybe_unused mem_cgroup_private_id_get_many(struct mem_cgroup *memcg, unsigned int n) { refcount_add(n, &memcg->id.ref); } -static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) +static void mem_cgroup_private_id_put_many(struct mem_cgroup *memcg, unsigned int n) { if (refcount_sub_and_test(n, &memcg->id.ref)) { - mem_cgroup_id_remove(memcg); + mem_cgroup_private_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); } } -static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) +static inline void mem_cgroup_private_id_put(struct mem_cgroup *memcg) { - mem_cgroup_id_put_many(memcg, 1); + mem_cgroup_private_id_put_many(memcg, 1); } -struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg) { while (!refcount_inc_not_zero(&memcg->id.ref)) { /* @@ -3604,15 +3604,20 @@ struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) } /** - * mem_cgroup_from_id - look up a memcg from a memcg id + * mem_cgroup_from_private_id - look up a memcg from a memcg id * @id: the memcg id to look up * * Caller must hold rcu_read_lock(). */ -struct mem_cgroup *mem_cgroup_from_id(unsigned short id) +struct mem_cgroup *mem_cgroup_from_private_id(unsigned short id) { WARN_ON_ONCE(!rcu_read_lock_held()); - return xa_load(&mem_cgroup_ids, id); + return xa_load(&mem_cgroup_private_ids, id); +} + +struct mem_cgroup *mem_cgroup_from_id(unsigned short id) +{ + return mem_cgroup_from_private_id(id); } #ifdef CONFIG_SHRINKER_DEBUG @@ -3711,7 +3716,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) if (!memcg) return ERR_PTR(-ENOMEM); - error = xa_alloc(&mem_cgroup_ids, &memcg->id.id, NULL, + error = xa_alloc(&mem_cgroup_private_ids, &memcg->id.id, NULL, XA_LIMIT(1, MEM_CGROUP_ID_MAX), GFP_KERNEL); if (error) goto fail; @@ -3771,7 +3776,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) lru_gen_init_memcg(memcg); return memcg; fail: - mem_cgroup_id_remove(memcg); + mem_cgroup_private_id_remove(memcg); __mem_cgroup_free(memcg); return ERR_PTR(error); } @@ -3854,7 +3859,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) css_get(css); /* - * Ensure mem_cgroup_from_id() works once we're fully online. + * Ensure mem_cgroup_from_private_id() works once we're fully online. * * We could do this earlier and require callers to filter with * css_tryget_online(). But right now there are no users that @@ -3863,13 +3868,13 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) * publish it here at the end of onlining. This matches the * regular ID destruction during offlining. */ - xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL); + xa_store(&mem_cgroup_private_ids, memcg->id.id, memcg, GFP_KERNEL); return 0; offline_kmem: memcg_offline_kmem(memcg); remove_id: - mem_cgroup_id_remove(memcg); + mem_cgroup_private_id_remove(memcg); return -ENOMEM; } @@ -3892,7 +3897,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) drain_all_stock(memcg); - mem_cgroup_id_put(memcg); + mem_cgroup_private_id_put(memcg); } static void mem_cgroup_css_released(struct cgroup_subsys_state *css) @@ -4779,7 +4784,7 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, id = lookup_swap_cgroup_id(entry); rcu_read_lock(); - memcg = mem_cgroup_from_id(id); + memcg = mem_cgroup_from_private_id(id); if (!memcg || !css_tryget_online(&memcg->css)) memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); @@ -5174,22 +5179,22 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) return 0; } - memcg = mem_cgroup_id_get_online(memcg); + memcg = mem_cgroup_private_id_get_online(memcg); if (!mem_cgroup_is_root(memcg) && !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { memcg_memory_event(memcg, MEMCG_SWAP_MAX); memcg_memory_event(memcg, MEMCG_SWAP_FAIL); - mem_cgroup_id_put(memcg); + mem_cgroup_private_id_put(memcg); return -ENOMEM; } /* Get references for the tail pages, too */ if (nr_pages > 1) - mem_cgroup_id_get_many(memcg, nr_pages - 1); + mem_cgroup_private_id_get_many(memcg, nr_pages - 1); mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); - swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); + swap_cgroup_record(folio, mem_cgroup_private_id(memcg), entry); return 0; } @@ -5206,7 +5211,7 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) id = swap_cgroup_clear(entry, nr_pages); rcu_read_lock(); - memcg = mem_cgroup_from_id(id); + memcg = mem_cgroup_from_private_id(id); if (memcg) { if (!mem_cgroup_is_root(memcg)) { if (do_memsw_account()) @@ -5215,7 +5220,7 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) page_counter_uncharge(&memcg->swap, nr_pages); } mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages); - mem_cgroup_id_put_many(memcg, nr_pages); + mem_cgroup_private_id_put_many(memcg, nr_pages); } rcu_read_unlock(); } diff --git a/mm/workingset.c b/mm/workingset.c index e9f05634747a..13422d304715 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -254,7 +254,7 @@ static void *lru_gen_eviction(struct folio *folio) hist = lru_hist_from_seq(min_seq); atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); - return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); + return pack_shadow(mem_cgroup_private_id(memcg), pgdat, token, workingset); } /* @@ -271,7 +271,7 @@ static bool lru_gen_test_recent(void *shadow, struct lruvec **lruvec, unpack_shadow(shadow, &memcg_id, &pgdat, token, workingset); - memcg = mem_cgroup_from_id(memcg_id); + memcg = mem_cgroup_from_private_id(memcg_id); *lruvec = mem_cgroup_lruvec(memcg, pgdat); max_seq = READ_ONCE((*lruvec)->lrugen.max_seq); @@ -395,7 +395,7 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ - memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); + memcgid = mem_cgroup_private_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); eviction >>= bucket_order; workingset_age_nonresident(lruvec, folio_nr_pages(folio)); @@ -456,7 +456,7 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset, * would be better if the root_mem_cgroup existed in all * configurations instead. */ - eviction_memcg = mem_cgroup_from_id(memcgid); + eviction_memcg = mem_cgroup_from_private_id(memcgid); if (!mem_cgroup_tryget(eviction_memcg)) eviction_memcg = NULL; rcu_read_unlock(); |
