diff options
| author | Muchun Song <songmuchun@bytedance.com> | 2026-03-05 19:52:28 +0800 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-04-18 00:10:45 -0700 |
| commit | 49717c7bd6b8e14329c2d04b1e8ec691175b6f4e (patch) | |
| tree | 713cb5f8dfce919ee6c574c6582df5faea5df379 | |
| parent | d10adce2c1a8ec61b46ff1841d3662f3c7a66d7a (diff) | |
writeback: prevent memory cgroup release in writeback module
In the near future, a folio will no longer pin its corresponding memory
cgroup. To ensure safety, it will only be appropriate to hold the rcu
read lock or acquire a reference to the memory cgroup returned by
folio_memcg(), thereby preventing it from being released.
In the current patch, the function get_mem_cgroup_css_from_folio() and the
rcu read lock are employed to safeguard against the release of the memory
cgroup.
This serves as a preparatory measure for the reparenting of the
LRU pages.
Link: https://lore.kernel.org/645f99bc344575417f67def3744f975596df2793.1772711148.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosry@kernel.org>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | fs/fs-writeback.c | 22 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 9 | ||||
| -rw-r--r-- | include/trace/events/writeback.h | 3 | ||||
| -rw-r--r-- | mm/memcontrol.c | 14 |
4 files changed, 29 insertions, 19 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7c75ed7e8979..c3442a38450c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -280,15 +280,13 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio) if (inode_cgwb_enabled(inode)) { struct cgroup_subsys_state *memcg_css; - if (folio) { - memcg_css = mem_cgroup_css_from_folio(folio); - wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); - } else { - /* must pin memcg_css, see wb_get_create() */ + /* must pin memcg_css, see wb_get_create() */ + if (folio) + memcg_css = get_mem_cgroup_css_from_folio(folio); + else memcg_css = task_get_css(current, memory_cgrp_id); - wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); - css_put(memcg_css); - } + wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); + css_put(memcg_css); } if (!wb) @@ -979,16 +977,16 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio if (!wbc->wb || wbc->no_cgroup_owner) return; - css = mem_cgroup_css_from_folio(folio); + css = get_mem_cgroup_css_from_folio(folio); /* dead cgroups shouldn't contribute to inode ownership arbitration */ if (!css_is_online(css)) - return; + goto out; id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; - return; + goto out; } if (id == wbc->wb_lcand_id) @@ -1001,6 +999,8 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio wbc->wb_tcand_bytes += bytes; else wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); +out: + css_put(css); } EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9a015258a2ff..4454f03a4acf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -894,7 +894,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return match; } -struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio); +struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio); ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_online(struct mem_cgroup *memcg) @@ -1563,9 +1563,14 @@ static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, if (mem_cgroup_disabled()) return; + if (!folio_memcg_charged(folio)) + return; + + rcu_read_lock(); memcg = folio_memcg(folio); - if (unlikely(memcg && &memcg->css != wb->memcg_css)) + if (unlikely(&memcg->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(folio, wb); + rcu_read_unlock(); } void mem_cgroup_flush_foreign(struct bdi_writeback *wb); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 4d3d8c8f3a1b..b849b8cc96b1 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -294,7 +294,10 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); + + rcu_read_lock(); __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); + rcu_read_unlock(); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dbcf0d2bf114..d7d4b44c5af5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -243,7 +243,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key); EXPORT_SYMBOL(memcg_bpf_enabled_key); /** - * mem_cgroup_css_from_folio - css of the memcg associated with a folio + * get_mem_cgroup_css_from_folio - acquire a css of the memcg associated with a folio * @folio: folio of interest * * If memcg is bound to the default hierarchy, css of the memcg associated @@ -253,14 +253,16 @@ EXPORT_SYMBOL(memcg_bpf_enabled_key); * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup * is returned. */ -struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio) +struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio) { - struct mem_cgroup *memcg = folio_memcg(folio); + struct mem_cgroup *memcg; - if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) - memcg = root_mem_cgroup; + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return &root_mem_cgroup->css; - return &memcg->css; + memcg = get_mem_cgroup_from_folio(folio); + + return memcg ? &memcg->css : &root_mem_cgroup->css; } /** |
