summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h34
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/compaction.c29
-rw-r--r--mm/memcontrol.c53
-rw-r--r--mm/swap.c6
5 files changed, 73 insertions, 52 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 38f94c7271c1..12982875073e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -741,7 +741,15 @@ out:
* folio_lruvec - return lruvec for isolating/putting an LRU folio
* @folio: Pointer to the folio.
*
- * This function relies on folio->mem_cgroup being stable.
+ * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
+ * Note that this alone will NOT guarantee the stability of the folio->lruvec
+ * association; the folio can be reparented to an ancestor if this races with
+ * cgroup deletion.
+ *
+ * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
+ * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
+ * their binding is stable if the returned lruvec matches the one the caller has
+ * locked. Useful for lock batching.
*/
static inline struct lruvec *folio_lruvec(struct folio *folio)
{
@@ -764,15 +772,6 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flags);
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
-#else
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-#endif
-
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -1198,11 +1197,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
return &pgdat->__lruvec;
}
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
return NULL;
@@ -1261,6 +1255,7 @@ static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
{
struct pglist_data *pgdat = folio_pgdat(folio);
+ rcu_read_lock();
spin_lock(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
@@ -1269,6 +1264,7 @@ static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
struct pglist_data *pgdat = folio_pgdat(folio);
+ rcu_read_lock();
spin_lock_irq(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
@@ -1278,6 +1274,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
{
struct pglist_data *pgdat = folio_pgdat(folio);
+ rcu_read_lock();
spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
return &pgdat->__lruvec;
}
@@ -1500,23 +1497,26 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
static inline void lruvec_lock_irq(struct lruvec *lruvec)
{
+ rcu_read_lock();
spin_lock_irq(&lruvec->lru_lock);
}
static inline void lruvec_unlock(struct lruvec *lruvec)
{
spin_unlock(&lruvec->lru_lock);
+ rcu_read_unlock();
}
static inline void lruvec_unlock_irq(struct lruvec *lruvec)
{
spin_unlock_irq(&lruvec->lru_lock);
+ rcu_read_unlock();
}
-static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
- unsigned long flags)
+static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec, unsigned long flags)
{
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+ rcu_read_unlock();
}
/* Test requires a stable folio->memcg binding, see folio_memcg() */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4b1f13b5bbad..ea08e2afa2b4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -310,8 +310,7 @@ extern unsigned long totalreserve_pages;
/* linux/mm/swap.c */
void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
- unsigned int nr_io, unsigned int nr_rotated)
- __releases(lruvec->lru_lock);
+ unsigned int nr_io, unsigned int nr_rotated);
void lru_note_cost_refault(struct folio *);
void folio_add_lru(struct folio *);
void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
diff --git a/mm/compaction.c b/mm/compaction.c
index c3e338aaa0ff..3648ce22c807 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -518,6 +518,24 @@ static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
return true;
}
+static struct lruvec *
+compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags,
+ struct compact_control *cc)
+{
+ struct lruvec *lruvec;
+
+ rcu_read_lock();
+retry:
+ lruvec = folio_lruvec(folio);
+ compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
+ if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+ spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+ goto retry;
+ }
+
+ return lruvec;
+}
+
/*
* Compaction requires the taking of some coarse locks that are potentially
* very heavily contended. The lock should be periodically unlocked to avoid
@@ -839,7 +857,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
{
pg_data_t *pgdat = cc->zone->zone_pgdat;
unsigned long nr_scanned = 0, nr_isolated = 0;
- struct lruvec *lruvec;
+ struct lruvec *lruvec = NULL;
unsigned long flags = 0;
struct lruvec *locked = NULL;
struct folio *folio = NULL;
@@ -1153,18 +1171,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (!folio_test_clear_lru(folio))
goto isolate_fail_put;
- lruvec = folio_lruvec(folio);
+ if (locked)
+ lruvec = folio_lruvec(folio);
/* If we already hold the lock, we can skip some rechecking */
- if (lruvec != locked) {
+ if (lruvec != locked || !locked) {
if (locked)
lruvec_unlock_irqrestore(locked, flags);
- compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+ lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, cc);
locked = lruvec;
- lruvec_memcg_debug(lruvec, folio);
-
/*
* Try get exclusive access under lock. If marked for
* skip, the scan is aborted unless the current context
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10021cef176b..0d4eaaea2b54 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1206,23 +1206,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
}
}
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
- struct mem_cgroup *memcg;
-
- if (mem_cgroup_disabled())
- return;
-
- memcg = folio_memcg(folio);
-
- if (!memcg)
- VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
- else
- VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
-}
-#endif
-
/**
* folio_lruvec_lock - Lock the lruvec for a folio.
* @folio: Pointer to the folio.
@@ -1232,14 +1215,20 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
* - folio_test_lru false
* - folio frozen (refcount of 0)
*
- * Return: The lruvec this folio is on with its lock held.
+ * Return: The lruvec this folio is on with its lock held and rcu read lock held.
*/
struct lruvec *folio_lruvec_lock(struct folio *folio)
{
- struct lruvec *lruvec = folio_lruvec(folio);
+ struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
+ lruvec = folio_lruvec(folio);
spin_lock(&lruvec->lru_lock);
- lruvec_memcg_debug(lruvec, folio);
+ if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+ spin_unlock(&lruvec->lru_lock);
+ goto retry;
+ }
return lruvec;
}
@@ -1254,14 +1243,20 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
* - folio frozen (refcount of 0)
*
* Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
*/
struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
- struct lruvec *lruvec = folio_lruvec(folio);
+ struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
+ lruvec = folio_lruvec(folio);
spin_lock_irq(&lruvec->lru_lock);
- lruvec_memcg_debug(lruvec, folio);
+ if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+ spin_unlock_irq(&lruvec->lru_lock);
+ goto retry;
+ }
return lruvec;
}
@@ -1277,15 +1272,21 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
* - folio frozen (refcount of 0)
*
* Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
*/
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flags)
{
- struct lruvec *lruvec = folio_lruvec(folio);
+ struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
+ lruvec = folio_lruvec(folio);
spin_lock_irqsave(&lruvec->lru_lock, *flags);
- lruvec_memcg_debug(lruvec, folio);
+ if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+ spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+ goto retry;
+ }
return lruvec;
}
diff --git a/mm/swap.c b/mm/swap.c
index 009b32d6d344..bcd2b52e5def 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -240,6 +240,7 @@ void folio_rotate_reclaimable(struct folio *folio)
void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
unsigned int nr_io, unsigned int nr_rotated)
__releases(lruvec->lru_lock)
+ __releases(rcu)
{
unsigned long cost;
@@ -253,6 +254,7 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
if (!cost) {
spin_unlock_irq(&lruvec->lru_lock);
+ rcu_read_unlock();
return;
}
@@ -285,8 +287,10 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
spin_unlock_irq(&lruvec->lru_lock);
lruvec = parent_lruvec(lruvec);
- if (!lruvec)
+ if (!lruvec) {
+ rcu_read_unlock();
break;
+ }
spin_lock_irq(&lruvec->lru_lock);
}
}