summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c83
1 files changed, 61 insertions, 22 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 822e05f1a964..f65c4edf199d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -167,6 +167,33 @@ static inline void __pcp_trylock_noop(unsigned long *flags) { }
pcp_trylock_finish(UP_flags); \
})
+/*
+ * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e.
+ * a potentially remote cpu drain) and get interrupted by an operation that
+ * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP
+ * spinlock assumptions making the trylock a no-op. So we have to turn that
+ * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no
+ * remote cpu's so we can only be locking the only existing local one.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
+static inline void __flags_noop(unsigned long *flags) { }
+#define pcp_spin_lock_maybe_irqsave(ptr, flags) \
+({ \
+ __flags_noop(&(flags)); \
+ spin_lock(&(ptr)->lock); \
+})
+#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \
+({ \
+ spin_unlock(&(ptr)->lock); \
+ __flags_noop(&(flags)); \
+})
+#else
+#define pcp_spin_lock_maybe_irqsave(ptr, flags) \
+ spin_lock_irqsave(&(ptr)->lock, flags)
+#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \
+ spin_unlock_irqrestore(&(ptr)->lock, flags)
+#endif
+
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -914,6 +941,17 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
NULL) != NULL;
}
+static void change_pageblock_range(struct page *pageblock_page,
+ int start_order, int migratetype)
+{
+ int nr_pageblocks = 1 << (start_order - pageblock_order);
+
+ while (nr_pageblocks--) {
+ set_pageblock_migratetype(pageblock_page, migratetype);
+ pageblock_page += pageblock_nr_pages;
+ }
+}
+
/*
* Freeing function for a buddy system allocator.
*
@@ -1000,7 +1038,7 @@ static inline void __free_one_page(struct page *page,
* expand() down the line puts the sub-blocks
* on the right freelists.
*/
- set_pageblock_migratetype(buddy, migratetype);
+ change_pageblock_range(buddy, order, migratetype);
}
combined_pfn = buddy_pfn & pfn;
@@ -2147,17 +2185,6 @@ bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *pag
#endif /* CONFIG_MEMORY_ISOLATION */
-static void change_pageblock_range(struct page *pageblock_page,
- int start_order, int migratetype)
-{
- int nr_pageblocks = 1 << (start_order - pageblock_order);
-
- while (nr_pageblocks--) {
- set_pageblock_migratetype(pageblock_page, migratetype);
- pageblock_page += pageblock_nr_pages;
- }
-}
-
static inline bool boost_watermark(struct zone *zone)
{
unsigned long max_boost;
@@ -2556,6 +2583,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
{
int high_min, to_drain, to_drain_batched, batch;
+ unsigned long UP_flags;
bool todo = false;
high_min = READ_ONCE(pcp->high_min);
@@ -2575,9 +2603,9 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
to_drain = pcp->count - pcp->high;
while (to_drain > 0) {
to_drain_batched = min(to_drain, batch);
- spin_lock(&pcp->lock);
+ pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
free_pcppages_bulk(zone, to_drain_batched, pcp, 0);
- spin_unlock(&pcp->lock);
+ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
todo = true;
to_drain -= to_drain_batched;
@@ -2594,14 +2622,15 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
*/
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
+ unsigned long UP_flags;
int to_drain, batch;
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0) {
- spin_lock(&pcp->lock);
+ pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
free_pcppages_bulk(zone, to_drain, pcp, 0);
- spin_unlock(&pcp->lock);
+ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
}
}
#endif
@@ -2612,10 +2641,11 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+ unsigned long UP_flags;
int count;
do {
- spin_lock(&pcp->lock);
+ pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
count = pcp->count;
if (count) {
int to_drain = min(count,
@@ -2624,7 +2654,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
free_pcppages_bulk(zone, to_drain, pcp, 0);
count -= to_drain;
}
- spin_unlock(&pcp->lock);
+ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
} while (count);
}
@@ -5924,7 +5954,7 @@ static int zone_batchsize(struct zone *zone)
* recycled, this leads to the once large chunks of space being
* fragmented and becoming unavailable for high-order allocations.
*/
- return 0;
+ return 1;
#endif
}
@@ -6109,6 +6139,7 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
{
struct per_cpu_pages *pcp;
struct cpu_cacheinfo *cci;
+ unsigned long UP_flags;
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
cci = get_cpu_cacheinfo(cpu);
@@ -6119,12 +6150,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
* This can reduce zone lock contention without hurting
* cache-hot pages sharing.
*/
- spin_lock(&pcp->lock);
+ pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
pcp->flags |= PCPF_FREE_HIGH_BATCH;
else
pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
- spin_unlock(&pcp->lock);
+ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
}
void setup_pcp_cacheinfo(unsigned int cpu)
@@ -6667,11 +6698,19 @@ static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *
int old_percpu_pagelist_high_fraction;
int ret;
+ /*
+ * Avoid using pcp_batch_high_lock for reads as the value is read
+ * atomically and a race with offlining is harmless.
+ */
+
+ if (!write)
+ return proc_dointvec_minmax(table, write, buffer, length, ppos);
+
mutex_lock(&pcp_batch_high_lock);
old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction;
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
- if (!write || ret < 0)
+ if (ret < 0)
goto out;
/* Sanity checking to avoid pcp imbalance */