summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c10
-rw-r--r--mm/dmapool.c31
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/mempolicy.c22
-rw-r--r--mm/mmap.c14
-rw-r--r--mm/nommu.c15
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/shmem.c26
-rw-r--r--mm/sparse.c10
-rw-r--r--mm/vmscan.c83
10 files changed, 124 insertions, 106 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 9eef55838fca..694eaabaaebd 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -713,7 +713,15 @@ static void isolate_freepages(struct zone *zone,
/* Found a block suitable for isolating free pages from */
isolated = 0;
- end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
+
+ /*
+ * As pfn may not start aligned, pfn+pageblock_nr_page
+ * may cross a MAX_ORDER_NR_PAGES boundary and miss
+ * a pfn_valid check. Ensure isolate_freepages_block()
+ * only scans within a pageblock
+ */
+ end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ end_pfn = min(end_pfn, zone_end_pfn);
isolated = isolate_freepages_block(cc, pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index c5ab33bca0a8..da1b0f0b8709 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -50,7 +50,6 @@ struct dma_pool { /* the pool */
size_t allocation;
size_t boundary;
char name[32];
- wait_queue_head_t waitq;
struct list_head pools;
};
@@ -62,8 +61,6 @@ struct dma_page { /* cacheable header for 'allocation' bytes */
unsigned int offset;
};
-#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000)
-
static DEFINE_MUTEX(pools_lock);
static ssize_t
@@ -172,7 +169,6 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
retval->size = size;
retval->boundary = boundary;
retval->allocation = allocation;
- init_waitqueue_head(&retval->waitq);
if (dev) {
int ret;
@@ -227,7 +223,6 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
pool_initialise_page(pool, page);
- list_add(&page->page_list, &pool->page_list);
page->in_use = 0;
page->offset = 0;
} else {
@@ -315,30 +310,21 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
might_sleep_if(mem_flags & __GFP_WAIT);
spin_lock_irqsave(&pool->lock, flags);
- restart:
list_for_each_entry(page, &pool->page_list, page_list) {
if (page->offset < pool->allocation)
goto ready;
}
- page = pool_alloc_page(pool, GFP_ATOMIC);
- if (!page) {
- if (mem_flags & __GFP_WAIT) {
- DECLARE_WAITQUEUE(wait, current);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- __add_wait_queue(&pool->waitq, &wait);
- spin_unlock_irqrestore(&pool->lock, flags);
+ /* pool_alloc_page() might sleep, so temporarily drop &pool->lock */
+ spin_unlock_irqrestore(&pool->lock, flags);
- schedule_timeout(POOL_TIMEOUT_JIFFIES);
+ page = pool_alloc_page(pool, mem_flags);
+ if (!page)
+ return NULL;
- spin_lock_irqsave(&pool->lock, flags);
- __remove_wait_queue(&pool->waitq, &wait);
- goto restart;
- }
- retval = NULL;
- goto done;
- }
+ spin_lock_irqsave(&pool->lock, flags);
+ list_add(&page->page_list, &pool->page_list);
ready:
page->in_use++;
offset = page->offset;
@@ -348,7 +334,6 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
#ifdef DMAPOOL_DEBUG
memset(retval, POOL_POISON_ALLOCATED, pool->size);
#endif
- done:
spin_unlock_irqrestore(&pool->lock, flags);
return retval;
}
@@ -435,8 +420,6 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
page->in_use--;
*(int *)vaddr = page->offset;
page->offset = offset;
- if (waitqueue_active(&pool->waitq))
- wake_up_locked(&pool->waitq);
/*
* Resist a temptation to do
* if (!is_page_busy(page)) pool_free_page(pool, page);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6c5899b9034a..8b20278be6a6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1476,9 +1476,17 @@ int soft_offline_page(struct page *page, int flags)
{
int ret;
unsigned long pfn = page_to_pfn(page);
+ struct page *hpage = compound_trans_head(page);
if (PageHuge(page))
return soft_offline_huge_page(page, flags);
+ if (PageTransHuge(hpage)) {
+ if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
+ pr_info("soft offline: %#lx: failed to split THP\n",
+ pfn);
+ return -EBUSY;
+ }
+ }
ret = get_any_page(page, pfn, flags);
if (ret < 0)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d04a8a54c294..4ea600da8940 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)
return new;
}
-/*
- * If *frompol needs [has] an extra ref, copy *frompol to *tompol ,
- * eliminate the * MPOL_F_* flags that require conditional ref and
- * [NOTE!!!] drop the extra ref. Not safe to reference *frompol directly
- * after return. Use the returned value.
- *
- * Allows use of a mempolicy for, e.g., multiple allocations with a single
- * policy lookup, even if the policy needs/has extra ref on lookup.
- * shmem_readahead needs this.
- */
-struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
- struct mempolicy *frompol)
-{
- if (!mpol_needs_cond_ref(frompol))
- return frompol;
-
- *tompol = *frompol;
- tompol->flags &= ~MPOL_F_SHARED; /* copy doesn't need unref */
- __mpol_put(frompol);
- return tompol;
-}
-
/* Slow path of a mempolicy comparison */
bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
diff --git a/mm/mmap.c b/mm/mmap.c
index 9a796c41e7d9..7d416055f08c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,20 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+ return percpu_counter_read_positive(&vm_committed_as);
+}
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
+/*
* Check that a process has enough memory to allocate a new virtual
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
diff --git a/mm/nommu.c b/mm/nommu.c
index 45131b41bcdb..79c3cac87afa 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -66,6 +66,21 @@ int heap_stack_gap = 0;
atomic_long_t mmap_pages_allocated;
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+ return percpu_counter_read_positive(&vm_committed_as);
+}
+
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
EXPORT_SYMBOL(mem_map);
EXPORT_SYMBOL(num_physpages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7bb35ac0964a..7e208f0ad68c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1405,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
mt = get_pageblock_migratetype(page);
if (unlikely(mt != MIGRATE_ISOLATE))
- __mod_zone_freepage_state(zone, -(1UL << order), mt);
+ __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
if (alloc_order != order)
expand(zone, page, alloc_order, order,
@@ -1422,7 +1422,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
}
}
- return 1UL << order;
+ return 1UL << alloc_order;
}
/*
@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage;
restart:
- wake_all_kswapd(order, zonelist, high_zoneidx,
- zone_idx(preferred_zone));
+ if (!(gfp_mask & __GFP_NO_KSWAPD))
+ wake_all_kswapd(order, zonelist, high_zoneidx,
+ zone_idx(preferred_zone));
/*
* OK, we're below the kswapd watermark and have kicked background
@@ -2494,7 +2495,7 @@ rebalance:
* system then fail the allocation instead of entering direct reclaim.
*/
if ((deferred_compaction || contended_compaction) &&
- (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
+ (gfp_mask & __GFP_NO_KSWAPD))
goto nopage;
/* Try direct reclaim and then allocating */
diff --git a/mm/shmem.c b/mm/shmem.c
index 89341b658bd0..50c5b8f3a359 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
- struct mempolicy mpol, *spol;
struct vm_area_struct pvma;
-
- spol = mpol_cond_copy(&mpol,
- mpol_shared_policy_lookup(&info->policy, index));
+ struct page *page;
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
/* Bias interleave by inode number to distribute better across nodes */
pvma.vm_pgoff = index + info->vfs_inode.i_ino;
pvma.vm_ops = NULL;
- pvma.vm_policy = spol;
- return swapin_readahead(swap, gfp, &pvma, 0);
+ pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
+
+ page = swapin_readahead(swap, gfp, &pvma, 0);
+
+ /* Drop reference taken by mpol_shared_policy_lookup() */
+ mpol_cond_put(pvma.vm_policy);
+
+ return page;
}
static struct page *shmem_alloc_page(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
struct vm_area_struct pvma;
+ struct page *page;
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
@@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp_t gfp,
pvma.vm_ops = NULL;
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
- /*
- * alloc_page_vma() will drop the shared policy reference
- */
- return alloc_page_vma(gfp, &pvma, 0);
+ page = alloc_page_vma(gfp, &pvma, 0);
+
+ /* Drop reference taken by mpol_shared_policy_lookup() */
+ mpol_cond_put(pvma.vm_policy);
+
+ return page;
}
#else /* !CONFIG_NUMA */
#ifdef CONFIG_TMPFS
diff --git a/mm/sparse.c b/mm/sparse.c
index fac95f2888f2..a83de2f72b30 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -617,7 +617,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
{
return; /* XXX: Not implemented yet */
}
-static void free_map_bootmem(struct page *page, unsigned long nr_pages)
+static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
{
}
#else
@@ -658,10 +658,11 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
get_order(sizeof(struct page) * nr_pages));
}
-static void free_map_bootmem(struct page *page, unsigned long nr_pages)
+static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
{
unsigned long maps_section_nr, removing_section_nr, i;
unsigned long magic;
+ struct page *page = virt_to_page(memmap);
for (i = 0; i < nr_pages; i++, page++) {
magic = (unsigned long) page->lru.next;
@@ -710,13 +711,10 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
*/
if (memmap) {
- struct page *memmap_page;
- memmap_page = virt_to_page(memmap);
-
nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
>> PAGE_SHIFT;
- free_map_bootmem(memmap_page, nr_pages);
+ free_map_bootmem(memmap, nr_pages);
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 48550c66f1f2..b7ed37675644 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2207,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
* Throttle direct reclaimers if backing storage is backed by the network
* and the PFMEMALLOC reserve for the preferred node is getting dangerously
* depleted. kswapd will continue to make progress and wake the processes
- * when the low watermark is reached
+ * when the low watermark is reached.
+ *
+ * Returns true if a fatal signal was delivered during throttling. If this
+ * happens, the page allocator should not consider triggering the OOM killer.
*/
-static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
+static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
nodemask_t *nodemask)
{
struct zone *zone;
@@ -2224,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
* processes to block on log_wait_commit().
*/
if (current->flags & PF_KTHREAD)
- return;
+ goto out;
+
+ /*
+ * If a fatal signal is pending, this process should not throttle.
+ * It should return quickly so it can exit and free its memory
+ */
+ if (fatal_signal_pending(current))
+ goto out;
/* Check if the pfmemalloc reserves are ok */
first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
pgdat = zone->zone_pgdat;
if (pfmemalloc_watermark_ok(pgdat))
- return;
+ goto out;
/* Account for the throttling */
count_vm_event(PGSCAN_DIRECT_THROTTLE);
@@ -2246,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
if (!(gfp_mask & __GFP_FS)) {
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
pfmemalloc_watermark_ok(pgdat), HZ);
- return;
+
+ goto check_pending;
}
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
pfmemalloc_watermark_ok(pgdat));
+
+check_pending:
+ if (fatal_signal_pending(current))
+ return true;
+
+out:
+ return false;
}
unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
@@ -2273,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.gfp_mask = sc.gfp_mask,
};
- throttle_direct_reclaim(gfp_mask, zonelist, nodemask);
-
/*
- * Do not enter reclaim if fatal signal is pending. 1 is returned so
- * that the page allocator does not consider triggering OOM
+ * Do not enter reclaim if fatal signal was delivered while throttled.
+ * 1 is returned so that the page allocator does not OOM kill at this
+ * point.
*/
- if (fatal_signal_pending(current))
+ if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
return 1;
trace_mm_vmscan_direct_reclaim_begin(order,
@@ -2397,6 +2414,19 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
} while (memcg);
}
+static bool zone_balanced(struct zone *zone, int order,
+ unsigned long balance_gap, int classzone_idx)
+{
+ if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
+ balance_gap, classzone_idx, 0))
+ return false;
+
+ if (COMPACTION_BUILD && order && !compaction_suitable(zone, order))
+ return false;
+
+ return true;
+}
+
/*
* pgdat_balanced is used when checking if a node is balanced for high-order
* allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2475,8 +2505,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
continue;
}
- if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
- i, 0))
+ if (!zone_balanced(zone, order, 0, i))
all_zones_ok = false;
else
balanced += zone->present_pages;
@@ -2585,8 +2614,7 @@ loop_again:
break;
}
- if (!zone_watermark_ok_safe(zone, order,
- high_wmark_pages(zone), 0, 0)) {
+ if (!zone_balanced(zone, order, 0, 0)) {
end_zone = i;
break;
} else {
@@ -2662,9 +2690,8 @@ loop_again:
testorder = 0;
if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
- !zone_watermark_ok_safe(zone, testorder,
- high_wmark_pages(zone) + balance_gap,
- end_zone, 0)) {
+ !zone_balanced(zone, testorder,
+ balance_gap, end_zone)) {
shrink_zone(zone, &sc);
reclaim_state->reclaimed_slab = 0;
@@ -2691,8 +2718,7 @@ loop_again:
continue;
}
- if (!zone_watermark_ok_safe(zone, testorder,
- high_wmark_pages(zone), end_zone, 0)) {
+ if (!zone_balanced(zone, testorder, 0, end_zone)) {
all_zones_ok = 0;
/*
* We are still under min water mark. This
@@ -2797,29 +2823,10 @@ out:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable &&
- sc.priority != DEF_PRIORITY)
- continue;
-
- /* Would compaction fail due to lack of free memory? */
- if (COMPACTION_BUILD &&
- compaction_suitable(zone, order) == COMPACT_SKIPPED)
- goto loop_again;
-
- /* Confirm the zone is balanced for order-0 */
- if (!zone_watermark_ok(zone, 0,
- high_wmark_pages(zone), 0, 0)) {
- order = sc.order = 0;
- goto loop_again;
- }
-
/* Check if the memory needs to be defragmented. */
if (zone_watermark_ok(zone, order,
low_wmark_pages(zone), *classzone_idx, 0))
zones_need_compaction = 0;
-
- /* If balanced, clear the congested flag */
- zone_clear_flag(zone, ZONE_CONGESTED);
}
if (zones_need_compaction)