summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c52
-rw-r--r--mm/bootmem.c2
-rw-r--r--mm/compaction.c156
-rw-r--r--mm/fadvise.c34
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/huge_memory.c1
-rw-r--r--mm/internal.h1
-rw-r--r--mm/kmemleak.c6
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/memory_hotplug.c16
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c40
-rw-r--r--mm/readahead.c14
-rw-r--r--mm/shmem.c171
-rw-r--r--mm/slab.c9
-rw-r--r--mm/slub.c15
-rw-r--r--mm/vmscan.c1
-rw-r--r--mm/vmstat.c2
23 files changed, 215 insertions, 343 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6b4718e2ee34..b41823cc05e6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -39,12 +39,6 @@ DEFINE_SPINLOCK(bdi_lock);
LIST_HEAD(bdi_list);
LIST_HEAD(bdi_pending_list);
-static struct task_struct *sync_supers_tsk;
-static struct timer_list sync_supers_timer;
-
-static int bdi_sync_supers(void *);
-static void sync_supers_timer_fn(unsigned long);
-
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
{
if (wb1 < wb2) {
@@ -250,12 +244,6 @@ static int __init default_bdi_init(void)
{
int err;
- sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
- BUG_ON(IS_ERR(sync_supers_tsk));
-
- setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
- bdi_arm_supers_timer();
-
err = bdi_init(&default_backing_dev_info);
if (!err)
bdi_register(&default_backing_dev_info, NULL, "default");
@@ -270,46 +258,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
return wb_has_dirty_io(&bdi->wb);
}
-/*
- * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
- * or we risk deadlocking on ->s_umount. The longer term solution would be
- * to implement sync_supers_bdi() or similar and simply do it from the
- * bdi writeback thread individually.
- */
-static int bdi_sync_supers(void *unused)
-{
- set_user_nice(current, 0);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
-
- /*
- * Do this periodically, like kupdated() did before.
- */
- sync_supers();
- }
-
- return 0;
-}
-
-void bdi_arm_supers_timer(void)
-{
- unsigned long next;
-
- if (!dirty_writeback_interval)
- return;
-
- next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
- mod_timer(&sync_supers_timer, round_jiffies_up(next));
-}
-
-static void sync_supers_timer_fn(unsigned long unused)
-{
- wake_up_process(sync_supers_tsk);
- bdi_arm_supers_timer();
-}
-
static void wakeup_timer_fn(unsigned long data)
{
struct backing_dev_info *bdi = (struct backing_dev_info *)data;
diff --git a/mm/bootmem.c b/mm/bootmem.c
index bcb63ac48cc5..f468185b3b28 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -419,7 +419,7 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
}
/**
- * reserve_bootmem - mark a page range as usable
+ * reserve_bootmem - mark a page range as reserved
* @addr: starting address of the range
* @size: size of the range in bytes
* @flags: reservation flags (see linux/bootmem.h)
diff --git a/mm/compaction.c b/mm/compaction.c
index e78cb9688421..7fcd3a52e68d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype)
}
/*
+ * Compaction requires the taking of some coarse locks that are potentially
+ * very heavily contended. Check if the process needs to be scheduled or
+ * if the lock is contended. For async compaction, back out in the event
+ * if contention is severe. For sync compaction, schedule.
+ *
+ * Returns true if the lock is held.
+ * Returns false if the lock is released and compaction should abort
+ */
+static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
+ bool locked, struct compact_control *cc)
+{
+ if (need_resched() || spin_is_contended(lock)) {
+ if (locked) {
+ spin_unlock_irqrestore(lock, *flags);
+ locked = false;
+ }
+
+ /* async aborts if taking too long or contended */
+ if (!cc->sync) {
+ if (cc->contended)
+ *cc->contended = true;
+ return false;
+ }
+
+ cond_resched();
+ if (fatal_signal_pending(current))
+ return false;
+ }
+
+ if (!locked)
+ spin_lock_irqsave(lock, *flags);
+ return true;
+}
+
+static inline bool compact_trylock_irqsave(spinlock_t *lock,
+ unsigned long *flags, struct compact_control *cc)
+{
+ return compact_checklock_irqsave(lock, flags, false, cc);
+}
+
+/*
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
* pages inside of the pageblock (even though it may still end up isolating
@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
}
/* Update the number of anon and file isolated pages in the zone */
-static void acct_isolated(struct zone *zone, struct compact_control *cc)
+static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
{
struct page *page;
unsigned int count[2] = { 0, };
@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++;
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ /* If locked we can use the interrupt unsafe versions */
+ if (locked) {
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ } else {
+ mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+ mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ }
}
/* Similar to reclaim, but different enough that they don't share logic */
@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
struct list_head *migratelist = &cc->migratepages;
isolate_mode_t mode = 0;
struct lruvec *lruvec;
+ unsigned long flags;
+ bool locked;
/*
* Ensure that there are not too many pages isolated from the LRU
@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
/* Time to isolate some pages for migration */
cond_resched();
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ locked = true;
for (; low_pfn < end_pfn; low_pfn++) {
struct page *page;
- bool locked = true;
/* give a chance to irqs before checking need_resched() */
if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
locked = false;
}
- if (need_resched() || spin_is_contended(&zone->lru_lock)) {
- if (locked)
- spin_unlock_irq(&zone->lru_lock);
- cond_resched();
- spin_lock_irq(&zone->lru_lock);
- if (fatal_signal_pending(current))
- break;
- } else if (!locked)
- spin_lock_irq(&zone->lru_lock);
+
+ /* Check if it is ok to still hold the lock */
+ locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
+ locked, cc);
+ if (!locked)
+ break;
/*
* migrate_pfn does not necessarily start aligned to a
@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
}
}
- acct_isolated(zone, cc);
+ acct_isolated(zone, locked, cc);
- spin_unlock_irq(&zone->lru_lock);
+ if (locked)
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
@@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page)
}
/*
+ * Returns the start pfn of the last page block in a zone. This is the starting
+ * point for full compaction of a zone. Compaction searches for free pages from
+ * the end of each zone, while isolate_freepages_block scans forward inside each
+ * page block.
+ */
+static unsigned long start_free_pfn(struct zone *zone)
+{
+ unsigned long free_pfn;
+ free_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ free_pfn &= ~(pageblock_nr_pages-1);
+ return free_pfn;
+}
+
+/*
* Based on information in the current compact_control, find blocks
* suitable for isolating free pages from and then isolate them.
*/
@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,
pfn -= pageblock_nr_pages) {
unsigned long isolated;
- /*
- * Skip ahead if another thread is compacting in the area
- * simultaneously. If we wrapped around, we can only skip
- * ahead if zone->compact_cached_free_pfn also wrapped to
- * above our starting point.
- */
- if (cc->order > 0 && (!cc->wrapped ||
- zone->compact_cached_free_pfn >
- cc->start_free_pfn))
- pfn = min(pfn, zone->compact_cached_free_pfn);
-
if (!pfn_valid(pfn))
continue;
@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
* are disabled
*/
isolated = 0;
- spin_lock_irqsave(&zone->lock, flags);
+
+ /*
+ * The zone lock must be held to isolate freepages. This
+ * unfortunately this is a very coarse lock and can be
+ * heavily contended if there are parallel allocations
+ * or parallel compactions. For async compaction do not
+ * spin on the lock
+ */
+ if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
+ break;
if (suitable_migration_target(page)) {
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
isolated = isolate_freepages_block(pfn, end_pfn,
@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,
*/
if (isolated) {
high_pfn = max(high_pfn, pfn);
- if (cc->order > 0)
+
+ /*
+ * If the free scanner has wrapped, update
+ * compact_cached_free_pfn to point to the highest
+ * pageblock with free pages. This reduces excessive
+ * scanning of full pageblocks near the end of the
+ * zone
+ */
+ if (cc->order > 0 && cc->wrapped)
zone->compact_cached_free_pfn = high_pfn;
}
}
@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,
cc->free_pfn = high_pfn;
cc->nr_freepages = nr_freepages;
+
+ /* If compact_cached_free_pfn is reset then set it now */
+ if (cc->order > 0 && !cc->wrapped &&
+ zone->compact_cached_free_pfn == start_free_pfn(zone))
+ zone->compact_cached_free_pfn = high_pfn;
}
/*
@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
return ISOLATE_SUCCESS;
}
-/*
- * Returns the start pfn of the last page block in a zone. This is the starting
- * point for full compaction of a zone. Compaction searches for free pages from
- * the end of each zone, while isolate_freepages_block scans forward inside each
- * page block.
- */
-static unsigned long start_free_pfn(struct zone *zone)
-{
- unsigned long free_pfn;
- free_pfn = zone->zone_start_pfn + zone->spanned_pages;
- free_pfn &= ~(pageblock_nr_pages-1);
- return free_pfn;
-}
-
static int compact_finished(struct zone *zone,
struct compact_control *cc)
{
@@ -771,7 +829,7 @@ out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync)
+ bool sync, bool *contended)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
.sync = sync,
+ .contended = contended,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync)
+ bool sync, bool *contended)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;
- status = compact_zone_order(zone, order, gfp_mask, sync);
+ status = compact_zone_order(zone, order, gfp_mask, sync,
+ contended);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (cc->order > 0) {
int ok = zone_watermark_ok(zone, cc->order,
low_wmark_pages(zone), 0, 0);
- if (ok && cc->order > zone->compact_order_failed)
+ if (ok && cc->order >= zone->compact_order_failed)
zone->compact_order_failed = cc->order + 1;
/* Currently async compaction is never deferred. */
else if (!ok && cc->sync)
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 9b75a045dbf4..a47f0f50c89f 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -26,7 +26,7 @@
*/
SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
{
- struct file *file = fget(fd);
+ struct fd f = fdget(fd);
struct address_space *mapping;
struct backing_dev_info *bdi;
loff_t endbyte; /* inclusive */
@@ -35,15 +35,15 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
unsigned long nrpages;
int ret = 0;
- if (!file)
+ if (!f.file)
return -EBADF;
- if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) {
+ if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) {
ret = -ESPIPE;
goto out;
}
- mapping = file->f_mapping;
+ mapping = f.file->f_mapping;
if (!mapping || len < 0) {
ret = -EINVAL;
goto out;
@@ -76,21 +76,21 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
switch (advice) {
case POSIX_FADV_NORMAL:
- file->f_ra.ra_pages = bdi->ra_pages;
- spin_lock(&file->f_lock);
- file->f_mode &= ~FMODE_RANDOM;
- spin_unlock(&file->f_lock);
+ f.file->f_ra.ra_pages = bdi->ra_pages;
+ spin_lock(&f.file->f_lock);
+ f.file->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&f.file->f_lock);
break;
case POSIX_FADV_RANDOM:
- spin_lock(&file->f_lock);
- file->f_mode |= FMODE_RANDOM;
- spin_unlock(&file->f_lock);
+ spin_lock(&f.file->f_lock);
+ f.file->f_mode |= FMODE_RANDOM;
+ spin_unlock(&f.file->f_lock);
break;
case POSIX_FADV_SEQUENTIAL:
- file->f_ra.ra_pages = bdi->ra_pages * 2;
- spin_lock(&file->f_lock);
- file->f_mode &= ~FMODE_RANDOM;
- spin_unlock(&file->f_lock);
+ f.file->f_ra.ra_pages = bdi->ra_pages * 2;
+ spin_lock(&f.file->f_lock);
+ f.file->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&f.file->f_lock);
break;
case POSIX_FADV_WILLNEED:
/* First and last PARTIAL page! */
@@ -106,7 +106,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
* Ignore return value because fadvise() shall return
* success even if filesystem can't retrieve a hint,
*/
- force_page_cache_readahead(mapping, file, start_index,
+ force_page_cache_readahead(mapping, f.file, start_index,
nrpages);
break;
case POSIX_FADV_NOREUSE:
@@ -128,7 +128,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
ret = -EINVAL;
}
out:
- fput(file);
+ fdput(f);
return ret;
}
#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
diff --git a/mm/filemap.c b/mm/filemap.c
index fa5ca304148e..384344575c37 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1412,12 +1412,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
- struct blk_plug plug;
-
- blk_start_plug(&plug);
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
- blk_finish_plug(&plug);
}
if (retval > 0) {
*ppos = pos + retval;
@@ -2527,14 +2523,12 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
- blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2545,7 +2539,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
- blk_finish_plug(&plug);
sb_end_write(inode->i_sb);
return ret;
}
diff --git a/mm/fremap.c b/mm/fremap.c
index 9ed4fd432467..048659c0c03d 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -195,10 +195,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
*/
if (mapping_cap_account_dirty(mapping)) {
unsigned long addr;
- struct file *file = vma->vm_file;
+ struct file *file = get_file(vma->vm_file);
flags &= MAP_NONBLOCK;
- get_file(file);
addr = mmap_region(file, start, size,
flags, vma->vm_flags, pgoff);
fput(file);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 57c4b9309015..141dbb695097 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1811,7 +1811,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
src_page = pte_page(pteval);
copy_user_highpage(page, src_page, address, vma);
VM_BUG_ON(page_mapcount(src_page) != 1);
- VM_BUG_ON(page_count(src_page) != 2);
release_pte_page(src_page);
/*
* ptl mostly unnecessary, but preempt has to
diff --git a/mm/internal.h b/mm/internal.h
index 3314f79d775a..b8c91b342e24 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -130,6 +130,7 @@ struct compact_control {
int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
+ bool *contended; /* True if a lock was contended */
};
unsigned long
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 45eb6217bf38..0de83b4541e9 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1483,13 +1483,11 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct kmemleak_object *prev_obj = v;
struct kmemleak_object *next_obj = NULL;
- struct list_head *n = &prev_obj->object_list;
+ struct kmemleak_object *obj = prev_obj;
++(*pos);
- list_for_each_continue_rcu(n, &object_list) {
- struct kmemleak_object *obj =
- list_entry(n, struct kmemleak_object, object_list);
+ list_for_each_entry_continue_rcu(obj, &object_list, object_list) {
if (get_object(obj)) {
next_obj = obj;
break;
diff --git a/mm/memblock.c b/mm/memblock.c
index 4d9393c7edc9..82aa349d2f7a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -246,7 +246,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
min(new_area_start, memblock.current_limit),
new_alloc_size, PAGE_SIZE);
- new_array = addr ? __va(addr) : 0;
+ new_array = addr ? __va(addr) : NULL;
}
if (!addr) {
pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 795e525afaba..a72f2ffdc3d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4973,6 +4973,13 @@ mem_cgroup_create(struct cgroup *cont)
} else {
res_counter_init(&memcg->res, NULL);
res_counter_init(&memcg->memsw, NULL);
+ /*
+ * Deeper hierachy with use_hierarchy == false doesn't make
+ * much sense so let cgroup subsystem know about this
+ * unfortunate state in our controller.
+ */
+ if (parent && parent != root_mem_cgroup)
+ mem_cgroup_subsys.broken_hierarchy = true;
}
memcg->last_scanned_node = MAX_NUMNODES;
INIT_LIST_HEAD(&memcg->oom_notify);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3ad25f9d1fc1..6a5b90d0cfd7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -126,9 +126,6 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
struct mem_section *ms;
struct page *page, *memmap;
- if (!pfn_valid(start_pfn))
- return;
-
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
@@ -187,9 +184,16 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
end_pfn = pfn + pgdat->node_spanned_pages;
/* register_section info */
- for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
- register_page_bootmem_info_section(pfn);
-
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ /*
+ * Some platforms can assign the same pfn to multiple nodes - on
+ * node0 as well as nodeN. To avoid registering a pfn against
+ * multiple nodes we check that this pfn does not already
+ * reside in some other node.
+ */
+ if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
+ register_page_bootmem_info_section(pfn);
+ }
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bd92431d4c49..4ada3be6e252 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2562,7 +2562,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
break;
default:
- BUG();
+ return -EINVAL;
}
l = strlen(policy_modes[mode]);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3e86914f11a..872441e81914 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1301,8 +1301,7 @@ munmap_back:
goto free_vma;
correct_wcount = 1;
}
- vma->vm_file = file;
- get_file(file);
+ vma->vm_file = get_file(file);
error = file->f_op->mmap(file, vma);
if (error)
goto unmap_and_free_vma;
@@ -1356,9 +1355,8 @@ out:
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
- if (file && uprobe_mmap(vma))
- /* matching probes but cannot insert */
- goto unmap_and_free_vma;
+ if (file)
+ uprobe_mmap(vma);
return addr;
@@ -2309,7 +2307,7 @@ void exit_mmap(struct mm_struct *mm)
}
vm_unacct_memory(nr_accounted);
- BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+ WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}
/* Insert vm structure into process list sorted by address
diff --git a/mm/nommu.c b/mm/nommu.c
index d4b0c10872de..dee2ff89fd58 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1282,10 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file,
vma->vm_pgoff = pgoff;
if (file) {
- region->vm_file = file;
- get_file(file);
- vma->vm_file = file;
- get_file(file);
+ region->vm_file = get_file(file);
+ vma->vm_file = get_file(file);
if (vm_flags & VM_EXECUTABLE) {
added_exe_file_vma(current->mm);
vma->vm_mm = current->mm;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e5363f34e025..5ad5ce23c1e0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1532,7 +1532,6 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
- bdi_arm_supers_timer();
return 0;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 009ac285fea7..c13ea7538891 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -584,7 +584,7 @@ static inline void __free_one_page(struct page *page,
combined_idx = buddy_idx & page_idx;
higher_page = page + (combined_idx - page_idx);
buddy_idx = __find_buddy_index(combined_idx, order + 1);
- higher_buddy = page + (buddy_idx - combined_idx);
+ higher_buddy = higher_page + (buddy_idx - combined_idx);
if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
list_add_tail(&page->lru,
&zone->free_area[order].free_list[migratetype]);
@@ -1928,6 +1928,17 @@ this_zone_full:
zlc_active = 0;
goto zonelist_scan;
}
+
+ if (page)
+ /*
+ * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
+ * necessary to allocate the page. The expectation is
+ * that the caller is taking steps that will free more
+ * memory. The caller should avoid the page being used
+ * for !PFMEMALLOC purposes.
+ */
+ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+
return page;
}
@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration,
- bool *deferred_compaction,
+ bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
struct page *page;
@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
- nodemask, sync_migration);
+ nodemask, sync_migration,
+ contended_compaction);
current->flags &= ~PF_MEMALLOC;
if (*did_some_progress != COMPACT_SKIPPED) {
@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration,
- bool *deferred_compaction,
+ bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
return NULL;
@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress;
bool sync_migration = false;
bool deferred_compaction = false;
+ bool contended_compaction = false;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -2389,14 +2402,6 @@ rebalance:
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
if (page) {
- /*
- * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
- * necessary to allocate the page. The expectation is
- * that the caller is taking steps that will free more
- * memory. The caller should avoid the page being used
- * for !PFMEMALLOC purposes.
- */
- page->pfmemalloc = true;
goto got_pg;
}
}
@@ -2422,6 +2427,7 @@ rebalance:
nodemask,
alloc_flags, preferred_zone,
migratetype, sync_migration,
+ &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
@@ -2431,10 +2437,11 @@ rebalance:
/*
* If compaction is deferred for high-order allocations, it is because
* sync compaction recently failed. In this is the case and the caller
- * has requested the system not be heavily disrupted, fail the
- * allocation now instead of entering direct reclaim
+ * requested a movable allocation that does not heavily disrupt the
+ * system then fail the allocation instead of entering direct reclaim.
*/
- if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+ if ((deferred_compaction || contended_compaction) &&
+ (gfp_mask & __GFP_NO_KSWAPD))
goto nopage;
/* Try direct reclaim and then allocating */
@@ -2505,6 +2512,7 @@ rebalance:
nodemask,
alloc_flags, preferred_zone,
migratetype, sync_migration,
+ &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
@@ -2569,8 +2577,6 @@ retry_cpuset:
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
- else
- page->pfmemalloc = false;
trace_mm_page_alloc(page, order, gfp_mask, migratetype);
diff --git a/mm/readahead.c b/mm/readahead.c
index ea8f8fa21649..7963f2391236 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -579,19 +579,19 @@ do_readahead(struct address_space *mapping, struct file *filp,
SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
{
ssize_t ret;
- struct file *file;
+ struct fd f;
ret = -EBADF;
- file = fget(fd);
- if (file) {
- if (file->f_mode & FMODE_READ) {
- struct address_space *mapping = file->f_mapping;
+ f = fdget(fd);
+ if (f.file) {
+ if (f.file->f_mode & FMODE_READ) {
+ struct address_space *mapping = f.file->f_mapping;
pgoff_t start = offset >> PAGE_CACHE_SHIFT;
pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
unsigned long len = end - start + 1;
- ret = do_readahead(mapping, file, start, len);
+ ret = do_readahead(mapping, f.file, start, len);
}
- fput(file);
+ fdput(f);
}
return ret;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index d4e184e2a38e..d3752110c8c7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -77,13 +77,6 @@ static struct vfsmount *shm_mnt;
/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
#define SHORT_SYMLINK_LEN 128
-struct shmem_xattr {
- struct list_head list; /* anchored by shmem_inode_info->xattr_list */
- char *name; /* xattr name */
- size_t size;
- char value[0];
-};
-
/*
* shmem_fallocate and shmem_writepage communicate via inode->i_private
* (with i_mutex making sure that it has only one user at a time):
@@ -636,7 +629,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- struct shmem_xattr *xattr, *nxattr;
if (inode->i_mapping->a_ops == &shmem_aops) {
shmem_unacct_size(info->flags, inode->i_size);
@@ -650,10 +642,7 @@ static void shmem_evict_inode(struct inode *inode)
} else
kfree(info->symlink);
- list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
- kfree(xattr->name);
- kfree(xattr);
- }
+ simple_xattrs_free(&info->xattrs);
BUG_ON(inode->i_blocks);
shmem_free_inode(inode->i_sb);
clear_inode(inode);
@@ -1377,7 +1366,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
spin_lock_init(&info->lock);
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->swaplist);
- INIT_LIST_HEAD(&info->xattr_list);
+ simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
switch (mode & S_IFMT) {
@@ -2060,28 +2049,6 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
*/
/*
- * Allocate new xattr and copy in the value; but leave the name to callers.
- */
-static struct shmem_xattr *shmem_xattr_alloc(const void *value, size_t size)
-{
- struct shmem_xattr *new_xattr;
- size_t len;
-
- /* wrap around? */
- len = sizeof(*new_xattr) + size;
- if (len <= sizeof(*new_xattr))
- return NULL;
-
- new_xattr = kmalloc(len, GFP_KERNEL);
- if (!new_xattr)
- return NULL;
-
- new_xattr->size = size;
- memcpy(new_xattr->value, value, size);
- return new_xattr;
-}
-
-/*
* Callback for security_inode_init_security() for acquiring xattrs.
*/
static int shmem_initxattrs(struct inode *inode,
@@ -2090,11 +2057,11 @@ static int shmem_initxattrs(struct inode *inode,
{
struct shmem_inode_info *info = SHMEM_I(inode);
const struct xattr *xattr;
- struct shmem_xattr *new_xattr;
+ struct simple_xattr *new_xattr;
size_t len;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- new_xattr = shmem_xattr_alloc(xattr->value, xattr->value_len);
+ new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
if (!new_xattr)
return -ENOMEM;
@@ -2111,91 +2078,12 @@ static int shmem_initxattrs(struct inode *inode,
memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
xattr->name, len);
- spin_lock(&info->lock);
- list_add(&new_xattr->list, &info->xattr_list);
- spin_unlock(&info->lock);
+ simple_xattr_list_add(&info->xattrs, new_xattr);
}
return 0;
}
-static int shmem_xattr_get(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
-{
- struct shmem_inode_info *info;
- struct shmem_xattr *xattr;
- int ret = -ENODATA;
-
- info = SHMEM_I(dentry->d_inode);
-
- spin_lock(&info->lock);
- list_for_each_entry(xattr, &info->xattr_list, list) {
- if (strcmp(name, xattr->name))
- continue;
-
- ret = xattr->size;
- if (buffer) {
- if (size < xattr->size)
- ret = -ERANGE;
- else
- memcpy(buffer, xattr->value, xattr->size);
- }
- break;
- }
- spin_unlock(&info->lock);
- return ret;
-}
-
-static int shmem_xattr_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- struct shmem_inode_info *info = SHMEM_I(inode);
- struct shmem_xattr *xattr;
- struct shmem_xattr *new_xattr = NULL;
- int err = 0;
-
- /* value == NULL means remove */
- if (value) {
- new_xattr = shmem_xattr_alloc(value, size);
- if (!new_xattr)
- return -ENOMEM;
-
- new_xattr->name = kstrdup(name, GFP_KERNEL);
- if (!new_xattr->name) {
- kfree(new_xattr);
- return -ENOMEM;
- }
- }
-
- spin_lock(&info->lock);
- list_for_each_entry(xattr, &info->xattr_list, list) {
- if (!strcmp(name, xattr->name)) {
- if (flags & XATTR_CREATE) {
- xattr = new_xattr;
- err = -EEXIST;
- } else if (new_xattr) {
- list_replace(&xattr->list, &new_xattr->list);
- } else {
- list_del(&xattr->list);
- }
- goto out;
- }
- }
- if (flags & XATTR_REPLACE) {
- xattr = new_xattr;
- err = -ENODATA;
- } else {
- list_add(&new_xattr->list, &info->xattr_list);
- xattr = NULL;
- }
-out:
- spin_unlock(&info->lock);
- if (xattr)
- kfree(xattr->name);
- kfree(xattr);
- return err;
-}
-
static const struct xattr_handler *shmem_xattr_handlers[] = {
#ifdef CONFIG_TMPFS_POSIX_ACL
&generic_acl_access_handler,
@@ -2226,6 +2114,7 @@ static int shmem_xattr_validate(const char *name)
static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
+ struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
int err;
/*
@@ -2240,12 +2129,13 @@ static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
if (err)
return err;
- return shmem_xattr_get(dentry, name, buffer, size);
+ return simple_xattr_get(&info->xattrs, name, buffer, size);
}
static int shmem_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
+ struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
int err;
/*
@@ -2260,15 +2150,12 @@ static int shmem_setxattr(struct dentry *dentry, const char *name,
if (err)
return err;
- if (size == 0)
- value = ""; /* empty EA, do not remove */
-
- return shmem_xattr_set(dentry->d_inode, name, value, size, flags);
-
+ return simple_xattr_set(&info->xattrs, name, value, size, flags);
}
static int shmem_removexattr(struct dentry *dentry, const char *name)
{
+ struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
int err;
/*
@@ -2283,45 +2170,13 @@ static int shmem_removexattr(struct dentry *dentry, const char *name)
if (err)
return err;
- return shmem_xattr_set(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
-}
-
-static bool xattr_is_trusted(const char *name)
-{
- return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+ return simple_xattr_remove(&info->xattrs, name);
}
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- bool trusted = capable(CAP_SYS_ADMIN);
- struct shmem_xattr *xattr;
- struct shmem_inode_info *info;
- size_t used = 0;
-
- info = SHMEM_I(dentry->d_inode);
-
- spin_lock(&info->lock);
- list_for_each_entry(xattr, &info->xattr_list, list) {
- size_t len;
-
- /* skip "trusted." attributes for unprivileged callers */
- if (!trusted && xattr_is_trusted(xattr->name))
- continue;
-
- len = strlen(xattr->name) + 1;
- used += len;
- if (buffer) {
- if (size < used) {
- used = -ERANGE;
- break;
- }
- memcpy(buffer, xattr->name, len);
- buffer += len;
- }
- }
- spin_unlock(&info->lock);
-
- return used;
+ struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+ return simple_xattr_list(&info->xattrs, buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
diff --git a/mm/slab.c b/mm/slab.c
index f8b0d539b482..11339110271e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -900,7 +900,7 @@ static void __cpuinit start_cpu_timer(int cpu)
*/
if (keventd_up() && reap_work->work.func == NULL) {
init_reap_node(cpu);
- INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
+ INIT_DEFERRABLE_WORK(reap_work, cache_reap);
schedule_delayed_work_on(cpu, reap_work,
__round_jiffies_relative(HZ, cpu));
}
@@ -983,7 +983,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
}
/* The caller cannot use PFMEMALLOC objects, find another one */
- for (i = 1; i < ac->avail; i++) {
+ for (i = 0; i < ac->avail; i++) {
/* If a !PFMEMALLOC object is found, swap them */
if (!is_obj_pfmemalloc(ac->entry[i])) {
objp = ac->entry[i];
@@ -1000,7 +1000,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
l3 = cachep->nodelists[numa_mem_id()];
if (!list_empty(&l3->slabs_free) && force_refill) {
struct slab *slabp = virt_to_slab(objp);
- ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
+ ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
clear_obj_pfmemalloc(&objp);
recheck_pfmemalloc_active(cachep, ac);
return objp;
@@ -1032,7 +1032,7 @@ static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
{
if (unlikely(pfmemalloc_active)) {
/* Some pfmemalloc slabs exist, check if this is one */
- struct page *page = virt_to_page(objp);
+ struct page *page = virt_to_head_page(objp);
if (PageSlabPfmemalloc(page))
set_obj_pfmemalloc(&objp);
}
@@ -3260,6 +3260,7 @@ force_grow:
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
+ node = numa_mem_id();
/* no objects in sight? abort */
if (!x && (ac->avail == 0 || force_refill))
diff --git a/mm/slub.c b/mm/slub.c
index 8f78e2577031..2fdd96f9e998 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1524,12 +1524,13 @@ static inline void *acquire_slab(struct kmem_cache *s,
}
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
-static void *get_partial_node(struct kmem_cache *s,
- struct kmem_cache_node *n, struct kmem_cache_cpu *c)
+static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ struct kmem_cache_cpu *c, gfp_t flags)
{
struct page *page, *page2;
void *object = NULL;
@@ -1545,9 +1546,13 @@ static void *get_partial_node(struct kmem_cache *s,
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
- void *t = acquire_slab(s, n, page, object == NULL);
+ void *t;
int available;
+ if (!pfmemalloc_match(page, flags))
+ continue;
+
+ t = acquire_slab(s, n, page, object == NULL);
if (!t)
break;
@@ -1614,7 +1619,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
n->nr_partial > s->min_partial) {
- object = get_partial_node(s, n, c);
+ object = get_partial_node(s, n, c, flags);
if (object) {
/*
* Return the object even if
@@ -1643,7 +1648,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
void *object;
int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
- object = get_partial_node(s, get_node(s, searchnode), c);
+ object = get_partial_node(s, get_node(s, searchnode), c, flags);
if (object || node != NUMA_NO_NODE)
return object;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8d01243d9560..99b434b674c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3102,6 +3102,7 @@ int kswapd_run(int nid)
/* failure at boot is fatal */
BUG_ON(system_state == SYSTEM_BOOTING);
printk("Failed to start kswapd on node %d\n",nid);
+ pgdat->kswapd = NULL;
ret = -1;
}
return ret;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index df7a6748231d..b3e3b9d525d0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1157,7 +1157,7 @@ static void __cpuinit start_cpu_timer(int cpu)
{
struct delayed_work *work = &per_cpu(vmstat_work, cpu);
- INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
+ INIT_DEFERRABLE_WORK(work, vmstat_update);
schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
}