diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/highmem.c | 17 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 8 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 53 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/mincore.c | 37 | ||||
-rw-r--r-- | mm/mlock.c | 99 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 40 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 18 | ||||
-rw-r--r-- | mm/pagewalk.c | 16 | ||||
-rw-r--r-- | mm/shmem.c | 13 | ||||
-rw-r--r-- | mm/slab.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/swap_state.c | 70 | ||||
-rw-r--r-- | mm/swapfile.c | 3 | ||||
-rw-r--r-- | mm/vmalloc.c | 8 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
21 files changed, 240 insertions, 165 deletions
diff --git a/mm/Makefile b/mm/Makefile index f4e009cba6a5..5e0bd6426693 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o -obj-$(CONFIG_ASHMEM) += ashmem.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o diff --git a/mm/highmem.c b/mm/highmem.c index 25878cc49daa..9c1e627f282e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -426,16 +426,21 @@ void __init page_address_init(void) void debug_kmap_atomic(enum km_type type) { - static unsigned warn_count = 10; + static int warn_count = 10; - if (unlikely(warn_count == 0)) + if (unlikely(warn_count < 0)) return; if (unlikely(in_interrupt())) { - if (in_irq()) { + if (in_nmi()) { + if (type != KM_NMI && type != KM_NMI_PTE) { + WARN_ON(1); + warn_count--; + } + } else if (in_irq()) { if (type != KM_IRQ0 && type != KM_IRQ1 && type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && - type != KM_BOUNCE_READ) { + type != KM_BOUNCE_READ && type != KM_IRQ_PTE) { WARN_ON(1); warn_count--; } @@ -452,7 +457,9 @@ void debug_kmap_atomic(enum km_type type) } if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ || + type == KM_IRQ_PTE || type == KM_NMI || + type == KM_NMI_PTE ) { if (!irqs_disabled()) { WARN_ON(1); warn_count--; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cafdcee154e8..cae03370a8ea 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1010,6 +1010,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) NODE_DATA(h->hugetlb_next_nid), huge_page_size(h), huge_page_size(h), 0); + hstate_next_node(h); if (addr) { /* * Use the beginning of the huge page to store the @@ -1019,7 +1020,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) m = addr; goto found; } - hstate_next_node(h); nr_nodes--; } return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fd4529d86de5..566925e7cdbd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -496,7 +496,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) task_unlock(task); if (!curr) return 0; - if (curr->use_hierarchy) + /* + * We should check use_hierarchy of "mem" not "curr". Because checking + * use_hierarchy of "curr" here make this function true if hierarchy is + * enabled in "curr" and "curr" is a child of "mem" in *cgroup* + * hierarchy(even if use_hierarchy is disabled in "mem"). + */ + if (mem->use_hierarchy) ret = css_is_ancestor(&curr->css, &mem->css); else ret = (curr == mem); diff --git a/mm/memory.c b/mm/memory.c index aede2ce3aba4..753b2e6ffd28 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2638,7 +2638,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (vma->vm_flags & VM_WRITE) + entry = pte_mkwrite(pte_mkdirty(entry)); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (!pte_none(*page_table)) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7dd9d9f80694..f29d8d70884d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1024,7 +1024,7 @@ static long do_mbind(unsigned long start, unsigned long len, err = migrate_prep(); if (err) - return err; + goto mpol_out; } { NODEMASK_SCRATCH(scratch); @@ -1039,10 +1039,9 @@ static long do_mbind(unsigned long start, unsigned long len, err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } - if (err) { - mpol_put(new); - return err; - } + if (err) + goto mpol_out; + vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); @@ -1058,9 +1057,11 @@ static long do_mbind(unsigned long start, unsigned long len, if (!err && nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; - } + } else + putback_lru_pages(&pagelist); up_write(&mm->mmap_sem); + mpol_out: mpol_put(new); return err; } @@ -2121,8 +2122,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) char *rest = nodelist; while (isdigit(*rest)) rest++; - if (!*rest) - err = 0; + if (*rest) + goto out; } break; case MPOL_INTERLEAVE: @@ -2131,7 +2132,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) */ if (!nodelist) nodes = node_states[N_HIGH_MEMORY]; - err = 0; break; case MPOL_LOCAL: /* @@ -2141,11 +2141,19 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) goto out; mode = MPOL_PREFERRED; break; - - /* - * case MPOL_BIND: mpol_new() enforces non-empty nodemask. - * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags. - */ + case MPOL_DEFAULT: + /* + * Insist on a empty nodelist + */ + if (!nodelist) + err = 0; + goto out; + case MPOL_BIND: + /* + * Insist on a nodelist + */ + if (!nodelist) + goto out; } mode_flags = 0; @@ -2159,13 +2167,14 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else - err = 1; + goto out; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) - err = 1; - else { + goto out; + + { int ret; NODEMASK_SCRATCH(scratch); if (scratch) { @@ -2176,13 +2185,15 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) ret = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); if (ret) { - err = 1; mpol_put(new); - } else if (no_context) { - /* save for contextualization */ - new->w.user_nodemask = nodes; + goto out; } } + err = 0; + if (no_context) { + /* save for contextualization */ + new->w.user_nodemask = nodes; + } out: /* Restore string for error message */ diff --git a/mm/migrate.c b/mm/migrate.c index 939888f9ddab..eebfb79d7748 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -597,7 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *newpage = get_new_page(page, private, &result); int rcu_locked = 0; int charge = 0; - struct mem_cgroup *mem; + struct mem_cgroup *mem = NULL; if (!newpage) return -ENOMEM; @@ -937,6 +937,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, goto out_pm; err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_pm; + if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; diff --git a/mm/mincore.c b/mm/mincore.c index 8cb508f84ea4..7a3436ef39eb 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -14,6 +14,7 @@ #include <linux/syscalls.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/hugetlb.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -72,6 +73,42 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag if (!vma || addr < vma->vm_start) return -ENOMEM; +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) { + struct hstate *h; + unsigned long nr_huge; + unsigned char present; + + i = 0; + nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); + h = hstate_vma(vma); + nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h)) + - (addr >> huge_page_shift(h)) + 1; + nr_huge = min(nr_huge, + (vma->vm_end - addr) >> huge_page_shift(h)); + while (1) { + /* hugepage always in RAM for now, + * but generally it needs to be check */ + ptep = huge_pte_offset(current->mm, + addr & huge_page_mask(h)); + present = !!(ptep && + !huge_pte_none(huge_ptep_get(ptep))); + while (1) { + vec[i++] = present; + addr += PAGE_SIZE; + /* reach buffer limit */ + if (i == nr) + return nr; + /* check hugepage border */ + if (!((addr & ~huge_page_mask(h)) + >> PAGE_SHIFT)) + break; + } + } + return nr; + } +#endif + /* * Calculate how many pages there are left in the last level of the * PTE array for our address. diff --git a/mm/mlock.c b/mm/mlock.c index 45eb650b9654..e13918d4fc4f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page) } /** - * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. + * __mlock_vma_pages_range() - mlock a range of pages in the vma. * @vma: target vma * @start: start address * @end: end address - * @mlock: 0 indicate munlock, otherwise mlock. * - * If @mlock == 0, unlock an mlocked range; - * else mlock the range of pages. This takes care of making the pages present , - * too. + * This takes care of making the pages present too. * * return 0 on success, negative error code on error. * * vma->vm_mm->mmap_sem must be held for at least read. */ static long __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - int mlock) + unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; struct page *pages[16]; /* 16 gives a reasonable batch */ int nr_pages = (end - start) / PAGE_SIZE; int ret = 0; - int gup_flags = 0; + int gup_flags; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); VM_BUG_ON(start < vma->vm_start); VM_BUG_ON(end > vma->vm_end); - VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && - (atomic_read(&mm->mm_users) != 0)); - - /* - * mlock: don't page populate if vma has PROT_NONE permission. - * munlock: always do munlock although the vma has PROT_NONE - * permission, or SIGKILL is pending. - */ - if (!mlock) - gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS | - GUP_FLAGS_IGNORE_SIGKILL; + VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); + gup_flags = 0; if (vma->vm_flags & VM_WRITE) - gup_flags |= GUP_FLAGS_WRITE; + gup_flags = GUP_FLAGS_WRITE; while (nr_pages > 0) { int i; @@ -201,19 +188,10 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, * This can happen for, e.g., VM_NONLINEAR regions before * a page has been allocated and mapped at a given offset, * or for addresses that map beyond end of a file. - * We'll mlock the the pages if/when they get faulted in. + * We'll mlock the pages if/when they get faulted in. */ if (ret < 0) break; - if (ret == 0) { - /* - * We know the vma is there, so the only time - * we cannot get a single page should be an - * error (ret < 0) case. - */ - WARN_ON(1); - break; - } lru_add_drain(); /* push cached pages to LRU */ @@ -224,28 +202,22 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, /* * Because we lock page here and migration is blocked * by the elevated reference, we need only check for - * page truncation (file-cache only). + * file-cache page truncation. This page->mapping + * check also neatly skips over the ZERO_PAGE(), + * though if that's common we'd prefer not to lock it. */ - if (page->mapping) { - if (mlock) - mlock_vma_page(page); - else - munlock_vma_page(page); - } + if (page->mapping) + mlock_vma_page(page); unlock_page(page); - put_page(page); /* ref from get_user_pages() */ - - /* - * here we assume that get_user_pages() has given us - * a list of virtually contiguous pages. - */ - addr += PAGE_SIZE; /* for next get_user_pages() */ - nr_pages--; + put_page(page); /* ref from get_user_pages() */ } + + addr += ret * PAGE_SIZE; + nr_pages -= ret; ret = 0; } - return ret; /* count entire vma as locked_vm */ + return ret; /* 0 or negative error code */ } /* @@ -289,7 +261,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))) { - __mlock_vma_pages_range(vma, start, end, 1); + __mlock_vma_pages_range(vma, start, end); /* Hide errors from mmap() and other callers */ return 0; @@ -310,7 +282,6 @@ no_mlock: return nr_pages; /* error or pages NOT mlocked */ } - /* * munlock_vma_pages_range() - munlock all pages in the vma range.' * @vma - vma containing range to be munlock()ed. @@ -330,10 +301,24 @@ no_mlock: * free them. This will result in freeing mlocked pages. */ void munlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end) { + unsigned long addr; + + lru_add_drain(); vma->vm_flags &= ~VM_LOCKED; - __mlock_vma_pages_range(vma, start, end, 0); + + for (addr = start; addr < end; addr += PAGE_SIZE) { + struct page *page = follow_page(vma, addr, FOLL_GET); + if (page) { + lock_page(page); + if (page->mapping) + munlock_vma_page(page); + unlock_page(page); + put_page(page); + } + cond_resched(); + } } /* @@ -400,18 +385,14 @@ success: * It's okay if try_to_unmap_one unmaps a page just after we * set VM_LOCKED, __mlock_vma_pages_range will bring it back. */ - vma->vm_flags = newflags; if (lock) { - ret = __mlock_vma_pages_range(vma, start, end, 1); - - if (ret > 0) { - mm->locked_vm -= ret; - ret = 0; - } else - ret = __mlock_posix_error_return(ret); /* translate if needed */ + vma->vm_flags = newflags; + ret = __mlock_vma_pages_range(vma, start, end); + if (ret < 0) + ret = __mlock_posix_error_return(ret); } else { - __mlock_vma_pages_range(vma, start, end, 0); + munlock_vma_pages_range(vma, start, end); } out: diff --git a/mm/mmap.c b/mm/mmap.c index 8101de490c73..cbb7cb308243 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -570,9 +570,9 @@ again: remove_next = 1 + (end > next->vm_end); /* * When changing only vma->vm_end, we don't really need - * anon_vma lock: but is that case worth optimizing out? + * anon_vma lock. */ - if (vma->anon_vma) + if (vma->anon_vma && (insert || importer || start != vma->vm_start)) anon_vma = vma->anon_vma; if (anon_vma) { spin_lock(&anon_vma->lock); diff --git a/mm/nommu.c b/mm/nommu.c index 66e81e7e9fe9..10cdd4c61292 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1056,7 +1056,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1073,7 +1073,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) */ static int do_mmap_private(struct vm_area_struct *vma, struct vm_region *region, - unsigned long len) + unsigned long len, + unsigned long capabilities) { struct page *pages; unsigned long total, point, n, rlen; @@ -1084,13 +1085,13 @@ static int do_mmap_private(struct vm_area_struct *vma, * shared mappings on devices or memory * - VM_MAYSHARE will be set if it may attempt to share */ - if (vma->vm_file) { + if (capabilities & BDI_CAP_MAP_DIRECT) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1328,7 +1329,7 @@ unsigned long do_mmap_pgoff(struct file *file, * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping */ - if (file && file->f_op->get_unmapped_area) { + if (capabilities & BDI_CAP_MAP_DIRECT) { addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR((void *) addr)) { @@ -1352,15 +1353,17 @@ unsigned long do_mmap_pgoff(struct file *file, } vma->vm_region = region; - add_nommu_region(region); - /* set up the mapping */ + /* set up the mapping + * - the region is filled in if BDI_CAP_MAP_DIRECT is still set + */ if (file && vma->vm_flags & VM_SHARED) ret = do_mmap_shared_file(vma); else - ret = do_mmap_private(vma, region, len); + ret = do_mmap_private(vma, region, len, capabilities); if (ret < 0) - goto error_put_region; + goto error_just_free; + add_nommu_region(region); /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; @@ -1378,25 +1381,14 @@ share: kleave(" = %lx", result); return result; -error_put_region: - __put_nommu_region(region); - if (vma) { - if (vma->vm_file) { - fput(vma->vm_file); - if (vma->vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(vma->vm_mm); - } - kmem_cache_free(vm_area_cachep, vma); - } - kleave(" = %d [pr]", ret); - return ret; - error_just_free: up_write(&nommu_region_sem); error: - fput(region->vm_file); + if (region->vm_file) + fput(region->vm_file); kmem_cache_free(vm_region_jar, region); - fput(vma->vm_file); + if (vma->vm_file) + fput(vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(vma->vm_mm); kmem_cache_free(vm_area_cachep, vma); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index a7b2460e922b..ed452e9485d4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -400,7 +400,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); - mem_cgroup_print_oom_info(mem, current); + mem_cgroup_print_oom_info(mem, p); show_mem(); if (sysctl_oom_dump_tasks) dump_tasks(mem); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0de15f46987..d04c8428c364 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1167,10 +1167,10 @@ again: } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); - __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); spin_unlock(&zone->lock); if (!page) goto failed; + __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); } __count_zone_vm_events(PGALLOC, zone, 1 << order); @@ -1715,7 +1715,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ alloc_flags &= ~ALLOC_CPUSET; - } else if (unlikely(rt_task(p))) + } else if (unlikely(rt_task(p)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { @@ -1763,6 +1763,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; +restart: wake_all_kswapd(order, zonelist, high_zoneidx); /* @@ -1772,7 +1773,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, */ alloc_flags = gfp_to_alloc_flags(gfp_mask); -restart: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, @@ -2783,7 +2783,8 @@ static void setup_zone_migrate_reserve(struct zone *zone) { unsigned long start_pfn, pfn, end_pfn; struct page *page; - unsigned long reserve, block_migratetype; + unsigned long block_migratetype; + int reserve; /* Get the start pfn, end pfn and the number of blocks to reserve */ start_pfn = zone->zone_start_pfn; @@ -2791,6 +2792,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> pageblock_order; + /* + * Reserve blocks are generally in place to help high-order atomic + * allocations that are short-lived. A min_free_kbytes value that + * would result in more than 2 reserve blocks for atomic allocations + * is assumed to be in place to help anti-fragmentation for the + * future allocation of hugepages at runtime. + */ + reserve = min(2, reserve); + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { if (!pfn_valid(pfn)) continue; diff --git a/mm/pagewalk.c b/mm/pagewalk.c index d5878bed7841..a286915e23ef 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -1,6 +1,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/sched.h> +#include <linux/hugetlb.h> static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -107,6 +108,7 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd_t *pgd; unsigned long next; int err = 0; + struct vm_area_struct *vma; if (addr >= end) return err; @@ -117,11 +119,22 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); + + /* skip hugetlb vma to avoid hugepage PMD being cleared + * in pmd_none_or_clear_bad(). */ + vma = find_vma(walk->mm, addr); + if (vma && is_vm_hugetlb_page(vma)) { + if (vma->vm_end < next) + next = vma->vm_end; + continue; + } + if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; + pgd++; continue; } if (walk->pgd_entry) @@ -131,7 +144,8 @@ int walk_page_range(unsigned long addr, unsigned long end, err = walk_pud_range(pgd, addr, next, walk); if (err) break; - } while (pgd++, addr = next, addr != end); + pgd++; + } while (addr = next, addr != end); return err; } diff --git a/mm/shmem.c b/mm/shmem.c index fb7861108d31..d713239ce2ce 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2668,14 +2668,6 @@ put_memory: } EXPORT_SYMBOL_GPL(shmem_file_setup); -void shmem_set_file(struct vm_area_struct *vma, struct file *file) -{ - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = file; - vma->vm_ops = &shmem_vm_ops; -} - /** * shmem_zero_setup - setup a shared anonymous mapping * @vma: the vma to be mmapped is prepared by do_mmap_pgoff @@ -2688,8 +2680,11 @@ int shmem_zero_setup(struct vm_area_struct *vma) file = shmem_file_setup("dev/zero", size, vma->vm_flags); if (IS_ERR(file)) return PTR_ERR(file); - shmem_set_file(vma, file); + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = file; + vma->vm_ops = &shmem_vm_ops; return 0; } diff --git a/mm/slab.c b/mm/slab.c index 7b5d4deacfcd..7dfa481c96ba 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void) * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */ - if (num_physpages > (32 << 20) >> PAGE_SHIFT) + if (totalram_pages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated diff --git a/mm/swap.c b/mm/swap.c index cb29ae5d33ab..9387f17f99c5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag); */ void __init swap_setup(void) { - unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); + unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); #ifdef CONFIG_SWAP bdi_init(swapper_space.backing_dev_info); diff --git a/mm/swap_state.c b/mm/swap_state.c index 42cd38eba79f..0313a135f2a1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -66,10 +66,10 @@ void show_swap_cache_info(void) } /* - * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, + * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) +static int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; @@ -77,28 +77,37 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) VM_BUG_ON(PageSwapCache(page)); VM_BUG_ON(!PageSwapBacked(page)); + page_cache_get(page); + SetPageSwapCache(page); + set_page_private(page, entry.val); + + spin_lock_irq(&swapper_space.tree_lock); + error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); + if (likely(!error)) { + total_swapcache_pages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + INC_CACHE_INFO(add_total); + } + spin_unlock_irq(&swapper_space.tree_lock); + + if (unlikely(error)) { + set_page_private(page, 0UL); + ClearPageSwapCache(page); + page_cache_release(page); + } + + return error; +} + + +int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) +{ + int error; + error = radix_tree_preload(gfp_mask); if (!error) { - page_cache_get(page); - SetPageSwapCache(page); - set_page_private(page, entry.val); - - spin_lock_irq(&swapper_space.tree_lock); - error = radix_tree_insert(&swapper_space.page_tree, - entry.val, page); - if (likely(!error)) { - total_swapcache_pages++; - __inc_zone_page_state(page, NR_FILE_PAGES); - INC_CACHE_INFO(add_total); - } - spin_unlock_irq(&swapper_space.tree_lock); + error = __add_to_swap_cache(page, entry); radix_tree_preload_end(); - - if (unlikely(error)) { - set_page_private(page, 0UL); - ClearPageSwapCache(page); - page_cache_release(page); - } } return error; } @@ -289,13 +298,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, } /* + * call radix_tree_preload() while we can wait. + */ + err = radix_tree_preload(gfp_mask & GFP_KERNEL); + if (err) + break; + + /* * Swap entry may have been freed since our caller observed it. */ err = swapcache_prepare(entry); - if (err == -EEXIST) /* seems racy */ + if (err == -EEXIST) { /* seems racy */ + radix_tree_preload_end(); continue; - if (err) /* swp entry is obsolete ? */ + } + if (err) { /* swp entry is obsolete ? */ + radix_tree_preload_end(); break; + } /* * Associate the page with swap entry in the swap cache. @@ -307,8 +327,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, */ __set_page_locked(new_page); SetPageSwapBacked(new_page); - err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); + err = __add_to_swap_cache(new_page, entry); if (likely(!err)) { + radix_tree_preload_end(); /* * Initiate read into locked page and return. */ @@ -316,6 +337,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, swap_readpage(new_page); return new_page; } + radix_tree_preload_end(); ClearPageSwapBacked(new_page); __clear_page_locked(new_page); swapcache_free(entry, NULL); diff --git a/mm/swapfile.c b/mm/swapfile.c index 8ffdc0d23c53..b47ccd77f050 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1149,8 +1149,7 @@ static int try_to_unuse(unsigned int type) } else retval = unuse_mm(mm, entry, page); - if (set_start_mm && - swap_count(*swap_map) < swcount) { + if (set_start_mm && *swap_map < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f8189a4b3e13..f603667f5d02 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -546,10 +546,8 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, } rcu_read_unlock(); - if (nr) { - BUG_ON(nr > atomic_read(&vmap_lazy_nr)); + if (nr) atomic_sub(nr, &vmap_lazy_nr); - } if (nr || force_flush) flush_tlb_kernel_range(*start, *end); @@ -1368,7 +1366,7 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); - if (count > num_physpages) + if (count > totalram_pages) return NULL; area = get_vm_area_caller((count << PAGE_SHIFT), flags, @@ -1475,7 +1473,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, unsigned long real_size = size; size = PAGE_ALIGN(size); - if (!size || (size >> PAGE_SHIFT) > num_physpages) + if (!size || (size >> PAGE_SHIFT) > totalram_pages) return NULL; area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, diff --git a/mm/vmscan.c b/mm/vmscan.c index 94e86dd6954c..95f35a7d1f05 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1298,7 +1298,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && !PageAnon(page)) { + if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { list_add(&page->lru, &l_active); continue; } |