diff options
author | Xinyu Chen <xinyu.chen@freescale.com> | 2012-07-25 16:54:33 +0800 |
---|---|---|
committer | Xinyu Chen <xinyu.chen@freescale.com> | 2012-07-25 16:54:33 +0800 |
commit | 10ca2f12149b8c3fde9af51da89736529892dc69 (patch) | |
tree | 1a5118db41796df0a40c20467f37542e957e87a0 /mm | |
parent | 3fb99edfabe05a47803eba7f39109b5eb86e25df (diff) | |
parent | cf3095062b75f6e518c6ef8a25b47a5b2ced7668 (diff) |
Merge remote branch 'fsl-linux-sdk/imx_3.0.35' into imx_3.0.35_android
Conflicts:
arch/arm/configs/imx6_defconfig
arch/arm/configs/imx6_updater_defconfig
arch/arm/configs/imx6s_defconfig
arch/arm/include/asm/dma-mapping.h
arch/arm/kernel/smp.c
arch/arm/mach-mx6/Kconfig
arch/arm/mach-mx6/board-mx6dl_arm2.h
arch/arm/mach-mx6/board-mx6dl_sabresd.h
arch/arm/mach-mx6/board-mx6q_arm2.c
arch/arm/mach-mx6/board-mx6q_arm2.h
arch/arm/mach-mx6/board-mx6q_sabreauto.c
arch/arm/mach-mx6/board-mx6q_sabreauto.h
arch/arm/mach-mx6/board-mx6q_sabrelite.c
arch/arm/mach-mx6/board-mx6q_sabresd.c
arch/arm/mach-mx6/board-mx6q_sabresd.h
arch/arm/mach-mx6/board-mx6sl_arm2.c
arch/arm/mach-mx6/board-mx6sl_arm2.h
arch/arm/mach-mx6/board-mx6solo_sabreauto.h
arch/arm/mach-mx6/bus_freq.c
arch/arm/mach-mx6/clock.c
arch/arm/mach-mx6/clock_mx6sl.c
arch/arm/mach-mx6/cpu.c
arch/arm/mach-mx6/crm_regs.h
arch/arm/mach-mx6/devices-imx6q.h
arch/arm/mach-mx6/devices.c
arch/arm/mach-mx6/mx6_anatop_regulator.c
arch/arm/mach-mx6/pcie.c
arch/arm/mach-mx6/system.c
arch/arm/mm/dma-mapping.c
arch/arm/plat-mxc/devices/Makefile
arch/arm/plat-mxc/devices/platform-imx-dcp.c
arch/arm/plat-mxc/devices/platform-imx-ocotp.c
arch/arm/plat-mxc/devices/platform-imx-rngb.c
arch/arm/plat-mxc/devices/platform-mxc_hdmi.c
arch/arm/plat-mxc/include/mach/devices-common.h
arch/arm/plat-mxc/include/mach/esdhc.h
arch/arm/plat-mxc/include/mach/iomux-mx6dl.h
arch/arm/plat-mxc/include/mach/iomux-mx6q.h
arch/arm/plat-mxc/include/mach/memory.h
arch/arm/plat-mxc/include/mach/mx6.h
arch/arm/plat-mxc/include/mach/mxc_edid.h
arch/arm/plat-mxc/include/mach/mxc_hdmi.h
arch/arm/plat-mxc/system.c
drivers/Kconfig
drivers/char/hw_random/fsl-rngc.c
drivers/cpufreq/Makefile
drivers/cpufreq/cpufreq_interactive.c
drivers/crypto/Kconfig
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/compat.h
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/intern.h
drivers/crypto/dcp.c
drivers/dma/pch_dma.c
drivers/input/keyboard/gpio_keys.c
drivers/input/touchscreen/egalax_ts.c
drivers/input/touchscreen/max11801_ts.c
drivers/media/video/mxc/capture/Kconfig
drivers/media/video/mxc/capture/adv7180.c
drivers/media/video/mxc/capture/ipu_csi_enc.c
drivers/media/video/mxc/capture/ipu_prp_vf_sdc.c
drivers/media/video/mxc/capture/ipu_prp_vf_sdc_bg.c
drivers/media/video/mxc/capture/mxc_v4l2_capture.c
drivers/media/video/mxc/capture/ov5640_mipi.c
drivers/media/video/mxc/output/mxc_vout.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/mmc/card/block.c
drivers/mmc/core/mmc.c
drivers/mmc/host/mmci.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mxc/Kconfig
drivers/mxc/Makefile
drivers/mxc/asrc/mxc_asrc.c
drivers/mxc/gpu-viv/arch/XAQ2/hal/kernel/gc_hal_kernel_context.c
drivers/mxc/gpu-viv/arch/XAQ2/hal/kernel/gc_hal_kernel_hardware.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel.h
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel_command.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel_event.c
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal.h
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal_base.h
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal_options.h
drivers/mxc/gpu-viv/hal/os/linux/kernel/gc_hal_kernel_os.c
drivers/mxc/ipu3/ipu_device.c
drivers/mxc/vpu/mxc_vpu.c
drivers/net/fec.c
drivers/net/wireless/Makefile
drivers/power/sabresd_battery.c
drivers/regulator/core.c
drivers/tty/serial/imx.c
drivers/usb/core/hub.c
drivers/usb/gadget/arcotg_udc.c
drivers/usb/gadget/fsl_updater.c
drivers/usb/gadget/inode.c
drivers/usb/host/ehci-hub.c
drivers/video/mxc/ldb.c
drivers/video/mxc/mipi_dsi.c
drivers/video/mxc/mxc_dispdrv.c
drivers/video/mxc/mxc_dispdrv.h
drivers/video/mxc/mxc_edid.c
drivers/video/mxc/mxc_elcdif_fb.c
drivers/video/mxc/mxc_ipuv3_fb.c
drivers/video/mxc/mxc_spdc_fb.c
drivers/video/mxc_hdmi.c
drivers/watchdog/imx2_wdt.c
fs/proc/base.c
include/linux/mmc/host.h
include/linux/mmc/sdhci.h
include/linux/mxc_v4l2.h
kernel/power/main.c
sound/soc/codecs/mxc_hdmi.c
sound/soc/codecs/mxc_spdif.c
sound/soc/codecs/wm8962.c
sound/soc/imx/Kconfig
sound/soc/imx/Makefile
sound/soc/imx/imx-cs42888.c
sound/soc/imx/imx-esai.c
sound/soc/imx/imx-wm8958.c
sound/soc/imx/imx-wm8962.c
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 5 | ||||
-rw-r--r-- | mm/compaction.c | 24 | ||||
-rw-r--r-- | mm/dmapool.c | 88 | ||||
-rw-r--r-- | mm/filemap.c | 33 | ||||
-rw-r--r-- | mm/filemap_xip.c | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 10 | ||||
-rw-r--r-- | mm/hugetlb.c | 34 | ||||
-rw-r--r-- | mm/madvise.c | 16 | ||||
-rw-r--r-- | mm/memcontrol.c | 62 | ||||
-rw-r--r-- | mm/memory.c | 16 | ||||
-rw-r--r-- | mm/mempolicy.c | 43 | ||||
-rw-r--r-- | mm/mincore.c | 2 | ||||
-rw-r--r-- | mm/nobootmem.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 9 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 11 | ||||
-rw-r--r-- | mm/pagewalk.c | 2 | ||||
-rw-r--r-- | mm/percpu.c | 16 | ||||
-rw-r--r-- | mm/slub.c | 3 | ||||
-rw-r--r-- | mm/sparse.c | 30 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
25 files changed, 319 insertions, 118 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 01d5a4b3dd0c..9686c4e3f80d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -768,14 +768,13 @@ void * __init alloc_bootmem_section(unsigned long size, unsigned long section_nr) { bootmem_data_t *bdata; - unsigned long pfn, goal, limit; + unsigned long pfn, goal; pfn = section_nr_to_pfn(section_nr); goal = pfn << PAGE_SHIFT; - limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; - return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); + return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0); } #endif diff --git a/mm/compaction.c b/mm/compaction.c index 6cc604bd5649..c4bc5acf865d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -320,12 +320,34 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } else if (!locked) spin_lock_irq(&zone->lru_lock); + /* + * migrate_pfn does not necessarily start aligned to a + * pageblock. Ensure that pfn_valid is called when moving + * into a new MAX_ORDER_NR_PAGES range in case of large + * memory holes within the zone + */ + if ((low_pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) { + if (!pfn_valid(low_pfn)) { + low_pfn += MAX_ORDER_NR_PAGES - 1; + continue; + } + } + if (!pfn_valid_within(low_pfn)) continue; nr_scanned++; - /* Get the page and skip if free */ + /* + * Get the page and ensure the page is within the same zone. + * See the comment in isolate_freepages about overlapping + * nodes. It is deliberate that the new zone lock is not taken + * as memory compaction should not move pages between nodes. + */ page = pfn_to_page(low_pfn); + if (page_zone(page) != zone) + continue; + + /* Skip if free */ if (PageBuddy(page)) continue; diff --git a/mm/dmapool.c b/mm/dmapool.c index 03bf3bb4519a..98798f414cb9 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -212,6 +212,32 @@ static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page) } while (offset < pool->allocation); } +#ifdef CONFIG_FSL_UTP +static struct dma_page *pool_alloc_page_nonbufferable(struct dma_pool *pool, gfp_t mem_flags) +{ + struct dma_page *page; + + page = kmalloc(sizeof(*page), mem_flags); + if (!page) + return NULL; + page->vaddr = dma_alloc_noncacheable(pool->dev, pool->allocation, + &page->dma, mem_flags); + if (page->vaddr) { +#ifdef DMAPOOL_DEBUG + memset(page->vaddr, POOL_POISON_FREED, pool->allocation); +#endif + pool_initialise_page(pool, page); + list_add(&page->page_list, &pool->page_list); + page->in_use = 0; + page->offset = 0; + } else { + kfree(page); + page = NULL; + } + return page; +} +#endif + static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags) { struct dma_page *page; @@ -353,6 +379,68 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, } EXPORT_SYMBOL(dma_pool_alloc); +#ifdef CONFIG_FSL_UTP +/** + * dma_pool_alloc_nonbufferable - get a block of consistent memory + * @pool: dma pool that will produce the block + * @mem_flags: GFP_* bitmask + * @handle: pointer to dma address of block + * + * This returns the kernel virtual address of a currently unused block, + * and reports its dma address through the handle. + * If such a memory block can't be allocated, %NULL is returned. + */ +void *dma_pool_alloc_nonbufferable(struct dma_pool *pool, gfp_t mem_flags, + dma_addr_t *handle) +{ + unsigned long flags; + struct dma_page *page; + size_t offset; + void *retval; + + might_sleep_if(mem_flags & __GFP_WAIT); + + spin_lock_irqsave(&pool->lock, flags); + restart: + list_for_each_entry(page, &pool->page_list, page_list) { + if (page->offset < pool->allocation) + goto ready; + } + page = pool_alloc_page_nonbufferable(pool, GFP_ATOMIC); + if (!page) { + if (mem_flags & __GFP_WAIT) { + DECLARE_WAITQUEUE(wait, current); + + __set_current_state(TASK_UNINTERRUPTIBLE); + __add_wait_queue(&pool->waitq, &wait); + spin_unlock_irqrestore(&pool->lock, flags); + + schedule_timeout(POOL_TIMEOUT_JIFFIES); + + spin_lock_irqsave(&pool->lock, flags); + __remove_wait_queue(&pool->waitq, &wait); + goto restart; + } + retval = NULL; + goto done; + } + + ready: + page->in_use++; + offset = page->offset; + page->offset = *(int *)(page->vaddr + offset); + retval = offset + page->vaddr; + *handle = offset + page->dma; +#ifdef DMAPOOL_DEBUG + memset(retval, POOL_POISON_ALLOCATED, pool->size); +#endif + done: + spin_unlock_irqrestore(&pool->lock, flags); + return retval; +} +EXPORT_SYMBOL(dma_pool_alloc_nonbufferable); +#endif + static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) { struct dma_page *page; diff --git a/mm/filemap.c b/mm/filemap.c index a8251a8d3457..b7d860390f34 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -396,24 +396,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { int error; - struct mem_cgroup *memcg = NULL; VM_BUG_ON(!PageLocked(old)); VM_BUG_ON(!PageLocked(new)); VM_BUG_ON(new->mapping); - /* - * This is not page migration, but prepare_migration and - * end_migration does enough work for charge replacement. - * - * In the longer term we probably want a specialized function - * for moving the charge from old to new in a more efficient - * manner. - */ - error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); - if (error) - return error; - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (!error) { struct address_space *mapping = old->mapping; @@ -435,13 +422,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); + /* mem_cgroup codes must not be called under tree_lock */ + mem_cgroup_replace_page_cache(old, new); radix_tree_preload_end(); if (freepage) freepage(old); page_cache_release(old); - mem_cgroup_end_migration(memcg, old, new, true); - } else { - mem_cgroup_end_migration(memcg, old, new, false); } return error; @@ -1393,15 +1379,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long seg = 0; size_t count; loff_t *ppos = &iocb->ki_pos; - struct blk_plug plug; count = 0; retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (retval) return retval; - blk_start_plug(&plug); - /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { loff_t size; @@ -1417,8 +1400,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); if (!retval) { + struct blk_plug plug; + + blk_start_plug(&plug); retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); + blk_finish_plug(&plug); } if (retval > 0) { *ppos = pos + retval; @@ -1474,7 +1461,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, break; } out: - blk_finish_plug(&plug); return retval; } EXPORT_SYMBOL(generic_file_aio_read); @@ -1807,7 +1793,7 @@ repeat: page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); - err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { page_cache_release(page); if (err == -EEXIST) @@ -1904,10 +1890,7 @@ static struct page *wait_on_page_read(struct page *page) * @gfp: the page allocator flags to use if allocating * * This is the same as "read_mapping_page(mapping, index, NULL)", but with - * any new page allocations done using the specified allocation flags. Note - * that the Radix tree operations will still use GFP_KERNEL, so you can't - * expect to do this atomically or anything like that - but you can pass in - * other page requirements. + * any new page allocations done using the specified allocation flags. * * If the page does not get brought uptodate, return -EIO. */ diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 93356cd12828..dee94297f392 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -263,7 +263,12 @@ found: xip_pfn); if (err == -ENOMEM) return VM_FAULT_OOM; - BUG_ON(err); + /* + * err == -EBUSY is fine, we've raced against another thread + * that faulted-in the same page + */ + if (err != -EBUSY) + BUG_ON(err); return VM_FAULT_NOPAGE; } else { int err, ret = VM_FAULT_OOM; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cc5acf9998b0..8cc11dda6a74 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -641,6 +641,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, set_pmd_at(mm, haddr, pmd, entry); prepare_pmd_huge_pte(pgtable, mm); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); + mm->nr_ptes++; spin_unlock(&mm->page_table_lock); } @@ -759,6 +760,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd = pmd_mkold(pmd_wrprotect(pmd)); set_pmd_at(dst_mm, addr, dst_pmd, pmd); prepare_pmd_huge_pte(pgtable, dst_mm); + dst_mm->nr_ptes++; ret = 0; out_unlock: @@ -857,7 +859,6 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, } kfree(pages); - mm->nr_ptes++; smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); page_remove_rmap(page); @@ -1016,6 +1017,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON(!PageHead(page)); + tlb->mm->nr_ptes--; spin_unlock(&tlb->mm->page_table_lock); tlb_remove_page(tlb, page); pte_free(tlb->mm, pgtable); @@ -1310,7 +1312,6 @@ static int __split_huge_page_map(struct page *page, pte_unmap(pte); } - mm->nr_ptes++; smp_wmb(); /* make pte visible before pmd */ /* * Up to this point the pmd is present and huge and @@ -1925,7 +1926,6 @@ static void collapse_huge_page(struct mm_struct *mm, set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache(vma, address, entry); prepare_pmd_huge_pte(pgtable, mm); - mm->nr_ptes--; spin_unlock(&mm->page_table_lock); #ifndef CONFIG_NUMA @@ -2020,7 +2020,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot) { struct mm_struct *mm = mm_slot->mm; - VM_BUG_ON(!spin_is_locked(&khugepaged_mm_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); if (khugepaged_test_exit(mm)) { /* free mm_slot */ @@ -2048,7 +2048,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int progress = 0; VM_BUG_ON(!pages); - VM_BUG_ON(!spin_is_locked(&khugepaged_mm_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); if (khugepaged_scan.mm_slot) mm_slot = khugepaged_scan.mm_slot; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 80936a118c38..05f8fd425f69 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -901,7 +901,6 @@ retry: h->resv_huge_pages += delta; ret = 0; - spin_unlock(&hugetlb_lock); /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) @@ -915,6 +914,7 @@ retry: VM_BUG_ON(page_count(page)); enqueue_huge_page(h, page); } + spin_unlock(&hugetlb_lock); /* Free unnecessary surplus pages to the buddy allocator */ free: @@ -2060,6 +2060,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) kref_get(&reservations->refs); } +static void resv_map_put(struct vm_area_struct *vma) +{ + struct resv_map *reservations = vma_resv_map(vma); + + if (!reservations) + return; + kref_put(&reservations->refs, resv_map_release); +} + static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); @@ -2075,7 +2084,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) reserve = (end - start) - region_count(&reservations->regions, start, end); - kref_put(&reservations->refs, resv_map_release); + resv_map_put(vma); if (reserve) { hugetlb_acct_memory(h, -reserve); @@ -2398,7 +2407,6 @@ retry_avoidcopy: if (outside_reserve) { BUG_ON(huge_pte_none(pte)); if (unmap_ref_private(mm, vma, old_page, address)) { - BUG_ON(page_count(old_page) != 1); BUG_ON(huge_pte_none(pte)); spin_lock(&mm->page_table_lock); goto retry_avoidcopy; @@ -2679,6 +2687,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * so no worry about deadlock. */ page = pte_page(entry); + get_page(page); if (page != pagecache_page) lock_page(page); @@ -2710,6 +2719,7 @@ out_page_table_lock: } if (page != pagecache_page) unlock_page(page); + put_page(page); out_mutex: mutex_unlock(&hugetlb_instantiation_mutex); @@ -2876,12 +2886,16 @@ int hugetlb_reserve_pages(struct inode *inode, set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } - if (chg < 0) - return chg; + if (chg < 0) { + ret = chg; + goto out_err; + } /* There must be enough filesystem quota for the mapping */ - if (hugetlb_get_quota(inode->i_mapping, chg)) - return -ENOSPC; + if (hugetlb_get_quota(inode->i_mapping, chg)) { + ret = -ENOSPC; + goto out_err; + } /* * Check enough hugepages are available for the reservation. @@ -2890,7 +2904,7 @@ int hugetlb_reserve_pages(struct inode *inode, ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugetlb_put_quota(inode->i_mapping, chg); - return ret; + goto out_err; } /* @@ -2907,6 +2921,10 @@ int hugetlb_reserve_pages(struct inode *inode, if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; +out_err: + if (vma) + resv_map_put(vma); + return ret; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) diff --git a/mm/madvise.c b/mm/madvise.c index 2221491ed503..deabe5f603ad 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -13,6 +13,7 @@ #include <linux/hugetlb.h> #include <linux/sched.h> #include <linux/ksm.h> +#include <linux/file.h> /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -197,14 +198,16 @@ static long madvise_remove(struct vm_area_struct *vma, struct address_space *mapping; loff_t offset, endoff; int error; + struct file *f; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; - if (!vma->vm_file || !vma->vm_file->f_mapping - || !vma->vm_file->f_mapping->host) { + f = vma->vm_file; + + if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } @@ -218,9 +221,16 @@ static long madvise_remove(struct vm_area_struct *vma, endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + /* + * vmtruncate_range may need to take i_mutex and i_alloc_sem. + * We need to explicitly grab a reference because the vma (and + * hence the vma's reference to the file) can go away as soon as + * we drop mmap_sem. + */ + get_file(f); up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); + fput(f); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 59ac5d6de478..ffb99b4e7527 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3422,6 +3422,50 @@ int mem_cgroup_shmem_charge_fallback(struct page *page, return ret; } +/* + * At replace page cache, newpage is not under any memcg but it's on + * LRU. So, this function doesn't touch res_counter but handles LRU + * in correct way. Both pages are locked so we cannot race with uncharge. + */ +void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage) +{ + struct mem_cgroup *memcg; + struct page_cgroup *pc; + struct zone *zone; + enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; + unsigned long flags; + + if (mem_cgroup_disabled()) + return; + + pc = lookup_page_cgroup(oldpage); + /* fix accounting on old pages */ + lock_page_cgroup(pc); + memcg = pc->mem_cgroup; + mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1); + ClearPageCgroupUsed(pc); + unlock_page_cgroup(pc); + + if (PageSwapBacked(oldpage)) + type = MEM_CGROUP_CHARGE_TYPE_SHMEM; + + zone = page_zone(newpage); + pc = lookup_page_cgroup(newpage); + /* + * Even if newpage->mapping was NULL before starting replacement, + * the newpage may be on LRU(or pagevec for LRU) already. We lock + * LRU while we overwrite pc->mem_cgroup. + */ + spin_lock_irqsave(&zone->lru_lock, flags); + if (PageLRU(newpage)) + del_page_from_lru_list(zone, newpage, page_lru(newpage)); + __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type); + if (PageLRU(newpage)) + add_page_to_lru_list(zone, newpage, page_lru(newpage)); + spin_unlock_irqrestore(&zone->lru_lock, flags); +} + #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -4514,6 +4558,9 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, */ BUG_ON(!thresholds); + if (!thresholds->primary) + goto unlock; + usage = mem_cgroup_usage(memcg, type == _MEMSWAP); /* Check if a threshold crossed before removing */ @@ -4558,11 +4605,17 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } + rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ synchronize_rcu(); - +unlock: mutex_unlock(&memcg->thresholds_lock); } @@ -4963,9 +5016,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) int cpu; enable_swap_cgroup(); parent = NULL; - root_mem_cgroup = mem; if (mem_cgroup_soft_limit_tree_init()) goto free_out; + root_mem_cgroup = mem; for_each_possible_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); @@ -5004,7 +5057,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) return &mem->css; free_out: __mem_cgroup_free(mem); - root_mem_cgroup = NULL; return ERR_PTR(error); } @@ -5244,6 +5296,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, spinlock_t *ptl; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) @@ -5405,6 +5459,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, spinlock_t *ptl; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; retry: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) { diff --git a/mm/memory.c b/mm/memory.c index 95a77998ab51..d49b58aba4ae 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1228,16 +1228,24 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) { - if (next-addr != HPAGE_PMD_SIZE) { + if (next - addr != HPAGE_PMD_SIZE) { VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); split_huge_page_pmd(vma->vm_mm, pmd); } else if (zap_huge_pmd(tlb, vma, pmd)) - continue; + goto next; /* fall through */ } - if (pmd_none_or_clear_bad(pmd)) - continue; + /* + * Here there can be other concurrent MADV_DONTNEED or + * trans huge page faults running, and if the pmd is + * none or trans huge it can change under us. This is + * because MADV_DONTNEED holds the mmap_sem in read + * mode. + */ + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) + goto next; next = zap_pte_range(tlb, vma, pmd, addr, next, details); +next: cond_resched(); } while (pmd++, addr = next, addr != end); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e7fb9d25c54e..3dac2d168e47 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -511,7 +511,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, do { next = pmd_addr_end(addr, end); split_huge_page_pmd(vma->vm_mm, pmd); - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; if (check_pte_range(vma, pmd, addr, next, nodes, flags, private)) @@ -606,27 +606,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return first; } -/* Apply policy to a single VMA */ -static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) -{ - int err = 0; - struct mempolicy *old = vma->vm_policy; - - pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff, - vma->vm_ops, vma->vm_file, - vma->vm_ops ? vma->vm_ops->set_policy : NULL); - - if (vma->vm_ops && vma->vm_ops->set_policy) - err = vma->vm_ops->set_policy(vma, new); - if (!err) { - mpol_get(new); - vma->vm_policy = new; - mpol_put(old); - } - return err; -} - /* Step 2: apply policy to a range and do splits. */ static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) @@ -666,9 +645,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } - err = policy_vma(vma, new_pol); - if (err) - goto out; + + /* + * Apply policy to a single VMA. The reference counting of + * policy for vma_policy linkages has already been handled by + * vma_merge and split_vma as necessary. If this is a shared + * policy then ->set_policy will increment the reference count + * for an sp node. + */ + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + if (vma->vm_ops && vma->vm_ops->set_policy) { + err = vma->vm_ops->set_policy(vma, new_pol); + if (err) + goto out; + } } out: diff --git a/mm/mincore.c b/mm/mincore.c index a4e6b9d75c76..117ff5492795 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -161,7 +161,7 @@ static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud, } /* fall through */ } - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) mincore_unmapped_range(vma, addr, next, vec); else mincore_pte_range(vma, pmd, addr, next, vec); diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 6e93dc7f2586..e39e3efe4a43 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -83,8 +83,7 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) static void __init __free_pages_memory(unsigned long start, unsigned long end) { - int i; - unsigned long start_aligned, end_aligned; + unsigned long i, start_aligned, end_aligned; int order = ilog2(BITS_PER_LONG); start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); diff --git a/mm/nommu.c b/mm/nommu.c index 9edc897a3970..5ff9b35883ee 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -697,9 +697,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_file) { mapping = vma->vm_file->f_mapping; + mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); vma_prio_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); + mutex_unlock(&mapping->i_mmap_mutex); } /* add the VMA to the tree */ @@ -761,9 +763,11 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) if (vma->vm_file) { mapping = vma->vm_file->f_mapping; + mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); vma_prio_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); + mutex_unlock(&mapping->i_mmap_mutex); } /* remove from the MM's tree and list */ @@ -776,8 +780,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) if (vma->vm_next) vma->vm_next->vm_prev = vma->vm_prev; - - vma->vm_mm = NULL; } /* @@ -2061,6 +2063,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; down_write(&nommu_region_sem); + mutex_lock(&inode->i_mapping->i_mmap_mutex); /* search for VMAs that fall within the dead zone */ vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, @@ -2068,6 +2071,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, /* found one - only interested if it's shared out of the page * cache */ if (vma->vm_flags & VM_SHARED) { + mutex_unlock(&inode->i_mapping->i_mmap_mutex); up_write(&nommu_region_sem); return -ETXTBSY; /* not quite true, but near enough */ } @@ -2095,6 +2099,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, } } + mutex_unlock(&inode->i_mapping->i_mmap_mutex); up_write(&nommu_region_sem); return 0; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8093fc766d16..7c72487ca459 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -162,7 +162,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { - int points; + long points; if (oom_unkillable_task(p, mem, nodemask)) return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index afa80ed07253..e2f474da7ee2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5588,6 +5588,17 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) bool is_pageblock_removable_nolock(struct page *page) { struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); + + /* + * We have to be careful here because we are iterating over memory + * sections which are not zone aware so we might end up outside of + * the zone but still within the section. + */ + if (!zone || zone->zone_start_pfn > pfn || + zone->zone_start_pfn + zone->spanned_pages <= pfn) + return false; + return __count_immobile_pages(zone, page, 0); } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index c3450d533611..87eac0ea2bf1 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -59,7 +59,7 @@ again: continue; split_huge_page_pmd(walk->mm, pmd); - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) goto again; err = walk_pte_range(pmd, addr, next, walk); if (err) diff --git a/mm/percpu.c b/mm/percpu.c index 93b5a7c96a7e..af0cc7a58f9f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1011,9 +1011,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) if (!is_vmalloc_addr(addr)) return __pa(addr); else - return page_to_phys(vmalloc_to_page(addr)); + return page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); } else - return page_to_phys(pcpu_addr_to_page(addr)); + return page_to_phys(pcpu_addr_to_page(addr)) + + offset_in_page(addr); } /** @@ -1628,6 +1630,16 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, areas[group] = ptr; base = min(ptr, base); + } + + /* + * Copy data and free unused parts. This should happen after all + * allocations are complete; otherwise, we may end up with + * overlapping groups. + */ + for (group = 0; group < ai->nr_groups; group++) { + struct pcpu_group_info *gi = &ai->groups[group]; + void *ptr = areas[group]; for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { if (gi->cpu_map[i] == NR_CPUS) { diff --git a/mm/slub.c b/mm/slub.c index 0d0901ebaea2..10ab2335e2ea 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3439,13 +3439,14 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, if (kmem_cache_open(s, n, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); + up_write(&slub_lock); if (sysfs_slab_add(s)) { + down_write(&slub_lock); list_del(&s->list); kfree(n); kfree(s); goto err; } - up_write(&slub_lock); return s; } kfree(n); diff --git a/mm/sparse.c b/mm/sparse.c index aa64b12831a2..4cd05e5f2f43 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -353,29 +353,21 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; + if (!usemap) { + usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); + if (!usemap) { + printk(KERN_WARNING "%s: allocation failed\n", __func__); + return; } - return; } - usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; - check_usemap_section_nr(nodeid, usemap_map[pnum]); - } - return; + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + usemap_map[pnum] = usemap; + usemap += size; + check_usemap_section_nr(nodeid, usemap_map[pnum]); } - - printk(KERN_WARNING "%s: allocation failed\n", __func__); } #ifndef CONFIG_SPARSEMEM_VMEMMAP diff --git a/mm/swap.c b/mm/swap.c index 87627f181c3f..4a1fc6db89e8 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -667,7 +667,7 @@ void lru_add_page_tail(struct zone* zone, VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); - VM_BUG_ON(!spin_is_locked(&zone->lru_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); SetPageLRU(page_tail); diff --git a/mm/swap_state.c b/mm/swap_state.c index 46680461785b..10e9198778cf 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -28,7 +28,7 @@ */ static const struct address_space_operations swap_aops = { .writepage = swap_writepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = __set_page_dirty_no_writeback, .migratepage = migrate_page, }; diff --git a/mm/swapfile.c b/mm/swapfile.c index ff8dc1a18cb4..c8f4338848df 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -932,9 +932,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (unlikely(pmd_trans_huge(*pmd))) - continue; - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; ret = unuse_pte_range(vma, pmd, addr, next, entry, page); if (ret) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 43b44dbaddaf..bdb70042c123 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -256,7 +256,7 @@ struct vmap_area { struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ struct list_head purge_list; /* "lazy purge" list */ - void *private; + struct vm_struct *vm; struct rcu_head rcu_head; }; @@ -1174,9 +1174,10 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); - va->flags = tmp->flags | VM_VM_AREA; + va->flags = VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; + va->vm = tmp; __insert_vmap_area(va); } @@ -1274,7 +1275,7 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, vm->addr = (void *)va->va_start; vm->size = va->va_end - va->va_start; vm->caller = caller; - va->private = vm; + va->vm = vm; va->flags |= VM_VM_AREA; } @@ -1397,7 +1398,7 @@ static struct vm_struct *find_vm_area(const void *addr) va = find_vmap_area((unsigned long)addr); if (va && va->flags & VM_VM_AREA) - return va->private; + return va->vm; return NULL; } @@ -1416,7 +1417,7 @@ struct vm_struct *remove_vm_area(const void *addr) va = find_vmap_area((unsigned long)addr); if (va && va->flags & VM_VM_AREA) { - struct vm_struct *vm = va->private; + struct vm_struct *vm = va->vm; if (!(vm->flags & VM_UNLIST)) { struct vm_struct *tmp, **p; diff --git a/mm/vmscan.c b/mm/vmscan.c index 6072d74a16f5..769935d17c01 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -665,7 +665,7 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageAnon(page)) + if (PageSwapBacked(page)) return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table |