diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 130 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 58 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 10 |
7 files changed, 80 insertions, 161 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8e13ecb41bee..027088f2f7dd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,7 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ -#include <asm/rcu.h> /* exception_enter(), ... */ +#include <asm/context_tracking.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -803,20 +803,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, __bad_area(regs, error_code, address, SEGV_ACCERR); } -/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void -out_of_memory(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) @@ -879,7 +865,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, return 1; } - out_of_memory(regs, error_code, address); + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 937bff5cdaa7..ae1aa71d0115 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -274,42 +274,15 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; - } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, @@ -317,83 +290,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long base = mm->mmap_base; - unsigned long addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - unsigned long start_addr; - - /* don't allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - start_addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or can't fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); - do { - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - vma = find_vma(mm, addr); - if (!vma) - return addr; + struct vm_unmapped_area_info info; + unsigned long addr; - if (addr + len <= vma->vm_start) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else if (mm->free_area_cache == vma->vm_end) { - /* pull free_area_cache down to the first hole */ - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (start_addr != base) { - mm->free_area_cache = base; - largest_hole = 0; - goto try_again; - } /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = hugetlb_get_unmapped_area_bottomup(file, addr0, - len, pgoff, flags); - - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ab1f6a93b527..d7aea41563b3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -35,40 +35,44 @@ struct map_range { unsigned page_size_mask; }; -static void __init find_early_table_space(struct map_range *mr, unsigned long end, - int use_pse, int use_gbpages) +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - if (use_pse) { - unsigned long extra; + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr->start < PMD_SIZE) - extra += mr->end - mr->start; - - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - end - 1, pgt_buf_start << PAGE_SHIFT, + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); } @@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(&mr[0], end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 11a58001b4ce..745d66b843c8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -715,10 +715,7 @@ static void __init test_wp_bit(void) if (!boot_cpu_data.wp_works_ok) { printk(KERN_CONT "No.\n"); -#ifdef CONFIG_X86_WP_WORKS_OK - panic( - "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); -#endif + panic("Linux doesn't support CPUs with broken WP."); } else { printk(KERN_CONT "Ok.\n"); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index fd4404f19d39..07519a120449 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -393,7 +393,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * these mappings are more intelligent. */ if (pte_val(*pte)) { - pages++; + if (!after_bootmem) + pages++; continue; } @@ -458,6 +459,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_2M)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } @@ -533,6 +536,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_1G)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } @@ -632,7 +637,9 @@ void __init paging_init(void) * numa support is not compiled in, and later node_set_state * will not set it back. */ - node_clear_state(0, N_NORMAL_MEMORY); + node_clear_state(0, N_MEMORY); + if (N_MEMORY != N_NORMAL_MEMORY) + node_clear_state(0, N_NORMAL_MEMORY); zone_sizes_init(); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8573b83a63d0..217eb705fac0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -137,7 +137,7 @@ static void pgd_dtor(pgd_t *pgd) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * -- wli + * -- nyc */ #ifdef CONFIG_X86_PAE diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0777f042e400..13a6b29e2e5d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -104,7 +104,7 @@ static void flush_tlb_func(void *info) return; if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg) + if (f->flush_end == TLB_FLUSH_ALL) local_flush_tlb(); else if (!f->flush_end) __flush_tlb_single(f->flush_start); @@ -197,7 +197,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, } if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 - || vmflag == VM_HUGETLB) { + || vmflag & VM_HUGETLB) { local_flush_tlb(); goto flush_all; } @@ -337,10 +337,8 @@ static const struct file_operations fops_tlbflush = { static int __cpuinit create_tlb_flushall_shift(void) { - if (cpu_has_invlpg) { - debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, - arch_debugfs_dir, NULL, &fops_tlbflush); - } + debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, + arch_debugfs_dir, NULL, &fops_tlbflush); return 0; } late_initcall(create_tlb_flushall_shift); |