diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 56 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 41 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/mpx.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 28 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 4 |
11 files changed, 94 insertions, 73 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9f72ca3b2669..8b5ff88aa4f8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, - struct vm_area_struct *vma) +static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) @@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * valid VMA, so we should never reach this without a * valid VMA. */ - if (!vma) { + if (!pkey) { WARN_ONCE(1, "PKU fault with no VMA passed in"); info->si_pkey = 0; return; @@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * absolutely guranteed to be 100% accurate because of * the race explained above. */ - info->si_pkey = vma_pkey(vma); + info->si_pkey = *pkey; } static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, struct vm_area_struct *vma, - int fault) + struct task_struct *tsk, u32 *pkey, int fault) { unsigned lsb = 0; siginfo_t info; @@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, vma); + fill_sig_info_pkey(si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } @@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; unsigned long flags; int sig; - /* No context means no VMA to pass down */ - struct vm_area_struct *vma = NULL; /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs, X86_TRAP_PF)) { @@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, /* XXX: hwpoison faults will set the wrong code. */ force_sig_info_fault(signal, si_code, address, - tsk, vma, 0); + tsk, NULL, 0); } /* @@ -853,8 +849,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - int si_code) + unsigned long address, u32 *pkey, int si_code) { struct task_struct *tsk = current; @@ -902,7 +897,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); return; } @@ -915,9 +910,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, u32 *pkey) { - __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); } static void @@ -925,6 +920,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct vm_area_struct *vma, int si_code) { struct mm_struct *mm = current->mm; + u32 pkey; + + if (vma) + pkey = vma_pkey(vma); /* * Something tried to access memory that isn't in our memory map.. @@ -932,7 +931,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, vma, si_code); + __bad_area_nosemaphore(regs, error_code, address, + (vma) ? &pkey : NULL, si_code); } static noinline void @@ -975,7 +975,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - struct vm_area_struct *vma, unsigned int fault) + u32 *pkey, unsigned int fault) { struct task_struct *tsk = current; int code = BUS_ADRERR; @@ -1002,13 +1002,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); + force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - unsigned int fault) + unsigned long address, u32 *pkey, unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -1032,9 +1031,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, vma, fault); + do_sigbus(regs, error_code, address, pkey, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address, vma); + bad_area_nosemaphore(regs, error_code, address, pkey); else BUG(); } @@ -1220,6 +1219,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, struct mm_struct *mm; int fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + u32 pkey; tsk = current; mm = tsk->mm; @@ -1393,7 +1393,17 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. + * + * Note that handle_userfault() may also release and reacquire mmap_sem + * (and not return with VM_FAULT_RETRY), when returning to userland to + * repeat the page fault later with a VM_FAULT_NOPAGE retval + * (potentially after handling any pending signal during the return to + * userland). The return to userland is identified whenever + * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. + * Thus we have to be careful about not touching vma after handling the + * fault, so we read the pkey beforehand. */ + pkey = vma_pkey(vma); fault = handle_mm_fault(vma, address, flags); major |= fault & VM_FAULT_MAJOR; @@ -1422,7 +1432,7 @@ good_area: up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, vma, fault); + mm_fault_error(regs, error_code, address, &pkey, fault); return; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0d4fb3ebbbac..1680768d392c 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } + if (!pte_allows_gup(pte_val(pte), write)) { + pte_unmap(ptep); + return 0; + } + if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_unmap(ptep); return 0; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { + } else if (pte_special(pte)) { pte_unmap(ptep); return 0; } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 2ae8584b44c7..fe342e8ed529 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -144,7 +144,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 22af912d66d2..889e7619a091 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -643,21 +643,40 @@ void __init init_mem_mapping(void) * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains BIOS code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. + * On x86, access has to be given to the first megabyte of RAM because that + * area traditionally contains BIOS code and data regions used by X, dosemu, + * and similar apps. Since they map the entire memory range, the whole range + * must be allowed (for mapping), but any areas that would otherwise be + * disallowed are flagged as being "zero filled" instead of rejected. + * Access has to be given to non-kernel-ram areas as well, these contain the + * PCI mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr < 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + if (page_is_ram(pagenr)) { + /* + * For disallowed memory regions in the low 1MB range, + * request that the page be shown as all zeros. + */ + if (pagenr < 256) + return 2; + + return 0; + } + + /* + * This must follow RAM test, since System RAM is considered a + * restricted resource under CONFIG_STRICT_IOMEM. + */ + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) { + /* Low 1MB bypasses iomem restrictions. */ + if (pagenr < 256) + return 1; + return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; + } + + return 1; } void free_init_pages(char *what, unsigned long begin, unsigned long end) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 14b9dd71d9e8..3e27ded6ac65 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup); */ void sync_global_pgds(unsigned long start, unsigned long end, int removed) { - unsigned long address; + unsigned long addr; - for (address = start; address <= end; address += PGDIR_SIZE) { - const pgd_t *pgd_ref = pgd_offset_k(address); + for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + const pgd_t *pgd_ref = pgd_offset_k(addr); struct page *page; /* @@ -113,7 +113,7 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed) pgd_t *pgd; spinlock_t *pgt_lock; - pgd = (pgd_t *)page_address(page) + pgd_index(address); + pgd = (pgd_t *)page_address(page) + pgd_index(addr); /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -689,7 +689,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0493c17b8a51..333362f992e4 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include <linux/bootmem.h> #include <linux/kasan.h> diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 887e57182716..aed206475aa7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; #if defined(CONFIG_X86_ESPFIX64) static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; #elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_START; +static const unsigned long vaddr_end = EFI_VA_END; #else static const unsigned long vaddr_end = __START_KERNEL_map; #endif @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) */ BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_START); + vaddr_end >= EFI_VA_END); BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e4f800999b32..a75103e7f963 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void *)-1) { + if (info->si_addr == (void __user *)-1) { err = -EINVAL; goto err_out; } @@ -525,15 +525,7 @@ int mpx_handle_bd_fault(void) if (!kernel_managing_mpx_tables(current->mm)) return -EINVAL; - if (do_mpx_bt_fault()) { - force_sig(SIGSEGV, current); - /* - * The force_sig() is essentially "handling" this - * exception, so we do not pass up the error - * from do_mpx_bt_fault(). - */ - } - return 0; + return do_mpx_bt_fault(); } /* diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 6b7ce6279133..aca6295350f3 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -100,5 +100,6 @@ void __init initmem_init(void) printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + __vmalloc_start_set = true; setup_bootmem_allocator(); } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 83e701f160a9..89d7907c4218 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -36,14 +36,14 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool boot_cpu_done; - -static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); -static void init_cache_modes(void); +static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __read_mostly pat_initialized; +static bool __read_mostly init_cm_done; void pat_disable(const char *reason) { - if (!__pat_enabled) + if (pat_disabled) return; if (boot_cpu_done) { @@ -51,10 +51,8 @@ void pat_disable(const char *reason) return; } - __pat_enabled = 0; + pat_disabled = true; pr_info("x86/PAT: %s\n", reason); - - init_cache_modes(); } static int __init nopat(char *str) @@ -66,7 +64,7 @@ early_param("nopat", nopat); bool pat_enabled(void) { - return !!__pat_enabled; + return pat_initialized; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -204,6 +202,8 @@ static void __init_cache_modes(u64 pat) update_cache_mode_entry(i, cache); } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); + + init_cm_done = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) @@ -224,6 +224,7 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); + pat_initialized = true; __init_cache_modes(pat); } @@ -241,10 +242,9 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -static void init_cache_modes(void) +void init_cache_modes(void) { u64 pat = 0; - static int init_cm_done; if (init_cm_done) return; @@ -286,8 +286,6 @@ static void init_cache_modes(void) } __init_cache_modes(pat); - - init_cm_done = 1; } /** @@ -305,10 +303,8 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!pat_enabled()) { - init_cache_modes(); + if (pat_disabled) return; - } if ((c->x86_vendor == X86_VENDOR_INTEL) && (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a7655f6caf7d..75fb01109f94 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -263,8 +263,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, { struct flush_tlb_info info; - if (end == 0) - end = start + PAGE_SIZE; info.flush_mm = mm; info.flush_start = start; info.flush_end = end; @@ -393,7 +391,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); + flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE); preempt_enable(); } |