diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 112 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 206 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 95 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 3 |
13 files changed, 403 insertions, 106 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index ecfdc46a024a..c4cc74006c61 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -20,6 +20,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o obj-$(CONFIG_KMEMCHECK) += kmemcheck/ +KASAN_SANITIZE_kasan_init_$(BITS).o := n +obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e3ff27a5b634..181c53bac3a7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) return 0; while (instr < max_instr) { @@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(nx_warning, from_kuid(&init_user_ns, current_uid())); if (pte && pte_present(*pte) && pte_exec(*pte) && (pgd_flags(*pgd) & _PAGE_USER) && - (read_cr4() & X86_CR4_SMEP)) + (__read_cr4() & X86_CR4_SMEP)) printk(smep_warning, from_kuid(&init_user_ns, current_uid())); } @@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) if (error_code & PF_USER) return false; - if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) return false; return true; @@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ - if (user_mode_vm(regs)) { + if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; flags |= FAULT_FLAG_USER; diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d7547824e763..81bf3d2af3eb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_numa(pte)) { + if (pte_protnone(pte)) { pte_unmap(ptep); return 0; } @@ -172,13 +172,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, */ if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (unlikely(pmd_large(pmd))) { + if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they * can be serialised against THP migration. */ - if (pmd_numa(pmd)) + if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) return 0; @@ -388,10 +388,9 @@ slow_irqon: start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + (end - start) >> PAGE_SHIFT, + write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8b977ebf9388..42982b26e32b 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -52,23 +52,17 @@ int pud_huge(pud_t pud) return 0; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} #else -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - +/* + * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal + * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. + * Otherwise, returns 0. + */ int pmd_huge(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_PSE); + return !pmd_none(pmd) && + (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; } int pud_huge(pud_t pud) @@ -178,4 +172,15 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_CMA +static __init int gigantic_pages_init(void) +{ + /* With CMA we can allocate gigantic pages at runtime */ + if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + return 0; +} +arch_initcall(gigantic_pages_init); +#endif #endif diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 079c3b6a3ff1..1d553186c434 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -29,29 +29,33 @@ /* * Tables translating between page_cache_type_t and pte encoding. - * Minimal supported modes are defined statically, modified if more supported - * cache modes are available. - * Index into __cachemode2pte_tbl is the cachemode. - * Index into __pte2cachemode_tbl are the caching attribute bits of the pte - * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. + * + * Minimal supported modes are defined statically, they are modified + * during bootup if more supported cache modes are available. + * + * Index into __cachemode2pte_tbl[] is the cachemode. + * + * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { - [_PAGE_CACHE_MODE_WB] = 0, - [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, - [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, - [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, - [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, - [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, + [_PAGE_CACHE_MODE_WB ] = 0 | 0 , + [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , + [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, + [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, }; EXPORT_SYMBOL(__cachemode2pte_tbl); + uint8_t __pte2cachemode_tbl[8] = { - [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, - [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, - [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, - [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, - [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, + [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; EXPORT_SYMBOL(__pte2cachemode_tbl); @@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void) int after_bootmem; -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init init_gbpages(void) -{ -#ifdef CONFIG_X86_64 - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -#endif -} +early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES); struct map_range { unsigned long start; @@ -157,28 +147,33 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { - init_gbpages(); - #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (direct_gbpages) - page_size_mask |= 1 << PG_LEVEL_1G; if (cpu_has_pse) page_size_mask |= 1 << PG_LEVEL_2M; #endif /* Enable PSE if available */ if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); + cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); + cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; + } else + __supported_pte_mask &= ~_PAGE_GLOBAL; + + /* Enable 1 GB linear kernel mappings if available: */ + if (direct_gbpages && cpu_has_gbpages) { + printk(KERN_INFO "Using GB pages for direct mapping\n"); + page_size_mask |= 1 << PG_LEVEL_1G; + } else { + direct_gbpages = 0; } } @@ -238,6 +233,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr, } } +static const char *page_size_string(struct map_range *mr) +{ + static const char str_1g[] = "1G"; + static const char str_2m[] = "2M"; + static const char str_4m[] = "4M"; + static const char str_4k[] = "4k"; + + if (mr->page_size_mask & (1<<PG_LEVEL_1G)) + return str_1g; + /* + * 32-bit without PAE has a 4M large page size. + * PG_LEVEL_2M is misnamed, but we can at least + * print out the right size in the string. + */ + if (IS_ENABLED(CONFIG_X86_32) && + !IS_ENABLED(CONFIG_X86_PAE) && + mr->page_size_mask & (1<<PG_LEVEL_2M)) + return str_4m; + + if (mr->page_size_mask & (1<<PG_LEVEL_2M)) + return str_2m; + + return str_4k; +} + static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long start, unsigned long end) @@ -333,8 +353,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, for (i = 0; i < nr_range; i++) printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", mr[i].start, mr[i].end - 1, - (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( - (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); + page_size_string(&mr[i])); return nr_range; } @@ -608,7 +627,7 @@ void __init init_mem_mapping(void) * * * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. + * contains BIOS code and data regions used by X and dosemu and similar apps. * Access has to be given to non-kernel-ram areas as well, these contain the PCI * mmio resources as well as potential bios/acpi data regions. */ @@ -713,6 +732,15 @@ void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +#ifdef CONFIG_SMP + .active_mm = &init_mm, + .state = 0, +#endif + .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ +}; +EXPORT_SYMBOL_GPL(cpu_tlbstate); + void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { /* entry 0 MUST be WB (hardwired to speed up translations) */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30eb05ae7061..3fba623e3ba5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, return 0; } -static int __init parse_direct_gbpages_off(char *arg) -{ - direct_gbpages = 0; - return 0; -} -early_param("nogbpages", parse_direct_gbpages_off); - -static int __init parse_direct_gbpages_on(char *arg) -{ - direct_gbpages = 1; - return 0; -} -early_param("gbpages", parse_direct_gbpages_on); - /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the * physical space so we can cache the place of the first one and move diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c new file mode 100644 index 000000000000..4860906c6b9f --- /dev/null +++ b/arch/x86/mm/kasan_init_64.c @@ -0,0 +1,206 @@ +#include <linux/bootmem.h> +#include <linux/kasan.h> +#include <linux/kdebug.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> + +#include <asm/tlbflush.h> +#include <asm/sections.h> + +extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern struct range pfn_mapped[E820_X_MAX]; + +extern unsigned char kasan_zero_page[PAGE_SIZE]; + +static int __init map_range(struct range *range) +{ + unsigned long start; + unsigned long end; + + start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); + end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); + + /* + * end + 1 here is intentional. We check several shadow bytes in advance + * to slightly speed up fastpath. In some rare cases we could cross + * boundary of mapped shadow, so we just map some more here. + */ + return vmemmap_populate(start, end + 1, NUMA_NO_NODE); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start < end; start += PGDIR_SIZE) + pgd_clear(pgd_offset_k(start)); +} + +void __init kasan_map_early_shadow(pgd_t *pgd) +{ + int i; + unsigned long start = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + + for (i = pgd_index(start); start < end; i++) { + pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) + | _KERNPG_TABLE); + start += PGDIR_SIZE; + } +} + +static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + + while (addr + PAGE_SIZE <= end) { + WARN_ON(!pte_none(*pte)); + set_pte(pte, __pte(__pa_nodebug(kasan_zero_page) + | __PAGE_KERNEL_RO)); + addr += PAGE_SIZE; + pte = pte_offset_kernel(pmd, addr); + } + return 0; +} + +static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pmd_t *pmd = pmd_offset(pud, addr); + + while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { + WARN_ON(!pmd_none(*pmd)); + set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) + | __PAGE_KERNEL_RO)); + addr += PMD_SIZE; + pmd = pmd_offset(pud, addr); + } + if (addr < end) { + if (pmd_none(*pmd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pte_populate(pmd, addr, end); + } + return ret; +} + + +static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pud_t *pud = pud_offset(pgd, addr); + + while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { + WARN_ON(!pud_none(*pud)); + set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) + | __PAGE_KERNEL_RO)); + addr += PUD_SIZE; + pud = pud_offset(pgd, addr); + } + + if (addr < end) { + if (pud_none(*pud)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pmd_populate(pud, addr, end); + } + return ret; +} + +static int __init zero_pgd_populate(unsigned long addr, unsigned long end) +{ + int ret = 0; + pgd_t *pgd = pgd_offset_k(addr); + + while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { + WARN_ON(!pgd_none(*pgd)); + set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) + | __PAGE_KERNEL_RO)); + addr += PGDIR_SIZE; + pgd = pgd_offset_k(addr); + } + + if (addr < end) { + if (pgd_none(*pgd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pud_populate(pgd, addr, end); + } + return ret; +} + + +static void __init populate_zero_shadow(const void *start, const void *end) +{ + if (zero_pgd_populate((unsigned long)start, (unsigned long)end)) + panic("kasan: unable to map zero shadow!"); +} + + +#ifdef CONFIG_KASAN_INLINE +static int kasan_die_handler(struct notifier_block *self, + unsigned long val, + void *data) +{ + if (val == DIE_GPF) { + pr_emerg("CONFIG_KASAN_INLINE enabled"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + } + return NOTIFY_OK; +} + +static struct notifier_block kasan_die_notifier = { + .notifier_call = kasan_die_handler, +}; +#endif + +void __init kasan_init(void) +{ + int i; + +#ifdef CONFIG_KASAN_INLINE + register_die_notifier(&kasan_die_notifier); +#endif + + memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); + load_cr3(early_level4_pgt); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + for (i = 0; i < E820_X_MAX; i++) { + if (pfn_mapped[i].end == 0) + break; + + if (map_range(&pfn_mapped[i])) + panic("kasan: unable to allocate shadow!"); + } + populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + NUMA_NO_NODE); + + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + (void *)KASAN_SHADOW_END); + + memset(kasan_zero_page, 0, PAGE_SIZE); + + load_cr3(init_level4_pgt); + init_task.kasan_depth = 0; +} diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 919b91205cd4..df4552bd239e 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = { .flags = -1, }; -static unsigned int stack_maxrandom_size(void) +static unsigned long stack_maxrandom_size(void) { - unsigned int max = 0; + unsigned long max = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; + max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT; } return max; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 1a883705a12a..cd4785bbacb9 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -794,7 +794,6 @@ int early_cpu_to_node(int cpu) void debug_cpumask_set_cpu(int cpu, int node, bool enable) { struct cpumask *mask; - char buf[64]; if (node == NUMA_NO_NODE) { /* early_cpu_to_node() already emits a warning and trace */ @@ -812,10 +811,9 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable) else cpumask_clear_cpu(cpu, mask); - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n", enable ? "numa_add_cpu" : "numa_remove_cpu", - cpu, node, buf); + cpu, node, cpumask_pr_args(mask)); return; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 536ea2fb6e33..89af288ec674 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m) seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif -#ifdef CONFIG_X86_64 if (direct_gbpages) seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); -#endif } #else static inline void split_page_count(int level) { } @@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages) { return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); } -EXPORT_SYMBOL_GPL(set_memory_ro); int set_memory_rw(unsigned long addr, int numpages) { return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); } -EXPORT_SYMBOL_GPL(set_memory_rw); int set_memory_np(unsigned long addr, int numpages) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7ac68698406c..35af6771a95a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } #ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", - current->comm, from, to - 1); + printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", + current->comm, from, to - 1); return 0; } cursor += PAGE_SIZE; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 6fb6927f9e76..5a7e5252c878 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -190,7 +190,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) #endif /* CONFIG_X86_PAE */ -static void free_pmds(pmd_t *pmds[]) +static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) { int i; @@ -198,10 +198,11 @@ static void free_pmds(pmd_t *pmds[]) if (pmds[i]) { pgtable_pmd_page_dtor(virt_to_page(pmds[i])); free_page((unsigned long)pmds[i]); + mm_dec_nr_pmds(mm); } } -static int preallocate_pmds(pmd_t *pmds[]) +static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) { int i; bool failed = false; @@ -215,11 +216,13 @@ static int preallocate_pmds(pmd_t *pmds[]) pmd = NULL; failed = true; } + if (pmd) + mm_inc_nr_pmds(mm); pmds[i] = pmd; } if (failed) { - free_pmds(pmds); + free_pmds(mm, pmds); return -ENOMEM; } @@ -246,6 +249,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); } } } @@ -271,19 +275,94 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } +/* + * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also + * assumes that pgd should be in one page. + * + * But kernel with PAE paging that is not running as a Xen domain + * only needs to allocate 32 bytes for pgd instead of one page. + */ +#ifdef CONFIG_X86_PAE + +#include <linux/slab.h> + +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#define PGD_ALIGN 32 + +static struct kmem_cache *pgd_cache; + +static int __init pgd_cache_init(void) +{ + /* + * When PAE kernel is running as a Xen domain, it does not use + * shared kernel pmd. And this requires a whole page for pgd. + */ + if (!SHARED_KERNEL_PMD) + return 0; + + /* + * when PAE kernel is not running as a Xen domain, it uses + * shared kernel pmd. Shared kernel pmd does not require a whole + * page for pgd. We are able to just allocate a 32-byte for pgd. + * During boot time, we create a 32-byte slab for pgd table allocation. + */ + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, + SLAB_PANIC, NULL); + if (!pgd_cache) + return -ENOMEM; + + return 0; +} +core_initcall(pgd_cache_init); + +static inline pgd_t *_pgd_alloc(void) +{ + /* + * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. + * We allocate one page for pgd. + */ + if (!SHARED_KERNEL_PMD) + return (pgd_t *)__get_free_page(PGALLOC_GFP); + + /* + * Now PAE kernel is not running as a Xen domain. We can allocate + * a 32-byte slab for pgd to save memory space. + */ + return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); +} + +static inline void _pgd_free(pgd_t *pgd) +{ + if (!SHARED_KERNEL_PMD) + free_page((unsigned long)pgd); + else + kmem_cache_free(pgd_cache, pgd); +} +#else +static inline pgd_t *_pgd_alloc(void) +{ + return (pgd_t *)__get_free_page(PGALLOC_GFP); +} + +static inline void _pgd_free(pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} +#endif /* CONFIG_X86_PAE */ + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); + pgd = _pgd_alloc(); if (pgd == NULL) goto out; mm->pgd = pgd; - if (preallocate_pmds(pmds) != 0) + if (preallocate_pmds(mm, pmds) != 0) goto out_free_pgd; if (paravirt_pgd_alloc(mm) != 0) @@ -304,9 +383,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; out_free_pmds: - free_pmds(pmds); + free_pmds(mm, pmds); out_free_pgd: - free_page((unsigned long)pgd); + _pgd_free(pgd); out: return NULL; } @@ -316,7 +395,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); - free_page((unsigned long)pgd); + _pgd_free(pgd); } /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ee61c36d64f8..3250f2371aea 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,9 +14,6 @@ #include <asm/uv/uv.h> #include <linux/debugfs.h> -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - /* * Smarter SMP flushing macros. * c/o Linus Torvalds. |