summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/gup.c13
-rw-r--r--arch/x86/mm/hugetlbpage.c31
-rw-r--r--arch/x86/mm/init.c112
-rw-r--r--arch/x86/mm/init_64.c14
-rw-r--r--arch/x86/mm/kasan_init_64.c206
-rw-r--r--arch/x86/mm/mmap.c6
-rw-r--r--arch/x86/mm/numa.c6
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pat.c6
-rw-r--r--arch/x86/mm/pgtable.c95
-rw-r--r--arch/x86/mm/tlb.c3
13 files changed, 403 insertions, 106 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index ecfdc46a024a..c4cc74006c61 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -20,6 +20,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o
obj-$(CONFIG_KMEMCHECK) += kmemcheck/
+KASAN_SANITIZE_kasan_init_$(BITS).o := n
+obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
+
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e3ff27a5b634..181c53bac3a7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (kprobes_built_in() && !user_mode_vm(regs)) {
+ if (kprobes_built_in() && !user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
return 0;
while (instr < max_instr) {
@@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
if (pte && pte_present(*pte) && pte_exec(*pte) &&
(pgd_flags(*pgd) & _PAGE_USER) &&
- (read_cr4() & X86_CR4_SMEP))
+ (__read_cr4() & X86_CR4_SMEP))
printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
}
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
if (error_code & PF_USER)
return false;
- if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+ if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
return false;
return true;
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* User-mode registers count as a user access even for any
* potential system fault or CPU buglet:
*/
- if (user_mode_vm(regs)) {
+ if (user_mode(regs)) {
local_irq_enable();
error_code |= PF_USER;
flags |= FAULT_FLAG_USER;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index d7547824e763..81bf3d2af3eb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
struct page *page;
/* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_numa(pte)) {
+ if (pte_protnone(pte)) {
pte_unmap(ptep);
return 0;
}
@@ -172,13 +172,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
*/
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
- if (unlikely(pmd_large(pmd))) {
+ if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
- if (pmd_numa(pmd))
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
return 0;
@@ -388,10 +388,9 @@ slow_irqon:
start += nr << PAGE_SHIFT;
pages += nr;
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
+ ret = get_user_pages_unlocked(current, mm, start,
+ (end - start) >> PAGE_SHIFT,
+ write, 0, pages);
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8b977ebf9388..42982b26e32b 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -52,23 +52,17 @@ int pud_huge(pud_t pud)
return 0;
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- return NULL;
-}
#else
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
+/*
+ * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
+ * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
+ * Otherwise, returns 0.
+ */
int pmd_huge(pmd_t pmd)
{
- return !!(pmd_val(pmd) & _PAGE_PSE);
+ return !pmd_none(pmd) &&
+ (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
}
int pud_huge(pud_t pud)
@@ -178,4 +172,15 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_CMA
+static __init int gigantic_pages_init(void)
+{
+ /* With CMA we can allocate gigantic pages at runtime */
+ if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
+ hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ return 0;
+}
+arch_initcall(gigantic_pages_init);
+#endif
#endif
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 079c3b6a3ff1..1d553186c434 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
/*
* Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ * Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
*/
uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
- [_PAGE_CACHE_MODE_WB] = 0,
- [_PAGE_CACHE_MODE_WC] = _PAGE_PWT,
- [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD,
- [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT,
- [_PAGE_CACHE_MODE_WT] = _PAGE_PCD,
- [_PAGE_CACHE_MODE_WP] = _PAGE_PCD,
+ [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
+ [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 ,
+ [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
+ [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
+ [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
+ [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD,
};
EXPORT_SYMBOL(__cachemode2pte_tbl);
+
uint8_t __pte2cachemode_tbl[8] = {
- [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
- [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
- [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
- [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
- [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
- [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
- [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+ [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
+ [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
+ [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
+ [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+ [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
};
EXPORT_SYMBOL(__pte2cachemode_tbl);
@@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void)
int after_bootmem;
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
- = 1
-#endif
-;
-
-static void __init init_gbpages(void)
-{
-#ifdef CONFIG_X86_64
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-#endif
-}
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
struct map_range {
unsigned long start;
@@ -157,28 +147,33 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
- init_gbpages();
-
#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (direct_gbpages)
- page_size_mask |= 1 << PG_LEVEL_1G;
if (cpu_has_pse)
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
/* Enable PSE if available */
if (cpu_has_pse)
- set_in_cr4(X86_CR4_PSE);
+ cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
+ cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
+ } else
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
+
+ /* Enable 1 GB linear kernel mappings if available: */
+ if (direct_gbpages && cpu_has_gbpages) {
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ page_size_mask |= 1 << PG_LEVEL_1G;
+ } else {
+ direct_gbpages = 0;
}
}
@@ -238,6 +233,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
}
}
+static const char *page_size_string(struct map_range *mr)
+{
+ static const char str_1g[] = "1G";
+ static const char str_2m[] = "2M";
+ static const char str_4m[] = "4M";
+ static const char str_4k[] = "4k";
+
+ if (mr->page_size_mask & (1<<PG_LEVEL_1G))
+ return str_1g;
+ /*
+ * 32-bit without PAE has a 4M large page size.
+ * PG_LEVEL_2M is misnamed, but we can at least
+ * print out the right size in the string.
+ */
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ !IS_ENABLED(CONFIG_X86_PAE) &&
+ mr->page_size_mask & (1<<PG_LEVEL_2M))
+ return str_4m;
+
+ if (mr->page_size_mask & (1<<PG_LEVEL_2M))
+ return str_2m;
+
+ return str_4k;
+}
+
static int __meminit split_mem_range(struct map_range *mr, int nr_range,
unsigned long start,
unsigned long end)
@@ -333,8 +353,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+ page_size_string(&mr[i]));
return nr_range;
}
@@ -608,7 +627,7 @@ void __init init_mem_mapping(void)
*
*
* On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
+ * contains BIOS code and data regions used by X and dosemu and similar apps.
* Access has to be given to non-kernel-ram areas as well, these contain the PCI
* mmio resources as well as potential bios/acpi data regions.
*/
@@ -713,6 +732,15 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+#ifdef CONFIG_SMP
+ .active_mm = &init_mm,
+ .state = 0,
+#endif
+ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
+};
+EXPORT_SYMBOL_GPL(cpu_tlbstate);
+
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
/* entry 0 MUST be WB (hardwired to speed up translations) */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05ae7061..3fba623e3ba5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
return 0;
}
-static int __init parse_direct_gbpages_off(char *arg)
-{
- direct_gbpages = 0;
- return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
- direct_gbpages = 1;
- return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
new file mode 100644
index 000000000000..4860906c6b9f
--- /dev/null
+++ b/arch/x86/mm/kasan_init_64.c
@@ -0,0 +1,206 @@
+#include <linux/bootmem.h>
+#include <linux/kasan.h>
+#include <linux/kdebug.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+
+extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern struct range pfn_mapped[E820_X_MAX];
+
+extern unsigned char kasan_zero_page[PAGE_SIZE];
+
+static int __init map_range(struct range *range)
+{
+ unsigned long start;
+ unsigned long end;
+
+ start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
+ end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
+
+ /*
+ * end + 1 here is intentional. We check several shadow bytes in advance
+ * to slightly speed up fastpath. In some rare cases we could cross
+ * boundary of mapped shadow, so we just map some more here.
+ */
+ return vmemmap_populate(start, end + 1, NUMA_NO_NODE);
+}
+
+static void __init clear_pgds(unsigned long start,
+ unsigned long end)
+{
+ for (; start < end; start += PGDIR_SIZE)
+ pgd_clear(pgd_offset_k(start));
+}
+
+void __init kasan_map_early_shadow(pgd_t *pgd)
+{
+ int i;
+ unsigned long start = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+
+ for (i = pgd_index(start); start < end; i++) {
+ pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud)
+ | _KERNPG_TABLE);
+ start += PGDIR_SIZE;
+ }
+}
+
+static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+
+ while (addr + PAGE_SIZE <= end) {
+ WARN_ON(!pte_none(*pte));
+ set_pte(pte, __pte(__pa_nodebug(kasan_zero_page)
+ | __PAGE_KERNEL_RO));
+ addr += PAGE_SIZE;
+ pte = pte_offset_kernel(pmd, addr);
+ }
+ return 0;
+}
+
+static int __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ int ret = 0;
+ pmd_t *pmd = pmd_offset(pud, addr);
+
+ while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) {
+ WARN_ON(!pmd_none(*pmd));
+ set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte)
+ | __PAGE_KERNEL_RO));
+ addr += PMD_SIZE;
+ pmd = pmd_offset(pud, addr);
+ }
+ if (addr < end) {
+ if (pmd_none(*pmd)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+ set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE));
+ }
+ ret = zero_pte_populate(pmd, addr, end);
+ }
+ return ret;
+}
+
+
+static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
+ unsigned long end)
+{
+ int ret = 0;
+ pud_t *pud = pud_offset(pgd, addr);
+
+ while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) {
+ WARN_ON(!pud_none(*pud));
+ set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd)
+ | __PAGE_KERNEL_RO));
+ addr += PUD_SIZE;
+ pud = pud_offset(pgd, addr);
+ }
+
+ if (addr < end) {
+ if (pud_none(*pud)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+ set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE));
+ }
+ ret = zero_pmd_populate(pud, addr, end);
+ }
+ return ret;
+}
+
+static int __init zero_pgd_populate(unsigned long addr, unsigned long end)
+{
+ int ret = 0;
+ pgd_t *pgd = pgd_offset_k(addr);
+
+ while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) {
+ WARN_ON(!pgd_none(*pgd));
+ set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud)
+ | __PAGE_KERNEL_RO));
+ addr += PGDIR_SIZE;
+ pgd = pgd_offset_k(addr);
+ }
+
+ if (addr < end) {
+ if (pgd_none(*pgd)) {
+ void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+ set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE));
+ }
+ ret = zero_pud_populate(pgd, addr, end);
+ }
+ return ret;
+}
+
+
+static void __init populate_zero_shadow(const void *start, const void *end)
+{
+ if (zero_pgd_populate((unsigned long)start, (unsigned long)end))
+ panic("kasan: unable to map zero shadow!");
+}
+
+
+#ifdef CONFIG_KASAN_INLINE
+static int kasan_die_handler(struct notifier_block *self,
+ unsigned long val,
+ void *data)
+{
+ if (val == DIE_GPF) {
+ pr_emerg("CONFIG_KASAN_INLINE enabled");
+ pr_emerg("GPF could be caused by NULL-ptr deref or user memory access");
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kasan_die_notifier = {
+ .notifier_call = kasan_die_handler,
+};
+#endif
+
+void __init kasan_init(void)
+{
+ int i;
+
+#ifdef CONFIG_KASAN_INLINE
+ register_die_notifier(&kasan_die_notifier);
+#endif
+
+ memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
+ load_cr3(early_level4_pgt);
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ populate_zero_shadow((void *)KASAN_SHADOW_START,
+ kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+ for (i = 0; i < E820_X_MAX; i++) {
+ if (pfn_mapped[i].end == 0)
+ break;
+
+ if (map_range(&pfn_mapped[i]))
+ panic("kasan: unable to allocate shadow!");
+ }
+ populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+ kasan_mem_to_shadow((void *)__START_KERNEL_map));
+
+ vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
+ (unsigned long)kasan_mem_to_shadow(_end),
+ NUMA_NO_NODE);
+
+ populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+ (void *)KASAN_SHADOW_END);
+
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+
+ load_cr3(init_level4_pgt);
+ init_task.kasan_depth = 0;
+}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 919b91205cd4..df4552bd239e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-static unsigned int stack_maxrandom_size(void)
+static unsigned long stack_maxrandom_size(void)
{
- unsigned int max = 0;
+ unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+ max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
}
return max;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1a883705a12a..cd4785bbacb9 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -794,7 +794,6 @@ int early_cpu_to_node(int cpu)
void debug_cpumask_set_cpu(int cpu, int node, bool enable)
{
struct cpumask *mask;
- char buf[64];
if (node == NUMA_NO_NODE) {
/* early_cpu_to_node() already emits a warning and trace */
@@ -812,10 +811,9 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable)
else
cpumask_clear_cpu(cpu, mask);
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n",
enable ? "numa_add_cpu" : "numa_remove_cpu",
- cpu, node, buf);
+ cpu, node, cpumask_pr_args(mask));
return;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2fb6e33..89af288ec674 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
seq_printf(m, "DirectMap4M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 12);
#endif
-#ifdef CONFIG_X86_64
if (direct_gbpages)
seq_printf(m, "DirectMap1G: %8lu kB\n",
direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
}
#else
static inline void split_page_count(int level) { }
@@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages)
{
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
-EXPORT_SYMBOL_GPL(set_memory_ro);
int set_memory_rw(unsigned long addr, int numpages)
{
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
-EXPORT_SYMBOL_GPL(set_memory_rw);
int set_memory_np(unsigned long addr, int numpages)
{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac68698406c..35af6771a95a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
#ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
- current->comm, from, to - 1);
+ printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 6fb6927f9e76..5a7e5252c878 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
#endif /* CONFIG_X86_PAE */
-static void free_pmds(pmd_t *pmds[])
+static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
{
int i;
@@ -198,10 +198,11 @@ static void free_pmds(pmd_t *pmds[])
if (pmds[i]) {
pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
free_page((unsigned long)pmds[i]);
+ mm_dec_nr_pmds(mm);
}
}
-static int preallocate_pmds(pmd_t *pmds[])
+static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
{
int i;
bool failed = false;
@@ -215,11 +216,13 @@ static int preallocate_pmds(pmd_t *pmds[])
pmd = NULL;
failed = true;
}
+ if (pmd)
+ mm_inc_nr_pmds(mm);
pmds[i] = pmd;
}
if (failed) {
- free_pmds(pmds);
+ free_pmds(mm, pmds);
return -ENOMEM;
}
@@ -246,6 +249,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
+ mm_dec_nr_pmds(mm);
}
}
}
@@ -271,19 +275,94 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
}
}
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN 32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+ /*
+ * When PAE kernel is running as a Xen domain, it does not use
+ * shared kernel pmd. And this requires a whole page for pgd.
+ */
+ if (!SHARED_KERNEL_PMD)
+ return 0;
+
+ /*
+ * when PAE kernel is not running as a Xen domain, it uses
+ * shared kernel pmd. Shared kernel pmd does not require a whole
+ * page for pgd. We are able to just allocate a 32-byte for pgd.
+ * During boot time, we create a 32-byte slab for pgd table allocation.
+ */
+ pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+ SLAB_PANIC, NULL);
+ if (!pgd_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+ /*
+ * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+ * We allocate one page for pgd.
+ */
+ if (!SHARED_KERNEL_PMD)
+ return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+ /*
+ * Now PAE kernel is not running as a Xen domain. We can allocate
+ * a 32-byte slab for pgd to save memory space.
+ */
+ return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+ if (!SHARED_KERNEL_PMD)
+ free_page((unsigned long)pgd);
+ else
+ kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+ return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+ free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+ pgd = _pgd_alloc();
if (pgd == NULL)
goto out;
mm->pgd = pgd;
- if (preallocate_pmds(pmds) != 0)
+ if (preallocate_pmds(mm, pmds) != 0)
goto out_free_pgd;
if (paravirt_pgd_alloc(mm) != 0)
@@ -304,9 +383,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
out_free_pmds:
- free_pmds(pmds);
+ free_pmds(mm, pmds);
out_free_pgd:
- free_page((unsigned long)pgd);
+ _pgd_free(pgd);
out:
return NULL;
}
@@ -316,7 +395,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
paravirt_pgd_free(mm, pgd);
- free_page((unsigned long)pgd);
+ _pgd_free(pgd);
}
/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ee61c36d64f8..3250f2371aea 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,9 +14,6 @@
#include <asm/uv/uv.h>
#include <linux/debugfs.h>
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
- = { &init_mm, 0, };
-
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.