summaryrefslogtreecommitdiff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 14:47:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 14:47:13 -0800
commit2dc10ad81fc017837037e60439662e1b16bdffb9 (patch)
treefc2f77874339b2f79499e3b34dc5ecb496b68dfc /arch/arm64/mm
parente627078a0cbdc0c391efeb5a2c4eb287328fd633 (diff)
parentf8f8bdc48851da979c6e0e4808b6031122e4af47 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - "genirq: Introduce generic irq migration for cpu hotunplugged" patch merged from tip/irq/for-arm to allow the arm64-specific part to be upstreamed via the arm64 tree - CPU feature detection reworked to cope with heterogeneous systems where CPUs may not have exactly the same features. The features reported by the kernel via internal data structures or ELF_HWCAP are delayed until all the CPUs are up (and before user space starts) - Support for 16KB pages, with the additional bonus of a 36-bit VA space, though the latter only depending on EXPERT - Implement native {relaxed, acquire, release} atomics for arm64 - New ASID allocation algorithm which avoids IPI on roll-over, together with TLB invalidation optimisations (using local vs global where feasible) - KASan support for arm64 - EFI_STUB clean-up and isolation for the kernel proper (required by KASan) - copy_{to,from,in}_user optimisations (sharing the memcpy template) - perf: moving arm64 to the arm32/64 shared PMU framework - L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware - Support for the contiguous PTE hint on kernel mapping (16 consecutive entries may be able to use a single TLB entry) - Generic CONFIG_HZ now used on arm64 - defconfig updates * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits) arm64/efi: fix libstub build under CONFIG_MODVERSIONS ARM64: Enable multi-core scheduler support by default arm64/efi: move arm64 specific stub C code to libstub arm64: page-align sections for DEBUG_RODATA arm64: Fix build with CONFIG_ZONE_DMA=n arm64: Fix compat register mappings arm64: Increase the max granular size arm64: remove bogus TASK_SIZE_64 check arm64: make Timer Interrupt Frequency selectable arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED arm64: cachetype: fix definitions of ICACHEF_* flags arm64: cpufeature: declare enable_cpu_capabilities as static genirq: Make the cpuhotplug migration code less noisy arm64: Constify hwcap name string arrays arm64/kvm: Make use of the system wide safe values arm64/debug: Make use of the system wide safe value arm64: Move FP/ASIMD hwcap handling to common code arm64/HWCAP: Use system wide safe values arm64/capabilities: Make use of system wide safe value arm64: Delay cpu feature capability checks ...
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/Makefile3
-rw-r--r--arch/arm64/mm/cache.S10
-rw-r--r--arch/arm64/mm/context.c236
-rw-r--r--arch/arm64/mm/dump.c18
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/init.c19
-rw-r--r--arch/arm64/mm/kasan_init.c165
-rw-r--r--arch/arm64/mm/mmu.c145
-rw-r--r--arch/arm64/mm/pageattr.c2
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/arm64/mm/proc.S10
11 files changed, 449 insertions, 163 deletions
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 773d37a14039..57f57fde5722 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+
+obj-$(CONFIG_KASAN) += kasan_init.o
+KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5df4a0f..cfa44a6adc0a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
b.lo 1b
dsb sy
ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
/*
* __inval_cache_range(start, end)
@@ -131,7 +131,7 @@ __dma_inv_range:
b.lo 2b
dsb sy
ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)
/*
@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
b.lo 1b
dsb sy
ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
/*
* __dma_map_area(start, size, dir)
@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_range
b __dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_range
ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d70ff14dbdbd..f636a2639f03 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,135 +17,185 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/init.h>
+#include <linux/bitops.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
+#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include <asm/cachetype.h>
-#define asid_bits(reg) \
- (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+static u32 asid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS)
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- mm->context.id = 0;
- raw_spin_lock_init(&mm->context.id_lock);
-}
+#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define NUM_USER_ASIDS ASID_FIRST_VERSION
-static void flush_context(void)
+static void flush_context(unsigned int cpu)
{
- /* set the reserved TTBR0 before flushing the TLB */
- cpu_set_reserved_ttbr0();
- flush_tlb_all();
- if (icache_is_aivivt())
- __flush_icache_all();
-}
+ int i;
+ u64 asid;
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
- unsigned long flags;
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
/*
- * Locking needed for multi-threaded applications where the same
- * mm->context.id could be set from different CPUs during the
- * broadcast. This function is also called via IPI so the
- * mm->context.id_lock has to be IRQ-safe.
+ * Ensure the generation bump is observed before we xchg the
+ * active_asids.
*/
- raw_spin_lock_irqsave(&mm->context.id_lock, flags);
- if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+ smp_wmb();
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
- * Old version of ASID found. Set the new one and reset
- * mm_cpumask(mm).
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
*/
- mm->context.id = asid;
- cpumask_clear(mm_cpumask(mm));
+ if (asid == 0)
+ asid = per_cpu(reserved_asids, i);
+ __set_bit(asid & ~ASID_MASK, asid_map);
+ per_cpu(reserved_asids, i) = asid;
}
- raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
- /*
- * Set the mm_cpumask(mm) bit for the current CPU.
- */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ cpumask_setall(&tlb_flush_pending);
+
+ if (icache_is_aivivt())
+ __flush_icache_all();
}
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+static int is_reserved_asid(u64 asid)
+{
+ int cpu;
+ for_each_possible_cpu(cpu)
+ if (per_cpu(reserved_asids, cpu) == asid)
+ return 1;
+ return 0;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
{
- unsigned int asid;
- unsigned int cpu = smp_processor_id();
- struct mm_struct *mm = current->active_mm;
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(&mm->context.id);
+ u64 generation = atomic64_read(&asid_generation);
+
+ if (asid != 0) {
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (is_reserved_asid(asid))
+ return generation | (asid & ~ASID_MASK);
+
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ asid &= ~ASID_MASK;
+ if (!__test_and_set_bit(asid, asid_map))
+ goto bump_gen;
+ }
/*
- * current->active_mm could be init_mm for the idle thread immediately
- * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
- * the reserved value, so no need to reset any context.
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes.
+ * We always count from ASID #1, as we use ASID #0 when setting a
+ * reserved TTBR0 for the init_mm.
*/
- if (mm == &init_mm)
- return;
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ if (asid != NUM_USER_ASIDS)
+ goto set_asid;
- smp_rmb();
- asid = cpu_last_asid + cpu;
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
- flush_context();
- set_mm_context(mm, asid);
+ /* We have at least 1 ASID per CPU, so this will always succeed */
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
- /* set the new ASID */
- cpu_switch_mm(mm->pgd, mm);
+set_asid:
+ __set_bit(asid, asid_map);
+ cur_idx = asid;
+
+bump_gen:
+ asid |= generation;
+ return asid;
}
-void __new_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
- unsigned int asid;
- unsigned int bits = asid_bits();
+ unsigned long flags;
+ u64 asid;
+
+ asid = atomic64_read(&mm->context.id);
- raw_spin_lock(&cpu_asid_lock);
/*
- * Check the ASID again, in case the change was broadcast from another
- * CPU before we acquired the lock.
+ * The memory ordering here is subtle. We rely on the control
+ * dependency between the generation read and the update of
+ * active_asids to ensure that we are synchronised with a
+ * parallel rollover (i.e. this pairs with the smp_wmb() in
+ * flush_context).
*/
- if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- raw_spin_unlock(&cpu_asid_lock);
- return;
+ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+ && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+ goto switch_mm_fastpath;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(&mm->context.id);
+ if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+ asid = new_context(mm, cpu);
+ atomic64_set(&mm->context.id, asid);
}
- /*
- * At this point, it is guaranteed that the current mm (with an old
- * ASID) isn't active on any other CPU since the ASIDs are changed
- * simultaneously via IPI.
- */
- asid = ++cpu_last_asid;
- /*
- * If we've used up all our ASIDs, we need to start a new version and
- * flush the TLB.
- */
- if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
- /* increment the ASID version */
- cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
- if (cpu_last_asid == 0)
- cpu_last_asid = ASID_FIRST_VERSION;
- asid = cpu_last_asid + smp_processor_id();
- flush_context();
- smp_wmb();
- smp_call_function(reset_context, NULL, 1);
- cpu_last_asid += NR_CPUS - 1;
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ local_flush_tlb_all();
+
+ atomic64_set(&per_cpu(active_asids, cpu), asid);
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+ cpu_switch_mm(mm->pgd, mm);
+}
+
+static int asids_init(void)
+{
+ int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+ switch (fld) {
+ default:
+ pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+ /* Fallthrough */
+ case 0:
+ asid_bits = 8;
+ break;
+ case 2:
+ asid_bits = 16;
}
- set_mm_context(mm, asid);
- raw_spin_unlock(&cpu_asid_lock);
+ /* If we end up with more CPUs than ASIDs, expect things to crash */
+ WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+ atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+ asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+ GFP_KERNEL);
+ if (!asid_map)
+ panic("Failed to allocate bitmap for %lu ASIDs\n",
+ NUM_USER_ASIDS);
+
+ pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+ return 0;
}
+early_initcall(asids_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f3d6221cd5bd..5a22a119a74c 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
@@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = {
.set = "NG",
.clear = " ",
}, {
+ .mask = PTE_CONT,
+ .val = PTE_CONT,
+ .set = "CON",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT,
+ .val = PTE_TABLE_BIT,
+ .set = " ",
+ .clear = "BLK",
+ }, {
.mask = PTE_UXN,
.val = PTE_UXN,
.set = "UXN",
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%16lx-0x%16lx ",
+ seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9fadf6d7039b..19211c4a8911 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -556,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
}
#ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void)
+void cpu_enable_pan(void *__unused)
{
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f5c0680d17d9..17bf39ac83ba 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
memset(zone_size, 0, sizeof(zone_size));
/* 4GB maximum for 32-bit only capable devices */
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
- max_dma = PFN_DOWN(arm64_dma_phys_limit);
- zone_size[ZONE_DMA] = max_dma - min;
- }
+#ifdef CONFIG_ZONE_DMA
+ max_dma = PFN_DOWN(arm64_dma_phys_limit);
+ zone_size[ZONE_DMA] = max_dma - min;
+#endif
zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
- if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+ if (start < max_dma) {
unsigned long dma_end = min(end, max_dma);
zhole_size[ZONE_DMA] -= dma_end - start;
}
-
+#endif
if (end > max_dma) {
unsigned long normal_end = min(end, max);
unsigned long normal_start = max(start, max_dma);
@@ -298,6 +299,9 @@ void __init mem_init(void)
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+ " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
+#endif
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
@@ -310,6 +314,9 @@ void __init mem_init(void)
" .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+ MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
MLG((unsigned long)vmemmap,
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
new file mode 100644
index 000000000000..cf038c7d9fa9
--- /dev/null
+++ b/arch/arm64/mm/kasan_init.c
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte;
+ unsigned long next;
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ next = addr + PAGE_SIZE;
+ set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+ PAGE_KERNEL));
+ } while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+ unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ kasan_early_pte_populate(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ kasan_early_pmd_populate(pud, addr, next);
+ } while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_early_pud_populate(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+ kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Remove references to kasan page tables from
+ * swapper_pg_dir. pgd_clear() can't be used
+ * here because it's nop on 2,3-level pagetable setups
+ */
+ for (; start < end; start += PGDIR_SIZE)
+ set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+ asm(
+ " msr ttbr1_el1, %0\n"
+ " isb"
+ :
+ : "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * We are going to perform proper setup of shadow memory.
+ * At first we should unmap early shadow (clear_pgds() call bellow).
+ * However, instrumented code couldn't execute without shadow memory.
+ * tmp_pg_dir used to keep early shadow mapped until full shadow
+ * setup will be finished.
+ */
+ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+ cpu_set_ttbr1(__pa(tmp_pg_dir));
+ flush_tlb_all();
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+ kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+ for_each_memblock(memory, reg) {
+ void *start = (void *)__phys_to_virt(reg->base);
+ void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+ if (start >= end)
+ break;
+
+ /*
+ * end + 1 here is intentional. We check several shadow bytes in
+ * advance to slightly speed up fastpath. In some rare cases
+ * we could cross boundary of mapped shadow, so we just map
+ * some more here.
+ */
+ vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+ (unsigned long)kasan_mem_to_shadow(end) + 1,
+ pfn_to_nid(virt_to_pfn(start)));
+ }
+
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ cpu_set_ttbr1(__pa(swapper_pg_dir));
+ flush_tlb_all();
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized\n");
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9211b8527f25..c2fa6b56613c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -32,6 +32,7 @@
#include <asm/cputype.h>
#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
do {
/*
* Need to have the least restrictive permissions available
- * permissions will be fixed up later
+ * permissions will be fixed up later. Default the new page
+ * range as contiguous ptes.
*/
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
pfn++;
} while (pte++, i++, i < PTRS_PER_PTE);
}
+/*
+ * Given a PTE with the CONT bit set, determine where the CONT range
+ * starts, and clear the entire range of PTE CONT bits.
+ */
+static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
+{
+ int i;
+
+ pte -= CONT_RANGE_OFFSET(addr);
+ for (i = 0; i < CONT_PTES; i++) {
+ set_pte(pte, pte_mknoncont(*pte));
+ pte++;
+ }
+ flush_tlb_all();
+}
+
+/*
+ * Given a range of PTEs set the pfn and provided page protection flags
+ */
+static void __populate_init_pte(pte_t *pte, unsigned long addr,
+ unsigned long end, phys_addr_t phys,
+ pgprot_t prot)
+{
+ unsigned long pfn = __phys_to_pfn(phys);
+
+ do {
+ /* clear all the bits except the pfn, then apply the prot */
+ set_pte(pte, pfn_pte(pfn, prot));
+ pte++;
+ pfn++;
+ addr += PAGE_SIZE;
+ } while (addr != end);
+}
+
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, unsigned long pfn,
+ unsigned long end, phys_addr_t phys,
pgprot_t prot,
void *(*alloc)(unsigned long size))
{
pte_t *pte;
+ unsigned long next;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ next = min(end, (addr + CONT_SIZE) & CONT_MASK);
+ if (((addr | next | phys) & ~CONT_MASK) == 0) {
+ /* a block of CONT_PTES */
+ __populate_init_pte(pte, addr, next, phys,
+ prot | __pgprot(PTE_CONT));
+ } else {
+ /*
+ * If the range being split is already inside of a
+ * contiguous range but this PTE isn't going to be
+ * contiguous, then we want to unmark the adjacent
+ * ranges, then update the portion of the range we
+ * are interrested in.
+ */
+ clear_cont_pte_range(pte, addr);
+ __populate_init_pte(pte, addr, next, phys, prot);
+ }
+
+ pte += (next - addr) >> PAGE_SHIFT;
+ phys += next - addr;
+ addr = next;
+ } while (addr != end);
}
void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
}
}
} else {
- alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, alloc);
+ alloc_init_pte(pmd, addr, next, phys, prot, alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -353,14 +407,11 @@ static void __init map_mem(void)
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir, gives
- * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
- * PHYS_OFFSET (which must be aligned to 2MB as per
- * Documentation/arm64/booting.txt).
+ * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+ * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+ * per Documentation/arm64/booting.txt).
*/
- if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
- limit = PHYS_OFFSET + PMD_SIZE;
- else
- limit = PHYS_OFFSET + PUD_SIZE;
+ limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
memblock_set_current_limit(limit);
/* map all the memory banks */
@@ -371,21 +422,24 @@ static void __init map_mem(void)
if (start >= end)
break;
-#ifndef CONFIG_ARM64_64K_PAGES
- /*
- * For the first memory bank align the start address and
- * current memblock limit to prevent create_mapping() from
- * allocating pte page tables from unmapped memory.
- * When 64K pages are enabled, the pte page table for the
- * first PGDIR_SIZE is already present in swapper_pg_dir.
- */
- if (start < limit)
- start = ALIGN(start, PMD_SIZE);
- if (end < limit) {
- limit = end & PMD_MASK;
- memblock_set_current_limit(limit);
+ if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+ /*
+ * For the first memory bank align the start address and
+ * current memblock limit to prevent create_mapping() from
+ * allocating pte page tables from unmapped memory. With
+ * the section maps, if the first block doesn't end on section
+ * size boundary, create_mapping() will try to allocate a pte
+ * page, which may be returned from an unmapped area.
+ * When section maps are not used, the pte page table for the
+ * current limit is already present in swapper_pg_dir.
+ */
+ if (start < limit)
+ start = ALIGN(start, SECTION_SIZE);
+ if (end < limit) {
+ limit = end & SECTION_MASK;
+ memblock_set_current_limit(limit);
+ }
}
-#endif
__map_memblock(start, end);
}
@@ -456,7 +510,7 @@ void __init paging_init(void)
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
- flush_tlb_all();
+ local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
}
@@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
return vmemmap_populate_basepages(start, end, node);
}
-#else /* !CONFIG_ARM64_64K_PAGES */
+#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
unsigned long addr = start;
@@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
- int granularity, size, offset;
+ int size, offset;
void *dt_virt;
/*
@@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
*/
BUILD_BUG_ON(dt_virt_base % SZ_2M);
- if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
- BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
- __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
-
- granularity = PAGE_SIZE;
- } else {
- BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
- __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
-
- granularity = PMD_SIZE;
- }
+ BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+ __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
- offset = dt_phys % granularity;
+ offset = dt_phys % SWAPPER_BLOCK_SIZE;
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
- create_mapping(round_down(dt_phys, granularity), dt_virt_base,
- granularity, prot);
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ SWAPPER_BLOCK_SIZE, prot);
if (fdt_check_header(dt_virt) != 0)
return NULL;
@@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
if (size > MAX_FDT_SIZE)
return NULL;
- if (offset + size > granularity)
- create_mapping(round_down(dt_phys, granularity), dt_virt_base,
- round_up(offset + size, granularity), prot);
+ if (offset + size > SWAPPER_BLOCK_SIZE)
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
memblock_reserve(dt_phys, size);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index e47ed1c5dce1..3571c7309c5e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
int ret;
struct page_change_data data;
- if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+ if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
end = start + size;
WARN_ON_ONCE(1);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 71ca104f97bd..cb3ba1b812e7 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -28,8 +28,6 @@
#include "mm.h"
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-
static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7783ff05f74c..cacecc4ad3e5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -30,7 +30,9 @@
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
#endif
@@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
- mmid w1, x1 // get mm->context.id
+ mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
@@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm)
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
- tlbi vmalle1is // invalidate I + D TLBs
- dsb ish
+ tlbi vmalle1 // Invalidate local TLB
+ dsb nsh
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD