summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 14:47:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 14:47:13 -0800
commit2dc10ad81fc017837037e60439662e1b16bdffb9 (patch)
treefc2f77874339b2f79499e3b34dc5ecb496b68dfc /arch
parente627078a0cbdc0c391efeb5a2c4eb287328fd633 (diff)
parentf8f8bdc48851da979c6e0e4808b6031122e4af47 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - "genirq: Introduce generic irq migration for cpu hotunplugged" patch merged from tip/irq/for-arm to allow the arm64-specific part to be upstreamed via the arm64 tree - CPU feature detection reworked to cope with heterogeneous systems where CPUs may not have exactly the same features. The features reported by the kernel via internal data structures or ELF_HWCAP are delayed until all the CPUs are up (and before user space starts) - Support for 16KB pages, with the additional bonus of a 36-bit VA space, though the latter only depending on EXPERT - Implement native {relaxed, acquire, release} atomics for arm64 - New ASID allocation algorithm which avoids IPI on roll-over, together with TLB invalidation optimisations (using local vs global where feasible) - KASan support for arm64 - EFI_STUB clean-up and isolation for the kernel proper (required by KASan) - copy_{to,from,in}_user optimisations (sharing the memcpy template) - perf: moving arm64 to the arm32/64 shared PMU framework - L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware - Support for the contiguous PTE hint on kernel mapping (16 consecutive entries may be able to use a single TLB entry) - Generic CONFIG_HZ now used on arm64 - defconfig updates * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits) arm64/efi: fix libstub build under CONFIG_MODVERSIONS ARM64: Enable multi-core scheduler support by default arm64/efi: move arm64 specific stub C code to libstub arm64: page-align sections for DEBUG_RODATA arm64: Fix build with CONFIG_ZONE_DMA=n arm64: Fix compat register mappings arm64: Increase the max granular size arm64: remove bogus TASK_SIZE_64 check arm64: make Timer Interrupt Frequency selectable arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED arm64: cachetype: fix definitions of ICACHEF_* flags arm64: cpufeature: declare enable_cpu_capabilities as static genirq: Make the cpuhotplug migration code less noisy arm64: Constify hwcap name string arrays arm64/kvm: Make use of the system wide safe values arm64/debug: Make use of the system wide safe value arm64: Move FP/ASIMD hwcap handling to common code arm64/HWCAP: Use system wide safe values arm64/capabilities: Make use of system wide safe value arm64: Delay cpu feature capability checks ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig69
-rw-r--r--arch/arm64/Kconfig.debug2
-rw-r--r--arch/arm64/Makefile7
-rw-r--r--arch/arm64/boot/dts/arm/juno-r1.dts18
-rw-r--r--arch/arm64/boot/dts/arm/juno.dts18
-rw-r--r--arch/arm64/configs/defconfig9
-rw-r--r--arch/arm64/include/asm/assembler.h11
-rw-r--r--arch/arm64/include/asm/atomic.h63
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h98
-rw-r--r--arch/arm64/include/asm/atomic_lse.h193
-rw-r--r--arch/arm64/include/asm/cache.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/cachetype.h4
-rw-r--r--arch/arm64/include/asm/cmpxchg.h279
-rw-r--r--arch/arm64/include/asm/cpu.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h91
-rw-r--r--arch/arm64/include/asm/cputype.h15
-rw-r--r--arch/arm64/include/asm/fixmap.h7
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h9
-rw-r--r--arch/arm64/include/asm/hwcap.h8
-rw-r--r--arch/arm64/include/asm/irq.h1
-rw-r--r--arch/arm64/include/asm/kasan.h38
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h83
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/mmu.h15
-rw-r--r--arch/arm64/include/asm/mmu_context.h113
-rw-r--r--arch/arm64/include/asm/page.h27
-rw-r--r--arch/arm64/include/asm/pgalloc.h1
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h48
-rw-r--r--arch/arm64/include/asm/pgtable.h30
-rw-r--r--arch/arm64/include/asm/pmu.h83
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/asm/ptrace.h16
-rw-r--r--arch/arm64/include/asm/string.h16
-rw-r--r--arch/arm64/include/asm/sysreg.h157
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/tlb.h26
-rw-r--r--arch/arm64/include/asm/tlbflush.h18
-rw-r--r--arch/arm64/kernel/Makefile11
-rw-r--r--arch/arm64/kernel/arm64ksyms.c3
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c851
-rw-r--r--arch/arm64/kernel/cpuinfo.c254
-rw-r--r--arch/arm64/kernel/debug-monitors.c6
-rw-r--r--arch/arm64/kernel/efi-entry.S10
-rw-r--r--arch/arm64/kernel/efi-stub.c78
-rw-r--r--arch/arm64/kernel/efi.c5
-rw-r--r--arch/arm64/kernel/entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c16
-rw-r--r--arch/arm64/kernel/head.S76
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c19
-rw-r--r--arch/arm64/kernel/image.h38
-rw-r--r--arch/arm64/kernel/irq.c62
-rw-r--r--arch/arm64/kernel/module.c16
-rw-r--r--arch/arm64/kernel/perf_event.c1066
-rw-r--r--arch/arm64/kernel/process.c3
-rw-r--r--arch/arm64/kernel/setup.c245
-rw-r--r--arch/arm64/kernel/smp.c22
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kernel/traps.c15
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/reset.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c12
-rw-r--r--arch/arm64/lib/copy_from_user.S78
-rw-r--r--arch/arm64/lib/copy_in_user.S67
-rw-r--r--arch/arm64/lib/copy_template.S193
-rw-r--r--arch/arm64/lib/copy_to_user.S67
-rw-r--r--arch/arm64/lib/memchr.S2
-rw-r--r--arch/arm64/lib/memcmp.S2
-rw-r--r--arch/arm64/lib/memcpy.S184
-rw-r--r--arch/arm64/lib/memmove.S9
-rw-r--r--arch/arm64/lib/memset.S5
-rw-r--r--arch/arm64/lib/strcmp.S2
-rw-r--r--arch/arm64/lib/strlen.S2
-rw-r--r--arch/arm64/lib/strncmp.S2
-rw-r--r--arch/arm64/mm/Makefile3
-rw-r--r--arch/arm64/mm/cache.S10
-rw-r--r--arch/arm64/mm/context.c236
-rw-r--r--arch/arm64/mm/dump.c18
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/init.c19
-rw-r--r--arch/arm64/mm/kasan_init.c165
-rw-r--r--arch/arm64/mm/mmu.c145
-rw-r--r--arch/arm64/mm/pageattr.c2
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/arm64/mm/proc.S10
88 files changed, 3177 insertions, 2474 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 440d906429de..7b10647cab22 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -48,6 +48,7 @@ config ARM64
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
+ default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
- default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+ default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
+ default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
source "init/Kconfig"
@@ -389,25 +392,37 @@ config ARM64_4K_PAGES
help
This feature enables 4KB pages support.
+config ARM64_16K_PAGES
+ bool "16KB"
+ help
+ The system will use 16KB pages support. AArch32 emulation
+ requires applications compiled with 16K (or a multiple of 16K)
+ aligned segments.
+
config ARM64_64K_PAGES
bool "64KB"
help
This feature enables 64KB pages support (4KB by default)
allowing only two levels of page tables and faster TLB
- look-up. AArch32 emulation is not available when this feature
- is enabled.
+ look-up. AArch32 emulation requires applications compiled
+ with 64K aligned segments.
endchoice
choice
prompt "Virtual address space size"
default ARM64_VA_BITS_39 if ARM64_4K_PAGES
+ default ARM64_VA_BITS_47 if ARM64_16K_PAGES
default ARM64_VA_BITS_42 if ARM64_64K_PAGES
help
Allows choosing one of multiple possible virtual address
space sizes. The level of translation table is determined by
a combination of page size and virtual address space size.
+config ARM64_VA_BITS_36
+ bool "36-bit" if EXPERT
+ depends on ARM64_16K_PAGES
+
config ARM64_VA_BITS_39
bool "39-bit"
depends on ARM64_4K_PAGES
@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42
bool "42-bit"
depends on ARM64_64K_PAGES
+config ARM64_VA_BITS_47
+ bool "47-bit"
+ depends on ARM64_16K_PAGES
+
config ARM64_VA_BITS_48
bool "48-bit"
@@ -423,8 +442,10 @@ endchoice
config ARM64_VA_BITS
int
+ default 36 if ARM64_VA_BITS_36
default 39 if ARM64_VA_BITS_39
default 42 if ARM64_VA_BITS_42
+ default 47 if ARM64_VA_BITS_47
default 48 if ARM64_VA_BITS_48
config CPU_BIG_ENDIAN
@@ -454,15 +475,13 @@ config NR_CPUS
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
+ select GENERIC_IRQ_MIGRATION
help
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
source kernel/Kconfig.preempt
-
-config HZ
- int
- default 100
+source kernel/Kconfig.hz
config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM
@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
config HW_PERF_EVENTS
- bool "Enable hardware performance counter support for perf events"
- depends on PERF_EVENTS
- default y
- help
- Enable hardware performance counter support for perf events. If
- disabled, perf events will use software events only.
+ def_bool y
+ depends on ARM_PMU
config SYS_SUPPORTS_HUGETLBFS
def_bool y
@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
- def_bool y if !ARM64_64K_PAGES
+ def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
@@ -532,7 +547,25 @@ config XEN
config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+ default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
+ help
+ The kernel memory allocator divides physically contiguous memory
+ blocks into "zones", where each zone is a power of two number of
+ pages. This option selects the largest power of two that the kernel
+ keeps in the memory allocator. If you need to allocate very large
+ blocks of physically contiguous memory, then you may need to
+ increase this value.
+
+ This config option is actually maximum order plus one. For example,
+ a value of 11 means that the largest free memory block is 2^10 pages.
+
+ We make sure that we can allocate upto a HugePage size for each configuration.
+ Hence we have :
+ MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
+
+ However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
+ 4M allocations matching the default size used by generic code.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
- depends on !ARM64_64K_PAGES || EXPERT
+ depends on ARM64_4K_PAGES || EXPERT
select COMPAT_BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
@@ -718,9 +751,9 @@ config COMPAT
the user helper functions, VFP support and the ptrace interface are
handled appropriately by the kernel.
- If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
- will only be able to execute AArch32 binaries that were compiled with
- 64k aligned segments.
+ If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+ that you will only be able to execute AArch32 binaries that were compiled
+ with page size aligned segments.
If you want to execute 32-bit userspace applications, say Y.
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index d6285ef9b5f9..c24d6adc0420 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -77,7 +77,7 @@ config DEBUG_RODATA
If in doubt, say Y
config DEBUG_ALIGN_RODATA
- depends on DEBUG_RODATA && !ARM64_64K_PAGES
+ depends on DEBUG_RODATA && ARM64_4K_PAGES
bool "Align linker sections up to SECTION_SIZE"
help
If this option is enabled, sections that may potentially be marked as
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d10b5d483022..cd822d8454c0 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -55,6 +55,13 @@ else
TEXT_OFFSET := 0x00080000
endif
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
+# in 32-bit arithmetic
+KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
+ (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+ + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
+ - (1 << (64 - 32 - 3)) )) )
+
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts
index c62751153a4f..734e1272b19f 100644
--- a/arch/arm64/boot/dts/arm/juno-r1.dts
+++ b/arch/arm64/boot/dts/arm/juno-r1.dts
@@ -91,17 +91,21 @@
};
};
- pmu {
- compatible = "arm,armv8-pmuv3";
+ pmu_a57 {
+ compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-affinity = <&A57_0>,
+ <&A57_1>;
+ };
+
+ pmu_a53 {
+ compatible = "arm,cortex-a53-pmu";
+ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-affinity = <&A57_0>,
- <&A57_1>,
- <&A53_0>,
+ interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index d7cbdd482a61..ffa05aeab3c7 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -91,17 +91,21 @@
};
};
- pmu {
- compatible = "arm,armv8-pmuv3";
+ pmu_a57 {
+ compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-affinity = <&A57_0>,
+ <&A57_1>;
+ };
+
+ pmu_a53 {
+ compatible = "arm,cortex-a53-pmu";
+ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-affinity = <&A57_0>,
- <&A57_1>,
- <&A53_0>,
+ interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 34d71dd86781..5f760347aee2 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -51,6 +51,7 @@ CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_SMP=y
+CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_SAMSUNG=y
+CONFIG_SERIAL_SAMSUNG_UARTS_4=y
+CONFIG_SERIAL_SAMSUNG_UARTS=4
+CONFIG_SERIAL_SAMSUNG_CONSOLE=y
CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_MMC_DW_PLTFM=y
+CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_SYSCON=y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b51f2cc22ca9..12eff928ef8b 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -193,4 +193,15 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym]
.endm
+/*
+ * Annotate a function as position independent, i.e., safe to be called before
+ * the kernel virtual mapping is activated.
+ */
+#define ENDPIPROC(x) \
+ .globl __pi_##x; \
+ .type __pi_##x, %function; \
+ .set __pi_##x, x; \
+ .size __pi_##x, . - x; \
+ ENDPROC(x)
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 1e247ac2601a..f3a3586a421c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -55,13 +55,42 @@
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_add_return_acquire atomic_add_return_acquire
+#define atomic_add_return_release atomic_add_return_release
+#define atomic_add_return atomic_add_return
+
+#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_sub_return_acquire atomic_sub_return_acquire
+#define atomic_sub_return_release atomic_sub_return_release
+#define atomic_sub_return atomic_sub_return
+
+#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+
+#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
+#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
+
+#define atomic_cmpxchg_relaxed(v, old, new) \
+ cmpxchg_relaxed(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_acquire(v, old, new) \
+ cmpxchg_acquire(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_release(v, old, new) \
+ cmpxchg_release(&((v)->counter), (old), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
#define atomic_inc(v) atomic_add(1, (v))
#define atomic_dec(v) atomic_sub(1, (v))
-#define atomic_inc_return(v) atomic_add_return(1, (v))
-#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
@@ -75,13 +104,39 @@
#define ATOMIC64_INIT ATOMIC_INIT
#define atomic64_read atomic_read
#define atomic64_set atomic_set
+
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_add_return_acquire atomic64_add_return_acquire
+#define atomic64_add_return_release atomic64_add_return_release
+#define atomic64_add_return atomic64_add_return
+
+#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1, (v))
+
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_sub_return_acquire atomic64_sub_return_acquire
+#define atomic64_sub_return_release atomic64_sub_return_release
+#define atomic64_sub_return atomic64_sub_return
+
+#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
+
+#define atomic64_xchg_relaxed atomic_xchg_relaxed
+#define atomic64_xchg_acquire atomic_xchg_acquire
+#define atomic64_xchg_release atomic_xchg_release
#define atomic64_xchg atomic_xchg
+
+#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
+#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
+#define atomic64_cmpxchg_release atomic_cmpxchg_release
#define atomic64_cmpxchg atomic_cmpxchg
#define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v))
-#define atomic64_inc_return(v) atomic64_add_return(1, (v))
-#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index b3b5c4ae3800..74d0b8eb0799 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
} \
__LL_SC_EXPORT(atomic_##op);
-#define ATOMIC_OP_RETURN(op, asm_op) \
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE int \
-__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
+__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
{ \
unsigned long tmp; \
int result; \
\
- asm volatile("// atomic_" #op "_return\n" \
+ asm volatile("// atomic_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
-"1: ldxr %w0, %2\n" \
+"1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
-" stlxr %w1, %w0, %2\n" \
-" cbnz %w1, 1b" \
+" st" #rel "xr %w1, %w0, %2\n" \
+" cbnz %w1, 1b\n" \
+" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
- : "memory"); \
+ : cl); \
\
- smp_mb(); \
return result; \
} \
-__LL_SC_EXPORT(atomic_##op##_return);
+__LL_SC_EXPORT(atomic_##op##_return##name);
+
+#define ATOMIC_OPS(...) \
+ ATOMIC_OP(__VA_ARGS__) \
+ ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
-#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS_RLX(...) \
+ ATOMIC_OPS(__VA_ARGS__) \
+ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS_RLX(add, add)
+ATOMIC_OPS_RLX(sub, sub)
ATOMIC_OP(and, and)
ATOMIC_OP(andnot, bic)
ATOMIC_OP(or, orr)
ATOMIC_OP(xor, eor)
+#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
} \
__LL_SC_EXPORT(atomic64_##op);
-#define ATOMIC64_OP_RETURN(op, asm_op) \
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE long \
-__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
+__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
{ \
long result; \
unsigned long tmp; \
\
- asm volatile("// atomic64_" #op "_return\n" \
+ asm volatile("// atomic64_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
-"1: ldxr %0, %2\n" \
+"1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
-" stlxr %w1, %0, %2\n" \
-" cbnz %w1, 1b" \
+" st" #rel "xr %w1, %0, %2\n" \
+" cbnz %w1, 1b\n" \
+" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
- : "memory"); \
+ : cl); \
\
- smp_mb(); \
return result; \
} \
-__LL_SC_EXPORT(atomic64_##op##_return);
+__LL_SC_EXPORT(atomic64_##op##_return##name);
+
+#define ATOMIC64_OPS(...) \
+ ATOMIC64_OP(__VA_ARGS__) \
+ ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
-#define ATOMIC64_OPS(op, asm_op) \
- ATOMIC64_OP(op, asm_op) \
- ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS_RLX(...) \
+ ATOMIC64_OPS(__VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS_RLX(add, add)
+ATOMIC64_OPS_RLX(sub, sub)
ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor)
+#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(atomic64_dec_if_positive);
-#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \
+#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
__LL_SC_INLINE unsigned long \
__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long old, \
@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
\
asm volatile( \
" prfm pstl1strm, %[v]\n" \
- "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
+ "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
" st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
} \
__LL_SC_EXPORT(__cmpxchg_case_##name);
-__CMPXCHG_CASE(w, b, 1, , , )
-__CMPXCHG_CASE(w, h, 2, , , )
-__CMPXCHG_CASE(w, , 4, , , )
-__CMPXCHG_CASE( , , 8, , , )
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
-__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory")
-__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory")
+__CMPXCHG_CASE(w, b, 1, , , , )
+__CMPXCHG_CASE(w, h, 2, , , , )
+__CMPXCHG_CASE(w, , 4, , , , )
+__CMPXCHG_CASE( , , 8, , , , )
+__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
+__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
+__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
+__CMPXCHG_CASE( , , acq_8, , a, , "memory")
+__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
+__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
+__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
+__CMPXCHG_CASE( , , rel_8, , , l, "memory")
+__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
+__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 55d740e63459..1fce7908e690 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
: "x30");
}
-static inline int atomic_add_return(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
+static inline int atomic_add_return##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(add_return##name), \
+ /* LSE atomics */ \
+ " ldadd" #mb " %w[i], w30, %[v]\n" \
+ " add %w[i], %w[i], w30") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : "x30" , ##cl); \
+ \
+ return w0; \
+}
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC(add_return),
- /* LSE atomics */
- " ldaddal %w[i], w30, %[v]\n"
- " add %w[i], %w[i], w30")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : "x30", "memory");
+ATOMIC_OP_ADD_RETURN(_relaxed, )
+ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
+ATOMIC_OP_ADD_RETURN(_release, l, "memory")
+ATOMIC_OP_ADD_RETURN( , al, "memory")
- return w0;
-}
+#undef ATOMIC_OP_ADD_RETURN
static inline void atomic_and(int i, atomic_t *v)
{
@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
: "x30");
}
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC(sub_return)
- " nop",
- /* LSE atomics */
- " neg %w[i], %w[i]\n"
- " ldaddal %w[i], w30, %[v]\n"
- " add %w[i], %w[i], w30")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : "x30", "memory");
-
- return w0;
+#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
+static inline int atomic_sub_return##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(sub_return##name) \
+ " nop", \
+ /* LSE atomics */ \
+ " neg %w[i], %w[i]\n" \
+ " ldadd" #mb " %w[i], w30, %[v]\n" \
+ " add %w[i], %w[i], w30") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : "x30" , ##cl); \
+ \
+ return w0; \
}
+ATOMIC_OP_SUB_RETURN(_relaxed, )
+ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
+ATOMIC_OP_SUB_RETURN(_release, l, "memory")
+ATOMIC_OP_SUB_RETURN( , al, "memory")
+
+#undef ATOMIC_OP_SUB_RETURN
#undef __LL_SC_ATOMIC
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
: "x30");
}
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
+static inline long atomic64_add_return##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(add_return##name), \
+ /* LSE atomics */ \
+ " ldadd" #mb " %[i], x30, %[v]\n" \
+ " add %[i], %[i], x30") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : "x30" , ##cl); \
+ \
+ return x0; \
+}
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC64(add_return),
- /* LSE atomics */
- " ldaddal %[i], x30, %[v]\n"
- " add %[i], %[i], x30")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : "x30", "memory");
+ATOMIC64_OP_ADD_RETURN(_relaxed, )
+ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
+ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
+ATOMIC64_OP_ADD_RETURN( , al, "memory")
- return x0;
-}
+#undef ATOMIC64_OP_ADD_RETURN
static inline void atomic64_and(long i, atomic64_t *v)
{
@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
: "x30");
}
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
+static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(sub_return##name) \
+ " nop", \
+ /* LSE atomics */ \
+ " neg %[i], %[i]\n" \
+ " ldadd" #mb " %[i], x30, %[v]\n" \
+ " add %[i], %[i], x30") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : "x30" , ##cl); \
+ \
+ return x0; \
+}
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC64(sub_return)
- " nop",
- /* LSE atomics */
- " neg %[i], %[i]\n"
- " ldaddal %[i], x30, %[v]\n"
- " add %[i], %[i], x30")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : "x30", "memory");
+ATOMIC64_OP_SUB_RETURN(_relaxed, )
+ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
+ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
+ATOMIC64_OP_SUB_RETURN( , al, "memory")
- return x0;
-}
+#undef ATOMIC64_OP_SUB_RETURN
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
@@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
return x0; \
}
-__CMPXCHG_CASE(w, b, 1, )
-__CMPXCHG_CASE(w, h, 2, )
-__CMPXCHG_CASE(w, , 4, )
-__CMPXCHG_CASE(x, , 8, )
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w, , mb_4, al, "memory")
-__CMPXCHG_CASE(x, , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b, 1, )
+__CMPXCHG_CASE(w, h, 2, )
+__CMPXCHG_CASE(w, , 4, )
+__CMPXCHG_CASE(x, , 8, )
+__CMPXCHG_CASE(w, b, acq_1, a, "memory")
+__CMPXCHG_CASE(w, h, acq_2, a, "memory")
+__CMPXCHG_CASE(w, , acq_4, a, "memory")
+__CMPXCHG_CASE(x, , acq_8, a, "memory")
+__CMPXCHG_CASE(w, b, rel_1, l, "memory")
+__CMPXCHG_CASE(w, h, rel_2, l, "memory")
+__CMPXCHG_CASE(w, , rel_4, l, "memory")
+__CMPXCHG_CASE(x, , rel_8, l, "memory")
+__CMPXCHG_CASE(w, b, mb_1, al, "memory")
+__CMPXCHG_CASE(w, h, mb_2, al, "memory")
+__CMPXCHG_CASE(w, , mb_4, al, "memory")
+__CMPXCHG_CASE(x, , mb_8, al, "memory")
#undef __LL_SC_CMPXCHG
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index bde449936e2f..5082b30bc2c0 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -18,7 +18,7 @@
#include <asm/cachetype.h>
-#define L1_CACHE_SHIFT 6
+#define L1_CACHE_SHIFT 7
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
/*
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c75b8d027eb1..54efedaf331f 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
+static inline void __local_flush_icache_all(void)
+{
+ asm("ic iallu");
+ dsb(nsh);
+ isb();
+}
+
static inline void __flush_icache_all(void)
{
asm("ic ialluis");
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index da2fc9e3cedd..f5588692f1d4 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -34,8 +34,8 @@
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
-#define ICACHEF_ALIASING BIT(0)
-#define ICACHEF_AIVIVT BIT(1)
+#define ICACHEF_ALIASING 0
+#define ICACHEF_AIVIVT 1
extern unsigned long __icache_flags;
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 899e9f1d19e4..9ea611ea69df 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -25,154 +25,151 @@
#include <asm/barrier.h>
#include <asm/lse.h>
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
-{
- unsigned long ret, tmp;
-
- switch (size) {
- case 1:
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " prfm pstl1strm, %2\n"
- "1: ldxrb %w0, %2\n"
- " stlxrb %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- " dmb ish",
- /* LSE atomics */
- " nop\n"
- " nop\n"
- " swpalb %w3, %w0, %2\n"
- " nop\n"
- " nop")
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
- : "r" (x)
- : "memory");
- break;
- case 2:
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " prfm pstl1strm, %2\n"
- "1: ldxrh %w0, %2\n"
- " stlxrh %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- " dmb ish",
- /* LSE atomics */
- " nop\n"
- " nop\n"
- " swpalh %w3, %w0, %2\n"
- " nop\n"
- " nop")
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
- : "r" (x)
- : "memory");
- break;
- case 4:
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " prfm pstl1strm, %2\n"
- "1: ldxr %w0, %2\n"
- " stlxr %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- " dmb ish",
- /* LSE atomics */
- " nop\n"
- " nop\n"
- " swpal %w3, %w0, %2\n"
- " nop\n"
- " nop")
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
- : "r" (x)
- : "memory");
- break;
- case 8:
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " stlxr %w1, %3, %2\n"
- " cbnz %w1, 1b\n"
- " dmb ish",
- /* LSE atomics */
- " nop\n"
- " nop\n"
- " swpal %3, %0, %2\n"
- " nop\n"
- " nop")
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
- : "r" (x)
- : "memory");
- break;
- default:
- BUILD_BUG();
- }
-
- return ret;
+/*
+ * We need separate acquire parameters for ll/sc and lse, since the full
+ * barrier case is generated as release+dmb for the former and
+ * acquire+release for the latter.
+ */
+#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \
+static inline unsigned long __xchg_case_##name(unsigned long x, \
+ volatile void *ptr) \
+{ \
+ unsigned long ret, tmp; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \
+ " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb, \
+ /* LSE atomics */ \
+ " nop\n" \
+ " nop\n" \
+ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
+ " nop\n" \
+ " " #nop_lse) \
+ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \
+ : "r" (x) \
+ : cl); \
+ \
+ return ret; \
}
-#define xchg(ptr,x) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
- __ret; \
+__XCHG_CASE(w, b, 1, , , , , , )
+__XCHG_CASE(w, h, 2, , , , , , )
+__XCHG_CASE(w, , 4, , , , , , )
+__XCHG_CASE( , , 8, , , , , , )
+__XCHG_CASE(w, b, acq_1, , , a, a, , "memory")
+__XCHG_CASE(w, h, acq_2, , , a, a, , "memory")
+__XCHG_CASE(w, , acq_4, , , a, a, , "memory")
+__XCHG_CASE( , , acq_8, , , a, a, , "memory")
+__XCHG_CASE(w, b, rel_1, , , , , l, "memory")
+__XCHG_CASE(w, h, rel_2, , , , , l, "memory")
+__XCHG_CASE(w, , rel_4, , , , , l, "memory")
+__XCHG_CASE( , , rel_8, , , , , l, "memory")
+__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
+
+#undef __XCHG_CASE
+
+#define __XCHG_GEN(sfx) \
+static inline unsigned long __xchg##sfx(unsigned long x, \
+ volatile void *ptr, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __xchg_case##sfx##_1(x, ptr); \
+ case 2: \
+ return __xchg_case##sfx##_2(x, ptr); \
+ case 4: \
+ return __xchg_case##sfx##_4(x, ptr); \
+ case 8: \
+ return __xchg_case##sfx##_8(x, ptr); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__XCHG_GEN()
+__XCHG_GEN(_acq)
+__XCHG_GEN(_rel)
+__XCHG_GEN(_mb)
+
+#undef __XCHG_GEN
+
+#define __xchg_wrapper(sfx, ptr, x) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __ret; \
})
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- switch (size) {
- case 1:
- return __cmpxchg_case_1(ptr, (u8)old, new);
- case 2:
- return __cmpxchg_case_2(ptr, (u16)old, new);
- case 4:
- return __cmpxchg_case_4(ptr, old, new);
- case 8:
- return __cmpxchg_case_8(ptr, old, new);
- default:
- BUILD_BUG();
- }
-
- unreachable();
+/* xchg */
+#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__)
+#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__)
+#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
+#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
+
+#define __CMPXCHG_GEN(sfx) \
+static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
+ unsigned long old, \
+ unsigned long new, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
+ case 2: \
+ return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
+ case 4: \
+ return __cmpxchg_case##sfx##_4(ptr, old, new); \
+ case 8: \
+ return __cmpxchg_case##sfx##_8(ptr, old, new); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
}
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- switch (size) {
- case 1:
- return __cmpxchg_case_mb_1(ptr, (u8)old, new);
- case 2:
- return __cmpxchg_case_mb_2(ptr, (u16)old, new);
- case 4:
- return __cmpxchg_case_mb_4(ptr, old, new);
- case 8:
- return __cmpxchg_case_mb_8(ptr, old, new);
- default:
- BUILD_BUG();
- }
-
- unreachable();
-}
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+__CMPXCHG_GEN(_rel)
+__CMPXCHG_GEN(_mb)
-#define cmpxchg(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- __ret; \
-})
+#undef __CMPXCHG_GEN
-#define cmpxchg_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))); \
- __ret; \
+#define __cmpxchg_wrapper(sfx, ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg##sfx((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
})
+/* cmpxchg */
+#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__)
+#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
+#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__)
+#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__)
+#define cmpxchg_local cmpxchg_relaxed
+
+/* cmpxchg64 */
+#define cmpxchg64_relaxed cmpxchg_relaxed
+#define cmpxchg64_acquire cmpxchg_acquire
+#define cmpxchg64_release cmpxchg_release
+#define cmpxchg64 cmpxchg
+#define cmpxchg64_local cmpxchg_local
+
+/* cmpxchg_double */
#define system_has_cmpxchg_double() 1
#define __cmpxchg_double_check(ptr1, ptr2) \
@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
+/* this_cpu_cmpxchg */
#define _protect_cmpxchg_local(pcp, o, n) \
({ \
typeof(*raw_cpu_ptr(&(pcp))) __ret; \
@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
-#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
-#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
-
-#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n))
-
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 8e797b2fcc01..b5e9cee4b5f8 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
void cpuinfo_store_cpu(void);
void __init cpuinfo_store_boot_cpu(void);
+void __init init_cpu_features(struct cpuinfo_arm64 *info);
+void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot);
+
#endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index dbc78d2b8cc6..11d5bb0fdd54 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,6 +10,7 @@
#define __ASM_CPUFEATURE_H
#include <asm/hwcap.h>
+#include <asm/sysreg.h>
/*
* In the arm64 world (as in the ARM world), elf_hwcap is used both internally
@@ -35,11 +36,42 @@
#include <linux/kernel.h>
+/* CPU feature register tracking */
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE,/* Bigger value is safe */
+};
+
+#define FTR_STRICT true /* SANITY check strict matching required */
+#define FTR_NONSTRICT false /* SANITY check ignored */
+
+struct arm64_ftr_bits {
+ bool strict; /* CPU Sanity check: strict matching required ? */
+ enum ftr_type type;
+ u8 shift;
+ u8 width;
+ s64 safe_val; /* safe value for discrete features */
+};
+
+/*
+ * @arm64_ftr_reg - Feature register
+ * @strict_mask Bits which should match across all CPUs for sanity.
+ * @sys_val Safe value across the CPUs (system view)
+ */
+struct arm64_ftr_reg {
+ u32 sys_id;
+ const char *name;
+ u64 strict_mask;
+ u64 sys_val;
+ struct arm64_ftr_bits *ftr_bits;
+};
+
struct arm64_cpu_capabilities {
const char *desc;
u16 capability;
bool (*matches)(const struct arm64_cpu_capabilities *);
- void (*enable)(void);
+ void (*enable)(void *); /* Called on all active CPUs */
union {
struct { /* To be used for erratum handling only */
u32 midr_model;
@@ -47,8 +79,11 @@ struct arm64_cpu_capabilities {
};
struct { /* Feature register checking */
+ u32 sys_reg;
int field_pos;
int min_field_value;
+ int hwcap_type;
+ unsigned long hwcap;
};
};
};
@@ -76,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num)
__set_bit(num, cpu_hwcaps);
}
-static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
- int field)
+static inline int __attribute_const__
+cpuid_feature_extract_field_width(u64 features, int field, int width)
+{
+ return (s64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field)
+{
+ return cpuid_feature_extract_field_width(features, field, 4);
+}
+
+static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+{
+ return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
+}
+
+static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+{
+ return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+}
+
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return (s64)(features << (64 - 4 - field)) >> (64 - 4);
+ return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+ cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
}
+void __init setup_cpu_features(void);
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void check_local_cpu_errata(void);
-void check_local_cpu_features(void);
-bool cpu_supports_mixed_endian_el0(void);
-bool system_supports_mixed_endian_el0(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void verify_local_cpu_capabilities(void);
+#else
+static inline void verify_local_cpu_capabilities(void)
+{
+}
+#endif
+
+u64 read_system_reg(u32 id);
+
+static inline bool cpu_supports_mixed_endian_el0(void)
+{
+ return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+static inline bool system_supports_mixed_endian_el0(void)
+{
+ return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
+}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 100a3d1b17c8..1a5949364ed0 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,15 +75,6 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
-#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \
- (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGEND_SHIFT 8
-#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT)
-#define ID_AA64MMFR0_BIGEND(mmfr0) \
- (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
-
#ifndef __ASSEMBLY__
/*
@@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
return read_cpuid(CTR_EL0);
}
-
-static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-{
- return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
- (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
-}
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 8b9884c726ad..309704544d22 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
+#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
@@ -55,11 +56,7 @@ enum fixed_addresses {
* Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*/
-#ifdef CONFIG_ARM64_64K_PAGES
-#define NR_FIX_BTMAPS 4
-#else
-#define NR_FIX_BTMAPS 64
-#endif
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
#define FIX_BTMAPS_SLOTS 7
#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 4c47cb2fbb52..e54415ec6935 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -17,6 +17,7 @@
#define __ASM_HW_BREAKPOINT_H
#include <asm/cputype.h>
+#include <asm/cpufeature.h>
#ifdef __KERNEL__
@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+ return 1 +
+ cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ ID_AA64DFR0_BRPS_SHIFT);
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+ return 1 +
+ cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ ID_AA64DFR0_WRPS_SHIFT);
}
#endif /* __KERNEL__ */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 0ad735166d9f..400b80b49595 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -52,6 +52,14 @@
extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
#endif
+enum {
+ CAP_HWCAP = 1,
+#ifdef CONFIG_COMPAT
+ CAP_COMPAT_HWCAP,
+ CAP_COMPAT_HWCAP2,
+#endif
+};
+
extern unsigned long elf_hwcap;
#endif
#endif
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746..09169296c3cc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -7,7 +7,6 @@
struct pt_regs;
-extern void migrate_irqs(void);
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
static inline void acpi_irq_init(void)
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
new file mode 100644
index 000000000000..2774fa384c47
--- /dev/null
+++ b/arch/arm64/include/asm/kasan.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_KASAN
+
+#include <linux/linkage.h>
+#include <asm/memory.h>
+
+/*
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
+ */
+#define KASAN_SHADOW_START (VA_START)
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+
+/*
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
+ * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
+ * should satisfy the following equation:
+ * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
+ */
+#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644
index 000000000000..a459714ee29e
--- /dev/null
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -0,0 +1,83 @@
+/*
+ * Kernel page table mapping
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KERNEL_PGTABLE_H
+#define __ASM_KERNEL_PGTABLE_H
+
+
+/*
+ * The linear mapping and the start of memory are both 2M aligned (per
+ * the arm64 booting.txt requirements). Hence we can use section mapping
+ * with 4K (section size = 2M) but not with 16K (section size = 32M) or
+ * 64K (section size = 512M).
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#else
+#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#endif
+
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
+ * map the kernel. With the 64K page configuration, swapper and idmap need to
+ * map to pte level. The swapper also maps the FDT (see __create_page_tables
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so pages required to map highest possible PA are reserved in all
+ * cases.
+ */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
+#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
+#else
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
+#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
+#endif
+
+#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* Initial memory map size */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
+#define SWAPPER_BLOCK_SIZE SECTION_SIZE
+#define SWAPPER_TABLE_SHIFT PUD_SHIFT
+#else
+#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
+#define SWAPPER_BLOCK_SIZE PAGE_SIZE
+#define SWAPPER_TABLE_SHIFT PMD_SHIFT
+#endif
+
+/* The size of the initial kernel direct mapping */
+#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
+
+/*
+ * Initial memory map attributes.
+ */
+#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#else
+#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#endif
+
+
+#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 67027c611dbd..853953cd1f08 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -42,12 +42,14 @@
* PAGE_OFFSET - the virtual address of the start of the kernel image (top
* (VA_BITS - 1))
* VA_BITS - the maximum number of bits for virtual addresses.
+ * VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
* The module space lives between the addresses given by TASK_SIZE
* and PAGE_OFFSET - it must be within 128MB of the kernel text.
*/
#define VA_BITS (CONFIG_ARM64_VA_BITS)
+#define VA_START (UL(0xffffffffffffffff) << VA_BITS)
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
#define MODULES_END (PAGE_OFFSET)
#define MODULES_VADDR (MODULES_END - SZ_64M)
@@ -68,10 +70,6 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
-#if TASK_SIZE_64 > MODULES_VADDR
-#error Top of 64-bit user space clashes with start of module space
-#endif
-
/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 030208767185..990124a67eeb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,15 +17,16 @@
#define __ASM_MMU_H
typedef struct {
- unsigned int id;
- raw_spinlock_t id_lock;
- void *vdso;
+ atomic64_t id;
+ void *vdso;
} mm_context_t;
-#define INIT_MM_CONTEXT(name) \
- .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
-
-#define ASID(mm) ((mm)->context.id & 0xffff)
+/*
+ * This macro is only used by the TLBI code, which cannot race with an
+ * ASID change and therefore doesn't need to reload the counter using
+ * atomic64_read.
+ */
+#define ASID(mm) ((mm)->context.id.counter & 0xffff)
extern void paging_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8ec41e5f56f0..c0e87898ba96 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -28,13 +28,6 @@
#include <asm/cputype.h>
#include <asm/pgtable.h>
-#define MAX_ASID_BITS 16
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-
#ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next)
{
@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void)
unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
}
-static inline void __cpu_set_tcr_t0sz(u64 t0sz)
-{
- unsigned long tcr;
-
- if (__cpu_uses_extended_idmap())
- asm volatile (
- " mrs %0, tcr_el1 ;"
- " bfi %0, %1, %2, %3 ;"
- " msr tcr_el1, %0 ;"
- " isb"
- : "=&r" (tcr)
- : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
-}
-
-/*
- * Set TCR.T0SZ to the value appropriate for activating the identity map.
- */
-static inline void cpu_set_idmap_tcr_t0sz(void)
-{
- __cpu_set_tcr_t0sz(idmap_t0sz);
-}
-
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
static inline void cpu_set_default_tcr_t0sz(void)
{
- __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
-}
-
-static inline void switch_new_context(struct mm_struct *mm)
-{
- unsigned long flags;
-
- __new_context(mm);
+ unsigned long tcr;
- local_irq_save(flags);
- cpu_switch_mm(mm->pgd, mm);
- local_irq_restore(flags);
-}
+ if (!__cpu_uses_extended_idmap())
+ return;
-static inline void check_and_switch_context(struct mm_struct *mm,
- struct task_struct *tsk)
-{
- /*
- * Required during context switch to avoid speculative page table
- * walking with the wrong TTBR.
- */
- cpu_set_reserved_ttbr0();
-
- if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
- /*
- * The ASID is from the current generation, just switch to the
- * new pgd. This condition is only true for calls from
- * context_switch() and interrupts are already disabled.
- */
- cpu_switch_mm(mm->pgd, mm);
- else if (irqs_disabled())
- /*
- * Defer the new ASID allocation until after the context
- * switch critical region since __new_context() cannot be
- * called with interrupts disabled.
- */
- set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
- else
- /*
- * That is a direct call to switch_mm() or activate_mm() with
- * interrupts enabled and a new context.
- */
- switch_new_context(mm);
+ asm volatile (
+ " mrs %0, tcr_el1 ;"
+ " bfi %0, %1, %2, %3 ;"
+ " msr tcr_el1, %0 ;"
+ " isb"
+ : "=&r" (tcr)
+ : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
}
-#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0)
+/*
+ * It would be nice to return ASIDs back to the allocator, but unfortunately
+ * that introduces a race with a generation rollover where we could erroneously
+ * free an ASID allocated in a future generation. We could workaround this by
+ * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
+ * but we'd then need to make sure that we didn't dirty any TLBs afterwards.
+ * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
+ * take CPU migration into account.
+ */
#define destroy_context(mm) do { } while(0)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
-#define finish_arch_post_lock_switch \
- finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
- if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
- struct mm_struct *mm = current->mm;
- unsigned long flags;
-
- __new_context(mm);
-
- local_irq_save(flags);
- cpu_switch_mm(mm->pgd, mm);
- local_irq_restore(flags);
- }
-}
+#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; })
/*
* This is called when "tsk" is about to enter lazy TLB mode.
@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
unsigned int cpu = smp_processor_id();
+ if (prev == next)
+ return;
+
/*
* init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
return;
}
- if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
- check_and_switch_context(next, tsk);
+ check_and_switch_context(next, cpu);
}
#define deactivate_mm(tsk,mm) do { } while (0)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7d9c7e4a424b..9b2f5a9d019d 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -20,31 +20,22 @@
#define __ASM_PAGE_H
/* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together */
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16
+#define CONT_SHIFT 5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define PAGE_SHIFT 14
+#define CONT_SHIFT 7
#else
#define PAGE_SHIFT 12
+#define CONT_SHIFT 4
#endif
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so 3 pages are reserved in all cases.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#else
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#endif
-
-#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE (3 * PAGE_SIZE)
+#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK (~(CONT_SIZE-1))
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 76420568d66a..c15053902942 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -27,6 +27,7 @@
#define check_pgt_cache() do { } while (0)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 24154b055835..d6739e836f7b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -16,13 +16,46 @@
#ifndef __ASM_PGTABLE_HWDEF_H
#define __ASM_PGTABLE_HWDEF_H
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ *
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ *
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
+ *
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
+ *
+ * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ *
+ * which gets simplified as :
+ */
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+
+/*
+ * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 4. Hence, starting at at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ * (4 - n) * (PAGE_SHIFT - 3) + 3
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 2
-#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3)
+#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE
@@ -32,7 +65,7 @@
* PUD_SHIFT determines the size a level 1 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 3
-#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3)
+#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
#define PTRS_PER_PUD PTRS_PER_PTE
@@ -42,7 +75,7 @@
* PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2).
*/
-#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
@@ -55,6 +88,13 @@
#define SECTION_MASK (~(SECTION_SIZE-1))
/*
+ * Contiguous page definitions.
+ */
+#define CONT_PTES (_AC(1, UL) << CONT_SHIFT)
+/* the the numerical offset of the PTE within a range of CONT_PTES */
+#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
+
+/*
* Hardware page table definitions.
*
* Level 1 descriptor (PUD).
@@ -83,6 +123,7 @@
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
@@ -105,6 +146,7 @@
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
+#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 571ca0ed4f05..f3acf421ded4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -41,7 +41,14 @@
* fixed mappings and modules
*/
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS)
+
+#ifndef CONFIG_KASAN
+#define VMALLOC_START (VA_START)
+#else
+#include <asm/kasan.h>
+#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
+#endif
+
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
@@ -74,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
@@ -142,6 +150,7 @@ extern struct page *empty_zero_page;
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
+#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -204,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
}
+static inline pte_t pte_mkcont(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mknoncont(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
static inline void set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
@@ -648,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/*
- * set_pte() does not have a DSB for user mappings, so make sure that
- * the page table write is visible.
+ * We don't do anything here, so there's a very small chance of
+ * us retaking a user fault which we just fixed up. The alternative
+ * is doing a dsb(ishst), but that penalises the fastpath.
*/
- dsb(ishst);
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
+#define kc_offset_to_vaddr(o) ((o) | VA_START)
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
deleted file mode 100644
index b7710a59672c..000000000000
--- a/arch/arm64/include/asm/pmu.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Based on arch/arm/include/asm/pmu.h
- *
- * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
- /*
- * The events that are active on the PMU for the given index.
- */
- struct perf_event **events;
-
- /*
- * A 1 bit for an index indicates that the counter is being used for
- * an event. A 0 means that the counter can be used.
- */
- unsigned long *used_mask;
-
- /*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
- raw_spinlock_t pmu_lock;
-};
-
-struct arm_pmu {
- struct pmu pmu;
- cpumask_t active_irqs;
- int *irq_affinity;
- const char *name;
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
- void (*enable)(struct hw_perf_event *evt, int idx);
- void (*disable)(struct hw_perf_event *evt, int idx);
- int (*get_event_idx)(struct pmu_hw_events *hw_events,
- struct hw_perf_event *hwc);
- int (*set_event_filter)(struct hw_perf_event *evt,
- struct perf_event_attr *attr);
- u32 (*read_counter)(int idx);
- void (*write_counter)(int idx, u32 val);
- void (*start)(void);
- void (*stop)(void);
- void (*reset)(void *);
- int (*map_event)(struct perf_event *event);
- int num_events;
- atomic_t active_events;
- struct mutex reserve_mutex;
- u64 max_period;
- struct platform_device *plat_device;
- struct pmu_hw_events *(*get_hw_events)(void);
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
-
-u64 armpmu_event_update(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx);
-
-int armpmu_event_set_period(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-#endif /* __ASM_PMU_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 98f32355dc97..4acb7ca94fcd 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
#endif
-void cpu_enable_pan(void);
+void cpu_enable_pan(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 536274ed292e..e9e5467e0bf4 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -83,14 +83,14 @@
#define compat_sp regs[13]
#define compat_lr regs[14]
#define compat_sp_hyp regs[15]
-#define compat_sp_irq regs[16]
-#define compat_lr_irq regs[17]
-#define compat_sp_svc regs[18]
-#define compat_lr_svc regs[19]
-#define compat_sp_abt regs[20]
-#define compat_lr_abt regs[21]
-#define compat_sp_und regs[22]
-#define compat_lr_und regs[23]
+#define compat_lr_irq regs[16]
+#define compat_sp_irq regs[17]
+#define compat_lr_svc regs[18]
+#define compat_sp_svc regs[19]
+#define compat_lr_abt regs[20]
+#define compat_sp_abt regs[21]
+#define compat_lr_und regs[22]
+#define compat_sp_und regs[23]
#define compat_r8_fiq regs[24]
#define compat_r9_fiq regs[25]
#define compat_r10_fiq regs[26]
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 64d2d4884a9d..2eb714c4639f 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMCHR
extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t);
+extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a7f3d4b2514d..d48ab5b41f52 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,9 +22,6 @@
#include <asm/opcodes.h>
-#define SCTLR_EL1_CP15BEN (0x1 << 5)
-#define SCTLR_EL1_SED (0x1 << 8)
-
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -38,12 +35,162 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
-#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
-#define SCTLR_EL1_SPAN (1 << 23)
+#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
+#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
+#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
+
+#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
+#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
+#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
+#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
+#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
+#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
+
+#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
+#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
+#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
+#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
+#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
+#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
+#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
+
+#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
+#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
+#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
+
+#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
+#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
+
+#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
+#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
+
+#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
+#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
+
+#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
+#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
+
+#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
+#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
+
+#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
(!!x)<<8 | 0x1f)
+/* SCTLR_EL1 */
+#define SCTLR_EL1_CP15BEN (0x1 << 5)
+#define SCTLR_EL1_SED (0x1 << 8)
+#define SCTLR_EL1_SPAN (0x1 << 23)
+
+
+/* id_aa64isar0 */
+#define ID_AA64ISAR0_RDM_SHIFT 28
+#define ID_AA64ISAR0_ATOMICS_SHIFT 20
+#define ID_AA64ISAR0_CRC32_SHIFT 16
+#define ID_AA64ISAR0_SHA2_SHIFT 12
+#define ID_AA64ISAR0_SHA1_SHIFT 8
+#define ID_AA64ISAR0_AES_SHIFT 4
+
+/* id_aa64pfr0 */
+#define ID_AA64PFR0_GIC_SHIFT 24
+#define ID_AA64PFR0_ASIMD_SHIFT 20
+#define ID_AA64PFR0_FP_SHIFT 16
+#define ID_AA64PFR0_EL3_SHIFT 12
+#define ID_AA64PFR0_EL2_SHIFT 8
+#define ID_AA64PFR0_EL1_SHIFT 4
+#define ID_AA64PFR0_EL0_SHIFT 0
+
+#define ID_AA64PFR0_FP_NI 0xf
+#define ID_AA64PFR0_FP_SUPPORTED 0x0
+#define ID_AA64PFR0_ASIMD_NI 0xf
+#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
+#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
+#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
+
+/* id_aa64mmfr0 */
+#define ID_AA64MMFR0_TGRAN4_SHIFT 28
+#define ID_AA64MMFR0_TGRAN64_SHIFT 24
+#define ID_AA64MMFR0_TGRAN16_SHIFT 20
+#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
+#define ID_AA64MMFR0_SNSMEM_SHIFT 12
+#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
+#define ID_AA64MMFR0_ASID_SHIFT 4
+#define ID_AA64MMFR0_PARANGE_SHIFT 0
+
+#define ID_AA64MMFR0_TGRAN4_NI 0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN64_NI 0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN16_NI 0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+
+/* id_aa64mmfr1 */
+#define ID_AA64MMFR1_PAN_SHIFT 20
+#define ID_AA64MMFR1_LOR_SHIFT 16
+#define ID_AA64MMFR1_HPD_SHIFT 12
+#define ID_AA64MMFR1_VHE_SHIFT 8
+#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
+#define ID_AA64MMFR1_HADBS_SHIFT 0
+
+/* id_aa64dfr0 */
+#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
+#define ID_AA64DFR0_WRPS_SHIFT 20
+#define ID_AA64DFR0_BRPS_SHIFT 12
+#define ID_AA64DFR0_PMUVER_SHIFT 8
+#define ID_AA64DFR0_TRACEVER_SHIFT 4
+#define ID_AA64DFR0_DEBUGVER_SHIFT 0
+
+#define ID_ISAR5_RDM_SHIFT 24
+#define ID_ISAR5_CRC32_SHIFT 16
+#define ID_ISAR5_SHA2_SHIFT 12
+#define ID_ISAR5_SHA1_SHIFT 8
+#define ID_ISAR5_AES_SHIFT 4
+#define ID_ISAR5_SEVL_SHIFT 0
+
+#define MVFR0_FPROUND_SHIFT 28
+#define MVFR0_FPSHVEC_SHIFT 24
+#define MVFR0_FPSQRT_SHIFT 20
+#define MVFR0_FPDIVIDE_SHIFT 16
+#define MVFR0_FPTRAP_SHIFT 12
+#define MVFR0_FPDP_SHIFT 8
+#define MVFR0_FPSP_SHIFT 4
+#define MVFR0_SIMD_SHIFT 0
+
+#define MVFR1_SIMDFMAC_SHIFT 28
+#define MVFR1_FPHP_SHIFT 24
+#define MVFR1_SIMDHP_SHIFT 20
+#define MVFR1_SIMDSP_SHIFT 16
+#define MVFR1_SIMDINT_SHIFT 12
+#define MVFR1_SIMDLS_SHIFT 8
+#define MVFR1_FPDNAN_SHIFT 4
+#define MVFR1_FPFTZ_SHIFT 0
+
+
+#define ID_AA64MMFR0_TGRAN4_SHIFT 28
+#define ID_AA64MMFR0_TGRAN64_SHIFT 24
+#define ID_AA64MMFR0_TGRAN16_SHIFT 20
+
+#define ID_AA64MMFR0_TGRAN4_NI 0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN64_NI 0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN16_NI 0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED
+#elif defined(CONFIG_ARM64_64K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
+#endif
+
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index dcd06d18a42a..90c7ff233735 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,8 +23,10 @@
#include <linux/compiler.h>
-#ifndef CONFIG_ARM64_64K_PAGES
+#ifdef CONFIG_ARM64_4K_PAGES
#define THREAD_SIZE_ORDER 2
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define THREAD_SIZE_ORDER 0
#endif
#define THREAD_SIZE 16384
@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 20
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
-#define TIF_SWITCH_MM 23 /* deferred switch_mm */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d6e6b6660380..ffdaea7954bb 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
static inline void tlb_flush(struct mmu_gather *tlb)
{
- if (tlb->fullmm) {
- flush_tlb_mm(tlb->mm);
- } else {
- struct vm_area_struct vma = { .vm_mm = tlb->mm, };
- /*
- * The intermediate page table levels are already handled by
- * the __(pte|pmd|pud)_free_tlb() functions, so last level
- * TLBI is sufficient here.
- */
- __flush_tlb_range(&vma, tlb->start, tlb->end, true);
- }
+ struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+
+ /*
+ * The ASID allocator will either invalidate the ASID or mark
+ * it as used.
+ */
+ if (tlb->fullmm)
+ return;
+
+ /*
+ * The intermediate page table levels are already handled by
+ * the __(pte|pmd|pud)_free_tlb() functions, so last level
+ * TLBI is sufficient here.
+ */
+ __flush_tlb_range(&vma, tlb->start, tlb->end, true);
}
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 7bd2da021658..b460ae28e346 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -63,6 +63,14 @@
* only require the D-TLB to be invalidated.
* - kaddr - Kernel virtual memory address
*/
+static inline void local_flush_tlb_all(void)
+{
+ dsb(nshst);
+ asm("tlbi vmalle1");
+ dsb(nsh);
+ isb();
+}
+
static inline void flush_tlb_all(void)
{
dsb(ishst);
@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- unsigned long asid = (unsigned long)ASID(mm) << 48;
+ unsigned long asid = ASID(mm) << 48;
dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid));
@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
- unsigned long addr = uaddr >> 12 |
- ((unsigned long)ASID(vma->vm_mm) << 48);
+ unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
dsb(ishst);
asm("tlbi vale1is, %0" : : "r" (addr));
@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
bool last_level)
{
- unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+ unsigned long asid = ASID(vma->vm_mm) << 48;
unsigned long addr;
if ((end - start) > MAX_TLB_RANGE) {
@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
static inline void __flush_tlb_pgtable(struct mm_struct *mm,
unsigned long uaddr)
{
- unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+ unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
- dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr));
dsb(ish);
}
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc781be..474691f8b13a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -4,7 +4,6 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
-CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src)
CFLAGS_REMOVE_ftrace.o = -pg
@@ -20,6 +19,12 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o
+extra-$(CONFIG_EFI) := efi-entry.o
+
+OBJCOPYFLAGS := --prefix-symbols=__efistub_
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o entry32.o \
../../arm/kernel/opcodes.o
@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o
-arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
head-y := head.o
-extra-y := $(head-y) vmlinux.lds
+extra-y += $(head-y) vmlinux.lds
# vDSO - this must be built first to generate the symbol offsets
$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index a85843ddbde8..3b6d8cc9dfe0 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(memcmp);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 8d89cf8dae55..25de8b244961 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,7 +60,7 @@ int main(void)
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
- DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id));
+ DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 574450c257a4..24926f2504f7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -97,5 +97,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
void check_local_cpu_errata(void)
{
- check_cpu_capabilities(arm64_errata, "enabling workaround for");
+ update_cpu_capabilities(arm64_errata, "enabling workaround for");
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 305f30dc9e63..52f0d7a5a1c2 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -16,12 +16,567 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#define pr_fmt(fmt) "alternatives: " fmt
+#define pr_fmt(fmt) "CPU features: " fmt
+#include <linux/bsearch.h>
+#include <linux/sort.h>
#include <linux/types.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
+#include <asm/cpu_ops.h>
#include <asm/processor.h>
+#include <asm/sysreg.h>
+
+unsigned long elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT \
+ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+ COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+ COMPAT_HWCAP_LPAE)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ { \
+ .strict = STRICT, \
+ .type = TYPE, \
+ .shift = SHIFT, \
+ .width = WIDTH, \
+ .safe_val = SAFE_VAL, \
+ }
+
+#define ARM64_FTR_END \
+ { \
+ .width = 0, \
+ }
+
+static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+ /* Linux doesn't care about the EL3 */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+ /* Linux shouldn't care about secure memory */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+ /*
+ * Differing PARange is fine as long as all peripherals and memory are mapped
+ * within the minimum PARange of all CPUs
+ */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_ctr[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ /*
+ * Linux can handle differing I-cache policies. Userspace JITs will
+ * make use of *minLine
+ */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_mvfr2[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_dczid[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
+ ARM64_FTR_END,
+};
+
+
+static struct arm64_ftr_bits ftr_id_isar5[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_pfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */
+ ARM64_FTR_END,
+};
+
+/*
+ * Common ftr bits for a 32bit register with all hidden, strict
+ * attributes, with 4bit feature fields and a default safe value of
+ * 0. Covers the following 32bit registers:
+ * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ */
+static struct arm64_ftr_bits ftr_generic_32bits[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic32[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
+ ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_aa64raz[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+ ARM64_FTR_END,
+};
+
+#define ARM64_FTR_REG(id, table) \
+ { \
+ .sys_id = id, \
+ .name = #id, \
+ .ftr_bits = &((table)[0]), \
+ }
+
+static struct arm64_ftr_reg arm64_ftr_regs[] = {
+
+ /* Op1 = 0, CRn = 0, CRm = 1 */
+ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
+ ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
+ ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
+
+ /* Op1 = 0, CRn = 0, CRm = 2 */
+ ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
+ ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+
+ /* Op1 = 0, CRn = 0, CRm = 3 */
+ ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+
+ /* Op1 = 0, CRn = 0, CRm = 4 */
+ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+ ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+
+ /* Op1 = 0, CRn = 0, CRm = 5 */
+ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
+ ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+
+ /* Op1 = 0, CRn = 0, CRm = 6 */
+ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
+ ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+
+ /* Op1 = 0, CRn = 0, CRm = 7 */
+ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+
+ /* Op1 = 3, CRn = 0, CRm = 0 */
+ ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+ ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
+
+ /* Op1 = 3, CRn = 14, CRm = 0 */
+ ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+};
+
+static int search_cmp_ftr_reg(const void *id, const void *regp)
+{
+ return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+}
+
+/*
+ * get_arm64_ftr_reg - Lookup a feature register entry using its
+ * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id , we use binary search to find a matching
+ * entry.
+ *
+ * returns - Upon success, matching ftr_reg entry for id.
+ * - NULL on failure. It is upto the caller to decide
+ * the impact of a failure.
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+ return bsearch((const void *)(unsigned long)sys_id,
+ arm64_ftr_regs,
+ ARRAY_SIZE(arm64_ftr_regs),
+ sizeof(arm64_ftr_regs[0]),
+ search_cmp_ftr_reg);
+}
+
+static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+{
+ u64 mask = arm64_ftr_mask(ftrp);
+
+ reg &= ~mask;
+ reg |= (ftr_val << ftrp->shift) & mask;
+ return reg;
+}
+
+static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+{
+ s64 ret = 0;
+
+ switch (ftrp->type) {
+ case FTR_EXACT:
+ ret = ftrp->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ ret = new < cur ? new : cur;
+ break;
+ case FTR_HIGHER_SAFE:
+ ret = new > cur ? new : cur;
+ break;
+ default:
+ BUG();
+ }
+
+ return ret;
+}
+
+static int __init sort_cmp_ftr_regs(const void *a, const void *b)
+{
+ return ((const struct arm64_ftr_reg *)a)->sys_id -
+ ((const struct arm64_ftr_reg *)b)->sys_id;
+}
+
+static void __init swap_ftr_regs(void *a, void *b, int size)
+{
+ struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
+ *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
+ *(struct arm64_ftr_reg *)b = tmp;
+}
+
+static void __init sort_ftr_regs(void)
+{
+ /* Keep the array sorted so that we can do the binary search */
+ sort(arm64_ftr_regs,
+ ARRAY_SIZE(arm64_ftr_regs),
+ sizeof(arm64_ftr_regs[0]),
+ sort_cmp_ftr_regs,
+ swap_ftr_regs);
+}
+
+/*
+ * Initialise the CPU feature register from Boot CPU values.
+ * Also initiliases the strict_mask for the register.
+ */
+static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+{
+ u64 val = 0;
+ u64 strict_mask = ~0x0ULL;
+ struct arm64_ftr_bits *ftrp;
+ struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
+
+ BUG_ON(!reg);
+
+ for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+ val = arm64_ftr_set_value(ftrp, val, ftr_new);
+ if (!ftrp->strict)
+ strict_mask &= ~arm64_ftr_mask(ftrp);
+ }
+ reg->sys_val = val;
+ reg->strict_mask = strict_mask;
+}
+
+void __init init_cpu_features(struct cpuinfo_arm64 *info)
+{
+ /* Before we start using the tables, make sure it is sorted */
+ sort_ftr_regs();
+
+ init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
+ init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
+ init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
+ init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+ init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+ init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+ init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+ init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+ init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+ init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+ init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+ init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+ init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+ init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+ init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+ init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
+static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
+{
+ struct arm64_ftr_bits *ftrp;
+
+ for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
+ s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+ if (ftr_cur == ftr_new)
+ continue;
+ /* Find a safe value */
+ ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur);
+ reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new);
+ }
+
+}
+
+static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
+{
+ struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+ BUG_ON(!regp);
+ update_cpu_ftr_reg(regp, val);
+ if ((boot & regp->strict_mask) == (val & regp->strict_mask))
+ return 0;
+ pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n",
+ regp->name, boot, cpu, val);
+ return 1;
+}
+
+/*
+ * Update system wide CPU feature registers with the values from a
+ * non-boot CPU. Also performs SANITY checks to make sure that there
+ * aren't any insane variations from that of the boot CPU.
+ */
+void update_cpu_features(int cpu,
+ struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot)
+{
+ int taint = 0;
+
+ /*
+ * The kernel can handle differing I-cache policies, but otherwise
+ * caches should look identical. Userspace JITs will make use of
+ * *minLine.
+ */
+ taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu,
+ info->reg_ctr, boot->reg_ctr);
+
+ /*
+ * Userspace may perform DC ZVA instructions. Mismatched block sizes
+ * could result in too much or too little memory being zeroed if a
+ * process is preempted and migrated between CPUs.
+ */
+ taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
+ info->reg_dczid, boot->reg_dczid);
+
+ /* If different, timekeeping will be broken (especially with KVM) */
+ taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
+ info->reg_cntfrq, boot->reg_cntfrq);
+
+ /*
+ * The kernel uses self-hosted debug features and expects CPUs to
+ * support identical debug features. We presently need CTX_CMPs, WRPs,
+ * and BRPs to be identical.
+ * ID_AA64DFR1 is currently RES0.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu,
+ info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu,
+ info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1);
+ /*
+ * Even in big.LITTLE, processors should be identical instruction-set
+ * wise.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu,
+ info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
+ info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+
+ /*
+ * Differing PARange support is fine as long as all peripherals and
+ * memory are mapped within the minimum PARange of all CPUs.
+ * Linux should not care about secure memory.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu,
+ info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
+ info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+
+ /*
+ * EL3 is not our concern.
+ * ID_AA64PFR1 is currently RES0.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
+ info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
+ info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+
+ /*
+ * If we have AArch32, we care about 32-bit features for compat. These
+ * registers should be RES0 otherwise.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+ info->reg_id_dfr0, boot->reg_id_dfr0);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+ info->reg_id_isar0, boot->reg_id_isar0);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+ info->reg_id_isar1, boot->reg_id_isar1);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+ info->reg_id_isar2, boot->reg_id_isar2);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+ info->reg_id_isar3, boot->reg_id_isar3);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+ info->reg_id_isar4, boot->reg_id_isar4);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+ info->reg_id_isar5, boot->reg_id_isar5);
+
+ /*
+ * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+ * ACTLR formats could differ across CPUs and therefore would have to
+ * be trapped for virtualization anyway.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+ info->reg_id_mmfr0, boot->reg_id_mmfr0);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+ info->reg_id_mmfr1, boot->reg_id_mmfr1);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+ info->reg_id_mmfr2, boot->reg_id_mmfr2);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+ info->reg_id_mmfr3, boot->reg_id_mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+ info->reg_id_pfr0, boot->reg_id_pfr0);
+ taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+ info->reg_id_pfr1, boot->reg_id_pfr1);
+ taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+ info->reg_mvfr0, boot->reg_mvfr0);
+ taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+ info->reg_mvfr1, boot->reg_mvfr1);
+ taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+ info->reg_mvfr2, boot->reg_mvfr2);
+
+ /*
+ * Mismatched CPU features are a recipe for disaster. Don't even
+ * pretend to support them.
+ */
+ WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
+ "Unsupported CPU feature variation.\n");
+}
+
+u64 read_system_reg(u32 id)
+{
+ struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+ /* We shouldn't get a request for an unsupported register */
+ BUG_ON(!regp);
+ return regp->sys_val;
+}
#include <linux/irqchip/arm-gic-v3.h>
@@ -33,25 +588,20 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
return val >= entry->min_field_value;
}
-#define __ID_FEAT_CHK(reg) \
-static bool __maybe_unused \
-has_##reg##_feature(const struct arm64_cpu_capabilities *entry) \
-{ \
- u64 val; \
- \
- val = read_cpuid(reg##_el1); \
- return feature_matches(val, entry); \
-}
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
+{
+ u64 val;
-__ID_FEAT_CHK(id_aa64pfr0);
-__ID_FEAT_CHK(id_aa64mmfr1);
-__ID_FEAT_CHK(id_aa64isar0);
+ val = read_system_reg(entry->sys_reg);
+ return feature_matches(val, entry);
+}
static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
{
bool has_sre;
- if (!has_id_aa64pfr0_feature(entry))
+ if (!has_cpuid_feature(entry))
return false;
has_sre = gic_enable_sre();
@@ -67,15 +617,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "GIC system register CPU interface",
.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
.matches = has_useable_gicv3_cpuif,
- .field_pos = 24,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .field_pos = ID_AA64PFR0_GIC_SHIFT,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
.capability = ARM64_HAS_PAN,
- .matches = has_id_aa64mmfr1_feature,
- .field_pos = 20,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR1_EL1,
+ .field_pos = ID_AA64MMFR1_PAN_SHIFT,
.min_field_value = 1,
.enable = cpu_enable_pan,
},
@@ -84,15 +636,101 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
- .matches = has_id_aa64isar0_feature,
- .field_pos = 20,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
.min_field_value = 2,
},
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
{},
};
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+#define HWCAP_CAP(reg, field, min_value, type, cap) \
+ { \
+ .desc = #cap, \
+ .matches = has_cpuid_feature, \
+ .sys_reg = reg, \
+ .field_pos = field, \
+ .min_field_value = min_value, \
+ .hwcap_type = type, \
+ .hwcap = cap, \
+ }
+
+static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+#ifdef CONFIG_COMPAT
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+ {},
+};
+
+static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+ switch (cap->hwcap_type) {
+ case CAP_HWCAP:
+ elf_hwcap |= cap->hwcap;
+ break;
+#ifdef CONFIG_COMPAT
+ case CAP_COMPAT_HWCAP:
+ compat_elf_hwcap |= (u32)cap->hwcap;
+ break;
+ case CAP_COMPAT_HWCAP2:
+ compat_elf_hwcap2 |= (u32)cap->hwcap;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+ bool rc;
+
+ switch (cap->hwcap_type) {
+ case CAP_HWCAP:
+ rc = (elf_hwcap & cap->hwcap) != 0;
+ break;
+#ifdef CONFIG_COMPAT
+ case CAP_COMPAT_HWCAP:
+ rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+ break;
+ case CAP_COMPAT_HWCAP2:
+ rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ rc = false;
+ }
+
+ return rc;
+}
+
+static void setup_cpu_hwcaps(void)
+{
+ int i;
+ const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
+
+ for (i = 0; hwcaps[i].desc; i++)
+ if (hwcaps[i].matches(&hwcaps[i]))
+ cap_set_hwcap(&hwcaps[i]);
+}
+
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info)
{
int i;
@@ -105,15 +743,178 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
pr_info("%s %s\n", info, caps[i].desc);
cpus_set_cap(caps[i].capability);
}
+}
+
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
+ */
+static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+ int i;
+
+ for (i = 0; caps[i].desc; i++)
+ if (caps[i].enable && cpus_have_cap(caps[i].capability))
+ on_each_cpu(caps[i].enable, NULL, true);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+ sys_caps_initialised = true;
+}
+
+/*
+ * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ */
+static u64 __raw_read_system_reg(u32 sys_id)
+{
+ switch (sys_id) {
+ case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1);
+ case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1);
+ case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1);
+ case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1);
+ case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1);
+ case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1);
+ case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1);
+ case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1);
+ case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1);
+ case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1);
+ case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1);
+ case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
+ case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
+ case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1);
+ case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1);
+ case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1);
+
+ case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1);
+ case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1);
+ case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1);
+ case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1);
+
+ case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0);
+ case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0);
+ case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0);
+ default:
+ BUG();
+ return 0;
+ }
+}
+
+/*
+ * Park the CPU which doesn't have the capability as advertised
+ * by the system.
+ */
+static void fail_incapable_cpu(char *cap_type,
+ const struct arm64_cpu_capabilities *cap)
+{
+ int cpu = smp_processor_id();
+
+ pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
+ /* Mark this CPU absent */
+ set_cpu_present(cpu, 0);
+
+ /* Check if we can park ourselves */
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+ asm(
+ "1: wfe\n"
+ " wfi\n"
+ " b 1b");
+}
- /* second pass allows enable() to consider interacting capabilities */
+/*
+ * Run through the enabled system capabilities and enable() it on this CPU.
+ * The capabilities were decided based on the available CPUs at the boot time.
+ * Any new CPU should match the system wide status of the capability. If the
+ * new CPU doesn't have a capability which the system now has enabled, we
+ * cannot do anything to fix it up and could cause unexpected failures. So
+ * we park the CPU.
+ */
+void verify_local_cpu_capabilities(void)
+{
+ int i;
+ const struct arm64_cpu_capabilities *caps;
+
+ /*
+ * If we haven't computed the system capabilities, there is nothing
+ * to verify.
+ */
+ if (!sys_caps_initialised)
+ return;
+
+ caps = arm64_features;
for (i = 0; caps[i].desc; i++) {
- if (cpus_have_cap(caps[i].capability) && caps[i].enable)
- caps[i].enable();
+ if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
+ continue;
+ /*
+ * If the new CPU misses an advertised feature, we cannot proceed
+ * further, park the cpu.
+ */
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+ fail_incapable_cpu("arm64_features", &caps[i]);
+ if (caps[i].enable)
+ caps[i].enable(NULL);
}
+
+ for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+ if (!cpus_have_hwcap(&caps[i]))
+ continue;
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+ fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+ }
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static inline void set_sys_caps_initialised(void)
+{
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void setup_feature_capabilities(void)
+{
+ update_cpu_capabilities(arm64_features, "detected feature:");
+ enable_cpu_capabilities(arm64_features);
}
-void check_local_cpu_features(void)
+void __init setup_cpu_features(void)
{
- check_cpu_capabilities(arm64_features, "detected feature:");
+ u32 cwg;
+ int cls;
+
+ /* Set the CPU feature capabilies */
+ setup_feature_capabilities();
+ setup_cpu_hwcaps();
+
+ /* Advertise that we have computed the system capabilities */
+ set_sys_caps_initialised();
+
+ /*
+ * Check for sane CTR_EL0.CWG value.
+ */
+ cwg = cache_type_cwg();
+ cls = cache_line_size();
+ if (!cwg)
+ pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+ cls);
+ if (L1_CACHE_BYTES < cls)
+ pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+ L1_CACHE_BYTES, cls);
}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a867e7fb..706679d0a0b4 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -24,8 +24,11 @@
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/personality.h>
#include <linux/preempt.h>
#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
#include <linux/smp.h>
/*
@@ -35,7 +38,6 @@
*/
DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
static struct cpuinfo_arm64 boot_cpu_data;
-static bool mixed_endian_el0 = true;
static char *icache_policy_str[] = {
[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
@@ -46,157 +48,148 @@ static char *icache_policy_str[] = {
unsigned long __icache_flags;
-static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
+static const char *const hwcap_str[] = {
+ "fp",
+ "asimd",
+ "evtstrm",
+ "aes",
+ "pmull",
+ "sha1",
+ "sha2",
+ "crc32",
+ "atomics",
+ NULL
+};
+
+#ifdef CONFIG_COMPAT
+static const char *const compat_hwcap_str[] = {
+ "swp",
+ "half",
+ "thumb",
+ "26bit",
+ "fastmult",
+ "fpa",
+ "vfp",
+ "edsp",
+ "java",
+ "iwmmxt",
+ "crunch",
+ "thumbee",
+ "neon",
+ "vfpv3",
+ "vfpv3d16",
+ "tls",
+ "vfpv4",
+ "idiva",
+ "idivt",
+ "vfpd32",
+ "lpae",
+ "evtstrm"
+};
+
+static const char *const compat_hwcap2_str[] = {
+ "aes",
+ "pmull",
+ "sha1",
+ "sha2",
+ "crc32",
+ NULL
+};
+#endif /* CONFIG_COMPAT */
+
+static int c_show(struct seq_file *m, void *v)
{
- unsigned int cpu = smp_processor_id();
- u32 l1ip = CTR_L1IP(info->reg_ctr);
+ int i, j;
+
+ for_each_online_cpu(i) {
+ struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+ u32 midr = cpuinfo->reg_midr;
- if (l1ip != ICACHE_POLICY_PIPT) {
/*
- * VIPT caches are non-aliasing if the VA always equals the PA
- * in all bit positions that are covered by the index. This is
- * the case if the size of a way (# of sets * line size) does
- * not exceed PAGE_SIZE.
+ * glibc reads /proc/cpuinfo to determine the number of
+ * online processors, looking for lines beginning with
+ * "processor". Give glibc what it expects.
*/
- u32 waysize = icache_get_numsets() * icache_get_linesize();
+ seq_printf(m, "processor\t: %d\n", i);
- if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
- set_bit(ICACHEF_ALIASING, &__icache_flags);
+ /*
+ * Dump out the common processor features in a single line.
+ * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+ * rather than attempting to parse this, but there's a body of
+ * software which does already (at least for 32-bit).
+ */
+ seq_puts(m, "Features\t:");
+ if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+ for (j = 0; compat_hwcap_str[j]; j++)
+ if (compat_elf_hwcap & (1 << j))
+ seq_printf(m, " %s", compat_hwcap_str[j]);
+
+ for (j = 0; compat_hwcap2_str[j]; j++)
+ if (compat_elf_hwcap2 & (1 << j))
+ seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+ } else {
+ for (j = 0; hwcap_str[j]; j++)
+ if (elf_hwcap & (1 << j))
+ seq_printf(m, " %s", hwcap_str[j]);
+ }
+ seq_puts(m, "\n");
+
+ seq_printf(m, "CPU implementer\t: 0x%02x\n",
+ MIDR_IMPLEMENTOR(midr));
+ seq_printf(m, "CPU architecture: 8\n");
+ seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+ seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+ seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
}
- if (l1ip == ICACHE_POLICY_AIVIVT)
- set_bit(ICACHEF_AIVIVT, &__icache_flags);
- pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
-}
-
-bool cpu_supports_mixed_endian_el0(void)
-{
- return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
-}
-
-bool system_supports_mixed_endian_el0(void)
-{
- return mixed_endian_el0;
+ return 0;
}
-static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
+static void *c_start(struct seq_file *m, loff_t *pos)
{
- mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
+ return *pos < 1 ? (void *)1 : NULL;
}
-static void update_cpu_features(struct cpuinfo_arm64 *info)
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
- update_mixed_endian_el0_support(info);
+ ++*pos;
+ return NULL;
}
-static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
+static void c_stop(struct seq_file *m, void *v)
{
- if ((boot & mask) == (cur & mask))
- return 0;
-
- pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
- name, (unsigned long)boot, cpu, (unsigned long)cur);
-
- return 1;
}
-#define CHECK_MASK(field, mask, boot, cur, cpu) \
- check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
-
-#define CHECK(field, boot, cur, cpu) \
- CHECK_MASK(field, ~0ULL, boot, cur, cpu)
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = c_show
+};
-/*
- * Verify that CPUs don't have unexpected differences that will cause problems.
- */
-static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
+static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
{
unsigned int cpu = smp_processor_id();
- struct cpuinfo_arm64 *boot = &boot_cpu_data;
- unsigned int diff = 0;
-
- /*
- * The kernel can handle differing I-cache policies, but otherwise
- * caches should look identical. Userspace JITs will make use of
- * *minLine.
- */
- diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
-
- /*
- * Userspace may perform DC ZVA instructions. Mismatched block sizes
- * could result in too much or too little memory being zeroed if a
- * process is preempted and migrated between CPUs.
- */
- diff |= CHECK(dczid, boot, cur, cpu);
-
- /* If different, timekeeping will be broken (especially with KVM) */
- diff |= CHECK(cntfrq, boot, cur, cpu);
-
- /*
- * The kernel uses self-hosted debug features and expects CPUs to
- * support identical debug features. We presently need CTX_CMPs, WRPs,
- * and BRPs to be identical.
- * ID_AA64DFR1 is currently RES0.
- */
- diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
- diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
-
- /*
- * Even in big.LITTLE, processors should be identical instruction-set
- * wise.
- */
- diff |= CHECK(id_aa64isar0, boot, cur, cpu);
- diff |= CHECK(id_aa64isar1, boot, cur, cpu);
-
- /*
- * Differing PARange support is fine as long as all peripherals and
- * memory are mapped within the minimum PARange of all CPUs.
- * Linux should not care about secure memory.
- * ID_AA64MMFR1 is currently RES0.
- */
- diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
- diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
-
- /*
- * EL3 is not our concern.
- * ID_AA64PFR1 is currently RES0.
- */
- diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
- diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
+ u32 l1ip = CTR_L1IP(info->reg_ctr);
- /*
- * If we have AArch32, we care about 32-bit features for compat. These
- * registers should be RES0 otherwise.
- */
- diff |= CHECK(id_dfr0, boot, cur, cpu);
- diff |= CHECK(id_isar0, boot, cur, cpu);
- diff |= CHECK(id_isar1, boot, cur, cpu);
- diff |= CHECK(id_isar2, boot, cur, cpu);
- diff |= CHECK(id_isar3, boot, cur, cpu);
- diff |= CHECK(id_isar4, boot, cur, cpu);
- diff |= CHECK(id_isar5, boot, cur, cpu);
- /*
- * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
- * ACTLR formats could differ across CPUs and therefore would have to
- * be trapped for virtualization anyway.
- */
- diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
- diff |= CHECK(id_mmfr1, boot, cur, cpu);
- diff |= CHECK(id_mmfr2, boot, cur, cpu);
- diff |= CHECK(id_mmfr3, boot, cur, cpu);
- diff |= CHECK(id_pfr0, boot, cur, cpu);
- diff |= CHECK(id_pfr1, boot, cur, cpu);
+ if (l1ip != ICACHE_POLICY_PIPT) {
+ /*
+ * VIPT caches are non-aliasing if the VA always equals the PA
+ * in all bit positions that are covered by the index. This is
+ * the case if the size of a way (# of sets * line size) does
+ * not exceed PAGE_SIZE.
+ */
+ u32 waysize = icache_get_numsets() * icache_get_linesize();
- diff |= CHECK(mvfr0, boot, cur, cpu);
- diff |= CHECK(mvfr1, boot, cur, cpu);
- diff |= CHECK(mvfr2, boot, cur, cpu);
+ if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
+ set_bit(ICACHEF_ALIASING, &__icache_flags);
+ }
+ if (l1ip == ICACHE_POLICY_AIVIVT)
+ set_bit(ICACHEF_AIVIVT, &__icache_flags);
- /*
- * Mismatched CPU features are a recipe for disaster. Don't even
- * pretend to support them.
- */
- WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
- "Unsupported CPU feature variation.\n");
+ pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
cpuinfo_detect_icache_policy(info);
check_local_cpu_errata();
- check_local_cpu_features();
- update_cpu_features(info);
}
void cpuinfo_store_cpu(void)
{
struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
__cpuinfo_store_cpu(info);
- cpuinfo_sanity_check(info);
+ update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
}
void __init cpuinfo_store_boot_cpu(void)
@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void)
__cpuinfo_store_cpu(info);
boot_cpu_data = *info;
+ init_cpu_features(&boot_cpu_data);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 253021ef2769..cd9ea8f078b3 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -26,14 +26,16 @@
#include <linux/stat.h>
#include <linux/uaccess.h>
-#include <asm/debug-monitors.h>
+#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
#include <asm/system_misc.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
- return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
+ return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ ID_AA64DFR0_DEBUGVER_SHIFT);
}
/*
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 8ce9b0577442..a773db92908b 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -29,7 +29,7 @@
* we want to be. The kernel image wants to be placed at TEXT_OFFSET
* from start of RAM.
*/
-ENTRY(efi_stub_entry)
+ENTRY(entry)
/*
* Create a stack frame to save FP/LR with extra space
* for image_addr variable passed to efi_entry().
@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
* entries for the VA range of the current image, so no maintenance is
* necessary.
*/
- adr x0, efi_stub_entry
- adr x1, efi_stub_entry_end
+ adr x0, entry
+ adr x1, entry_end
sub x1, x1, x0
bl __flush_dcache_area
@@ -120,5 +120,5 @@ efi_load_fail:
ldp x29, x30, [sp], #32
ret
-efi_stub_entry_end:
-ENDPROC(efi_stub_entry)
+entry_end:
+ENDPROC(entry)
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
deleted file mode 100644
index 78dfbd34b6bf..000000000000
--- a/arch/arm64/kernel/efi-stub.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org>
- *
- * This file implements the EFI boot stub for the arm64 kernel.
- * Adapted from ARM version by Mark Salter <msalter@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#include <linux/efi.h>
-#include <asm/efi.h>
-#include <asm/sections.h>
-
-efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
- unsigned long *image_addr,
- unsigned long *image_size,
- unsigned long *reserve_addr,
- unsigned long *reserve_size,
- unsigned long dram_base,
- efi_loaded_image_t *image)
-{
- efi_status_t status;
- unsigned long kernel_size, kernel_memsize = 0;
- unsigned long nr_pages;
- void *old_image_addr = (void *)*image_addr;
- unsigned long preferred_offset;
-
- /*
- * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond
- * a 2 MB aligned base, which itself may be lower than dram_base, as
- * long as the resulting offset equals or exceeds it.
- */
- preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET;
- if (preferred_offset < dram_base)
- preferred_offset += SZ_2M;
-
- /* Relocate the image, if required. */
- kernel_size = _edata - _text;
- if (*image_addr != preferred_offset) {
- kernel_memsize = kernel_size + (_end - _edata);
-
- /*
- * First, try a straight allocation at the preferred offset.
- * This will work around the issue where, if dram_base == 0x0,
- * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
- * address of the allocation to be mistaken for a FAIL return
- * value or a NULL pointer). It will also ensure that, on
- * platforms where the [dram_base, dram_base + TEXT_OFFSET)
- * interval is partially occupied by the firmware (like on APM
- * Mustang), we can still place the kernel at the address
- * 'dram_base + TEXT_OFFSET'.
- */
- *image_addr = *reserve_addr = preferred_offset;
- nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
- EFI_PAGE_SIZE;
- status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
- EFI_LOADER_DATA, nr_pages,
- (efi_physical_addr_t *)reserve_addr);
- if (status != EFI_SUCCESS) {
- kernel_memsize += TEXT_OFFSET;
- status = efi_low_alloc(sys_table_arg, kernel_memsize,
- SZ_2M, reserve_addr);
-
- if (status != EFI_SUCCESS) {
- pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
- return status;
- }
- *image_addr = *reserve_addr + TEXT_OFFSET;
- }
- memcpy((void *)*image_addr, old_image_addr, kernel_size);
- *reserve_size = kernel_memsize;
- }
-
-
- return EFI_SUCCESS;
-}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 61eb1d17586a..de46b50f4cdf 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = {
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
- INIT_MM_CONTEXT(efi_mm)
};
static int __init is_normal_ram(efi_memory_desc_t *md)
@@ -335,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm)
else
cpu_switch_mm(mm->pgd, mm);
- flush_tlb_all();
+ local_flush_tlb_all();
if (icache_is_aivivt())
- __flush_icache_all();
+ __local_flush_icache_all();
}
void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4306c937b1ff..7ed3d75f6304 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -430,6 +430,8 @@ el0_sync_compat:
b.eq el0_fpsimd_acc
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc
+ cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
+ b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c56956a16d3f..4c46c54a3ad7 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
*/
static int __init fpsimd_init(void)
{
- u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-
- if (pfr & (0xf << 16)) {
+ if (elf_hwcap & HWCAP_FP) {
+ fpsimd_pm_init();
+ fpsimd_hotplug_init();
+ } else {
pr_notice("Floating-point is not implemented\n");
- return 0;
}
- elf_hwcap |= HWCAP_FP;
- if (pfr & (0xf << 20))
+ if (!(elf_hwcap & HWCAP_ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
- else
- elf_hwcap |= HWCAP_ASIMD;
-
- fpsimd_pm_init();
- fpsimd_hotplug_init();
return 0;
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 351a4de1b1e2..23cfc08fc8ba 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,11 +29,13 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
-#include <asm/thread_info.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
#include <asm/virt.h>
#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
@@ -46,32 +48,10 @@
#error TEXT_OFFSET must be less than 2MB
#endif
-#ifdef CONFIG_ARM64_64K_PAGES
-#define BLOCK_SHIFT PAGE_SHIFT
-#define BLOCK_SIZE PAGE_SIZE
-#define TABLE_SHIFT PMD_SHIFT
-#else
-#define BLOCK_SHIFT SECTION_SHIFT
-#define BLOCK_SIZE SECTION_SIZE
-#define TABLE_SHIFT PUD_SHIFT
-#endif
-
#define KERNEL_START _text
#define KERNEL_END _end
/*
- * Initial memory map attributes.
- */
-#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
-#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
-
-#ifdef CONFIG_ARM64_64K_PAGES
-#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
-#else
-#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
-#endif
-
-/*
* Kernel startup entry point.
* ---------------------------
*
@@ -120,8 +100,8 @@ efi_head:
#endif
#ifdef CONFIG_EFI
- .globl stext_offset
- .set stext_offset, stext - efi_head
+ .globl __efistub_stext_offset
+ .set __efistub_stext_offset, stext - efi_head
.align 3
pe_header:
.ascii "PE"
@@ -144,8 +124,8 @@ optional_header:
.long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long efi_stub_entry - efi_head // AddressOfEntryPoint
- .long stext_offset // BaseOfCode
+ .long __efistub_entry - efi_head // AddressOfEntryPoint
+ .long __efistub_stext_offset // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
@@ -162,7 +142,7 @@ extra_header_fields:
.long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long stext_offset // SizeOfHeaders
+ .long __efistub_stext_offset // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -207,9 +187,9 @@ section_table:
.byte 0
.byte 0 // end of 0 padding of section name
.long _end - stext // VirtualSize
- .long stext_offset // VirtualAddress
+ .long __efistub_stext_offset // VirtualAddress
.long _edata - stext // SizeOfRawData
- .long stext_offset // PointerToRawData
+ .long __efistub_stext_offset // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args)
*/
.macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
- create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS > 3
+ create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
+#endif
+#if SWAPPER_PGTABLE_LEVELS > 2
+ create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif
.endm
@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args)
* Corrupts: phys, start, end, pstate
*/
.macro create_block_map, tbl, flags, phys, start, end
- lsr \phys, \phys, #BLOCK_SHIFT
- lsr \start, \start, #BLOCK_SHIFT
+ lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT
+ lsr \start, \start, #SWAPPER_BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index
- orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
- lsr \end, \end, #BLOCK_SHIFT
+ orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry
+ lsr \end, \end, #SWAPPER_BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index
9999: str \phys, [\tbl, \start, lsl #3] // store the entry
add \start, \start, #1 // next entry
- add \phys, \phys, #BLOCK_SIZE // next block
+ add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
cmp \start, \end
b.ls 9999b
.endm
@@ -350,7 +333,7 @@ __create_page_tables:
cmp x0, x6
b.lo 1b
- ldr x7, =MM_MMUFLAGS
+ ldr x7, =SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
@@ -444,6 +427,9 @@ __mmap_switched:
str_l x21, __fdt_pointer, x5 // Save FDT pointer
str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
mov x29, #0
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
b start_kernel
ENDPROC(__mmap_switched)
@@ -630,10 +616,17 @@ ENDPROC(__secondary_switched)
* x0 = SCTLR_EL1 value for turning on the MMU.
* x27 = *virtual* address to jump to upon completion
*
- * other registers depend on the function called upon completion
+ * Other registers depend on the function called upon completion.
+ *
+ * Checks if the selected granule size is supported by the CPU.
+ * If it isn't, park the CPU
*/
.section ".idmap.text", "ax"
__enable_mmu:
+ mrs x1, ID_AA64MMFR0_EL1
+ ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+ cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+ b.ne __no_granule_support
ldr x5, =vectors
msr vbar_el1, x5
msr ttbr0_el1, x25 // load TTBR0
@@ -651,3 +644,8 @@ __enable_mmu:
isb
br x27
ENDPROC(__enable_mmu)
+
+__no_granule_support:
+ wfe
+ b __no_granule_support
+ENDPROC(__no_granule_support)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index bba85c8f8037..b45c95d34b83 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
#include <linux/ptrace.h>
#include <linux/smp.h>
+#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
@@ -163,6 +164,20 @@ enum hw_breakpoint_ops {
HW_BREAKPOINT_RESTORE
};
+static int is_compat_bp(struct perf_event *bp)
+{
+ struct task_struct *tsk = bp->hw.target;
+
+ /*
+ * tsk can be NULL for per-cpu (non-ptrace) breakpoints.
+ * In this case, use the native interface, since we don't have
+ * the notion of a "compat CPU" and could end up relying on
+ * deprecated behaviour if we use unaligned watchpoints in
+ * AArch64 state.
+ */
+ return tsk && is_compat_thread(task_thread_info(tsk));
+}
+
/**
* hw_breakpoint_slot_setup - Find and setup a perf slot according to
* operations
@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp)
* Watchpoints can be of length 1, 2, 4 or 8 bytes.
*/
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
- if (is_compat_task()) {
+ if (is_compat_bp(bp)) {
if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
info->ctrl.len != ARM_BREAKPOINT_LEN_4)
return -EINVAL;
@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* AArch32 tasks expect some simple alignment fixups, so emulate
* that here.
*/
- if (is_compat_task()) {
+ if (is_compat_bp(bp)) {
if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
alignment_mask = 0x7;
else
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 8fae0756e175..bc2abb8b1599 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -47,7 +47,10 @@
#define __HEAD_FLAG_BE 0
#endif
-#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
+ (__HEAD_FLAG_PAGE_SIZE << 1))
/*
* These will output as part of the Image header, which should be little-endian
@@ -59,4 +62,37 @@
_kernel_offset_le = DATA_LE64(TEXT_OFFSET); \
_kernel_flags_le = DATA_LE64(__HEAD_FLAGS);
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp = __pi_memcmp;
+__efistub_memchr = __pi_memchr;
+__efistub_memcpy = __pi_memcpy;
+__efistub_memmove = __pi_memmove;
+__efistub_memset = __pi_memset;
+__efistub_strlen = __pi_strlen;
+__efistub_strcmp = __pi_strcmp;
+__efistub_strncmp = __pi_strncmp;
+__efistub___flush_dcache_area = __pi___flush_dcache_area;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy = __pi_memcpy;
+__efistub___memmove = __pi_memmove;
+__efistub___memset = __pi_memset;
+#endif
+
+__efistub__text = _text;
+__efistub__end = _end;
+__efistub__edata = _edata;
+
+#endif
+
#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 11dc3fd47853..9f17ec071ee0 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/seq_file.h>
-#include <linux/ratelimit.h>
unsigned long irq_err_count;
@@ -54,64 +53,3 @@ void __init init_IRQ(void)
if (!handle_arch_irq)
panic("No interrupt controller found.");
}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static bool migrate_one_irq(struct irq_desc *desc)
-{
- struct irq_data *d = irq_desc_get_irq_data(desc);
- const struct cpumask *affinity = irq_data_get_affinity_mask(d);
- struct irq_chip *c;
- bool ret = false;
-
- /*
- * If this is a per-CPU interrupt, or the affinity does not
- * include this CPU, then we have nothing to do.
- */
- if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
- return false;
-
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- affinity = cpu_online_mask;
- ret = true;
- }
-
- c = irq_data_get_irq_chip(d);
- if (!c->irq_set_affinity)
- pr_debug("IRQ%u: unable to set affinity\n", d->irq);
- else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
- cpumask_copy(irq_data_get_affinity_mask(d), affinity);
-
- return ret;
-}
-
-/*
- * The current CPU has been marked offline. Migrate IRQs off this CPU.
- * If the affinity settings do not allow other CPUs, force them onto any
- * available CPU.
- *
- * Note: we must iterate over all IRQs, whether they have an attached
- * action structure or not, as we need to get chained interrupts too.
- */
-void migrate_irqs(void)
-{
- unsigned int i;
- struct irq_desc *desc;
- unsigned long flags;
-
- local_irq_save(flags);
-
- for_each_irq_desc(i, desc) {
- bool affinity_broken;
-
- raw_spin_lock(&desc->lock);
- affinity_broken = migrate_one_irq(desc);
- raw_spin_unlock(&desc->lock);
-
- if (affinity_broken)
- pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
- i, smp_processor_id());
- }
-
- local_irq_restore(flags);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 876eb8df50bf..f4bc779e62e8 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
+#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
@@ -34,9 +35,18 @@
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
- NUMA_NO_NODE, __builtin_return_address(0));
+ void *p;
+
+ p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
+ if (p && (kasan_module_alloc(p, size) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+
+ return p;
}
enum aarch64_reloc_op {
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f9a74d4fff3b..5b1897e8ca24 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -18,651 +18,12 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/perf_event.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-#include <asm/cputype.h>
-#include <asm/irq.h>
#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/*
- * ARMv8 supports a maximum of 32 events.
- * The cycle counter is included in this total.
- */
-#define ARMPMU_MAX_HWEVENTS 32
-
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-int
-armpmu_get_max_events(void)
-{
- int max_events = 0;
-
- if (cpu_pmu != NULL)
- max_events = cpu_pmu->num_events;
-
- return max_events;
-}
-EXPORT_SYMBOL_GPL(armpmu_get_max_events);
-
-int perf_num_counters(void)
-{
- return armpmu_get_max_events();
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-#define HW_OP_UNSUPPORTED 0xFFFF
-
-#define C(_x) \
- PERF_COUNT_HW_CACHE_##_x
-
-#define CACHE_OP_UNSUPPORTED 0xFFFF
-
-#define PERF_MAP_ALL_UNSUPPORTED \
- [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
-
-#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
-[0 ... C(MAX) - 1] = { \
- [0 ... C(OP_MAX) - 1] = { \
- [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
- }, \
-}
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX],
- u64 config)
-{
- unsigned int cache_type, cache_op, cache_result, ret;
-
- cache_type = (config >> 0) & 0xff;
- if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
- return -EINVAL;
-
- cache_op = (config >> 8) & 0xff;
- if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
- return -EINVAL;
-
- cache_result = (config >> 16) & 0xff;
- if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
- if (ret == CACHE_OP_UNSUPPORTED)
- return -ENOENT;
-
- return ret;
-}
-
-static int
-armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
- int mapping;
-
- if (config >= PERF_COUNT_HW_MAX)
- return -EINVAL;
-
- mapping = (*event_map)[config];
- return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
- return (int)(config & raw_event_mask);
-}
-
-static int map_cpu_event(struct perf_event *event,
- const unsigned (*event_map)[PERF_COUNT_HW_MAX],
- const unsigned (*cache_map)
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX],
- u32 raw_event_mask)
-{
- u64 config = event->attr.config;
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- return armpmu_map_event(event_map, config);
- case PERF_TYPE_HW_CACHE:
- return armpmu_map_cache_event(cache_map, config);
- case PERF_TYPE_RAW:
- return armpmu_map_raw_event(raw_event_mask, config);
- }
-
- return -ENOENT;
-}
-
-int
-armpmu_event_set_period(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- s64 left = local64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
- int ret = 0;
-
- if (unlikely(left <= -period)) {
- left = period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
-
- if (unlikely(left <= 0)) {
- left += period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
-
- /*
- * Limit the maximum period to prevent the counter value
- * from overtaking the one we are about to program. In
- * effect we are reducing max_period to account for
- * interrupt latency (and we are being very conservative).
- */
- if (left > (armpmu->max_period >> 1))
- left = armpmu->max_period >> 1;
-
- local64_set(&hwc->prev_count, (u64)-left);
-
- armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
-
- perf_event_update_userpage(event);
-
- return ret;
-}
-
-u64
-armpmu_event_update(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- u64 delta, prev_raw_count, new_raw_count;
-
-again:
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = armpmu->read_counter(idx);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- goto again;
-
- delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
- local64_add(delta, &event->count);
- local64_sub(delta, &hwc->period_left);
-
- return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- /* Don't read disabled counters! */
- if (hwc->idx < 0)
- return;
-
- armpmu_event_update(event, hwc, hwc->idx);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
-
- /*
- * ARM pmu always has to update the counter, so ignore
- * PERF_EF_UPDATE, see comments in armpmu_start().
- */
- if (!(hwc->state & PERF_HES_STOPPED)) {
- armpmu->disable(hwc, hwc->idx);
- barrier(); /* why? */
- armpmu_event_update(event, hwc, hwc->idx);
- hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
- }
-}
-
-static void
-armpmu_start(struct perf_event *event, int flags)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
-
- /*
- * ARM pmu always has to reprogram the period, so ignore
- * PERF_EF_RELOAD, see the comment below.
- */
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
- hwc->state = 0;
- /*
- * Set the period again. Some counters can't be stopped, so when we
- * were stopped we simply disabled the IRQ source and the counter
- * may have been left counting. If we don't do this step then we may
- * get an interrupt too soon or *way* too late if the overflow has
- * happened since disabling.
- */
- armpmu_event_set_period(event, hwc, hwc->idx);
- armpmu->enable(hwc, hwc->idx);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- WARN_ON(idx < 0);
-
- armpmu_stop(event, PERF_EF_UPDATE);
- hw_events->events[idx] = NULL;
- clear_bit(idx, hw_events->used_mask);
-
- perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
- struct hw_perf_event *hwc = &event->hw;
- int idx;
- int err = 0;
-
- perf_pmu_disable(event->pmu);
-
- /* If we don't have a space for the counter then finish early. */
- idx = armpmu->get_event_idx(hw_events, hwc);
- if (idx < 0) {
- err = idx;
- goto out;
- }
-
- /*
- * If there is an event in the counter we are going to use then make
- * sure it is disabled.
- */
- event->hw.idx = idx;
- armpmu->disable(hwc, idx);
- hw_events->events[idx] = event;
-
- hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
- if (flags & PERF_EF_START)
- armpmu_start(event, PERF_EF_RELOAD);
-
- /* Propagate our changes to the userspace mapping. */
- perf_event_update_userpage(event);
-
-out:
- perf_pmu_enable(event->pmu);
- return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
- struct perf_event *event)
-{
- struct arm_pmu *armpmu;
- struct hw_perf_event fake_event = event->hw;
- struct pmu *leader_pmu = event->group_leader->pmu;
-
- if (is_software_event(event))
- return 1;
-
- /*
- * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
- * core perf code won't check that the pmu->ctx == leader->ctx
- * until after pmu->event_init(event).
- */
- if (event->pmu != pmu)
- return 0;
-
- if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
- return 1;
-
- if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
- return 1;
-
- armpmu = to_arm_pmu(event->pmu);
- return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
- struct perf_event *sibling, *leader = event->group_leader;
- struct pmu_hw_events fake_pmu;
- DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
-
- /*
- * Initialise the fake PMU. We only need to populate the
- * used_mask for the purposes of validation.
- */
- memset(fake_used_mask, 0, sizeof(fake_used_mask));
- fake_pmu.used_mask = fake_used_mask;
-
- if (!validate_event(event->pmu, &fake_pmu, leader))
- return -EINVAL;
-
- list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- if (!validate_event(event->pmu, &fake_pmu, sibling))
- return -EINVAL;
- }
-
- if (!validate_event(event->pmu, &fake_pmu, event))
- return -EINVAL;
-
- return 0;
-}
-
-static void
-armpmu_disable_percpu_irq(void *data)
-{
- unsigned int irq = *(unsigned int *)data;
- disable_percpu_irq(irq);
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
- int irq;
- unsigned int i, irqs;
- struct platform_device *pmu_device = armpmu->plat_device;
-
- irqs = min(pmu_device->num_resources, num_possible_cpus());
- if (!irqs)
- return;
-
- irq = platform_get_irq(pmu_device, 0);
- if (irq <= 0)
- return;
-
- if (irq_is_percpu(irq)) {
- on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
- free_percpu_irq(irq, &cpu_hw_events);
- } else {
- for (i = 0; i < irqs; ++i) {
- int cpu = i;
-
- if (armpmu->irq_affinity)
- cpu = armpmu->irq_affinity[i];
-
- if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
- continue;
- irq = platform_get_irq(pmu_device, i);
- if (irq > 0)
- free_irq(irq, armpmu);
- }
- }
-}
-
-static void
-armpmu_enable_percpu_irq(void *data)
-{
- unsigned int irq = *(unsigned int *)data;
- enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
- int err, irq;
- unsigned int i, irqs;
- struct platform_device *pmu_device = armpmu->plat_device;
-
- if (!pmu_device)
- return -ENODEV;
-
- irqs = min(pmu_device->num_resources, num_possible_cpus());
- if (!irqs) {
- pr_err("no irqs for PMUs defined\n");
- return -ENODEV;
- }
-
- irq = platform_get_irq(pmu_device, 0);
- if (irq <= 0) {
- pr_err("failed to get valid irq for PMU device\n");
- return -ENODEV;
- }
-
- if (irq_is_percpu(irq)) {
- err = request_percpu_irq(irq, armpmu->handle_irq,
- "arm-pmu", &cpu_hw_events);
-
- if (err) {
- pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
- irq);
- armpmu_release_hardware(armpmu);
- return err;
- }
-
- on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
- } else {
- for (i = 0; i < irqs; ++i) {
- int cpu = i;
-
- err = 0;
- irq = platform_get_irq(pmu_device, i);
- if (irq <= 0)
- continue;
-
- if (armpmu->irq_affinity)
- cpu = armpmu->irq_affinity[i];
-
- /*
- * If we have a single PMU interrupt that we can't shift,
- * assume that we're running on a uniprocessor machine and
- * continue. Otherwise, continue without this interrupt.
- */
- if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
- pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
- irq, cpu);
- continue;
- }
-
- err = request_irq(irq, armpmu->handle_irq,
- IRQF_NOBALANCING | IRQF_NO_THREAD,
- "arm-pmu", armpmu);
- if (err) {
- pr_err("unable to request IRQ%d for ARM PMU counters\n",
- irq);
- armpmu_release_hardware(armpmu);
- return err;
- }
-
- cpumask_set_cpu(cpu, &armpmu->active_irqs);
- }
- }
-
- return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- atomic_t *active_events = &armpmu->active_events;
- struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
- if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
- armpmu_release_hardware(armpmu);
- mutex_unlock(pmu_reserve_mutex);
- }
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
- return attr->exclude_idle || attr->exclude_user ||
- attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int mapping, err;
-
- mapping = armpmu->map_event(event);
-
- if (mapping < 0) {
- pr_debug("event %x:%llx not supported\n", event->attr.type,
- event->attr.config);
- return mapping;
- }
-
- /*
- * We don't assign an index until we actually place the event onto
- * hardware. Use -1 to signify that we haven't decided where to put it
- * yet. For SMP systems, each core has it's own PMU so we can't do any
- * clever allocation or constraints checking at this point.
- */
- hwc->idx = -1;
- hwc->config_base = 0;
- hwc->config = 0;
- hwc->event_base = 0;
-
- /*
- * Check whether we need to exclude the counter from certain modes.
- */
- if ((!armpmu->set_event_filter ||
- armpmu->set_event_filter(hwc, &event->attr)) &&
- event_requires_mode_exclusion(&event->attr)) {
- pr_debug("ARM performance counters do not support mode exclusion\n");
- return -EPERM;
- }
-
- /*
- * Store the event encoding into the config_base field.
- */
- hwc->config_base |= (unsigned long)mapping;
-
- if (!hwc->sample_period) {
- /*
- * For non-sampling runs, limit the sample_period to half
- * of the counter width. That way, the new counter value
- * is far less likely to overtake the previous one unless
- * you have some serious IRQ latency issues.
- */
- hwc->sample_period = armpmu->max_period >> 1;
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
- }
-
- err = 0;
- if (event->group_leader != event) {
- err = validate_group(event);
- if (err)
- return -EINVAL;
- }
-
- return err;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- int err = 0;
- atomic_t *active_events = &armpmu->active_events;
-
- if (armpmu->map_event(event) == -ENOENT)
- return -ENOENT;
-
- event->destroy = hw_perf_event_destroy;
-
- if (!atomic_inc_not_zero(active_events)) {
- mutex_lock(&armpmu->reserve_mutex);
- if (atomic_read(active_events) == 0)
- err = armpmu_reserve_hardware(armpmu);
-
- if (!err)
- atomic_inc(active_events);
- mutex_unlock(&armpmu->reserve_mutex);
- }
- if (err)
- return err;
-
- err = __hw_perf_event_init(event);
- if (err)
- hw_perf_event_destroy(event);
-
- return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
- struct arm_pmu *armpmu = to_arm_pmu(pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
- if (enabled)
- armpmu->start();
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
- struct arm_pmu *armpmu = to_arm_pmu(pmu);
- armpmu->stop();
-}
-
-static void __init armpmu_init(struct arm_pmu *armpmu)
-{
- atomic_set(&armpmu->active_events, 0);
- mutex_init(&armpmu->reserve_mutex);
-
- armpmu->pmu = (struct pmu) {
- .pmu_enable = armpmu_enable,
- .pmu_disable = armpmu_disable,
- .event_init = armpmu_event_init,
- .add = armpmu_add,
- .del = armpmu_del,
- .start = armpmu_start,
- .stop = armpmu_stop,
- .read = armpmu_read,
- };
-}
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
-{
- armpmu_init(armpmu);
- return perf_pmu_register(&armpmu->pmu, name, type);
-}
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
/*
* ARMv8 PMUv3 Performance Events handling code.
@@ -708,6 +69,21 @@ enum armv8_pmuv3_perf_types {
ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D,
};
+/* ARMv8 Cortex-A53 specific event types. */
+enum armv8_a53_pmu_perf_types {
+ ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2,
+};
+
+/* ARMv8 Cortex-A57 specific event types. */
+enum armv8_a57_perf_types {
+ ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40,
+ ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41,
+ ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42,
+ ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43,
+ ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c,
+ ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d,
+};
+
/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
@@ -718,6 +94,28 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
+/* ARM Cortex-A53 HW events mapping. */
+static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -734,12 +132,60 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+
/*
* Perf Events' indices
*/
#define ARMV8_IDX_CYCLE_COUNTER 0
#define ARMV8_IDX_COUNTER0 1
-#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
+ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
#define ARMV8_MAX_COUNTERS 32
#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1)
@@ -805,49 +251,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
return pmovsr & ARMV8_OVERFLOWED_MASK;
}
-static inline int armv8pmu_counter_valid(int idx)
+static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
{
- return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
+ return idx >= ARMV8_IDX_CYCLE_COUNTER &&
+ idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
}
static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
{
- int ret = 0;
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u checking wrong counter %d overflow status\n",
- smp_processor_id(), idx);
- } else {
- counter = ARMV8_IDX_TO_COUNTER(idx);
- ret = pmnc & BIT(counter);
- }
-
- return ret;
+ return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
}
static inline int armv8pmu_select_counter(int idx)
{
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u selecting wrong PMNC counter %d\n",
- smp_processor_id(), idx);
- return -EINVAL;
- }
-
- counter = ARMV8_IDX_TO_COUNTER(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmselr_el0, %0" :: "r" (counter));
isb();
return idx;
}
-static inline u32 armv8pmu_read_counter(int idx)
+static inline u32 armv8pmu_read_counter(struct perf_event *event)
{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
u32 value = 0;
- if (!armv8pmu_counter_valid(idx))
+ if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -858,9 +289,13 @@ static inline u32 armv8pmu_read_counter(int idx)
return value;
}
-static inline void armv8pmu_write_counter(int idx, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
{
- if (!armv8pmu_counter_valid(idx))
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -879,65 +314,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val)
static inline int armv8pmu_enable_counter(int idx)
{
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u enabling wrong PMNC counter %d\n",
- smp_processor_id(), idx);
- return -EINVAL;
- }
-
- counter = ARMV8_IDX_TO_COUNTER(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_disable_counter(int idx)
{
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u disabling wrong PMNC counter %d\n",
- smp_processor_id(), idx);
- return -EINVAL;
- }
-
- counter = ARMV8_IDX_TO_COUNTER(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_enable_intens(int idx)
{
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
- smp_processor_id(), idx);
- return -EINVAL;
- }
-
- counter = ARMV8_IDX_TO_COUNTER(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_disable_intens(int idx)
{
- u32 counter;
-
- if (!armv8pmu_counter_valid(idx)) {
- pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
- smp_processor_id(), idx);
- return -EINVAL;
- }
-
- counter = ARMV8_IDX_TO_COUNTER(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
isb();
/* Clear the overflow flag in case an interrupt is pending. */
asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
isb();
+
return idx;
}
@@ -955,10 +359,13 @@ static inline u32 armv8pmu_getreset_flags(void)
return value;
}
-static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ int idx = hwc->idx;
/*
* Enable counter and interrupt, and set the counter to count
@@ -989,10 +396,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
-static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_disable_event(struct perf_event *event)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ int idx = hwc->idx;
/*
* Disable counter and interrupt
@@ -1016,7 +426,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
{
u32 pmovsr;
struct perf_sample_data data;
- struct pmu_hw_events *cpuc;
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
@@ -1036,7 +447,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
*/
regs = get_irq_regs();
- cpuc = this_cpu_ptr(&cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
@@ -1053,13 +463,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
continue;
hwc = &event->hw;
- armpmu_event_update(event, hwc, idx);
+ armpmu_event_update(event);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event, hwc, idx))
+ if (!armpmu_event_set_period(event))
continue;
if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(hwc, idx);
+ cpu_pmu->disable(event);
}
/*
@@ -1074,10 +484,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
return IRQ_HANDLED;
}
-static void armv8pmu_start(void)
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
@@ -1085,10 +495,10 @@ static void armv8pmu_start(void)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
-static void armv8pmu_stop(void)
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
@@ -1097,10 +507,12 @@ static void armv8pmu_stop(void)
}
static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct hw_perf_event *event)
+ struct perf_event *event)
{
int idx;
- unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
/* Always place a cycle counter into the cycle counter. */
if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
@@ -1151,11 +563,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
static void armv8pmu_reset(void *info)
{
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
u32 idx, nb_cnt = cpu_pmu->num_events;
/* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
- armv8pmu_disable_event(NULL, idx);
+ for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+ armv8pmu_disable_counter(idx);
+ armv8pmu_disable_intens(idx);
+ }
/* Initialize & Reset PMNC: C and P bits. */
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
@@ -1166,169 +581,104 @@ static void armv8pmu_reset(void *info)
static int armv8_pmuv3_map_event(struct perf_event *event)
{
- return map_cpu_event(event, &armv8_pmuv3_perf_map,
+ return armpmu_map_event(event, &armv8_pmuv3_perf_map,
&armv8_pmuv3_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}
-static struct arm_pmu armv8pmu = {
- .handle_irq = armv8pmu_handle_irq,
- .enable = armv8pmu_enable_event,
- .disable = armv8pmu_disable_event,
- .read_counter = armv8pmu_read_counter,
- .write_counter = armv8pmu_write_counter,
- .get_event_idx = armv8pmu_get_event_idx,
- .start = armv8pmu_start,
- .stop = armv8pmu_stop,
- .reset = armv8pmu_reset,
- .max_period = (1LLU << 32) - 1,
-};
+static int armv8_a53_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv8_a53_perf_map,
+ &armv8_a53_perf_cache_map,
+ ARMV8_EVTYPE_EVENT);
+}
-static u32 __init armv8pmu_read_num_pmnc_events(void)
+static int armv8_a57_map_event(struct perf_event *event)
{
- u32 nb_cnt;
+ return armpmu_map_event(event, &armv8_a57_perf_map,
+ &armv8_a57_perf_cache_map,
+ ARMV8_EVTYPE_EVENT);
+}
+
+static void armv8pmu_read_num_pmnc_events(void *info)
+{
+ int *nb_cnt = info;
/* Read the nb of CNTx counters supported from PMNC */
- nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+ *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
- /* Add the CPU cycles counter and return */
- return nb_cnt + 1;
+ /* Add the CPU cycles counter */
+ *nb_cnt += 1;
}
-static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
+static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
{
- armv8pmu.name = "arm/armv8-pmuv3";
- armv8pmu.map_event = armv8_pmuv3_map_event;
- armv8pmu.num_events = armv8pmu_read_num_pmnc_events();
- armv8pmu.set_event_filter = armv8pmu_set_event_filter;
- return &armv8pmu;
+ return smp_call_function_any(&arm_pmu->supported_cpus,
+ armv8pmu_read_num_pmnc_events,
+ &arm_pmu->num_events, 1);
}
-/*
- * Ensure the PMU has sane values out of reset.
- * This requires SMP to be available, so exists as a separate initcall.
- */
-static int __init
-cpu_pmu_reset(void)
+static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
{
- if (cpu_pmu && cpu_pmu->reset)
- return on_each_cpu(cpu_pmu->reset, NULL, 1);
- return 0;
+ cpu_pmu->handle_irq = armv8pmu_handle_irq,
+ cpu_pmu->enable = armv8pmu_enable_event,
+ cpu_pmu->disable = armv8pmu_disable_event,
+ cpu_pmu->read_counter = armv8pmu_read_counter,
+ cpu_pmu->write_counter = armv8pmu_write_counter,
+ cpu_pmu->get_event_idx = armv8pmu_get_event_idx,
+ cpu_pmu->start = armv8pmu_start,
+ cpu_pmu->stop = armv8pmu_stop,
+ cpu_pmu->reset = armv8pmu_reset,
+ cpu_pmu->max_period = (1LLU << 32) - 1,
+ cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
}
-arch_initcall(cpu_pmu_reset);
-
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id armpmu_of_device_ids[] = {
- {.compatible = "arm,armv8-pmuv3"},
- {},
-};
-static int armpmu_device_probe(struct platform_device *pdev)
+static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
{
- int i, irq, *irqs;
-
- if (!cpu_pmu)
- return -ENODEV;
-
- /* Don't bother with PPIs; they're already affine */
- irq = platform_get_irq(pdev, 0);
- if (irq >= 0 && irq_is_percpu(irq))
- goto out;
-
- irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
- if (!irqs)
- return -ENOMEM;
-
- for (i = 0; i < pdev->num_resources; ++i) {
- struct device_node *dn;
- int cpu;
-
- dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
- i);
- if (!dn) {
- pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
- of_node_full_name(pdev->dev.of_node), i);
- break;
- }
-
- for_each_possible_cpu(cpu)
- if (dn == of_cpu_device_node_get(cpu))
- break;
-
- if (cpu >= nr_cpu_ids) {
- pr_warn("Failed to find logical CPU for %s\n",
- dn->name);
- of_node_put(dn);
- break;
- }
- of_node_put(dn);
-
- irqs[i] = cpu;
- }
-
- if (i == pdev->num_resources)
- cpu_pmu->irq_affinity = irqs;
- else
- kfree(irqs);
-
-out:
- cpu_pmu->plat_device = pdev;
- return 0;
+ armv8_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv8_pmuv3";
+ cpu_pmu->map_event = armv8_pmuv3_map_event;
+ return armv8pmu_probe_num_events(cpu_pmu);
}
-static struct platform_driver armpmu_driver = {
- .driver = {
- .name = "arm-pmu",
- .of_match_table = armpmu_of_device_ids,
- },
- .probe = armpmu_device_probe,
-};
-
-static int __init register_pmu_driver(void)
+static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
- return platform_driver_register(&armpmu_driver);
+ armv8_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv8_cortex_a53";
+ cpu_pmu->map_event = armv8_a53_map_event;
+ return armv8pmu_probe_num_events(cpu_pmu);
}
-device_initcall(register_pmu_driver);
-static struct pmu_hw_events *armpmu_get_cpu_events(void)
+static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
- return this_cpu_ptr(&cpu_hw_events);
+ armv8_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv8_cortex_a57";
+ cpu_pmu->map_event = armv8_a57_map_event;
+ return armv8pmu_probe_num_events(cpu_pmu);
}
-static void __init cpu_pmu_init(struct arm_pmu *armpmu)
-{
- int cpu;
- for_each_possible_cpu(cpu) {
- struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
- events->events = per_cpu(hw_events, cpu);
- events->used_mask = per_cpu(used_mask, cpu);
- raw_spin_lock_init(&events->pmu_lock);
- }
- armpmu->get_hw_events = armpmu_get_cpu_events;
-}
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+ {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
+ {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
+ {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
+ {},
+};
-static int __init init_hw_perf_events(void)
+static int armv8_pmu_device_probe(struct platform_device *pdev)
{
- u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-
- switch ((dfr >> 8) & 0xf) {
- case 0x1: /* PMUv3 */
- cpu_pmu = armv8_pmuv3_pmu_init();
- break;
- }
+ return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+}
- if (cpu_pmu) {
- pr_info("enabled with %s PMU driver, %d counters available\n",
- cpu_pmu->name, cpu_pmu->num_events);
- cpu_pmu_init(cpu_pmu);
- armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
- } else {
- pr_info("no hardware support available\n");
- }
+static struct platform_driver armv8_pmu_driver = {
+ .driver = {
+ .name = "armv8-pmu",
+ .of_match_table = armv8_pmu_of_device_ids,
+ },
+ .probe = armv8_pmu_device_probe,
+};
- return 0;
+static int __init register_armv8_pmu_driver(void)
+{
+ return platform_driver_register(&armv8_pmu_driver);
}
-early_initcall(init_hw_perf_events);
-
+device_initcall(register_armv8_pmu_driver);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 223b093c9440..f75b540bc3b4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -44,6 +44,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/personality.h>
#include <linux/notifier.h>
+#include <trace/events/power.h>
#include <asm/compat.h>
#include <asm/cacheflush.h>
@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
* This should do all the clock switching and wait for interrupt
* tricks
*/
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
cpu_do_idle();
local_irq_enable();
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 232247945b1c..8119479147db 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -28,7 +28,6 @@
#include <linux/console.h>
#include <linux/cache.h>
#include <linux/bootmem.h>
-#include <linux/seq_file.h>
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
@@ -44,7 +43,6 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/efi.h>
-#include <linux/personality.h>
#include <linux/psci.h>
#include <asm/acpi.h>
@@ -54,6 +52,7 @@
#include <asm/elf.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
+#include <asm/kasan.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -64,23 +63,6 @@
#include <asm/efi.h>
#include <asm/xen/hypervisor.h>
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
-
-#ifdef CONFIG_COMPAT
-#define COMPAT_ELF_HWCAP_DEFAULT \
- (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
- COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
- COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
- COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
- COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
- COMPAT_HWCAP_LPAE)
-unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
-unsigned int compat_elf_hwcap2 __read_mostly;
-#endif
-
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-
phys_addr_t __fdt_pointer __initdata;
/*
@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void)
__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
-static void __init setup_processor(void)
-{
- u64 features;
- s64 block;
- u32 cwg;
- int cls;
-
- printk("CPU: AArch64 Processor [%08x] revision %d\n",
- read_cpuid_id(), read_cpuid_id() & 15);
-
- sprintf(init_utsname()->machine, ELF_PLATFORM);
- elf_hwcap = 0;
-
- cpuinfo_store_boot_cpu();
-
- /*
- * Check for sane CTR_EL0.CWG value.
- */
- cwg = cache_type_cwg();
- cls = cache_line_size();
- if (!cwg)
- pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
- cls);
- if (L1_CACHE_BYTES < cls)
- pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
- L1_CACHE_BYTES, cls);
-
- /*
- * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
- * The blocks we test below represent incremental functionality
- * for non-negative values. Negative values are reserved.
- */
- features = read_cpuid(ID_AA64ISAR0_EL1);
- block = cpuid_feature_extract_field(features, 4);
- if (block > 0) {
- switch (block) {
- default:
- case 2:
- elf_hwcap |= HWCAP_PMULL;
- case 1:
- elf_hwcap |= HWCAP_AES;
- case 0:
- break;
- }
- }
-
- if (cpuid_feature_extract_field(features, 8) > 0)
- elf_hwcap |= HWCAP_SHA1;
-
- if (cpuid_feature_extract_field(features, 12) > 0)
- elf_hwcap |= HWCAP_SHA2;
-
- if (cpuid_feature_extract_field(features, 16) > 0)
- elf_hwcap |= HWCAP_CRC32;
-
- block = cpuid_feature_extract_field(features, 20);
- if (block > 0) {
- switch (block) {
- default:
- case 2:
- elf_hwcap |= HWCAP_ATOMICS;
- case 1:
- /* RESERVED */
- case 0:
- break;
- }
- }
-
-#ifdef CONFIG_COMPAT
- /*
- * ID_ISAR5_EL1 carries similar information as above, but pertaining to
- * the AArch32 32-bit execution state.
- */
- features = read_cpuid(ID_ISAR5_EL1);
- block = cpuid_feature_extract_field(features, 4);
- if (block > 0) {
- switch (block) {
- default:
- case 2:
- compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
- case 1:
- compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
- case 0:
- break;
- }
- }
-
- if (cpuid_feature_extract_field(features, 8) > 0)
- compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
-
- if (cpuid_feature_extract_field(features, 12) > 0)
- compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
-
- if (cpuid_feature_extract_field(features, 16) > 0)
- compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
-#endif
-}
-
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
void *dt_virt = fixmap_remap_fdt(dt_phys);
@@ -406,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
{
- setup_processor();
+ pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
+ sprintf(init_utsname()->machine, ELF_PLATFORM);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
@@ -436,6 +321,9 @@ void __init setup_arch(char **cmdline_p)
paging_init();
relocate_initrd();
+
+ kasan_init();
+
request_standard_resources();
early_ioremap_reset();
@@ -493,124 +381,3 @@ static int __init topology_init(void)
return 0;
}
subsys_initcall(topology_init);
-
-static const char *hwcap_str[] = {
- "fp",
- "asimd",
- "evtstrm",
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- "atomics",
- NULL
-};
-
-#ifdef CONFIG_COMPAT
-static const char *compat_hwcap_str[] = {
- "swp",
- "half",
- "thumb",
- "26bit",
- "fastmult",
- "fpa",
- "vfp",
- "edsp",
- "java",
- "iwmmxt",
- "crunch",
- "thumbee",
- "neon",
- "vfpv3",
- "vfpv3d16",
- "tls",
- "vfpv4",
- "idiva",
- "idivt",
- "vfpd32",
- "lpae",
- "evtstrm"
-};
-
-static const char *compat_hwcap2_str[] = {
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- NULL
-};
-#endif /* CONFIG_COMPAT */
-
-static int c_show(struct seq_file *m, void *v)
-{
- int i, j;
-
- for_each_online_cpu(i) {
- struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
- u32 midr = cpuinfo->reg_midr;
-
- /*
- * glibc reads /proc/cpuinfo to determine the number of
- * online processors, looking for lines beginning with
- * "processor". Give glibc what it expects.
- */
- seq_printf(m, "processor\t: %d\n", i);
-
- /*
- * Dump out the common processor features in a single line.
- * Userspace should read the hwcaps with getauxval(AT_HWCAP)
- * rather than attempting to parse this, but there's a body of
- * software which does already (at least for 32-bit).
- */
- seq_puts(m, "Features\t:");
- if (personality(current->personality) == PER_LINUX32) {
-#ifdef CONFIG_COMPAT
- for (j = 0; compat_hwcap_str[j]; j++)
- if (compat_elf_hwcap & (1 << j))
- seq_printf(m, " %s", compat_hwcap_str[j]);
-
- for (j = 0; compat_hwcap2_str[j]; j++)
- if (compat_elf_hwcap2 & (1 << j))
- seq_printf(m, " %s", compat_hwcap2_str[j]);
-#endif /* CONFIG_COMPAT */
- } else {
- for (j = 0; hwcap_str[j]; j++)
- if (elf_hwcap & (1 << j))
- seq_printf(m, " %s", hwcap_str[j]);
- }
- seq_puts(m, "\n");
-
- seq_printf(m, "CPU implementer\t: 0x%02x\n",
- MIDR_IMPLEMENTOR(midr));
- seq_printf(m, "CPU architecture: 8\n");
- seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
- seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
- seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
- }
-
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
- .start = c_start,
- .next = c_next,
- .stop = c_stop,
- .show = c_show
-};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dbdaacddd9a5..2bbdc0e4fd14 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void)
*/
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- cpumask_set_cpu(cpu, mm_cpumask(mm));
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- printk("CPU%u: Booted secondary processor\n", cpu);
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
- flush_tlb_all();
+ local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
preempt_disable();
trace_hardirqs_off();
+ /*
+ * If the system has established the capabilities, make sure
+ * this CPU ticks all of those. If it doesn't, the CPU will
+ * fail to come online.
+ */
+ verify_local_cpu_capabilities();
+
if (cpu_ops[cpu]->cpu_postboot)
cpu_ops[cpu]->cpu_postboot();
@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void)
* the CPU migration code to notice that the CPU is online
* before we continue.
*/
+ pr_info("CPU%u: Booted secondary processor [%08x]\n",
+ cpu, read_cpuid_id());
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -232,12 +239,7 @@ int __cpu_disable(void)
/*
* OK - migrate IRQs away from this CPU
*/
- migrate_irqs();
-
- /*
- * Remove this CPU from the vm mask set of all processes.
- */
- clear_tasks_mm_cpumask(cpu);
+ irq_migrate_all_off_this_cpu();
return 0;
}
@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+ setup_cpu_features();
hyp_mode_check();
apply_alternatives_all();
}
void __init smp_prepare_boot_cpu(void)
{
+ cpuinfo_store_boot_cpu();
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 44ca4143b013..40f7b33a22da 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* restoration before returning.
*/
cpu_set_reserved_ttbr0();
- flush_tlb_all();
+ local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
if (mm != &init_mm)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f93aae5e4307..e9b9b5364393 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
set_fs(fs);
}
-static void dump_backtrace_entry(unsigned long where, unsigned long stack)
+static void dump_backtrace_entry(unsigned long where)
{
+ /*
+ * Note that 'where' can have a physical address, but it's not handled.
+ */
print_ip_sym(where);
- if (in_exception_text(where))
- dump_mem("", "Exception stack", stack,
- stack + sizeof(struct pt_regs), false);
}
static void dump_instr(const char *lvl, struct pt_regs *regs)
@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
pr_emerg("Call trace:\n");
while (1) {
unsigned long where = frame.pc;
+ unsigned long stack;
int ret;
+ dump_backtrace_entry(where);
ret = unwind_frame(&frame);
if (ret < 0)
break;
- dump_backtrace_entry(where, frame.sp);
+ stack = frame.sp;
+ if (in_exception_text(where))
+ dump_mem("", "Exception stack", stack,
+ stack + sizeof(struct pt_regs), false);
}
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d0..1ee2c3937d4e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
*/
#include <asm-generic/vmlinux.lds.h>
+#include <asm/kernel-pgtable.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#define PECOFF_EDATA_PADDING
#endif
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+#elif defined(CONFIG_DEBUG_RODATA)
+#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
#else
#define ALIGN_DEBUG_RO
#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index ff5292c6277c..c9d1f34daab1 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM_ARM_VGIC_V3
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
+ depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
@@ -37,6 +38,8 @@ config KVM
select KVM_ARM_VGIC_V3
---help---
Support hosting virtualized guest machines.
+ We don't support KVM with 16K page tables yet, due to the multiple
+ levels of fake page tables.
If unsure, say N.
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 91cf5350b328..f34745cb3d23 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
{
u64 pfr0;
- pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+ pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
return !!(pfr0 & 0x20);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d03d3af17e7e..87a64e8db04c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
- u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
- u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
- u32 el3 = !!((pfr >> 12) & 0xf);
+ u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
+ u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
+ u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
- *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
- (((dfr >> 12) & 0xf) << 24) |
- (((dfr >> 28) & 0xf) << 20) |
+ *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+ (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+ (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
(6 << 16) | (el3 << 14) | (el3 << 12));
return true;
}
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 1be9ef27be97..4699cd74f87e 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -18,6 +18,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
@@ -31,49 +32,58 @@
* Returns:
* x0 - bytes not copied
*/
+
+ .macro ldrb1 ptr, regB, val
+ USER(9998f, ldrb \ptr, [\regB], \val)
+ .endm
+
+ .macro strb1 ptr, regB, val
+ strb \ptr, [\regB], \val
+ .endm
+
+ .macro ldrh1 ptr, regB, val
+ USER(9998f, ldrh \ptr, [\regB], \val)
+ .endm
+
+ .macro strh1 ptr, regB, val
+ strh \ptr, [\regB], \val
+ .endm
+
+ .macro ldr1 ptr, regB, val
+ USER(9998f, ldr \ptr, [\regB], \val)
+ .endm
+
+ .macro str1 ptr, regB, val
+ str \ptr, [\regB], \val
+ .endm
+
+ .macro ldp1 ptr, regB, regC, val
+ USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ .endm
+
+ .macro stp1 ptr, regB, regC, val
+ stp \ptr, \regB, [\regC], \val
+ .endm
+
+end .req x5
ENTRY(__copy_from_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
- add x5, x1, x2 // upper user buffer boundary
- subs x2, x2, #16
- b.mi 1f
-0:
-USER(9f, ldp x3, x4, [x1], #16)
- subs x2, x2, #16
- stp x3, x4, [x0], #16
- b.pl 0b
-1: adds x2, x2, #8
- b.mi 2f
-USER(9f, ldr x3, [x1], #8 )
- sub x2, x2, #8
- str x3, [x0], #8
-2: adds x2, x2, #4
- b.mi 3f
-USER(9f, ldr w3, [x1], #4 )
- sub x2, x2, #4
- str w3, [x0], #4
-3: adds x2, x2, #2
- b.mi 4f
-USER(9f, ldrh w3, [x1], #2 )
- sub x2, x2, #2
- strh w3, [x0], #2
-4: adds x2, x2, #1
- b.mi 5f
-USER(9f, ldrb w3, [x1] )
- strb w3, [x0]
-5: mov x0, #0
+ add end, x0, x2
+#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
+ mov x0, #0 // Nothing to copy
ret
ENDPROC(__copy_from_user)
.section .fixup,"ax"
.align 2
-9: sub x2, x5, x1
- mov x3, x2
-10: strb wzr, [x0], #1 // zero remaining buffer space
- subs x3, x3, #1
- b.ne 10b
- mov x0, x2 // bytes not copied
+9998:
+ sub x0, end, dst
+9999:
+ strb wzr, [dst], #1 // zero remaining buffer space
+ cmp dst, end
+ b.lo 9999b
ret
.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 1b94661e22b3..81c8fc93c100 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -20,6 +20,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
@@ -33,44 +34,52 @@
* Returns:
* x0 - bytes not copied
*/
+ .macro ldrb1 ptr, regB, val
+ USER(9998f, ldrb \ptr, [\regB], \val)
+ .endm
+
+ .macro strb1 ptr, regB, val
+ USER(9998f, strb \ptr, [\regB], \val)
+ .endm
+
+ .macro ldrh1 ptr, regB, val
+ USER(9998f, ldrh \ptr, [\regB], \val)
+ .endm
+
+ .macro strh1 ptr, regB, val
+ USER(9998f, strh \ptr, [\regB], \val)
+ .endm
+
+ .macro ldr1 ptr, regB, val
+ USER(9998f, ldr \ptr, [\regB], \val)
+ .endm
+
+ .macro str1 ptr, regB, val
+ USER(9998f, str \ptr, [\regB], \val)
+ .endm
+
+ .macro ldp1 ptr, regB, regC, val
+ USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ .endm
+
+ .macro stp1 ptr, regB, regC, val
+ USER(9998f, stp \ptr, \regB, [\regC], \val)
+ .endm
+
+end .req x5
ENTRY(__copy_in_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
- add x5, x0, x2 // upper user buffer boundary
- subs x2, x2, #16
- b.mi 1f
-0:
-USER(9f, ldp x3, x4, [x1], #16)
- subs x2, x2, #16
-USER(9f, stp x3, x4, [x0], #16)
- b.pl 0b
-1: adds x2, x2, #8
- b.mi 2f
-USER(9f, ldr x3, [x1], #8 )
- sub x2, x2, #8
-USER(9f, str x3, [x0], #8 )
-2: adds x2, x2, #4
- b.mi 3f
-USER(9f, ldr w3, [x1], #4 )
- sub x2, x2, #4
-USER(9f, str w3, [x0], #4 )
-3: adds x2, x2, #2
- b.mi 4f
-USER(9f, ldrh w3, [x1], #2 )
- sub x2, x2, #2
-USER(9f, strh w3, [x0], #2 )
-4: adds x2, x2, #1
- b.mi 5f
-USER(9f, ldrb w3, [x1] )
-USER(9f, strb w3, [x0] )
-5: mov x0, #0
+ add end, x0, x2
+#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
+ mov x0, #0
ret
ENDPROC(__copy_in_user)
.section .fixup,"ax"
.align 2
-9: sub x0, x5, x0 // bytes not copied
+9998: sub x0, end, dst // bytes not copied
ret
.previous
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
new file mode 100644
index 000000000000..410fbdb8163f
--- /dev/null
+++ b/arch/arm64/lib/copy_template.S
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * Copy a buffer from src to dest (alignment handled by the hardware)
+ *
+ * Parameters:
+ * x0 - dest
+ * x1 - src
+ * x2 - n
+ * Returns:
+ * x0 - dest
+ */
+dstin .req x0
+src .req x1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+dst .req x6
+
+A_l .req x7
+A_h .req x8
+B_l .req x9
+B_h .req x10
+C_l .req x11
+C_h .req x12
+D_l .req x13
+D_h .req x14
+
+ mov dst, dstin
+ cmp count, #16
+ /*When memory length is less than 16, the accessed are not aligned.*/
+ b.lo .Ltiny15
+
+ neg tmp2, src
+ ands tmp2, tmp2, #15/* Bytes to reach alignment. */
+ b.eq .LSrcAligned
+ sub count, count, tmp2
+ /*
+ * Copy the leading memory data from src to dst in an increasing
+ * address order.By this way,the risk of overwritting the source
+ * memory data is eliminated when the distance between src and
+ * dst is less than 16. The memory accesses here are alignment.
+ */
+ tbz tmp2, #0, 1f
+ ldrb1 tmp1w, src, #1
+ strb1 tmp1w, dst, #1
+1:
+ tbz tmp2, #1, 2f
+ ldrh1 tmp1w, src, #2
+ strh1 tmp1w, dst, #2
+2:
+ tbz tmp2, #2, 3f
+ ldr1 tmp1w, src, #4
+ str1 tmp1w, dst, #4
+3:
+ tbz tmp2, #3, .LSrcAligned
+ ldr1 tmp1, src, #8
+ str1 tmp1, dst, #8
+
+.LSrcAligned:
+ cmp count, #64
+ b.ge .Lcpy_over64
+ /*
+ * Deal with small copies quickly by dropping straight into the
+ * exit block.
+ */
+.Ltail63:
+ /*
+ * Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate.
+ */
+ ands tmp1, count, #0x30
+ b.eq .Ltiny15
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp1 A_l, A_h, src, #16
+ stp1 A_l, A_h, dst, #16
+1:
+ ldp1 A_l, A_h, src, #16
+ stp1 A_l, A_h, dst, #16
+2:
+ ldp1 A_l, A_h, src, #16
+ stp1 A_l, A_h, dst, #16
+.Ltiny15:
+ /*
+ * Prefer to break one ldp/stp into several load/store to access
+ * memory in an increasing address order,rather than to load/store 16
+ * bytes from (src-16) to (dst-16) and to backward the src to aligned
+ * address,which way is used in original cortex memcpy. If keeping
+ * the original memcpy process here, memmove need to satisfy the
+ * precondition that src address is at least 16 bytes bigger than dst
+ * address,otherwise some source data will be overwritten when memove
+ * call memcpy directly. To make memmove simpler and decouple the
+ * memcpy's dependency on memmove, withdrew the original process.
+ */
+ tbz count, #3, 1f
+ ldr1 tmp1, src, #8
+ str1 tmp1, dst, #8
+1:
+ tbz count, #2, 2f
+ ldr1 tmp1w, src, #4
+ str1 tmp1w, dst, #4
+2:
+ tbz count, #1, 3f
+ ldrh1 tmp1w, src, #2
+ strh1 tmp1w, dst, #2
+3:
+ tbz count, #0, .Lexitfunc
+ ldrb1 tmp1w, src, #1
+ strb1 tmp1w, dst, #1
+
+ b .Lexitfunc
+
+.Lcpy_over64:
+ subs count, count, #128
+ b.ge .Lcpy_body_large
+ /*
+ * Less than 128 bytes to copy, so handle 64 here and then jump
+ * to the tail.
+ */
+ ldp1 A_l, A_h, src, #16
+ stp1 A_l, A_h, dst, #16
+ ldp1 B_l, B_h, src, #16
+ ldp1 C_l, C_h, src, #16
+ stp1 B_l, B_h, dst, #16
+ stp1 C_l, C_h, dst, #16
+ ldp1 D_l, D_h, src, #16
+ stp1 D_l, D_h, dst, #16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ b .Lexitfunc
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lcpy_body_large:
+ /* pre-get 64 bytes data. */
+ ldp1 A_l, A_h, src, #16
+ ldp1 B_l, B_h, src, #16
+ ldp1 C_l, C_h, src, #16
+ ldp1 D_l, D_h, src, #16
+1:
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp1 A_l, A_h, dst, #16
+ ldp1 A_l, A_h, src, #16
+ stp1 B_l, B_h, dst, #16
+ ldp1 B_l, B_h, src, #16
+ stp1 C_l, C_h, dst, #16
+ ldp1 C_l, C_h, src, #16
+ stp1 D_l, D_h, dst, #16
+ ldp1 D_l, D_h, src, #16
+ subs count, count, #64
+ b.ge 1b
+ stp1 A_l, A_h, dst, #16
+ stp1 B_l, B_h, dst, #16
+ stp1 C_l, C_h, dst, #16
+ stp1 D_l, D_h, dst, #16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+.Lexitfunc:
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index a257b47e2dc4..7512bbbc07ac 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -18,6 +18,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
@@ -31,44 +32,52 @@
* Returns:
* x0 - bytes not copied
*/
+ .macro ldrb1 ptr, regB, val
+ ldrb \ptr, [\regB], \val
+ .endm
+
+ .macro strb1 ptr, regB, val
+ USER(9998f, strb \ptr, [\regB], \val)
+ .endm
+
+ .macro ldrh1 ptr, regB, val
+ ldrh \ptr, [\regB], \val
+ .endm
+
+ .macro strh1 ptr, regB, val
+ USER(9998f, strh \ptr, [\regB], \val)
+ .endm
+
+ .macro ldr1 ptr, regB, val
+ ldr \ptr, [\regB], \val
+ .endm
+
+ .macro str1 ptr, regB, val
+ USER(9998f, str \ptr, [\regB], \val)
+ .endm
+
+ .macro ldp1 ptr, regB, regC, val
+ ldp \ptr, \regB, [\regC], \val
+ .endm
+
+ .macro stp1 ptr, regB, regC, val
+ USER(9998f, stp \ptr, \regB, [\regC], \val)
+ .endm
+
+end .req x5
ENTRY(__copy_to_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
- add x5, x0, x2 // upper user buffer boundary
- subs x2, x2, #16
- b.mi 1f
-0:
- ldp x3, x4, [x1], #16
- subs x2, x2, #16
-USER(9f, stp x3, x4, [x0], #16)
- b.pl 0b
-1: adds x2, x2, #8
- b.mi 2f
- ldr x3, [x1], #8
- sub x2, x2, #8
-USER(9f, str x3, [x0], #8 )
-2: adds x2, x2, #4
- b.mi 3f
- ldr w3, [x1], #4
- sub x2, x2, #4
-USER(9f, str w3, [x0], #4 )
-3: adds x2, x2, #2
- b.mi 4f
- ldrh w3, [x1], #2
- sub x2, x2, #2
-USER(9f, strh w3, [x0], #2 )
-4: adds x2, x2, #1
- b.mi 5f
- ldrb w3, [x1]
-USER(9f, strb w3, [x0] )
-5: mov x0, #0
+ add end, x0, x2
+#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
+ mov x0, #0
ret
ENDPROC(__copy_to_user)
.section .fixup,"ax"
.align 2
-9: sub x0, x5, x0 // bytes not copied
+9998: sub x0, end, dst // bytes not copied
ret
.previous
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 8636b7549163..4444c1d25f4b 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -41,4 +41,4 @@ ENTRY(memchr)
ret
2: mov x0, #0
ret
-ENDPROC(memchr)
+ENDPIPROC(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 6ea0776ba6de..ffbdec00327d 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 )
.Lret0:
mov result, #0
ret
-ENDPROC(memcmp)
+ENDPIPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 8a9a96d3ddae..67613937711f 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -36,166 +36,42 @@
* Returns:
* x0 - dest
*/
-dstin .req x0
-src .req x1
-count .req x2
-tmp1 .req x3
-tmp1w .req w3
-tmp2 .req x4
-tmp2w .req w4
-tmp3 .req x5
-tmp3w .req w5
-dst .req x6
+ .macro ldrb1 ptr, regB, val
+ ldrb \ptr, [\regB], \val
+ .endm
-A_l .req x7
-A_h .req x8
-B_l .req x9
-B_h .req x10
-C_l .req x11
-C_h .req x12
-D_l .req x13
-D_h .req x14
+ .macro strb1 ptr, regB, val
+ strb \ptr, [\regB], \val
+ .endm
-ENTRY(memcpy)
- mov dst, dstin
- cmp count, #16
- /*When memory length is less than 16, the accessed are not aligned.*/
- b.lo .Ltiny15
+ .macro ldrh1 ptr, regB, val
+ ldrh \ptr, [\regB], \val
+ .endm
- neg tmp2, src
- ands tmp2, tmp2, #15/* Bytes to reach alignment. */
- b.eq .LSrcAligned
- sub count, count, tmp2
- /*
- * Copy the leading memory data from src to dst in an increasing
- * address order.By this way,the risk of overwritting the source
- * memory data is eliminated when the distance between src and
- * dst is less than 16. The memory accesses here are alignment.
- */
- tbz tmp2, #0, 1f
- ldrb tmp1w, [src], #1
- strb tmp1w, [dst], #1
-1:
- tbz tmp2, #1, 2f
- ldrh tmp1w, [src], #2
- strh tmp1w, [dst], #2
-2:
- tbz tmp2, #2, 3f
- ldr tmp1w, [src], #4
- str tmp1w, [dst], #4
-3:
- tbz tmp2, #3, .LSrcAligned
- ldr tmp1, [src],#8
- str tmp1, [dst],#8
+ .macro strh1 ptr, regB, val
+ strh \ptr, [\regB], \val
+ .endm
-.LSrcAligned:
- cmp count, #64
- b.ge .Lcpy_over64
- /*
- * Deal with small copies quickly by dropping straight into the
- * exit block.
- */
-.Ltail63:
- /*
- * Copy up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate.
- */
- ands tmp1, count, #0x30
- b.eq .Ltiny15
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp A_l, A_h, [src], #16
- stp A_l, A_h, [dst], #16
-1:
- ldp A_l, A_h, [src], #16
- stp A_l, A_h, [dst], #16
-2:
- ldp A_l, A_h, [src], #16
- stp A_l, A_h, [dst], #16
-.Ltiny15:
- /*
- * Prefer to break one ldp/stp into several load/store to access
- * memory in an increasing address order,rather than to load/store 16
- * bytes from (src-16) to (dst-16) and to backward the src to aligned
- * address,which way is used in original cortex memcpy. If keeping
- * the original memcpy process here, memmove need to satisfy the
- * precondition that src address is at least 16 bytes bigger than dst
- * address,otherwise some source data will be overwritten when memove
- * call memcpy directly. To make memmove simpler and decouple the
- * memcpy's dependency on memmove, withdrew the original process.
- */
- tbz count, #3, 1f
- ldr tmp1, [src], #8
- str tmp1, [dst], #8
-1:
- tbz count, #2, 2f
- ldr tmp1w, [src], #4
- str tmp1w, [dst], #4
-2:
- tbz count, #1, 3f
- ldrh tmp1w, [src], #2
- strh tmp1w, [dst], #2
-3:
- tbz count, #0, .Lexitfunc
- ldrb tmp1w, [src]
- strb tmp1w, [dst]
+ .macro ldr1 ptr, regB, val
+ ldr \ptr, [\regB], \val
+ .endm
-.Lexitfunc:
- ret
+ .macro str1 ptr, regB, val
+ str \ptr, [\regB], \val
+ .endm
-.Lcpy_over64:
- subs count, count, #128
- b.ge .Lcpy_body_large
- /*
- * Less than 128 bytes to copy, so handle 64 here and then jump
- * to the tail.
- */
- ldp A_l, A_h, [src],#16
- stp A_l, A_h, [dst],#16
- ldp B_l, B_h, [src],#16
- ldp C_l, C_h, [src],#16
- stp B_l, B_h, [dst],#16
- stp C_l, C_h, [dst],#16
- ldp D_l, D_h, [src],#16
- stp D_l, D_h, [dst],#16
+ .macro ldp1 ptr, regB, regC, val
+ ldp \ptr, \regB, [\regC], \val
+ .endm
- tst count, #0x3f
- b.ne .Ltail63
- ret
+ .macro stp1 ptr, regB, regC, val
+ stp \ptr, \regB, [\regC], \val
+ .endm
- /*
- * Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line.
- */
- .p2align L1_CACHE_SHIFT
-.Lcpy_body_large:
- /* pre-get 64 bytes data. */
- ldp A_l, A_h, [src],#16
- ldp B_l, B_h, [src],#16
- ldp C_l, C_h, [src],#16
- ldp D_l, D_h, [src],#16
-1:
- /*
- * interlace the load of next 64 bytes data block with store of the last
- * loaded 64 bytes data.
- */
- stp A_l, A_h, [dst],#16
- ldp A_l, A_h, [src],#16
- stp B_l, B_h, [dst],#16
- ldp B_l, B_h, [src],#16
- stp C_l, C_h, [dst],#16
- ldp C_l, C_h, [src],#16
- stp D_l, D_h, [dst],#16
- ldp D_l, D_h, [src],#16
- subs count, count, #64
- b.ge 1b
- stp A_l, A_h, [dst],#16
- stp B_l, B_h, [dst],#16
- stp C_l, C_h, [dst],#16
- stp D_l, D_h, [dst],#16
-
- tst count, #0x3f
- b.ne .Ltail63
+ .weak memcpy
+ENTRY(__memcpy)
+ENTRY(memcpy)
+#include "copy_template.S"
ret
-ENDPROC(memcpy)
+ENDPIPROC(memcpy)
+ENDPROC(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index 57b19ea2dad4..a5a4459013b1 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -57,12 +57,14 @@ C_h .req x12
D_l .req x13
D_h .req x14
+ .weak memmove
+ENTRY(__memmove)
ENTRY(memmove)
cmp dstin, src
- b.lo memcpy
+ b.lo __memcpy
add tmp1, src, count
cmp dstin, tmp1
- b.hs memcpy /* No overlap. */
+ b.hs __memcpy /* No overlap. */
add dst, dstin, count
add src, src, count
@@ -194,4 +196,5 @@ ENTRY(memmove)
tst count, #0x3f
b.ne .Ltail63
ret
-ENDPROC(memmove)
+ENDPIPROC(memmove)
+ENDPROC(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 7c72dfd36b63..f2670a9f218c 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -54,6 +54,8 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
+ .weak memset
+ENTRY(__memset)
ENTRY(memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
@@ -213,4 +215,5 @@ ENTRY(memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
-ENDPROC(memset)
+ENDPIPROC(memset)
+ENDPROC(__memset)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 42f828b06c59..471fe61760ef 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul )
lsr data1, data1, #56
sub result, data1, data2, lsr #56
ret
-ENDPROC(strcmp)
+ENDPIPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 987b68b9ce44..55ccc8e24c08 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
csinv data1, data1, xzr, le
csel data2, data2, data2a, le
b .Lrealigned
-ENDPROC(strlen)
+ENDPIPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 0224cf5a5533..e267044761c6 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul )
.Lret0:
mov result, #0
ret
-ENDPROC(strncmp)
+ENDPIPROC(strncmp)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 773d37a14039..57f57fde5722 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+
+obj-$(CONFIG_KASAN) += kasan_init.o
+KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5df4a0f..cfa44a6adc0a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
b.lo 1b
dsb sy
ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
/*
* __inval_cache_range(start, end)
@@ -131,7 +131,7 @@ __dma_inv_range:
b.lo 2b
dsb sy
ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)
/*
@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
b.lo 1b
dsb sy
ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
/*
* __dma_map_area(start, size, dir)
@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_range
b __dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_range
ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d70ff14dbdbd..f636a2639f03 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,135 +17,185 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/init.h>
+#include <linux/bitops.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
+#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include <asm/cachetype.h>
-#define asid_bits(reg) \
- (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+static u32 asid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS)
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- mm->context.id = 0;
- raw_spin_lock_init(&mm->context.id_lock);
-}
+#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define NUM_USER_ASIDS ASID_FIRST_VERSION
-static void flush_context(void)
+static void flush_context(unsigned int cpu)
{
- /* set the reserved TTBR0 before flushing the TLB */
- cpu_set_reserved_ttbr0();
- flush_tlb_all();
- if (icache_is_aivivt())
- __flush_icache_all();
-}
+ int i;
+ u64 asid;
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
- unsigned long flags;
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
/*
- * Locking needed for multi-threaded applications where the same
- * mm->context.id could be set from different CPUs during the
- * broadcast. This function is also called via IPI so the
- * mm->context.id_lock has to be IRQ-safe.
+ * Ensure the generation bump is observed before we xchg the
+ * active_asids.
*/
- raw_spin_lock_irqsave(&mm->context.id_lock, flags);
- if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+ smp_wmb();
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
- * Old version of ASID found. Set the new one and reset
- * mm_cpumask(mm).
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
*/
- mm->context.id = asid;
- cpumask_clear(mm_cpumask(mm));
+ if (asid == 0)
+ asid = per_cpu(reserved_asids, i);
+ __set_bit(asid & ~ASID_MASK, asid_map);
+ per_cpu(reserved_asids, i) = asid;
}
- raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
- /*
- * Set the mm_cpumask(mm) bit for the current CPU.
- */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ cpumask_setall(&tlb_flush_pending);
+
+ if (icache_is_aivivt())
+ __flush_icache_all();
}
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+static int is_reserved_asid(u64 asid)
+{
+ int cpu;
+ for_each_possible_cpu(cpu)
+ if (per_cpu(reserved_asids, cpu) == asid)
+ return 1;
+ return 0;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
{
- unsigned int asid;
- unsigned int cpu = smp_processor_id();
- struct mm_struct *mm = current->active_mm;
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(&mm->context.id);
+ u64 generation = atomic64_read(&asid_generation);
+
+ if (asid != 0) {
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (is_reserved_asid(asid))
+ return generation | (asid & ~ASID_MASK);
+
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ asid &= ~ASID_MASK;
+ if (!__test_and_set_bit(asid, asid_map))
+ goto bump_gen;
+ }
/*
- * current->active_mm could be init_mm for the idle thread immediately
- * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
- * the reserved value, so no need to reset any context.
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes.
+ * We always count from ASID #1, as we use ASID #0 when setting a
+ * reserved TTBR0 for the init_mm.
*/
- if (mm == &init_mm)
- return;
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ if (asid != NUM_USER_ASIDS)
+ goto set_asid;
- smp_rmb();
- asid = cpu_last_asid + cpu;
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
- flush_context();
- set_mm_context(mm, asid);
+ /* We have at least 1 ASID per CPU, so this will always succeed */
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
- /* set the new ASID */
- cpu_switch_mm(mm->pgd, mm);
+set_asid:
+ __set_bit(asid, asid_map);
+ cur_idx = asid;
+
+bump_gen:
+ asid |= generation;
+ return asid;
}
-void __new_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
- unsigned int asid;
- unsigned int bits = asid_bits();
+ unsigned long flags;
+ u64 asid;
+
+ asid = atomic64_read(&mm->context.id);
- raw_spin_lock(&cpu_asid_lock);
/*
- * Check the ASID again, in case the change was broadcast from another
- * CPU before we acquired the lock.
+ * The memory ordering here is subtle. We rely on the control
+ * dependency between the generation read and the update of
+ * active_asids to ensure that we are synchronised with a
+ * parallel rollover (i.e. this pairs with the smp_wmb() in
+ * flush_context).
*/
- if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- raw_spin_unlock(&cpu_asid_lock);
- return;
+ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+ && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+ goto switch_mm_fastpath;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(&mm->context.id);
+ if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+ asid = new_context(mm, cpu);
+ atomic64_set(&mm->context.id, asid);
}
- /*
- * At this point, it is guaranteed that the current mm (with an old
- * ASID) isn't active on any other CPU since the ASIDs are changed
- * simultaneously via IPI.
- */
- asid = ++cpu_last_asid;
- /*
- * If we've used up all our ASIDs, we need to start a new version and
- * flush the TLB.
- */
- if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
- /* increment the ASID version */
- cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
- if (cpu_last_asid == 0)
- cpu_last_asid = ASID_FIRST_VERSION;
- asid = cpu_last_asid + smp_processor_id();
- flush_context();
- smp_wmb();
- smp_call_function(reset_context, NULL, 1);
- cpu_last_asid += NR_CPUS - 1;
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ local_flush_tlb_all();
+
+ atomic64_set(&per_cpu(active_asids, cpu), asid);
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+ cpu_switch_mm(mm->pgd, mm);
+}
+
+static int asids_init(void)
+{
+ int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+ switch (fld) {
+ default:
+ pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+ /* Fallthrough */
+ case 0:
+ asid_bits = 8;
+ break;
+ case 2:
+ asid_bits = 16;
}
- set_mm_context(mm, asid);
- raw_spin_unlock(&cpu_asid_lock);
+ /* If we end up with more CPUs than ASIDs, expect things to crash */
+ WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+ atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+ asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+ GFP_KERNEL);
+ if (!asid_map)
+ panic("Failed to allocate bitmap for %lu ASIDs\n",
+ NUM_USER_ASIDS);
+
+ pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+ return 0;
}
+early_initcall(asids_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f3d6221cd5bd..5a22a119a74c 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
@@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = {
.set = "NG",
.clear = " ",
}, {
+ .mask = PTE_CONT,
+ .val = PTE_CONT,
+ .set = "CON",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT,
+ .val = PTE_TABLE_BIT,
+ .set = " ",
+ .clear = "BLK",
+ }, {
.mask = PTE_UXN,
.val = PTE_UXN,
.set = "UXN",
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%16lx-0x%16lx ",
+ seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9fadf6d7039b..19211c4a8911 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -556,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
}
#ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void)
+void cpu_enable_pan(void *__unused)
{
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f5c0680d17d9..17bf39ac83ba 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
memset(zone_size, 0, sizeof(zone_size));
/* 4GB maximum for 32-bit only capable devices */
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
- max_dma = PFN_DOWN(arm64_dma_phys_limit);
- zone_size[ZONE_DMA] = max_dma - min;
- }
+#ifdef CONFIG_ZONE_DMA
+ max_dma = PFN_DOWN(arm64_dma_phys_limit);
+ zone_size[ZONE_DMA] = max_dma - min;
+#endif
zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
- if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+ if (start < max_dma) {
unsigned long dma_end = min(end, max_dma);
zhole_size[ZONE_DMA] -= dma_end - start;
}
-
+#endif
if (end > max_dma) {
unsigned long normal_end = min(end, max);
unsigned long normal_start = max(start, max_dma);
@@ -298,6 +299,9 @@ void __init mem_init(void)
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+ " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
+#endif
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
@@ -310,6 +314,9 @@ void __init mem_init(void)
" .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+ MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
MLG((unsigned long)vmemmap,
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
new file mode 100644
index 000000000000..cf038c7d9fa9
--- /dev/null
+++ b/arch/arm64/mm/kasan_init.c
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte;
+ unsigned long next;
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ next = addr + PAGE_SIZE;
+ set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+ PAGE_KERNEL));
+ } while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+ unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ kasan_early_pte_populate(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ kasan_early_pmd_populate(pud, addr, next);
+ } while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_early_pud_populate(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+ kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Remove references to kasan page tables from
+ * swapper_pg_dir. pgd_clear() can't be used
+ * here because it's nop on 2,3-level pagetable setups
+ */
+ for (; start < end; start += PGDIR_SIZE)
+ set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+ asm(
+ " msr ttbr1_el1, %0\n"
+ " isb"
+ :
+ : "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * We are going to perform proper setup of shadow memory.
+ * At first we should unmap early shadow (clear_pgds() call bellow).
+ * However, instrumented code couldn't execute without shadow memory.
+ * tmp_pg_dir used to keep early shadow mapped until full shadow
+ * setup will be finished.
+ */
+ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+ cpu_set_ttbr1(__pa(tmp_pg_dir));
+ flush_tlb_all();
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+ kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+ for_each_memblock(memory, reg) {
+ void *start = (void *)__phys_to_virt(reg->base);
+ void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+ if (start >= end)
+ break;
+
+ /*
+ * end + 1 here is intentional. We check several shadow bytes in
+ * advance to slightly speed up fastpath. In some rare cases
+ * we could cross boundary of mapped shadow, so we just map
+ * some more here.
+ */
+ vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+ (unsigned long)kasan_mem_to_shadow(end) + 1,
+ pfn_to_nid(virt_to_pfn(start)));
+ }
+
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ cpu_set_ttbr1(__pa(swapper_pg_dir));
+ flush_tlb_all();
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized\n");
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9211b8527f25..c2fa6b56613c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -32,6 +32,7 @@
#include <asm/cputype.h>
#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
do {
/*
* Need to have the least restrictive permissions available
- * permissions will be fixed up later
+ * permissions will be fixed up later. Default the new page
+ * range as contiguous ptes.
*/
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
pfn++;
} while (pte++, i++, i < PTRS_PER_PTE);
}
+/*
+ * Given a PTE with the CONT bit set, determine where the CONT range
+ * starts, and clear the entire range of PTE CONT bits.
+ */
+static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
+{
+ int i;
+
+ pte -= CONT_RANGE_OFFSET(addr);
+ for (i = 0; i < CONT_PTES; i++) {
+ set_pte(pte, pte_mknoncont(*pte));
+ pte++;
+ }
+ flush_tlb_all();
+}
+
+/*
+ * Given a range of PTEs set the pfn and provided page protection flags
+ */
+static void __populate_init_pte(pte_t *pte, unsigned long addr,
+ unsigned long end, phys_addr_t phys,
+ pgprot_t prot)
+{
+ unsigned long pfn = __phys_to_pfn(phys);
+
+ do {
+ /* clear all the bits except the pfn, then apply the prot */
+ set_pte(pte, pfn_pte(pfn, prot));
+ pte++;
+ pfn++;
+ addr += PAGE_SIZE;
+ } while (addr != end);
+}
+
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, unsigned long pfn,
+ unsigned long end, phys_addr_t phys,
pgprot_t prot,
void *(*alloc)(unsigned long size))
{
pte_t *pte;
+ unsigned long next;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ next = min(end, (addr + CONT_SIZE) & CONT_MASK);
+ if (((addr | next | phys) & ~CONT_MASK) == 0) {
+ /* a block of CONT_PTES */
+ __populate_init_pte(pte, addr, next, phys,
+ prot | __pgprot(PTE_CONT));
+ } else {
+ /*
+ * If the range being split is already inside of a
+ * contiguous range but this PTE isn't going to be
+ * contiguous, then we want to unmark the adjacent
+ * ranges, then update the portion of the range we
+ * are interrested in.
+ */
+ clear_cont_pte_range(pte, addr);
+ __populate_init_pte(pte, addr, next, phys, prot);
+ }
+
+ pte += (next - addr) >> PAGE_SHIFT;
+ phys += next - addr;
+ addr = next;
+ } while (addr != end);
}
void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
}
}
} else {
- alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, alloc);
+ alloc_init_pte(pmd, addr, next, phys, prot, alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -353,14 +407,11 @@ static void __init map_mem(void)
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir, gives
- * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
- * PHYS_OFFSET (which must be aligned to 2MB as per
- * Documentation/arm64/booting.txt).
+ * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+ * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+ * per Documentation/arm64/booting.txt).
*/
- if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
- limit = PHYS_OFFSET + PMD_SIZE;
- else
- limit = PHYS_OFFSET + PUD_SIZE;
+ limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
memblock_set_current_limit(limit);
/* map all the memory banks */
@@ -371,21 +422,24 @@ static void __init map_mem(void)
if (start >= end)
break;
-#ifndef CONFIG_ARM64_64K_PAGES
- /*
- * For the first memory bank align the start address and
- * current memblock limit to prevent create_mapping() from
- * allocating pte page tables from unmapped memory.
- * When 64K pages are enabled, the pte page table for the
- * first PGDIR_SIZE is already present in swapper_pg_dir.
- */
- if (start < limit)
- start = ALIGN(start, PMD_SIZE);
- if (end < limit) {
- limit = end & PMD_MASK;
- memblock_set_current_limit(limit);
+ if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+ /*
+ * For the first memory bank align the start address and
+ * current memblock limit to prevent create_mapping() from
+ * allocating pte page tables from unmapped memory. With
+ * the section maps, if the first block doesn't end on section
+ * size boundary, create_mapping() will try to allocate a pte
+ * page, which may be returned from an unmapped area.
+ * When section maps are not used, the pte page table for the
+ * current limit is already present in swapper_pg_dir.
+ */
+ if (start < limit)
+ start = ALIGN(start, SECTION_SIZE);
+ if (end < limit) {
+ limit = end & SECTION_MASK;
+ memblock_set_current_limit(limit);
+ }
}
-#endif
__map_memblock(start, end);
}
@@ -456,7 +510,7 @@ void __init paging_init(void)
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
- flush_tlb_all();
+ local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
}
@@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
return vmemmap_populate_basepages(start, end, node);
}
-#else /* !CONFIG_ARM64_64K_PAGES */
+#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
unsigned long addr = start;
@@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
- int granularity, size, offset;
+ int size, offset;
void *dt_virt;
/*
@@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
*/
BUILD_BUG_ON(dt_virt_base % SZ_2M);
- if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
- BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
- __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
-
- granularity = PAGE_SIZE;
- } else {
- BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
- __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
-
- granularity = PMD_SIZE;
- }
+ BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+ __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
- offset = dt_phys % granularity;
+ offset = dt_phys % SWAPPER_BLOCK_SIZE;
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
- create_mapping(round_down(dt_phys, granularity), dt_virt_base,
- granularity, prot);
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ SWAPPER_BLOCK_SIZE, prot);
if (fdt_check_header(dt_virt) != 0)
return NULL;
@@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
if (size > MAX_FDT_SIZE)
return NULL;
- if (offset + size > granularity)
- create_mapping(round_down(dt_phys, granularity), dt_virt_base,
- round_up(offset + size, granularity), prot);
+ if (offset + size > SWAPPER_BLOCK_SIZE)
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
memblock_reserve(dt_phys, size);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index e47ed1c5dce1..3571c7309c5e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
int ret;
struct page_change_data data;
- if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+ if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
end = start + size;
WARN_ON_ONCE(1);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 71ca104f97bd..cb3ba1b812e7 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -28,8 +28,6 @@
#include "mm.h"
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-
static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7783ff05f74c..cacecc4ad3e5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -30,7 +30,9 @@
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
#endif
@@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
- mmid w1, x1 // get mm->context.id
+ mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
@@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm)
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
- tlbi vmalle1is // invalidate I + D TLBs
- dsb ish
+ tlbi vmalle1 // Invalidate local TLB
+ dsb nsh
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD