summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig66
-rw-r--r--arch/arm/boot/compressed/atags_to_fdt.c2
-rw-r--r--arch/arm/boot/dts/bcm21664.dtsi164
-rw-r--r--arch/arm/boot/dts/marco.dtsi2
-rw-r--r--arch/arm/boot/dts/prima2.dtsi2
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi6
-rw-r--r--arch/arm/boot/dts/sun5i-a10s.dtsi6
-rw-r--r--arch/arm/boot/dts/sun5i-a13.dtsi6
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi10
-rw-r--r--arch/arm/common/mcpm_entry.c6
-rw-r--r--arch/arm/common/mcpm_platsmp.c2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/assembler.h2
-rw-r--r--arch/arm/include/asm/cacheflush.h4
-rw-r--r--arch/arm/include/asm/cp15.h25
-rw-r--r--arch/arm/include/asm/cputype.h1
-rw-r--r--arch/arm/include/asm/dma-mapping.h25
-rw-r--r--arch/arm/include/asm/fixmap.h21
-rw-r--r--arch/arm/include/asm/ftrace.h10
-rw-r--r--arch/arm/include/asm/glue-df.h8
-rw-r--r--arch/arm/include/asm/hardware/cache-l2x0.h104
-rw-r--r--arch/arm/include/asm/highmem.h1
-rw-r--r--arch/arm/include/asm/io.h6
-rw-r--r--arch/arm/include/asm/mach/arch.h7
-rw-r--r--arch/arm/include/asm/mcpm.h8
-rw-r--r--arch/arm/include/asm/memblock.h3
-rw-r--r--arch/arm/include/asm/memory.h2
-rw-r--r--arch/arm/include/asm/outercache.h66
-rw-r--r--arch/arm/include/asm/setup.h28
-rw-r--r--arch/arm/kernel/Makefile1
-rw-r--r--arch/arm/kernel/atags_parse.c5
-rw-r--r--arch/arm/kernel/devtree.c4
-rw-r--r--arch/arm/kernel/entry-armv.S19
-rw-r--r--arch/arm/kernel/entry-common.S8
-rw-r--r--arch/arm/kernel/entry-header.S4
-rw-r--r--arch/arm/kernel/ftrace.c13
-rw-r--r--arch/arm/kernel/head-common.S3
-rw-r--r--arch/arm/kernel/head.S2
-rw-r--r--arch/arm/kernel/hibernate.c107
-rw-r--r--arch/arm/kernel/irq.c12
-rw-r--r--arch/arm/kernel/isa.c6
-rw-r--r--arch/arm/kernel/iwmmxt.S16
-rw-r--r--arch/arm/kernel/perf_event_cpu.c1
-rw-r--r--arch/arm/kernel/perf_event_v7.c12
-rw-r--r--arch/arm/kernel/setup.c37
-rw-r--r--arch/arm/kernel/sleep.S5
-rw-r--r--arch/arm/kernel/stacktrace.c60
-rw-r--r--arch/arm/kernel/topology.c8
-rw-r--r--arch/arm/kernel/uprobes.c20
-rw-r--r--arch/arm/mach-at91/sysirq_mask.c22
-rw-r--r--arch/arm/mach-bcm/bcm_5301x.c9
-rw-r--r--arch/arm/mach-berlin/berlin.c17
-rw-r--r--arch/arm/mach-clps711x/board-clep7312.c7
-rw-r--r--arch/arm/mach-clps711x/board-edb7211.c10
-rw-r--r--arch/arm/mach-clps711x/board-p720t.c2
-rw-r--r--arch/arm/mach-cns3xxx/core.c10
-rw-r--r--arch/arm/mach-ep93xx/crunch-bits.S14
-rw-r--r--arch/arm/mach-exynos/common.h1
-rw-r--r--arch/arm/mach-exynos/exynos.c24
-rw-r--r--arch/arm/mach-exynos/sleep.S30
-rw-r--r--arch/arm/mach-footbridge/cats-hw.c2
-rw-r--r--arch/arm/mach-footbridge/netwinder-hw.c2
-rw-r--r--arch/arm/mach-highbank/highbank.c21
-rw-r--r--arch/arm/mach-imx/mach-vf610.c9
-rw-r--r--arch/arm/mach-imx/suspend-imx6.S24
-rw-r--r--arch/arm/mach-imx/system.c8
-rw-r--r--arch/arm/mach-msm/board-halibut.c6
-rw-r--r--arch/arm/mach-msm/board-mahimahi.c13
-rw-r--r--arch/arm/mach-msm/board-msm7x30.c3
-rw-r--r--arch/arm/mach-msm/board-sapphire.c13
-rw-r--r--arch/arm/mach-msm/board-trout.c8
-rw-r--r--arch/arm/mach-mvebu/board-v7.c7
-rw-r--r--arch/arm/mach-nomadik/cpu-8815.c13
-rw-r--r--arch/arm/mach-omap2/Kconfig1
-rw-r--r--arch/arm/mach-omap2/common.h1
-rw-r--r--arch/arm/mach-omap2/io.c2
-rw-r--r--arch/arm/mach-omap2/omap-mpuss-lowpower.c16
-rw-r--r--arch/arm/mach-omap2/omap4-common.c86
-rw-r--r--arch/arm/mach-orion5x/common.c3
-rw-r--r--arch/arm/mach-orion5x/common.h3
-rw-r--r--arch/arm/mach-prima2/Makefile1
-rw-r--r--arch/arm/mach-prima2/common.c6
-rw-r--r--arch/arm/mach-prima2/l2x0.c49
-rw-r--r--arch/arm/mach-prima2/pm.c1
-rw-r--r--arch/arm/mach-pxa/cm-x300.c3
-rw-r--r--arch/arm/mach-pxa/corgi.c10
-rw-r--r--arch/arm/mach-pxa/eseries.c9
-rw-r--r--arch/arm/mach-pxa/poodle.c8
-rw-r--r--arch/arm/mach-pxa/spitz.c8
-rw-r--r--arch/arm/mach-pxa/tosa.c8
-rw-r--r--arch/arm/mach-realview/core.c11
-rw-r--r--arch/arm/mach-realview/core.h3
-rw-r--r--arch/arm/mach-realview/realview_eb.c9
-rw-r--r--arch/arm/mach-realview/realview_pb1176.c16
-rw-r--r--arch/arm/mach-realview/realview_pb11mp.c9
-rw-r--r--arch/arm/mach-realview/realview_pbx.c21
-rw-r--r--arch/arm/mach-rockchip/rockchip.c9
-rw-r--r--arch/arm/mach-s3c24xx/mach-smdk2413.c8
-rw-r--r--arch/arm/mach-s3c24xx/mach-vstms.c8
-rw-r--r--arch/arm/mach-sa1100/assabet.c2
-rw-r--r--arch/arm/mach-shmobile/board-armadillo800eva-reference.c4
-rw-r--r--arch/arm/mach-shmobile/board-armadillo800eva.c4
-rw-r--r--arch/arm/mach-shmobile/board-kzm9g-reference.c4
-rw-r--r--arch/arm/mach-shmobile/board-kzm9g.c4
-rw-r--r--arch/arm/mach-shmobile/setup-r8a7778.c4
-rw-r--r--arch/arm/mach-shmobile/setup-r8a7779.c4
-rw-r--r--arch/arm/mach-socfpga/socfpga.c9
-rw-r--r--arch/arm/mach-spear/platsmp.c19
-rw-r--r--arch/arm/mach-spear/spear13xx.c8
-rw-r--r--arch/arm/mach-sti/board-dt.c27
-rw-r--r--arch/arm/mach-tegra/pm.h2
-rw-r--r--arch/arm/mach-tegra/reset-handler.S11
-rw-r--r--arch/arm/mach-tegra/sleep.h31
-rw-r--r--arch/arm/mach-tegra/tegra.c32
-rw-r--r--arch/arm/mach-ux500/cache-l2x0.c32
-rw-r--r--arch/arm/mach-vexpress/ct-ca9x4.c28
-rw-r--r--arch/arm/mach-vexpress/tc2_pm.c4
-rw-r--r--arch/arm/mach-vexpress/v2m.c3
-rw-r--r--arch/arm/mach-zynq/common.c8
-rw-r--r--arch/arm/mm/Kconfig51
-rw-r--r--arch/arm/mm/Makefile3
-rw-r--r--arch/arm/mm/alignment.c19
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c1
-rw-r--r--arch/arm/mm/cache-l2x0.c1498
-rw-r--r--arch/arm/mm/cache-v7.S12
-rw-r--r--arch/arm/mm/dma-mapping.c11
-rw-r--r--arch/arm/mm/flush.c33
-rw-r--r--arch/arm/mm/highmem.c33
-rw-r--r--arch/arm/mm/init.c81
-rw-r--r--arch/arm/mm/ioremap.c9
-rw-r--r--arch/arm/mm/l2c-common.c20
-rw-r--r--arch/arm/mm/l2c-l2x0-resume.S58
-rw-r--r--arch/arm/mm/mm.h4
-rw-r--r--arch/arm/mm/mmu.c238
-rw-r--r--arch/arm/mm/nommu.c66
-rw-r--r--arch/arm/mm/proc-v7-3level.S18
-rw-r--r--arch/arm/mm/proc-v7.S39
-rw-r--r--arch/arm/plat-samsung/s5p-sleep.S1
-rw-r--r--arch/arm/vfp/entry.S3
-rw-r--r--arch/arm64/Kconfig28
-rw-r--r--arch/arm64/Makefile1
-rw-r--r--arch/arm64/boot/dts/apm-storm.dtsi21
-rw-r--r--arch/arm64/configs/defconfig36
-rw-r--r--arch/arm64/crypto/Kconfig53
-rw-r--r--arch/arm64/crypto/Makefile38
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S222
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c297
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c155
-rw-r--r--arch/arm64/crypto/aes-ce.S133
-rw-r--r--arch/arm64/crypto/aes-glue.c446
-rw-r--r--arch/arm64/crypto/aes-modes.S532
-rw-r--r--arch/arm64/crypto/aes-neon.S382
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S95
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c155
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S153
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c174
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S156
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c255
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/assembler.h23
-rw-r--r--arch/arm64/include/asm/atomic.h2
-rw-r--r--arch/arm64/include/asm/barrier.h20
-rw-r--r--arch/arm64/include/asm/cache.h13
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/include/asm/cachetype.h11
-rw-r--r--arch/arm64/include/asm/cmpxchg.h7
-rw-r--r--arch/arm64/include/asm/compat.h5
-rw-r--r--arch/arm64/include/asm/efi.h14
-rw-r--r--arch/arm64/include/asm/esr.h6
-rw-r--r--arch/arm64/include/asm/fpsimd.h23
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h35
-rw-r--r--arch/arm64/include/asm/ftrace.h59
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/insn.h2
-rw-r--r--arch/arm64/include/asm/io.h8
-rw-r--r--arch/arm64/include/asm/mmu.h2
-rw-r--r--arch/arm64/include/asm/neon.h6
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h107
-rw-r--r--arch/arm64/include/asm/processor.h1
-rw-r--r--arch/arm64/include/asm/ptrace.h5
-rw-r--r--arch/arm64/include/asm/sigcontext.h31
-rw-r--r--arch/arm64/include/asm/string.h15
-rw-r--r--arch/arm64/include/asm/syscall.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h17
-rw-r--r--arch/arm64/include/asm/tlbflush.h44
-rw-r--r--arch/arm64/include/asm/topology.h3
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h7
-rw-r--r--arch/arm64/kernel/Makefile10
-rw-r--r--arch/arm64/kernel/arm64ksyms.c9
-rw-r--r--arch/arm64/kernel/efi-entry.S109
-rw-r--r--arch/arm64/kernel/efi-stub.c81
-rw-r--r--arch/arm64/kernel/efi.c469
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S24
-rw-r--r--arch/arm64/kernel/entry-ftrace.S218
-rw-r--r--arch/arm64/kernel/entry.S90
-rw-r--r--arch/arm64/kernel/fpsimd.c186
-rw-r--r--arch/arm64/kernel/ftrace.c176
-rw-r--r--arch/arm64/kernel/head.S120
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/process.c49
-rw-r--r--arch/arm64/kernel/ptrace.c62
-rw-r--r--arch/arm64/kernel/return_address.c55
-rw-r--r--arch/arm64/kernel/setup.c21
-rw-r--r--arch/arm64/kernel/signal.c52
-rw-r--r--arch/arm64/kernel/signal32.c16
-rw-r--r--arch/arm64/kernel/smp.c19
-rw-r--r--arch/arm64/kernel/smp_spin_table.c39
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/kernel/time.c3
-rw-r--r--arch/arm64/kernel/topology.c212
-rw-r--r--arch/arm64/kernel/traps.c7
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm64/kvm/hyp.S12
-rw-r--r--arch/arm64/kvm/sys_regs.c4
-rw-r--r--arch/arm64/lib/Makefile1
-rw-r--r--arch/arm64/lib/memcmp.S258
-rw-r--r--arch/arm64/lib/memcpy.S192
-rw-r--r--arch/arm64/lib/memmove.S190
-rw-r--r--arch/arm64/lib/memset.S207
-rw-r--r--arch/arm64/lib/strcmp.S234
-rw-r--r--arch/arm64/lib/strlen.S126
-rw-r--r--arch/arm64/lib/strncmp.S310
-rw-r--r--arch/arm64/lib/strnlen.S171
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/cache.S6
-rw-r--r--arch/arm64/mm/dma-mapping.c2
-rw-r--r--arch/arm64/mm/fault.c8
-rw-r--r--arch/arm64/mm/mmu.c124
-rw-r--r--arch/arm64/mm/proc.S2
-rw-r--r--arch/arm64/mm/tlb.S71
-rw-r--r--arch/blackfin/include/asm/ftrace.h11
-rw-r--r--arch/blackfin/kernel/ptrace.c8
-rw-r--r--arch/cris/arch-v10/drivers/gpio.c4
-rw-r--r--arch/cris/arch-v32/drivers/mach-fs/gpio.c6
-rw-r--r--arch/ia64/kernel/crash.c4
-rw-r--r--arch/ia64/kernel/perfmon.c6
-rw-r--r--arch/m68k/include/asm/signal.h9
-rw-r--r--arch/microblaze/configs/mmu_defconfig1
-rw-r--r--arch/microblaze/configs/nommu_defconfig1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/device.h26
-rw-r--r--arch/microblaze/include/asm/dma-mapping.h25
-rw-r--r--arch/microblaze/include/asm/io.h3
-rw-r--r--arch/microblaze/include/asm/pci.h8
-rw-r--r--arch/microblaze/kernel/dma.c30
-rw-r--r--arch/microblaze/kernel/head.S2
-rw-r--r--arch/microblaze/kernel/setup.c34
-rw-r--r--arch/microblaze/pci/pci-common.c19
-rw-r--r--arch/mips/dec/Makefile2
-rw-r--r--arch/mips/dec/platform.c44
-rw-r--r--arch/parisc/include/asm/ftrace.h10
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/sh/include/asm/ftrace.h10
-rw-r--r--arch/tile/kernel/proc.c4
-rw-r--r--arch/x86/boot/compressed/eboot.c3
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/header.S3
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S4
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/include/asm/efi.h100
-rw-r--r--arch/x86/include/asm/fpu-internal.h10
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/kernel/irq.c16
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/platform/efi/efi.c51
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S81
-rw-r--r--arch/x86/platform/uv/bios_uv.c2
-rw-r--r--arch/xtensa/include/asm/ftrace.h14
270 files changed, 9713 insertions, 2588 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ad89a033f17f..87b63fde06d7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -165,12 +165,9 @@ config TRACE_IRQFLAGS_SUPPORT
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
config RWSEM_XCHGADD_ALGORITHM
bool
+ default y
config ARCH_HAS_ILOG2_U32
bool
@@ -1089,11 +1086,6 @@ source "arch/arm/firmware/Kconfig"
source arch/arm/mm/Kconfig
-config ARM_NR_BANKS
- int
- default 16 if ARCH_EP93XX
- default 8
-
config IWMMXT
bool "Enable iWMMXt support"
depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B
@@ -1214,19 +1206,6 @@ config ARM_ERRATA_742231
register of the Cortex-A9 which reduces the linefill issuing
capabilities of the processor.
-config PL310_ERRATA_588369
- bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
- depends on CACHE_L2X0
- help
- The PL310 L2 cache controller implements three types of Clean &
- Invalidate maintenance operations: by Physical Address
- (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
- They are architecturally defined to behave as the execution of a
- clean operation followed immediately by an invalidate operation,
- both performing to the same memory location. This functionality
- is not correctly implemented in PL310 as clean lines are not
- invalidated as a result of these operations.
-
config ARM_ERRATA_643719
bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
depends on CPU_V7 && SMP
@@ -1249,17 +1228,6 @@ config ARM_ERRATA_720789
tables. The workaround changes the TLB flushing routines to invalidate
entries regardless of the ASID.
-config PL310_ERRATA_727915
- bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
- depends on CACHE_L2X0
- help
- PL310 implements the Clean & Invalidate by Way L2 cache maintenance
- operation (offset 0x7FC). This operation runs in background so that
- PL310 can handle normal accesses while it is in progress. Under very
- rare circumstances, due to this erratum, write data can be lost when
- PL310 treats a cacheable write transaction during a Clean &
- Invalidate by Way operation.
-
config ARM_ERRATA_743622
bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption"
depends on CPU_V7
@@ -1285,21 +1253,6 @@ config ARM_ERRATA_751472
operation is received by a CPU before the ICIALLUIS has completed,
potentially leading to corrupted entries in the cache or TLB.
-config PL310_ERRATA_753970
- bool "PL310 errata: cache sync operation may be faulty"
- depends on CACHE_PL310
- help
- This option enables the workaround for the 753970 PL310 (r3p0) erratum.
-
- Under some condition the effect of cache sync operation on
- the store buffer still remains when the operation completes.
- This means that the store buffer is always asked to drain and
- this prevents it from merging any further writes. The workaround
- is to replace the normal offset of cache sync operation (0x730)
- by another offset targeting an unmapped PL310 register 0x740.
- This has the same effect as the cache sync operation: store buffer
- drain and waiting for all buffers empty.
-
config ARM_ERRATA_754322
bool "ARM errata: possible faulty MMU translations following an ASID switch"
depends on CPU_V7
@@ -1348,18 +1301,6 @@ config ARM_ERRATA_764369
relevant cache maintenance functions and sets a specific bit
in the diagnostic control register of the SCU.
-config PL310_ERRATA_769419
- bool "PL310 errata: no automatic Store Buffer drain"
- depends on CACHE_L2X0
- help
- On revisions of the PL310 prior to r3p2, the Store Buffer does
- not automatically drain. This can cause normal, non-cacheable
- writes to be retained when the memory system is idle, leading
- to suboptimal I/O performance for drivers using coherent DMA.
- This option adds a write barrier to the cpu_idle loop so that,
- on systems with an outer cache, the store buffer is drained
- explicitly.
-
config ARM_ERRATA_775420
bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
depends on CPU_V7
@@ -2279,6 +2220,11 @@ config ARCH_SUSPEND_POSSIBLE
config ARM_CPU_SUSPEND
def_bool PM_SLEEP
+config ARCH_HIBERNATION_POSSIBLE
+ bool
+ depends on MMU
+ default y if ARCH_SUSPEND_POSSIBLE
+
endmenu
source "net/Kconfig"
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index d1153c8a765a..9448aa0c6686 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -7,6 +7,8 @@
#define do_extend_cmdline 0
#endif
+#define NR_BANKS 16
+
static int node_offset(void *fdt, const char *node_path)
{
int offset = fdt_path_offset(fdt, node_path);
diff --git a/arch/arm/boot/dts/bcm21664.dtsi b/arch/arm/boot/dts/bcm21664.dtsi
index 08a44d41b672..8b366822bb43 100644
--- a/arch/arm/boot/dts/bcm21664.dtsi
+++ b/arch/arm/boot/dts/bcm21664.dtsi
@@ -14,6 +14,8 @@
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interrupt-controller/irq.h>
+#include "dt-bindings/clock/bcm21664.h"
+
#include "skeleton.dtsi"
/ {
@@ -43,7 +45,7 @@
compatible = "brcm,bcm21664-dw-apb-uart", "snps,dw-apb-uart";
status = "disabled";
reg = <0x3e000000 0x118>;
- clocks = <&uartb_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_UARTB>;
interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
reg-shift = <2>;
reg-io-width = <4>;
@@ -53,7 +55,7 @@
compatible = "brcm,bcm21664-dw-apb-uart", "snps,dw-apb-uart";
status = "disabled";
reg = <0x3e001000 0x118>;
- clocks = <&uartb2_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_UARTB2>;
interrupts = <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>;
reg-shift = <2>;
reg-io-width = <4>;
@@ -63,7 +65,7 @@
compatible = "brcm,bcm21664-dw-apb-uart", "snps,dw-apb-uart";
status = "disabled";
reg = <0x3e002000 0x118>;
- clocks = <&uartb3_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_UARTB3>;
interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
reg-shift = <2>;
reg-io-width = <4>;
@@ -85,7 +87,7 @@
compatible = "brcm,kona-timer";
reg = <0x35006000 0x1c>;
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&hub_timer_clk>;
+ clocks = <&aon_ccu BCM21664_AON_CCU_HUB_TIMER>;
};
gpio: gpio@35003000 {
@@ -106,7 +108,7 @@
compatible = "brcm,kona-sdhci";
reg = <0x3f180000 0x801c>;
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&sdio1_clk>;
+ clocks = <&master_ccu BCM21664_MASTER_CCU_SDIO1>;
status = "disabled";
};
@@ -114,7 +116,7 @@
compatible = "brcm,kona-sdhci";
reg = <0x3f190000 0x801c>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&sdio2_clk>;
+ clocks = <&master_ccu BCM21664_MASTER_CCU_SDIO2>;
status = "disabled";
};
@@ -122,7 +124,7 @@
compatible = "brcm,kona-sdhci";
reg = <0x3f1a0000 0x801c>;
interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&sdio3_clk>;
+ clocks = <&master_ccu BCM21664_MASTER_CCU_SDIO3>;
status = "disabled";
};
@@ -130,7 +132,7 @@
compatible = "brcm,kona-sdhci";
reg = <0x3f1b0000 0x801c>;
interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&sdio4_clk>;
+ clocks = <&master_ccu BCM21664_MASTER_CCU_SDIO4>;
status = "disabled";
};
@@ -140,7 +142,7 @@
interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
- clocks = <&bsc1_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_BSC1>;
status = "disabled";
};
@@ -150,7 +152,7 @@
interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
- clocks = <&bsc2_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_BSC2>;
status = "disabled";
};
@@ -160,7 +162,7 @@
interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
- clocks = <&bsc3_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_BSC3>;
status = "disabled";
};
@@ -170,105 +172,149 @@
interrupts = <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
- clocks = <&bsc4_clk>;
+ clocks = <&slave_ccu BCM21664_SLAVE_CCU_BSC4>;
status = "disabled";
};
clocks {
- bsc1_clk: bsc1 {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
- #clock-cells = <0>;
- };
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
- bsc2_clk: bsc2 {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
+ /*
+ * Fixed clocks are defined before CCUs whose
+ * clocks may depend on them.
+ */
+
+ ref_32k_clk: ref_32k {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <32768>;
};
- bsc3_clk: bsc3 {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
+ bbl_32k_clk: bbl_32k {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <32768>;
};
- bsc4_clk: bsc4 {
+ ref_13m_clk: ref_13m {
+ #clock-cells = <0>;
compatible = "fixed-clock";
clock-frequency = <13000000>;
- #clock-cells = <0>;
};
- pmu_bsc_clk: pmu_bsc {
+ var_13m_clk: var_13m {
+ #clock-cells = <0>;
compatible = "fixed-clock";
clock-frequency = <13000000>;
- #clock-cells = <0>;
};
- hub_timer_clk: hub_timer {
- compatible = "fixed-clock";
- clock-frequency = <32768>;
+ dft_19_5m_clk: dft_19_5m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <19500000>;
};
- pwm_clk: pwm {
+ ref_crystal_clk: ref_crystal {
+ #clock-cells = <0>;
compatible = "fixed-clock";
clock-frequency = <26000000>;
- #clock-cells = <0>;
};
- sdio1_clk: sdio1 {
- compatible = "fixed-clock";
- clock-frequency = <48000000>;
+ ref_52m_clk: ref_52m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <52000000>;
};
- sdio2_clk: sdio2 {
- compatible = "fixed-clock";
- clock-frequency = <48000000>;
+ var_52m_clk: var_52m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <52000000>;
};
- sdio3_clk: sdio3 {
- compatible = "fixed-clock";
- clock-frequency = <48000000>;
+ usb_otg_ahb_clk: usb_otg_ahb {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <52000000>;
};
- sdio4_clk: sdio4 {
- compatible = "fixed-clock";
- clock-frequency = <48000000>;
+ ref_96m_clk: ref_96m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <96000000>;
};
- tmon_1m_clk: tmon_1m {
- compatible = "fixed-clock";
- clock-frequency = <1000000>;
+ var_96m_clk: var_96m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <96000000>;
};
- uartb_clk: uartb {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
+ ref_104m_clk: ref_104m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <104000000>;
};
- uartb2_clk: uartb2 {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
+ var_104m_clk: var_104m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <104000000>;
};
- uartb3_clk: uartb3 {
- compatible = "fixed-clock";
- clock-frequency = <13000000>;
+ ref_156m_clk: ref_156m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <156000000>;
};
- usb_otg_ahb_clk: usb_otg_ahb {
- compatible = "fixed-clock";
- clock-frequency = <52000000>;
+ var_156m_clk: var_156m {
#clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <156000000>;
+ };
+
+ root_ccu: root_ccu {
+ compatible = BCM21664_DT_ROOT_CCU_COMPAT;
+ reg = <0x35001000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "frac_1m";
+ };
+
+ aon_ccu: aon_ccu {
+ compatible = BCM21664_DT_AON_CCU_COMPAT;
+ reg = <0x35002000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "hub_timer";
+ };
+
+ master_ccu: master_ccu {
+ compatible = BCM21664_DT_MASTER_CCU_COMPAT;
+ reg = <0x3f001000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "sdio1",
+ "sdio2",
+ "sdio3",
+ "sdio4",
+ "sdio1_sleep",
+ "sdio2_sleep",
+ "sdio3_sleep",
+ "sdio4_sleep";
+ };
+
+ slave_ccu: slave_ccu {
+ compatible = BCM21664_DT_SLAVE_CCU_COMPAT;
+ reg = <0x3e011000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "uartb",
+ "uartb2",
+ "uartb3",
+ "bsc1",
+ "bsc2",
+ "bsc3",
+ "bsc4";
};
};
diff --git a/arch/arm/boot/dts/marco.dtsi b/arch/arm/boot/dts/marco.dtsi
index 0c9647d28765..fb354225740a 100644
--- a/arch/arm/boot/dts/marco.dtsi
+++ b/arch/arm/boot/dts/marco.dtsi
@@ -36,7 +36,7 @@
ranges = <0x40000000 0x40000000 0xa0000000>;
l2-cache-controller@c0030000 {
- compatible = "sirf,marco-pl310-cache", "arm,pl310-cache";
+ compatible = "arm,pl310-cache";
reg = <0xc0030000 0x1000>;
interrupts = <0 59 0>;
arm,tag-latency = <1 1 1>;
diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi
index 3df7ba860282..963b7e54ab15 100644
--- a/arch/arm/boot/dts/prima2.dtsi
+++ b/arch/arm/boot/dts/prima2.dtsi
@@ -48,7 +48,7 @@
ranges = <0x40000000 0x40000000 0x80000000>;
l2-cache-controller@80040000 {
- compatible = "arm,pl310-cache", "sirf,prima2-pl310-cache";
+ compatible = "arm,pl310-cache";
reg = <0x80040000 0x1000>;
interrupts = <59>;
arm,tag-latency = <1 1 1>;
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index c7b794ed337d..d96e179490ce 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -713,7 +713,7 @@
};
i2c0: i2c@01c2ac00 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun4i-a10-i2c";
reg = <0x01c2ac00 0x400>;
interrupts = <7>;
clocks = <&apb1_gates 0>;
@@ -724,7 +724,7 @@
};
i2c1: i2c@01c2b000 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun4i-a10-i2c";
reg = <0x01c2b000 0x400>;
interrupts = <8>;
clocks = <&apb1_gates 1>;
@@ -735,7 +735,7 @@
};
i2c2: i2c@01c2b400 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun4i-a10-i2c";
reg = <0x01c2b400 0x400>;
interrupts = <9>;
clocks = <&apb1_gates 2>;
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index aa1dd59dc34a..b64f705d9008 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -560,7 +560,7 @@
i2c0: i2c@01c2ac00 {
#address-cells = <1>;
#size-cells = <0>;
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a10s-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2ac00 0x400>;
interrupts = <7>;
clocks = <&apb1_gates 0>;
@@ -571,7 +571,7 @@
i2c1: i2c@01c2b000 {
#address-cells = <1>;
#size-cells = <0>;
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a10s-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b000 0x400>;
interrupts = <8>;
clocks = <&apb1_gates 1>;
@@ -582,7 +582,7 @@
i2c2: i2c@01c2b400 {
#address-cells = <1>;
#size-cells = <0>;
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a10s-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b400 0x400>;
interrupts = <9>;
clocks = <&apb1_gates 2>;
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index c9fdb7b3ecd9..3b2a94c40f6e 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -486,7 +486,7 @@
};
i2c0: i2c@01c2ac00 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a13-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2ac00 0x400>;
interrupts = <7>;
clocks = <&apb1_gates 0>;
@@ -497,7 +497,7 @@
};
i2c1: i2c@01c2b000 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a13-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b000 0x400>;
interrupts = <8>;
clocks = <&apb1_gates 1>;
@@ -508,7 +508,7 @@
};
i2c2: i2c@01c2b400 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun5i-a13-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b400 0x400>;
interrupts = <9>;
clocks = <&apb1_gates 2>;
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 385933bac114..01e94664232a 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -863,7 +863,7 @@
};
i2c0: i2c@01c2ac00 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun7i-a20-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2ac00 0x400>;
interrupts = <0 7 4>;
clocks = <&apb1_gates 0>;
@@ -874,7 +874,7 @@
};
i2c1: i2c@01c2b000 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun7i-a20-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b000 0x400>;
interrupts = <0 8 4>;
clocks = <&apb1_gates 1>;
@@ -885,7 +885,7 @@
};
i2c2: i2c@01c2b400 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun7i-a20-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b400 0x400>;
interrupts = <0 9 4>;
clocks = <&apb1_gates 2>;
@@ -896,7 +896,7 @@
};
i2c3: i2c@01c2b800 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun7i-a20-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2b800 0x400>;
interrupts = <0 88 4>;
clocks = <&apb1_gates 3>;
@@ -907,7 +907,7 @@
};
i2c4: i2c@01c2c000 {
- compatible = "allwinner,sun4i-i2c";
+ compatible = "allwinner,sun7i-a20-i2c", "allwinner,sun4i-a10-i2c";
reg = <0x01c2c000 0x400>;
interrupts = <0 89 4>;
clocks = <&apb1_gates 15>;
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 86fd60fefbc9..f91136ab447e 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -106,14 +106,14 @@ void mcpm_cpu_power_down(void)
BUG();
}
-int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster)
+int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster)
{
int ret;
- if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down_finish))
+ if (WARN_ON_ONCE(!platform_ops || !platform_ops->wait_for_powerdown))
return -EUNATCH;
- ret = platform_ops->power_down_finish(cpu, cluster);
+ ret = platform_ops->wait_for_powerdown(cpu, cluster);
if (ret)
pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n",
__func__, cpu, cluster, ret);
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
index 177251a4dd9a..92e54d7c6f46 100644
--- a/arch/arm/common/mcpm_platsmp.c
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -62,7 +62,7 @@ static int mcpm_cpu_kill(unsigned int cpu)
cpu_to_pcpu(cpu, &pcpu, &pcluster);
- return !mcpm_cpu_power_down_finish(pcpu, pcluster);
+ return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster);
}
static int mcpm_cpu_disable(unsigned int cpu)
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 23e728ecf8ab..f5a357601983 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += parport.h
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
+generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b974184f9941..57f0584e8d97 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -312,7 +312,7 @@
* you cannot return to the original mode.
*/
.macro safe_svcmode_maskall reg:req
-#if __LINUX_ARM_ARCH__ >= 6
+#if __LINUX_ARM_ARCH__ >= 6 && !defined(CONFIG_CPU_V7M)
mrs \reg , cpsr
eor \reg, \reg, #HYP_MODE
tst \reg, #MODE_MASK
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 8b8b61685a34..fd43f7f55b70 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -212,7 +212,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
static inline void __flush_icache_all(void)
{
__flush_icache_preferred();
- dsb();
+ dsb(ishst);
}
/*
@@ -487,4 +487,6 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+ void *kaddr, unsigned long len);
#endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 6493802f880a..c3f11524f10c 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -42,24 +42,23 @@
#ifndef __ASSEMBLY__
#if __LINUX_ARM_ARCH__ >= 4
-#define vectors_high() (cr_alignment & CR_V)
+#define vectors_high() (get_cr() & CR_V)
#else
#define vectors_high() (0)
#endif
#ifdef CONFIG_CPU_CP15
-extern unsigned long cr_no_alignment; /* defined in entry-armv.S */
extern unsigned long cr_alignment; /* defined in entry-armv.S */
-static inline unsigned int get_cr(void)
+static inline unsigned long get_cr(void)
{
- unsigned int val;
+ unsigned long val;
asm("mrc p15, 0, %0, c1, c0, 0 @ get CR" : "=r" (val) : : "cc");
return val;
}
-static inline void set_cr(unsigned int val)
+static inline void set_cr(unsigned long val)
{
asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR"
: : "r" (val) : "cc");
@@ -80,10 +79,6 @@ static inline void set_auxcr(unsigned int val)
isb();
}
-#ifndef CONFIG_SMP
-extern void adjust_cr(unsigned long mask, unsigned long set);
-#endif
-
#define CPACC_FULL(n) (3 << (n * 2))
#define CPACC_SVC(n) (1 << (n * 2))
#define CPACC_DISABLE(n) (0 << (n * 2))
@@ -106,13 +101,17 @@ static inline void set_copro_access(unsigned int val)
#else /* ifdef CONFIG_CPU_CP15 */
/*
- * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
- * minds of the developers). Yielding 0 for machines without a cp15 (and making
- * it read-only) is fine for most cases and saves quite some #ifdeffery.
+ * cr_alignment is tightly coupled to cp15 (at least in the minds of the
+ * developers). Yielding 0 for machines without a cp15 (and making it
+ * read-only) is fine for most cases and saves quite some #ifdeffery.
*/
-#define cr_no_alignment UL(0)
#define cr_alignment UL(0)
+static inline unsigned long get_cr(void)
+{
+ return 0;
+}
+
#endif /* ifdef CONFIG_CPU_CP15 / else */
#endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 4764344367d4..8c2b7321a478 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -72,6 +72,7 @@
#define ARM_CPU_PART_CORTEX_A15 0xC0F0
#define ARM_CPU_PART_CORTEX_A7 0xC070
#define ARM_CPU_PART_CORTEX_A12 0xC0D0
+#define ARM_CPU_PART_CORTEX_A17 0xC0E0
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index e701a4d9aa59..c45b61a4b4a5 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -58,21 +58,37 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
#ifndef __arch_pfn_to_dma
static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
{
+ if (dev)
+ pfn -= dev->dma_pfn_offset;
return (dma_addr_t)__pfn_to_bus(pfn);
}
static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
{
- return __bus_to_pfn(addr);
+ unsigned long pfn = __bus_to_pfn(addr);
+
+ if (dev)
+ pfn += dev->dma_pfn_offset;
+
+ return pfn;
}
static inline void *dma_to_virt(struct device *dev, dma_addr_t addr)
{
+ if (dev) {
+ unsigned long pfn = dma_to_pfn(dev, addr);
+
+ return phys_to_virt(__pfn_to_phys(pfn));
+ }
+
return (void *)__bus_to_virt((unsigned long)addr);
}
static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
{
+ if (dev)
+ return pfn_to_dma(dev, virt_to_pfn(addr));
+
return (dma_addr_t)__virt_to_bus((unsigned long)(addr));
}
@@ -105,6 +121,13 @@ static inline unsigned long dma_max_pfn(struct device *dev)
}
#define dma_max_pfn(dev) dma_max_pfn(dev)
+static inline int set_arch_dma_coherent_ops(struct device *dev)
+{
+ set_dma_ops(dev, &arm_coherent_dma_ops);
+ return 0;
+}
+#define set_arch_dma_coherent_ops(dev) set_arch_dma_coherent_ops(dev)
+
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
unsigned int offset = paddr & ~PAGE_MASK;
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index bbae919bceb4..74124b0d0d79 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -1,24 +1,11 @@
#ifndef _ASM_FIXMAP_H
#define _ASM_FIXMAP_H
-/*
- * Nothing too fancy for now.
- *
- * On ARM we already have well known fixed virtual addresses imposed by
- * the architecture such as the vector page which is located at 0xffff0000,
- * therefore a second level page table is already allocated covering
- * 0xfff00000 upwards.
- *
- * The cache flushing code in proc-xscale.S uses the virtual area between
- * 0xfffe0000 and 0xfffeffff.
- */
-
-#define FIXADDR_START 0xfff00000UL
-#define FIXADDR_TOP 0xfffe0000UL
+#define FIXADDR_START 0xffc00000UL
+#define FIXADDR_TOP 0xffe00000UL
#define FIXADDR_SIZE (FIXADDR_TOP - FIXADDR_START)
-#define FIX_KMAP_BEGIN 0
-#define FIX_KMAP_END (FIXADDR_SIZE >> PAGE_SHIFT)
+#define FIX_KMAP_NR_PTES (FIXADDR_SIZE >> PAGE_SHIFT)
#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT))
#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT)
@@ -27,7 +14,7 @@ extern void __this_fixmap_does_not_exist(void);
static inline unsigned long fix_to_virt(const unsigned int idx)
{
- if (idx >= FIX_KMAP_END)
+ if (idx >= FIX_KMAP_NR_PTES)
__this_fixmap_does_not_exist();
return __fix_to_virt(idx);
}
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index f89515adac60..eb577f4f5f70 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level)
#endif
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_addr(n) return_address(n)
#endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
index 6b70f1b46a6e..04e18b656659 100644
--- a/arch/arm/include/asm/glue-df.h
+++ b/arch/arm/include/asm/glue-df.h
@@ -31,14 +31,6 @@
#undef CPU_DABORT_HANDLER
#undef MULTI_DABORT
-#if defined(CONFIG_CPU_ARM710)
-# ifdef CPU_DABORT_HANDLER
-# define MULTI_DABORT 1
-# else
-# define CPU_DABORT_HANDLER cpu_arm7_data_abort
-# endif
-#endif
-
#ifdef CONFIG_CPU_ABRT_EV4
# ifdef CPU_DABORT_HANDLER
# define MULTI_DABORT 1
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 6795ff743b3d..3a5ec1c25659 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -26,8 +26,8 @@
#define L2X0_CACHE_TYPE 0x004
#define L2X0_CTRL 0x100
#define L2X0_AUX_CTRL 0x104
-#define L2X0_TAG_LATENCY_CTRL 0x108
-#define L2X0_DATA_LATENCY_CTRL 0x10C
+#define L310_TAG_LATENCY_CTRL 0x108
+#define L310_DATA_LATENCY_CTRL 0x10C
#define L2X0_EVENT_CNT_CTRL 0x200
#define L2X0_EVENT_CNT1_CFG 0x204
#define L2X0_EVENT_CNT0_CFG 0x208
@@ -54,53 +54,93 @@
#define L2X0_LOCKDOWN_WAY_D_BASE 0x900
#define L2X0_LOCKDOWN_WAY_I_BASE 0x904
#define L2X0_LOCKDOWN_STRIDE 0x08
-#define L2X0_ADDR_FILTER_START 0xC00
-#define L2X0_ADDR_FILTER_END 0xC04
+#define L310_ADDR_FILTER_START 0xC00
+#define L310_ADDR_FILTER_END 0xC04
#define L2X0_TEST_OPERATION 0xF00
#define L2X0_LINE_DATA 0xF10
#define L2X0_LINE_TAG 0xF30
#define L2X0_DEBUG_CTRL 0xF40
-#define L2X0_PREFETCH_CTRL 0xF60
-#define L2X0_POWER_CTRL 0xF80
-#define L2X0_DYNAMIC_CLK_GATING_EN (1 << 1)
-#define L2X0_STNDBY_MODE_EN (1 << 0)
+#define L310_PREFETCH_CTRL 0xF60
+#define L310_POWER_CTRL 0xF80
+#define L310_DYNAMIC_CLK_GATING_EN (1 << 1)
+#define L310_STNDBY_MODE_EN (1 << 0)
/* Registers shifts and masks */
#define L2X0_CACHE_ID_PART_MASK (0xf << 6)
#define L2X0_CACHE_ID_PART_L210 (1 << 6)
+#define L2X0_CACHE_ID_PART_L220 (2 << 6)
#define L2X0_CACHE_ID_PART_L310 (3 << 6)
#define L2X0_CACHE_ID_RTL_MASK 0x3f
-#define L2X0_CACHE_ID_RTL_R0P0 0x0
-#define L2X0_CACHE_ID_RTL_R1P0 0x2
-#define L2X0_CACHE_ID_RTL_R2P0 0x4
-#define L2X0_CACHE_ID_RTL_R3P0 0x5
-#define L2X0_CACHE_ID_RTL_R3P1 0x6
-#define L2X0_CACHE_ID_RTL_R3P2 0x8
+#define L210_CACHE_ID_RTL_R0P2_02 0x00
+#define L210_CACHE_ID_RTL_R0P1 0x01
+#define L210_CACHE_ID_RTL_R0P2_01 0x02
+#define L210_CACHE_ID_RTL_R0P3 0x03
+#define L210_CACHE_ID_RTL_R0P4 0x0b
+#define L210_CACHE_ID_RTL_R0P5 0x0f
+#define L220_CACHE_ID_RTL_R1P7_01REL0 0x06
+#define L310_CACHE_ID_RTL_R0P0 0x00
+#define L310_CACHE_ID_RTL_R1P0 0x02
+#define L310_CACHE_ID_RTL_R2P0 0x04
+#define L310_CACHE_ID_RTL_R3P0 0x05
+#define L310_CACHE_ID_RTL_R3P1 0x06
+#define L310_CACHE_ID_RTL_R3P1_50REL0 0x07
+#define L310_CACHE_ID_RTL_R3P2 0x08
+#define L310_CACHE_ID_RTL_R3P3 0x09
-#define L2X0_AUX_CTRL_MASK 0xc0000fff
+/* L2C auxiliary control register - bits common to L2C-210/220/310 */
+#define L2C_AUX_CTRL_WAY_SIZE_SHIFT 17
+#define L2C_AUX_CTRL_WAY_SIZE_MASK (7 << 17)
+#define L2C_AUX_CTRL_WAY_SIZE(n) ((n) << 17)
+#define L2C_AUX_CTRL_EVTMON_ENABLE BIT(20)
+#define L2C_AUX_CTRL_PARITY_ENABLE BIT(21)
+#define L2C_AUX_CTRL_SHARED_OVERRIDE BIT(22)
+/* L2C-210/220 common bits */
#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT 0
-#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK 0x7
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK (7 << 0)
#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT 3
-#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK (0x7 << 3)
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK (7 << 3)
#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT 6
-#define L2X0_AUX_CTRL_TAG_LATENCY_MASK (0x7 << 6)
+#define L2X0_AUX_CTRL_TAG_LATENCY_MASK (7 << 6)
#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT 9
-#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK (0x7 << 9)
-#define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT 16
-#define L2X0_AUX_CTRL_WAY_SIZE_SHIFT 17
-#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x7 << 17)
-#define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT 22
-#define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT 26
-#define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT 27
-#define L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT 28
-#define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT 29
-#define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT 30
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK (7 << 9)
+#define L2X0_AUX_CTRL_ASSOC_SHIFT 13
+#define L2X0_AUX_CTRL_ASSOC_MASK (15 << 13)
+/* L2C-210 specific bits */
+#define L210_AUX_CTRL_WRAP_DISABLE BIT(12)
+#define L210_AUX_CTRL_WA_OVERRIDE BIT(23)
+#define L210_AUX_CTRL_EXCLUSIVE_ABORT BIT(24)
+/* L2C-220 specific bits */
+#define L220_AUX_CTRL_EXCLUSIVE_CACHE BIT(12)
+#define L220_AUX_CTRL_FWA_SHIFT 23
+#define L220_AUX_CTRL_FWA_MASK (3 << 23)
+#define L220_AUX_CTRL_NS_LOCKDOWN BIT(26)
+#define L220_AUX_CTRL_NS_INT_CTRL BIT(27)
+/* L2C-310 specific bits */
+#define L310_AUX_CTRL_FULL_LINE_ZERO BIT(0) /* R2P0+ */
+#define L310_AUX_CTRL_HIGHPRIO_SO_DEV BIT(10) /* R2P0+ */
+#define L310_AUX_CTRL_STORE_LIMITATION BIT(11) /* R2P0+ */
+#define L310_AUX_CTRL_EXCLUSIVE_CACHE BIT(12)
+#define L310_AUX_CTRL_ASSOCIATIVITY_16 BIT(16)
+#define L310_AUX_CTRL_CACHE_REPLACE_RR BIT(25) /* R2P0+ */
+#define L310_AUX_CTRL_NS_LOCKDOWN BIT(26)
+#define L310_AUX_CTRL_NS_INT_CTRL BIT(27)
+#define L310_AUX_CTRL_DATA_PREFETCH BIT(28)
+#define L310_AUX_CTRL_INSTR_PREFETCH BIT(29)
+#define L310_AUX_CTRL_EARLY_BRESP BIT(30) /* R2P0+ */
-#define L2X0_LATENCY_CTRL_SETUP_SHIFT 0
-#define L2X0_LATENCY_CTRL_RD_SHIFT 4
-#define L2X0_LATENCY_CTRL_WR_SHIFT 8
+#define L310_LATENCY_CTRL_SETUP(n) ((n) << 0)
+#define L310_LATENCY_CTRL_RD(n) ((n) << 4)
+#define L310_LATENCY_CTRL_WR(n) ((n) << 8)
-#define L2X0_ADDR_FILTER_EN 1
+#define L310_ADDR_FILTER_EN 1
+
+#define L310_PREFETCH_CTRL_OFFSET_MASK 0x1f
+#define L310_PREFETCH_CTRL_DBL_LINEFILL_INCR BIT(23)
+#define L310_PREFETCH_CTRL_PREFETCH_DROP BIT(24)
+#define L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP BIT(27)
+#define L310_PREFETCH_CTRL_DATA_PREFETCH BIT(28)
+#define L310_PREFETCH_CTRL_INSTR_PREFETCH BIT(29)
+#define L310_PREFETCH_CTRL_DBL_LINEFILL BIT(30)
#define L2X0_CTRL_EN 1
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 91b99abe7a95..535579511ed0 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -18,6 +18,7 @@
} while (0)
extern pte_t *pkmap_page_table;
+extern pte_t *fixmap_page_table;
extern void *kmap_high(struct page *page);
extern void kunmap_high(struct page *page);
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 8aa4cca74501..3d23418cbddd 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -179,6 +179,12 @@ static inline void __iomem *__typesafe_io(unsigned long addr)
/* PCI fixed i/o mapping */
#define PCI_IO_VIRT_BASE 0xfee00000
+#if defined(CONFIG_PCI)
+void pci_ioremap_set_mem_type(int mem_type);
+#else
+static inline void pci_ioremap_set_mem_type(int mem_type) {}
+#endif
+
extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
/*
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 17a3fa2979e8..060a75e99263 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -14,7 +14,6 @@
#include <linux/reboot.h>
struct tag;
-struct meminfo;
struct pt_regs;
struct smp_operations;
#ifdef CONFIG_SMP
@@ -45,10 +44,12 @@ struct machine_desc {
unsigned char reserve_lp1 :1; /* never has lp1 */
unsigned char reserve_lp2 :1; /* never has lp2 */
enum reboot_mode reboot_mode; /* default restart mode */
+ unsigned l2c_aux_val; /* L2 cache aux value */
+ unsigned l2c_aux_mask; /* L2 cache aux mask */
+ void (*l2c_write_sec)(unsigned long, unsigned);
struct smp_operations *smp; /* SMP operations */
bool (*smp_init)(void);
- void (*fixup)(struct tag *, char **,
- struct meminfo *);
+ void (*fixup)(struct tag *, char **);
void (*init_meminfo)(void);
void (*reserve)(void);/* reserve mem blocks */
void (*map_io)(void);/* IO mapping function */
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index a5ff410dcdb6..d9702eb0b02b 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -98,14 +98,14 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster);
* previously in which case the caller should take appropriate action.
*
* On success, the CPU is not guaranteed to be truly halted until
- * mcpm_cpu_power_down_finish() subsequently returns non-zero for the
+ * mcpm_wait_for_cpu_powerdown() subsequently returns non-zero for the
* specified cpu. Until then, other CPUs should make sure they do not
* trash memory the target CPU might be executing/accessing.
*/
void mcpm_cpu_power_down(void);
/**
- * mcpm_cpu_power_down_finish - wait for a specified CPU to halt, and
+ * mcpm_wait_for_cpu_powerdown - wait for a specified CPU to halt, and
* make sure it is powered off
*
* @cpu: CPU number within given cluster
@@ -127,7 +127,7 @@ void mcpm_cpu_power_down(void);
* - zero if the CPU is in a safely parked state
* - nonzero otherwise (e.g., timeout)
*/
-int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster);
+int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
/**
* mcpm_cpu_suspend - bring the calling CPU in a suspended state
@@ -171,7 +171,7 @@ int mcpm_cpu_powered_up(void);
struct mcpm_platform_ops {
int (*power_up)(unsigned int cpu, unsigned int cluster);
void (*power_down)(void);
- int (*power_down_finish)(unsigned int cpu, unsigned int cluster);
+ int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster);
void (*suspend)(u64);
void (*powered_up)(void);
};
diff --git a/arch/arm/include/asm/memblock.h b/arch/arm/include/asm/memblock.h
index c2f5102ae659..bf47a6c110a2 100644
--- a/arch/arm/include/asm/memblock.h
+++ b/arch/arm/include/asm/memblock.h
@@ -1,10 +1,9 @@
#ifndef _ASM_ARM_MEMBLOCK_H
#define _ASM_ARM_MEMBLOCK_H
-struct meminfo;
struct machine_desc;
-void arm_memblock_init(struct meminfo *, const struct machine_desc *);
+void arm_memblock_init(const struct machine_desc *);
phys_addr_t arm_memblock_steal(phys_addr_t size, phys_addr_t align);
#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 02fa2558f662..2b751464d6ff 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -83,8 +83,6 @@
*/
#define IOREMAP_MAX_ORDER 24
-#define CONSISTENT_END (0xffe00000UL)
-
#else /* CONFIG_MMU */
/*
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index f94784f0e3a6..891a56b35bcf 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -28,53 +28,84 @@ struct outer_cache_fns {
void (*clean_range)(unsigned long, unsigned long);
void (*flush_range)(unsigned long, unsigned long);
void (*flush_all)(void);
- void (*inv_all)(void);
void (*disable)(void);
#ifdef CONFIG_OUTER_CACHE_SYNC
void (*sync)(void);
#endif
- void (*set_debug)(unsigned long);
void (*resume)(void);
+
+ /* This is an ARM L2C thing */
+ void (*write_sec)(unsigned long, unsigned);
};
extern struct outer_cache_fns outer_cache;
#ifdef CONFIG_OUTER_CACHE
-
+/**
+ * outer_inv_range - invalidate range of outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
{
if (outer_cache.inv_range)
outer_cache.inv_range(start, end);
}
+
+/**
+ * outer_clean_range - clean dirty outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
static inline void outer_clean_range(phys_addr_t start, phys_addr_t end)
{
if (outer_cache.clean_range)
outer_cache.clean_range(start, end);
}
+
+/**
+ * outer_flush_range - clean and invalidate outer cache lines
+ * @start: starting physical address, inclusive
+ * @end: end physical address, exclusive
+ */
static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
{
if (outer_cache.flush_range)
outer_cache.flush_range(start, end);
}
+/**
+ * outer_flush_all - clean and invalidate all cache lines in the outer cache
+ *
+ * Note: depending on implementation, this may not be atomic - it must
+ * only be called with interrupts disabled and no other active outer
+ * cache masters.
+ *
+ * It is intended that this function is only used by implementations
+ * needing to override the outer_cache.disable() method due to security.
+ * (Some implementations perform this as a clean followed by an invalidate.)
+ */
static inline void outer_flush_all(void)
{
if (outer_cache.flush_all)
outer_cache.flush_all();
}
-static inline void outer_inv_all(void)
-{
- if (outer_cache.inv_all)
- outer_cache.inv_all();
-}
-
-static inline void outer_disable(void)
-{
- if (outer_cache.disable)
- outer_cache.disable();
-}
+/**
+ * outer_disable - clean, invalidate and disable the outer cache
+ *
+ * Disable the outer cache, ensuring that any data contained in the outer
+ * cache is pushed out to lower levels of system memory. The note and
+ * conditions above concerning outer_flush_all() applies here.
+ */
+extern void outer_disable(void);
+/**
+ * outer_resume - restore the cache configuration and re-enable outer cache
+ *
+ * Restore any configuration that the cache had when previously enabled,
+ * and re-enable the outer cache.
+ */
static inline void outer_resume(void)
{
if (outer_cache.resume)
@@ -90,13 +121,18 @@ static inline void outer_clean_range(phys_addr_t start, phys_addr_t end)
static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
{ }
static inline void outer_flush_all(void) { }
-static inline void outer_inv_all(void) { }
static inline void outer_disable(void) { }
static inline void outer_resume(void) { }
#endif
#ifdef CONFIG_OUTER_CACHE_SYNC
+/**
+ * outer_sync - perform a sync point for outer cache
+ *
+ * Ensure that all outer cache operations are complete and any store
+ * buffers are drained.
+ */
static inline void outer_sync(void)
{
if (outer_cache.sync)
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 8d6a089dfb76..e0adb9f1bf94 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -21,34 +21,6 @@
#define __tagtable(tag, fn) \
static const struct tagtable __tagtable_##fn __tag = { tag, fn }
-/*
- * Memory map description
- */
-#define NR_BANKS CONFIG_ARM_NR_BANKS
-
-struct membank {
- phys_addr_t start;
- phys_addr_t size;
- unsigned int highmem;
-};
-
-struct meminfo {
- int nr_banks;
- struct membank bank[NR_BANKS];
-};
-
-extern struct meminfo meminfo;
-
-#define for_each_bank(iter,mi) \
- for (iter = 0; iter < (mi)->nr_banks; iter++)
-
-#define bank_pfn_start(bank) __phys_to_pfn((bank)->start)
-#define bank_pfn_end(bank) __phys_to_pfn((bank)->start + (bank)->size)
-#define bank_pfn_size(bank) ((bank)->size >> PAGE_SHIFT)
-#define bank_phys_start(bank) (bank)->start
-#define bank_phys_end(bank) ((bank)->start + (bank)->size)
-#define bank_phys_size(bank) (bank)->size
-
extern int arm_add_memory(u64 start, u64 size);
extern void early_print(const char *str, ...);
extern void dump_machine_table(void);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 040619c32d68..38ddd9f83d0e 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o
obj-$(CONFIG_SMP) += smp.o
ifdef CONFIG_MMU
obj-$(CONFIG_SMP) += smp_tlb.o
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 8c14de8180c0..7807ef58a2ab 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/root_dev.h>
#include <linux/screen_info.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/system_info.h>
@@ -222,10 +223,10 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
}
if (mdesc->fixup)
- mdesc->fixup(tags, &from, &meminfo);
+ mdesc->fixup(tags, &from);
if (tags->hdr.tag == ATAG_CORE) {
- if (meminfo.nr_banks != 0)
+ if (memblock_phys_mem_size())
squash_mem_tags(tags);
save_atags(tags);
parse_tags(tags);
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index ea9ce92a4b52..e94a157ddff1 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -27,10 +27,6 @@
#include <asm/mach/arch.h>
#include <asm/mach-types.h>
-void __init early_init_dt_add_memory_arch(u64 base, u64 size)
-{
- arm_add_memory(base, size);
-}
#ifdef CONFIG_SMP
extern struct of_cpu_method __cpu_method_of_table[];
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 1879e8dd2acc..52a949a8077d 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -344,7 +344,7 @@ ENDPROC(__pabt_svc)
@
@ Enable the alignment trap while in kernel mode
@
- alignment_trap r0
+ alignment_trap r0, .LCcralign
@
@ Clear FP to mark the first stack frame
@@ -413,6 +413,11 @@ __und_usr:
@
adr r9, BSYM(ret_from_exception)
+ @ IRQs must be enabled before attempting to read the instruction from
+ @ user space since that could cause a page/translation fault if the
+ @ page table was modified by another CPU.
+ enable_irq
+
tst r3, #PSR_T_BIT @ Thumb mode?
bne __und_usr_thumb
sub r4, r2, #4 @ ARM instr at LR - 4
@@ -484,7 +489,8 @@ ENDPROC(__und_usr)
*/
.pushsection .fixup, "ax"
.align 2
-4: mov pc, r9
+4: str r4, [sp, #S_PC] @ retry current instruction
+ mov pc, r9
.popsection
.pushsection __ex_table,"a"
.long 1b, 4b
@@ -517,7 +523,7 @@ ENDPROC(__und_usr)
* r9 = normal "successful" return address
* r10 = this threads thread_info structure
* lr = unrecognised instruction return address
- * IRQs disabled, FIQs enabled.
+ * IRQs enabled, FIQs enabled.
*/
@
@ Fall-through from Thumb-2 __und_usr
@@ -624,7 +630,6 @@ call_fpe:
#endif
do_fpe:
- enable_irq
ldr r4, .LCfp
add r10, r10, #TI_FPSTATE @ r10 = workspace
ldr pc, [r4] @ Call FP module USR entry point
@@ -652,8 +657,7 @@ __und_usr_fault_32:
b 1f
__und_usr_fault_16:
mov r1, #2
-1: enable_irq
- mov r0, sp
+1: mov r0, sp
adr lr, BSYM(ret_from_exception)
b __und_fault
ENDPROC(__und_usr_fault_32)
@@ -1143,11 +1147,8 @@ __vectors_start:
.data
.globl cr_alignment
- .globl cr_no_alignment
cr_alignment:
.space 4
-cr_no_alignment:
- .space 4
#ifdef CONFIG_MULTI_IRQ_HANDLER
.globl handle_arch_irq
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index a2dcafdf1bc8..7139d4a7dea7 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -365,13 +365,7 @@ ENTRY(vector_swi)
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
#endif
zero_fp
-
-#ifdef CONFIG_ALIGNMENT_TRAP
- ldr ip, __cr_alignment
- ldr ip, [ip]
- mcr p15, 0, ip, c1, c0 @ update control register
-#endif
-
+ alignment_trap ip, __cr_alignment
enable_irq
ct_user_exit
get_thread_info tsk
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index efb208de75ec..5d702f8900b1 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -37,9 +37,9 @@
#endif
.endm
- .macro alignment_trap, rtemp
+ .macro alignment_trap, rtemp, label
#ifdef CONFIG_ALIGNMENT_TRAP
- ldr \rtemp, .LCcralign
+ ldr \rtemp, \label
ldr \rtemp, [\rtemp]
mcr p15, 0, \rtemp, c1, c0
#endif
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index c108ddcb9ba4..af9a8a927a4e 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -14,6 +14,7 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
+#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/opcodes.h>
@@ -63,6 +64,18 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
}
#endif
+int ftrace_arch_code_modify_prepare(void)
+{
+ set_all_modules_text_rw();
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ set_all_modules_text_ro();
+ return 0;
+}
+
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
{
return arm_gen_branch_link(pc, addr);
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index c96ecacb2021..572a38335c96 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -99,8 +99,7 @@ __mmap_switched:
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
cmp r7, #0
- bicne r4, r0, #CR_A @ Clear 'A' bit
- stmneia r7, {r0, r4} @ Save control register values
+ strne r0, [r7] @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 591d6e4a6492..2c35f0ff2fdc 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -475,7 +475,7 @@ ENDPROC(__turn_mmu_on)
#ifdef CONFIG_SMP_ON_UP
- __INIT
+ __HEAD
__fixup_smp:
and r3, r9, #0x000f0000 @ architecture version
teq r3, #0x000f0000 @ CPU ID supported?
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
new file mode 100644
index 000000000000..bb8b79648643
--- /dev/null
+++ b/arch/arm/kernel/hibernate.c
@@ -0,0 +1,107 @@
+/*
+ * Hibernation support specific for ARM
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/system_misc.h>
+#include <asm/idmap.h>
+#include <asm/suspend.h>
+#include <asm/memory.h>
+
+extern const void __nosave_begin, __nosave_end;
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+ local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+ local_fiq_enable();
+}
+
+/*
+ * Snapshot kernel memory and reset the system.
+ *
+ * swsusp_save() is executed in the suspend finisher so that the CPU
+ * context pointer and memory are part of the saved image, which is
+ * required by the resume kernel image to restart execution from
+ * swsusp_arch_suspend().
+ *
+ * soft_restart is not technically needed, but is used to get success
+ * returned from cpu_suspend.
+ *
+ * When soft reboot completes, the hibernation snapshot is written out.
+ */
+static int notrace arch_save_image(unsigned long unused)
+{
+ int ret;
+
+ ret = swsusp_save();
+ if (ret == 0)
+ soft_restart(virt_to_phys(cpu_resume));
+ return ret;
+}
+
+/*
+ * Save the current CPU state before suspend / poweroff.
+ */
+int notrace swsusp_arch_suspend(void)
+{
+ return cpu_suspend(0, arch_save_image);
+}
+
+/*
+ * Restore page contents for physical pages that were in use during loading
+ * hibernation image. Switch to idmap_pgd so the physical page tables
+ * are overwritten with the same contents.
+ */
+static void notrace arch_restore_image(void *unused)
+{
+ struct pbe *pbe;
+
+ cpu_switch_mm(idmap_pgd, &init_mm);
+ for (pbe = restore_pblist; pbe; pbe = pbe->next)
+ copy_page(pbe->orig_address, pbe->address);
+
+ soft_restart(virt_to_phys(cpu_resume));
+}
+
+static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
+
+/*
+ * Resume from the hibernation image.
+ * Due to the kernel heap / data restore, stack contents change underneath
+ * and that would make function calls impossible; switch to a temporary
+ * stack within the nosave region to avoid that problem.
+ */
+int swsusp_arch_resume(void)
+{
+ extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+ call_with_stack(arch_restore_image, 0,
+ resume_stack + ARRAY_SIZE(resume_stack));
+ return 0;
+}
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 9723d17b8f38..2c4257604513 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -37,6 +37,7 @@
#include <linux/proc_fs.h>
#include <linux/export.h>
+#include <asm/hardware/cache-l2x0.h>
#include <asm/exception.h>
#include <asm/mach/arch.h>
#include <asm/mach/irq.h>
@@ -115,10 +116,21 @@ EXPORT_SYMBOL_GPL(set_irq_flags);
void __init init_IRQ(void)
{
+ int ret;
+
if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq)
irqchip_init();
else
machine_desc->init_irq();
+
+ if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
+ (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
+ outer_cache.write_sec = machine_desc->l2c_write_sec;
+ ret = l2x0_of_init(machine_desc->l2c_aux_val,
+ machine_desc->l2c_aux_mask);
+ if (ret)
+ pr_err("L2C: failed to init: %d\n", ret);
+ }
}
#ifdef CONFIG_MULTI_IRQ_HANDLER
diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c
index 346485910732..9d1cf7156895 100644
--- a/arch/arm/kernel/isa.c
+++ b/arch/arm/kernel/isa.c
@@ -20,7 +20,7 @@
static unsigned int isa_membase, isa_portbase, isa_portshift;
-static ctl_table ctl_isa_vars[4] = {
+static struct ctl_table ctl_isa_vars[4] = {
{
.procname = "membase",
.data = &isa_membase,
@@ -44,7 +44,7 @@ static ctl_table ctl_isa_vars[4] = {
static struct ctl_table_header *isa_sysctl_header;
-static ctl_table ctl_isa[2] = {
+static struct ctl_table ctl_isa[2] = {
{
.procname = "isa",
.mode = 0555,
@@ -52,7 +52,7 @@ static ctl_table ctl_isa[2] = {
}, {}
};
-static ctl_table ctl_bus[2] = {
+static struct ctl_table ctl_bus[2] = {
{
.procname = "bus",
.mode = 0555,
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index 2452dd1bef53..a5599cfc43cb 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -18,6 +18,7 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
#define PJ4(code...) code
@@ -65,17 +66,18 @@
* r9 = ret_from_exception
* lr = undefined instr exit
*
- * called from prefetch exception handler with interrupts disabled
+ * called from prefetch exception handler with interrupts enabled
*/
ENTRY(iwmmxt_task_enable)
+ inc_preempt_count r10, r3
XSC(mrc p15, 0, r2, c15, c1, 0)
PJ4(mrc p15, 0, r2, c1, c0, 2)
@ CP0 and CP1 accessible?
XSC(tst r2, #0x3)
PJ4(tst r2, #0xf)
- movne pc, lr @ if so no business here
+ bne 4f @ if so no business here
@ enable access to CP0 and CP1
XSC(orr r2, r2, #0x3)
XSC(mcr p15, 0, r2, c15, c1, 0)
@@ -136,7 +138,7 @@ concan_dump:
wstrd wR15, [r1, #MMX_WR15]
2: teq r0, #0 @ anything to load?
- moveq pc, lr
+ beq 3f
concan_load:
@@ -169,8 +171,14 @@ concan_load:
@ clear CUP/MUP (only if r1 != 0)
teq r1, #0
mov r2, #0
- moveq pc, lr
+ beq 3f
tmcr wCon, r2
+
+3:
+#ifdef CONFIG_PREEMPT_COUNT
+ get_thread_info r10
+#endif
+4: dec_preempt_count r10, r3
mov pc, lr
/*
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 51798d7854ac..a71ae1523620 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -221,6 +221,7 @@ static struct notifier_block cpu_pmu_hotplug_notifier = {
* PMU platform driver and devicetree bindings.
*/
static struct of_device_id cpu_pmu_of_device_ids[] = {
+ {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
{.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
{.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
{.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f4ef3981ed02..2037f7205987 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1599,6 +1599,13 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7_a12_pmu_init(cpu_pmu);
+ cpu_pmu->name = "ARMv7 Cortex-A17";
+ return 0;
+}
+
/*
* Krait Performance Monitor Region Event Selection Register (PMRESRn)
*
@@ -2021,6 +2028,11 @@ static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
return -ENODEV;
}
+static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return -ENODEV;
+}
+
static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 50e198c1e9c8..8a16ee5d8a95 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -72,6 +72,7 @@ static int __init fpe_setup(char *line)
__setup("fpe=", fpe_setup);
#endif
+extern void init_default_cache_policy(unsigned long);
extern void paging_init(const struct machine_desc *desc);
extern void early_paging_init(const struct machine_desc *,
struct proc_info_list *);
@@ -590,7 +591,7 @@ static void __init setup_processor(void)
pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
- proc_arch[cpu_architecture()], cr_alignment);
+ proc_arch[cpu_architecture()], get_cr());
snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
list->arch_name, ENDIANNESS);
@@ -603,7 +604,9 @@ static void __init setup_processor(void)
#ifndef CONFIG_ARM_THUMB
elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
#endif
-
+#ifdef CONFIG_MMU
+ init_default_cache_policy(list->__cpu_mm_mmu_flags);
+#endif
erratum_a15_798181_init();
feat_v6_fixup();
@@ -628,15 +631,8 @@ void __init dump_machine_table(void)
int __init arm_add_memory(u64 start, u64 size)
{
- struct membank *bank = &meminfo.bank[meminfo.nr_banks];
u64 aligned_start;
- if (meminfo.nr_banks >= NR_BANKS) {
- pr_crit("NR_BANKS too low, ignoring memory at 0x%08llx\n",
- (long long)start);
- return -EINVAL;
- }
-
/*
* Ensure that start/size are aligned to a page boundary.
* Size is appropriately rounded down, start is rounded up.
@@ -677,17 +673,17 @@ int __init arm_add_memory(u64 start, u64 size)
aligned_start = PHYS_OFFSET;
}
- bank->start = aligned_start;
- bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
+ start = aligned_start;
+ size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
/*
* Check whether this memory region has non-zero size or
* invalid node number.
*/
- if (bank->size == 0)
+ if (size == 0)
return -EINVAL;
- meminfo.nr_banks++;
+ memblock_add(start, size);
return 0;
}
@@ -695,6 +691,7 @@ int __init arm_add_memory(u64 start, u64 size)
* Pick out the memory size. We look for mem=size@start,
* where start and size are "size[KkMm]"
*/
+
static int __init early_mem(char *p)
{
static int usermem __initdata = 0;
@@ -709,7 +706,8 @@ static int __init early_mem(char *p)
*/
if (usermem == 0) {
usermem = 1;
- meminfo.nr_banks = 0;
+ memblock_remove(memblock_start_of_DRAM(),
+ memblock_end_of_DRAM() - memblock_start_of_DRAM());
}
start = PHYS_OFFSET;
@@ -854,13 +852,6 @@ static void __init reserve_crashkernel(void)
static inline void reserve_crashkernel(void) {}
#endif /* CONFIG_KEXEC */
-static int __init meminfo_cmp(const void *_a, const void *_b)
-{
- const struct membank *a = _a, *b = _b;
- long cmp = bank_pfn_start(a) - bank_pfn_start(b);
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
void __init hyp_mode_check(void)
{
#ifdef CONFIG_ARM_VIRT_EXT
@@ -903,12 +894,10 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
- sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
-
early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
setup_dma_zone(mdesc);
sanity_check_meminfo();
- arm_memblock_init(&meminfo, mdesc);
+ arm_memblock_init(mdesc);
paging_init(mdesc);
request_standard_resources(mdesc);
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index b907d9b790ab..1b880db2a033 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -127,6 +127,10 @@ ENDPROC(cpu_resume_after_mmu)
.align
ENTRY(cpu_resume)
ARM_BE8(setend be) @ ensure we are in BE mode
+#ifdef CONFIG_ARM_VIRT_EXT
+ bl __hyp_stub_install_secondary
+#endif
+ safe_svcmode_maskall r1
mov r1, #0
ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
ALT_UP_B(1f)
@@ -144,7 +148,6 @@ ARM_BE8(setend be) @ ensure we are in BE mode
ldr r0, [r0, #SLEEP_SAVE_SP_PHYS]
ldr r0, [r0, r1, lsl #2]
- setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off
@ load phys pgd, stack, resume fn
ARM( ldmia r0!, {r1, sp, pc} )
THUMB( ldmia r0!, {r1, r2, r3} )
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index af4e8c8a5422..f065eb05d254 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -3,6 +3,7 @@
#include <linux/stacktrace.h>
#include <asm/stacktrace.h>
+#include <asm/traps.h>
#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
/*
@@ -61,6 +62,7 @@ EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
struct stack_trace *trace;
+ unsigned long last_pc;
unsigned int no_sched_functions;
unsigned int skip;
};
@@ -69,6 +71,7 @@ static int save_trace(struct stackframe *frame, void *d)
{
struct stack_trace_data *data = d;
struct stack_trace *trace = data->trace;
+ struct pt_regs *regs;
unsigned long addr = frame->pc;
if (data->no_sched_functions && in_sched_functions(addr))
@@ -80,16 +83,39 @@ static int save_trace(struct stackframe *frame, void *d)
trace->entries[trace->nr_entries++] = addr;
+ if (trace->nr_entries >= trace->max_entries)
+ return 1;
+
+ /*
+ * in_exception_text() is designed to test if the PC is one of
+ * the functions which has an exception stack above it, but
+ * unfortunately what is in frame->pc is the return LR value,
+ * not the saved PC value. So, we need to track the previous
+ * frame PC value when doing this.
+ */
+ addr = data->last_pc;
+ data->last_pc = frame->pc;
+ if (!in_exception_text(addr))
+ return 0;
+
+ regs = (struct pt_regs *)frame->sp;
+
+ trace->entries[trace->nr_entries++] = regs->ARM_pc;
+
return trace->nr_entries >= trace->max_entries;
}
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+/* This must be noinline to so that our skip calculation works correctly */
+static noinline void __save_stack_trace(struct task_struct *tsk,
+ struct stack_trace *trace, unsigned int nosched)
{
struct stack_trace_data data;
struct stackframe frame;
data.trace = trace;
+ data.last_pc = ULONG_MAX;
data.skip = trace->skip;
+ data.no_sched_functions = nosched;
if (tsk != current) {
#ifdef CONFIG_SMP
@@ -102,7 +128,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
return;
#else
- data.no_sched_functions = 1;
frame.fp = thread_saved_fp(tsk);
frame.sp = thread_saved_sp(tsk);
frame.lr = 0; /* recovered from the stack */
@@ -111,11 +136,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
} else {
register unsigned long current_sp asm ("sp");
- data.no_sched_functions = 0;
+ /* We don't want this function nor the caller */
+ data.skip += 2;
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_sp;
frame.lr = (unsigned long)__builtin_return_address(0);
- frame.pc = (unsigned long)save_stack_trace_tsk;
+ frame.pc = (unsigned long)__save_stack_trace;
}
walk_stackframe(&frame, save_trace, &data);
@@ -123,9 +149,33 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ struct stack_trace_data data;
+ struct stackframe frame;
+
+ data.trace = trace;
+ data.skip = trace->skip;
+ data.no_sched_functions = 0;
+
+ frame.fp = regs->ARM_fp;
+ frame.sp = regs->ARM_sp;
+ frame.lr = regs->ARM_lr;
+ frame.pc = regs->ARM_pc;
+
+ walk_stackframe(&frame, save_trace, &data);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ __save_stack_trace(tsk, trace, 1);
+}
+
void save_stack_trace(struct stack_trace *trace)
{
- save_stack_trace_tsk(current, trace);
+ __save_stack_trace(current, trace, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
#endif
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 71e1fec6d31a..3997c411c140 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -91,13 +91,13 @@ static void __init parse_dt_topology(void)
{
const struct cpu_efficiency *cpu_eff;
struct device_node *cn = NULL;
- unsigned long min_capacity = (unsigned long)(-1);
+ unsigned long min_capacity = ULONG_MAX;
unsigned long max_capacity = 0;
unsigned long capacity = 0;
- int alloc_size, cpu = 0;
+ int cpu = 0;
- alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
- __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+ __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+ GFP_NOWAIT);
for_each_possible_cpu(cpu) {
const u32 *rate;
diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/kernel/uprobes.c
index f9bacee973bf..56adf9c1fde0 100644
--- a/arch/arm/kernel/uprobes.c
+++ b/arch/arm/kernel/uprobes.c
@@ -113,6 +113,26 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
return 0;
}
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+{
+ void *xol_page_kaddr = kmap_atomic(page);
+ void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+
+ preempt_disable();
+
+ /* Initialize the slot */
+ memcpy(dst, src, len);
+
+ /* flush caches (dcache/icache) */
+ flush_uprobe_xol_access(page, vaddr, dst, len);
+
+ preempt_enable();
+
+ kunmap_atomic(xol_page_kaddr);
+}
+
+
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
struct uprobe_task *utask = current->utask;
diff --git a/arch/arm/mach-at91/sysirq_mask.c b/arch/arm/mach-at91/sysirq_mask.c
index 2ba694f9626b..f8bc3511a8c8 100644
--- a/arch/arm/mach-at91/sysirq_mask.c
+++ b/arch/arm/mach-at91/sysirq_mask.c
@@ -25,24 +25,28 @@
#include "generic.h"
-#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */
-#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */
+#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */
+#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */
+#define AT91_RTC_IRQ_MASK 0x1f /* Available IRQs mask */
void __init at91_sysirq_mask_rtc(u32 rtc_base)
{
void __iomem *base;
- u32 mask;
base = ioremap(rtc_base, 64);
if (!base)
return;
- mask = readl_relaxed(base + AT91_RTC_IMR);
- if (mask) {
- pr_info("AT91: Disabling rtc irq\n");
- writel_relaxed(mask, base + AT91_RTC_IDR);
- (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */
- }
+ /*
+ * sam9x5 SoCs have the following errata:
+ * "RTC: Interrupt Mask Register cannot be used
+ * Interrupt Mask Register read always returns 0."
+ *
+ * Hence we're not relying on IMR values to disable
+ * interrupts.
+ */
+ writel_relaxed(AT91_RTC_IRQ_MASK, base + AT91_RTC_IDR);
+ (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */
iounmap(base);
}
diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c
index edff69761e04..e9bcbdbce555 100644
--- a/arch/arm/mach-bcm/bcm_5301x.c
+++ b/arch/arm/mach-bcm/bcm_5301x.c
@@ -43,19 +43,14 @@ static void __init bcm5301x_init_early(void)
"imprecise external abort");
}
-static void __init bcm5301x_dt_init(void)
-{
- l2x0_of_init(0, ~0UL);
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char __initconst *bcm5301x_dt_compat[] = {
"brcm,bcm4708",
NULL,
};
DT_MACHINE_START(BCM5301X, "BCM5301X")
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.init_early = bcm5301x_init_early,
- .init_machine = bcm5301x_dt_init,
.dt_compat = bcm5301x_dt_compat,
MACHINE_END
diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c
index 025bcb5473eb..ac181c6797ee 100644
--- a/arch/arm/mach-berlin/berlin.c
+++ b/arch/arm/mach-berlin/berlin.c
@@ -18,16 +18,6 @@
#include <asm/hardware/cache-l2x0.h>
#include <asm/mach/arch.h>
-static void __init berlin_init_machine(void)
-{
- /*
- * with DT probing for L2CCs, berlin_init_machine can be removed.
- * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
- */
- l2x0_of_init(0x70c00000, 0xfeffffff);
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char * const berlin_dt_compat[] = {
"marvell,berlin",
NULL,
@@ -35,5 +25,10 @@ static const char * const berlin_dt_compat[] = {
DT_MACHINE_START(BERLIN_DT, "Marvell Berlin")
.dt_compat = berlin_dt_compat,
- .init_machine = berlin_init_machine,
+ /*
+ * with DT probing for L2CCs, berlin_init_machine can be removed.
+ * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
+ */
+ .l2c_aux_val = 0x30c00000,
+ .l2c_aux_mask = 0xfeffffff,
MACHINE_END
diff --git a/arch/arm/mach-clps711x/board-clep7312.c b/arch/arm/mach-clps711x/board-clep7312.c
index 221b9de32dd6..94a7add88a3f 100644
--- a/arch/arm/mach-clps711x/board-clep7312.c
+++ b/arch/arm/mach-clps711x/board-clep7312.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/string.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/mach-types.h>
@@ -26,11 +27,9 @@
#include "common.h"
static void __init
-fixup_clep7312(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_clep7312(struct tag *tags, char **cmdline)
{
- mi->nr_banks=1;
- mi->bank[0].start = 0xc0000000;
- mi->bank[0].size = 0x01000000;
+ memblock_add(0xc0000000, 0x01000000);
}
MACHINE_START(CLEP7212, "Cirrus Logic 7212/7312")
diff --git a/arch/arm/mach-clps711x/board-edb7211.c b/arch/arm/mach-clps711x/board-edb7211.c
index 077609841f14..f9828f89972a 100644
--- a/arch/arm/mach-clps711x/board-edb7211.c
+++ b/arch/arm/mach-clps711x/board-edb7211.c
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/backlight.h>
#include <linux/platform_device.h>
+#include <linux/memblock.h>
#include <linux/mtd/physmap.h>
#include <linux/mtd/partitions.h>
@@ -133,7 +134,7 @@ static void __init edb7211_reserve(void)
}
static void __init
-fixup_edb7211(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_edb7211(struct tag *tags, char **cmdline)
{
/*
* Bank start addresses are not present in the information
@@ -143,11 +144,8 @@ fixup_edb7211(struct tag *tags, char **cmdline, struct meminfo *mi)
* Banks sizes _are_ present in the param block, but we're
* not using that information yet.
*/
- mi->bank[0].start = 0xc0000000;
- mi->bank[0].size = SZ_8M;
- mi->bank[1].start = 0xc1000000;
- mi->bank[1].size = SZ_8M;
- mi->nr_banks = 2;
+ memblock_add(0xc0000000, SZ_8M);
+ memblock_add(0xc1000000, SZ_8M);
}
static void __init edb7211_init(void)
diff --git a/arch/arm/mach-clps711x/board-p720t.c b/arch/arm/mach-clps711x/board-p720t.c
index 67b733744ed7..0cf0e51e6546 100644
--- a/arch/arm/mach-clps711x/board-p720t.c
+++ b/arch/arm/mach-clps711x/board-p720t.c
@@ -295,7 +295,7 @@ static struct generic_bl_info p720t_lcd_backlight_pdata = {
};
static void __init
-fixup_p720t(struct tag *tag, char **cmdline, struct meminfo *mi)
+fixup_p720t(struct tag *tag, char **cmdline)
{
/*
* Our bootloader doesn't setup any tags (yet).
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 2ae28a69e3e5..f85449a6accd 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -272,9 +272,9 @@ void __init cns3xxx_l2x0_init(void)
*
* 1 cycle of latency for setup, read and write accesses
*/
- val = readl(base + L2X0_TAG_LATENCY_CTRL);
+ val = readl(base + L310_TAG_LATENCY_CTRL);
val &= 0xfffff888;
- writel(val, base + L2X0_TAG_LATENCY_CTRL);
+ writel(val, base + L310_TAG_LATENCY_CTRL);
/*
* Data RAM Control register
@@ -285,12 +285,12 @@ void __init cns3xxx_l2x0_init(void)
*
* 1 cycle of latency for setup, read and write accesses
*/
- val = readl(base + L2X0_DATA_LATENCY_CTRL);
+ val = readl(base + L310_DATA_LATENCY_CTRL);
val &= 0xfffff888;
- writel(val, base + L2X0_DATA_LATENCY_CTRL);
+ writel(val, base + L310_DATA_LATENCY_CTRL);
/* 32 KiB, 8-way, parity disable */
- l2x0_init(base, 0x00540000, 0xfe000fff);
+ l2x0_init(base, 0x00500000, 0xfe0f0fff);
}
#endif /* CONFIG_CACHE_L2X0 */
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S
index 0ec9bb48fab9..e96923a3017b 100644
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ b/arch/arm/mach-ep93xx/crunch-bits.S
@@ -16,6 +16,7 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
#include <mach/ep93xx-regs.h>
/*
@@ -62,14 +63,16 @@
* r9 = ret_from_exception
* lr = undefined instr exit
*
- * called from prefetch exception handler with interrupts disabled
+ * called from prefetch exception handler with interrupts enabled
*/
ENTRY(crunch_task_enable)
+ inc_preempt_count r10, r3
+
ldr r8, =(EP93XX_APB_VIRT_BASE + 0x00130000) @ syscon addr
ldr r1, [r8, #0x80]
tst r1, #0x00800000 @ access to crunch enabled?
- movne pc, lr @ if so no business here
+ bne 2f @ if so no business here
mov r3, #0xaa @ unlock syscon swlock
str r3, [r8, #0xc0]
orr r1, r1, #0x00800000 @ enable access to crunch
@@ -142,7 +145,7 @@ crunch_save:
teq r0, #0 @ anything to load?
cfldr64eq mvdx0, [r1, #CRUNCH_MVDX0] @ mvdx0 was clobbered
- moveq pc, lr
+ beq 1f
crunch_load:
cfldr64 mvdx0, [r0, #CRUNCH_DSPSC] @ load status word
@@ -190,6 +193,11 @@ crunch_load:
cfldr64 mvdx14, [r0, #CRUNCH_MVDX14]
cfldr64 mvdx15, [r0, #CRUNCH_MVDX15]
+1:
+#ifdef CONFIG_PREEMPT_COUNT
+ get_thread_info r10
+#endif
+2: dec_preempt_count r10, r3
mov pc, lr
/*
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 80b90e346ca0..16617bdb37a9 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -153,7 +153,6 @@ enum sys_powerdown {
NUM_SYS_POWERDOWN,
};
-extern unsigned long l2x0_regs_phys;
struct exynos_pmu_conf {
void __iomem *reg;
unsigned int val[NUM_SYS_POWERDOWN];
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index a56ce45a3f90..90aab4d75d08 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -30,9 +30,6 @@
#include "mfc.h"
#include "regs-pmu.h"
-#define L2_AUX_VAL 0x7C470001
-#define L2_AUX_MASK 0xC200ffff
-
static struct map_desc exynos4_iodesc[] __initdata = {
{
.virtual = (unsigned long)S3C_VA_SYS,
@@ -246,25 +243,6 @@ void __init exynos_init_io(void)
exynos_map_io();
}
-static int __init exynos4_l2x0_cache_init(void)
-{
- int ret;
-
- if (!soc_is_exynos4())
- return 0;
-
- ret = l2x0_of_init(L2_AUX_VAL, L2_AUX_MASK);
- if (ret)
- return ret;
-
- if (IS_ENABLED(CONFIG_S5P_SLEEP)) {
- l2x0_regs_phys = virt_to_phys(&l2x0_saved_regs);
- clean_dcache_area(&l2x0_regs_phys, sizeof(unsigned long));
- }
- return 0;
-}
-early_initcall(exynos4_l2x0_cache_init);
-
static void __init exynos_dt_machine_init(void)
{
struct device_node *i2c_np;
@@ -333,6 +311,8 @@ static void __init exynos_reserve(void)
DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)")
/* Maintainer: Thomas Abraham <thomas.abraham@linaro.org> */
/* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */
+ .l2c_aux_val = 0x3c400001,
+ .l2c_aux_mask = 0xc20fffff,
.smp = smp_ops(exynos_smp_ops),
.map_io = exynos_init_io,
.init_early = exynos_firmware_init,
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
index a2613e944e10..108a45f4bb62 100644
--- a/arch/arm/mach-exynos/sleep.S
+++ b/arch/arm/mach-exynos/sleep.S
@@ -16,8 +16,6 @@
*/
#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
#define CPU_MASK 0xff0ffff0
#define CPU_CORTEX_A9 0x410fc090
@@ -53,33 +51,7 @@ ENTRY(exynos_cpu_resume)
and r0, r0, r1
ldr r1, =CPU_CORTEX_A9
cmp r0, r1
- bne skip_l2_resume
- adr r0, l2x0_regs_phys
- ldr r0, [r0]
- cmp r0, #0
- beq skip_l2_resume
- ldr r1, [r0, #L2X0_R_PHY_BASE]
- ldr r2, [r1, #L2X0_CTRL]
- tst r2, #0x1
- bne skip_l2_resume
- ldr r2, [r0, #L2X0_R_AUX_CTRL]
- str r2, [r1, #L2X0_AUX_CTRL]
- ldr r2, [r0, #L2X0_R_TAG_LATENCY]
- str r2, [r1, #L2X0_TAG_LATENCY_CTRL]
- ldr r2, [r0, #L2X0_R_DATA_LATENCY]
- str r2, [r1, #L2X0_DATA_LATENCY_CTRL]
- ldr r2, [r0, #L2X0_R_PREFETCH_CTRL]
- str r2, [r1, #L2X0_PREFETCH_CTRL]
- ldr r2, [r0, #L2X0_R_PWR_CTRL]
- str r2, [r1, #L2X0_POWER_CTRL]
- mov r2, #1
- str r2, [r1, #L2X0_CTRL]
-skip_l2_resume:
+ bleq l2c310_early_resume
#endif
b cpu_resume
ENDPROC(exynos_cpu_resume)
-#ifdef CONFIG_CACHE_L2X0
- .globl l2x0_regs_phys
-l2x0_regs_phys:
- .long 0
-#endif
diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c
index da0415094856..8f05489671b7 100644
--- a/arch/arm/mach-footbridge/cats-hw.c
+++ b/arch/arm/mach-footbridge/cats-hw.c
@@ -76,7 +76,7 @@ __initcall(cats_hw_init);
* hard reboots fail on early boards.
*/
static void __init
-fixup_cats(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_cats(struct tag *tags, char **cmdline)
{
#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
screen_info.orig_video_lines = 25;
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index eb1fa5c84723..cdee08c6d239 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -620,7 +620,7 @@ __initcall(nw_hw_init);
* the parameter page.
*/
static void __init
-fixup_netwinder(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_netwinder(struct tag *tags, char **cmdline)
{
#ifdef CONFIG_ISAPNP
extern int isapnp_disable;
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index c7de89b263dd..8c35ae4ff176 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -51,11 +51,13 @@ static void __init highbank_scu_map_io(void)
}
-static void highbank_l2x0_disable(void)
+static void highbank_l2c310_write_sec(unsigned long val, unsigned reg)
{
- outer_flush_all();
- /* Disable PL310 L2 Cache controller */
- highbank_smc1(0x102, 0x0);
+ if (reg == L2X0_CTRL)
+ highbank_smc1(0x102, val);
+ else
+ WARN_ONCE(1, "Highbank L2C310: ignoring write to reg 0x%x\n",
+ reg);
}
static void __init highbank_init_irq(void)
@@ -64,14 +66,6 @@ static void __init highbank_init_irq(void)
if (of_find_compatible_node(NULL, NULL, "arm,cortex-a9"))
highbank_scu_map_io();
-
- /* Enable PL310 L2 Cache controller */
- if (IS_ENABLED(CONFIG_CACHE_L2X0) &&
- of_find_compatible_node(NULL, NULL, "arm,pl310-cache")) {
- highbank_smc1(0x102, 0x1);
- l2x0_of_init(0, ~0UL);
- outer_cache.disable = highbank_l2x0_disable;
- }
}
static void highbank_power_off(void)
@@ -185,6 +179,9 @@ DT_MACHINE_START(HIGHBANK, "Highbank")
#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
.dma_zone_size = (4ULL * SZ_1G),
#endif
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
+ .l2c_write_sec = highbank_l2c310_write_sec,
.init_irq = highbank_init_irq,
.init_machine = highbank_init,
.dt_compat = highbank_match,
diff --git a/arch/arm/mach-imx/mach-vf610.c b/arch/arm/mach-imx/mach-vf610.c
index 2d8aef5a6efa..c44602758120 100644
--- a/arch/arm/mach-imx/mach-vf610.c
+++ b/arch/arm/mach-imx/mach-vf610.c
@@ -20,19 +20,14 @@ static void __init vf610_init_machine(void)
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
-static void __init vf610_init_irq(void)
-{
- l2x0_of_init(0, ~0UL);
- irqchip_init();
-}
-
static const char *vf610_dt_compat[] __initconst = {
"fsl,vf610",
NULL,
};
DT_MACHINE_START(VYBRID_VF610, "Freescale Vybrid VF610 (Device Tree)")
- .init_irq = vf610_init_irq,
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.init_machine = vf610_init_machine,
.dt_compat = vf610_dt_compat,
.restart = mxc_restart,
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index 20048ff05739..fe123b079c05 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -334,28 +334,10 @@ ENDPROC(imx6_suspend)
* turned into relative ones.
*/
-#ifdef CONFIG_CACHE_L2X0
- .macro pl310_resume
- adr r0, l2x0_saved_regs_offset
- ldr r2, [r0]
- add r2, r2, r0
- ldr r0, [r2, #L2X0_R_PHY_BASE] @ get physical base of l2x0
- ldr r1, [r2, #L2X0_R_AUX_CTRL] @ get aux_ctrl value
- str r1, [r0, #L2X0_AUX_CTRL] @ restore aux_ctrl
- mov r1, #0x1
- str r1, [r0, #L2X0_CTRL] @ re-enable L2
- .endm
-
-l2x0_saved_regs_offset:
- .word l2x0_saved_regs - .
-
-#else
- .macro pl310_resume
- .endm
-#endif
-
ENTRY(v7_cpu_resume)
bl v7_invalidate_l1
- pl310_resume
+#ifdef CONFIG_CACHE_L2X0
+ bl l2c310_early_resume
+#endif
b cpu_resume
ENDPROC(v7_cpu_resume)
diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c
index 5e3027d3692f..3b0733edb68c 100644
--- a/arch/arm/mach-imx/system.c
+++ b/arch/arm/mach-imx/system.c
@@ -124,7 +124,7 @@ void __init imx_init_l2cache(void)
}
/* Configure the L2 PREFETCH and POWER registers */
- val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
+ val = readl_relaxed(l2x0_base + L310_PREFETCH_CTRL);
val |= 0x70800000;
/*
* The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0
@@ -137,14 +137,12 @@ void __init imx_init_l2cache(void)
*/
if (cpu_is_imx6q())
val &= ~(1 << 30 | 1 << 23);
- writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL);
- val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN;
- writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL);
+ writel_relaxed(val, l2x0_base + L310_PREFETCH_CTRL);
iounmap(l2x0_base);
of_node_put(np);
out:
- l2x0_of_init(0, ~0UL);
+ l2x0_of_init(0, ~0);
}
#endif
diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c
index a77529887cbc..61bfe584a9d7 100644
--- a/arch/arm/mach-msm/board-halibut.c
+++ b/arch/arm/mach-msm/board-halibut.c
@@ -83,11 +83,6 @@ static void __init halibut_init(void)
platform_add_devices(devices, ARRAY_SIZE(devices));
}
-static void __init halibut_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
-{
-}
-
static void __init halibut_map_io(void)
{
msm_map_common_io();
@@ -100,7 +95,6 @@ static void __init halibut_init_late(void)
MACHINE_START(HALIBUT, "Halibut Board (QCT SURF7200A)")
.atag_offset = 0x100,
- .fixup = halibut_fixup,
.map_io = halibut_map_io,
.init_early = halibut_init_early,
.init_irq = halibut_init_irq,
diff --git a/arch/arm/mach-msm/board-mahimahi.c b/arch/arm/mach-msm/board-mahimahi.c
index 7d9981cb400e..873c3ca3cd7e 100644
--- a/arch/arm/mach-msm/board-mahimahi.c
+++ b/arch/arm/mach-msm/board-mahimahi.c
@@ -22,6 +22,7 @@
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <linux/memblock.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
@@ -52,16 +53,10 @@ static void __init mahimahi_init(void)
platform_add_devices(devices, ARRAY_SIZE(devices));
}
-static void __init mahimahi_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init mahimahi_fixup(struct tag *tags, char **cmdline)
{
- mi->nr_banks = 2;
- mi->bank[0].start = PHYS_OFFSET;
- mi->bank[0].node = PHYS_TO_NID(PHYS_OFFSET);
- mi->bank[0].size = (219*1024*1024);
- mi->bank[1].start = MSM_HIGHMEM_BASE;
- mi->bank[1].node = PHYS_TO_NID(MSM_HIGHMEM_BASE);
- mi->bank[1].size = MSM_HIGHMEM_SIZE;
+ memblock_add(PHYS_OFFSET, 219*SZ_1M);
+ memblock_add(MSM_HIGHMEM_BASE, MSM_HIGHMEM_SIZE);
}
static void __init mahimahi_map_io(void)
diff --git a/arch/arm/mach-msm/board-msm7x30.c b/arch/arm/mach-msm/board-msm7x30.c
index 0c4c200e1221..245884319d2e 100644
--- a/arch/arm/mach-msm/board-msm7x30.c
+++ b/arch/arm/mach-msm/board-msm7x30.c
@@ -40,8 +40,7 @@
#include "proc_comm.h"
#include "common.h"
-static void __init msm7x30_fixup(struct tag *tag, char **cmdline,
- struct meminfo *mi)
+static void __init msm7x30_fixup(struct tag *tag, char **cmdline)
{
for (; tag->hdr.size; tag = tag_next(tag))
if (tag->hdr.tag == ATAG_MEM && tag->u.mem.start == 0x200000) {
diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c
index 327605174d63..e50967926dcd 100644
--- a/arch/arm/mach-msm/board-sapphire.c
+++ b/arch/arm/mach-msm/board-sapphire.c
@@ -35,6 +35,7 @@
#include <linux/mtd/nand.h>
#include <linux/mtd/partitions.h>
+#include <linux/memblock.h>
#include "gpio_chip.h"
#include "board-sapphire.h"
@@ -74,22 +75,18 @@ static struct map_desc sapphire_io_desc[] __initdata = {
}
};
-static void __init sapphire_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init sapphire_fixup(struct tag *tags, char **cmdline)
{
int smi_sz = parse_tag_smi((const struct tag *)tags);
- mi->nr_banks = 1;
- mi->bank[0].start = PHYS_OFFSET;
- mi->bank[0].node = PHYS_TO_NID(PHYS_OFFSET);
if (smi_sz == 32) {
- mi->bank[0].size = (84*1024*1024);
+ memblock_add(PHYS_OFFSET, 84*SZ_1M);
} else if (smi_sz == 64) {
- mi->bank[0].size = (101*1024*1024);
+ memblock_add(PHYS_OFFSET, 101*SZ_1M);
} else {
+ memblock_add(PHYS_OFFSET, 101*SZ_1M);
/* Give a default value when not get smi size */
smi_sz = 64;
- mi->bank[0].size = (101*1024*1024);
}
}
diff --git a/arch/arm/mach-msm/board-trout.c b/arch/arm/mach-msm/board-trout.c
index 5edfbd904d06..f72b07de2152 100644
--- a/arch/arm/mach-msm/board-trout.c
+++ b/arch/arm/mach-msm/board-trout.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/clkdev.h>
+#include <linux/memblock.h>
#include <asm/system_info.h>
#include <asm/mach-types.h>
@@ -55,12 +56,9 @@ static void __init trout_init_irq(void)
msm_init_irq();
}
-static void __init trout_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init trout_fixup(struct tag *tags, char **cmdline)
{
- mi->nr_banks = 1;
- mi->bank[0].start = PHYS_OFFSET;
- mi->bank[0].size = (101*1024*1024);
+ memblock_add(PHYS_OFFSET, 101*SZ_1M);
}
static void __init trout_init(void)
diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
index 01cfce6ac20b..8bb742fdf5ca 100644
--- a/arch/arm/mach-mvebu/board-v7.c
+++ b/arch/arm/mach-mvebu/board-v7.c
@@ -78,7 +78,6 @@ static void __init mvebu_timer_and_clk_init(void)
mvebu_scu_enable();
coherency_init();
BUG_ON(mvebu_mbus_dt_init(coherency_available()));
- l2x0_of_init(0, ~0UL);
if (of_machine_is_compatible("marvell,armada375"))
hook_fault_code(16 + 6, armada_375_external_abort_wa, SIGBUS, 0,
@@ -182,6 +181,8 @@ static const char * const armada_370_xp_dt_compat[] = {
};
DT_MACHINE_START(ARMADA_370_XP_DT, "Marvell Armada 370/XP (Device Tree)")
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.smp = smp_ops(armada_xp_smp_ops),
.init_machine = mvebu_dt_init,
.init_time = mvebu_timer_and_clk_init,
@@ -195,6 +196,8 @@ static const char * const armada_375_dt_compat[] = {
};
DT_MACHINE_START(ARMADA_375_DT, "Marvell Armada 375 (Device Tree)")
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.init_time = mvebu_timer_and_clk_init,
.init_machine = mvebu_dt_init,
.restart = mvebu_restart,
@@ -208,6 +211,8 @@ static const char * const armada_38x_dt_compat[] = {
};
DT_MACHINE_START(ARMADA_38X_DT, "Marvell Armada 380/385 (Device Tree)")
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.init_time = mvebu_timer_and_clk_init,
.restart = mvebu_restart,
.dt_compat = armada_38x_dt_compat,
diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c
index 4a1065e41e9c..9116ca476d7c 100644
--- a/arch/arm/mach-nomadik/cpu-8815.c
+++ b/arch/arm/mach-nomadik/cpu-8815.c
@@ -143,23 +143,16 @@ static int __init cpu8815_mmcsd_init(void)
}
device_initcall(cpu8815_mmcsd_init);
-static void __init cpu8815_init_of(void)
-{
-#ifdef CONFIG_CACHE_L2X0
- /* At full speed latency must be >=2, so 0x249 in low bits */
- l2x0_of_init(0x00730249, 0xfe000fff);
-#endif
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char * cpu8815_board_compat[] = {
"calaosystems,usb-s8815",
NULL,
};
DT_MACHINE_START(NOMADIK_DT, "Nomadik STn8815")
+ /* At full speed latency must be >=2, so 0x249 in low bits */
+ .l2c_aux_val = 0x00700249,
+ .l2c_aux_mask = 0xfe0fefff,
.map_io = cpu8815_map_io,
- .init_machine = cpu8815_init_of,
.restart = cpu8815_restart,
.dt_compat = cpu8815_board_compat,
MACHINE_END
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index cb31d4390d52..0ba482638ebf 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -65,6 +65,7 @@ config SOC_AM43XX
select ARCH_HAS_OPP
select ARM_GIC
select MACH_OMAP_GENERIC
+ select MIGHT_HAVE_CACHE_L2X0
config SOC_DRA7XX
bool "TI DRA7XX"
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index d88aff7baff8..ff029737c8f0 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -91,6 +91,7 @@ extern void omap3_sync32k_timer_init(void);
extern void omap3_secure_sync32k_timer_init(void);
extern void omap3_gptimer_timer_init(void);
extern void omap4_local_timer_init(void);
+int omap_l2_cache_init(void);
extern void omap5_realtime_timer_init(void);
void omap2420_init_early(void);
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 4ec3b4a93843..8f559450c876 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -609,6 +609,7 @@ void __init am43xx_init_early(void)
am43xx_clockdomains_init();
am43xx_hwmod_init();
omap_hwmod_init_postsetup();
+ omap_l2_cache_init();
omap_clk_soc_init = am43xx_dt_clk_init;
}
@@ -640,6 +641,7 @@ void __init omap4430_init_early(void)
omap44xx_clockdomains_init();
omap44xx_hwmod_init();
omap_hwmod_init_postsetup();
+ omap_l2_cache_init();
omap_clk_soc_init = omap4xxx_dt_clk_init;
}
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index eb76e47091ad..4001325f90fb 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -187,19 +187,15 @@ static void l2x0_pwrst_prepare(unsigned int cpu_id, unsigned int save_state)
* in every restore MPUSS OFF path.
*/
#ifdef CONFIG_CACHE_L2X0
-static void save_l2x0_context(void)
+static void __init save_l2x0_context(void)
{
- u32 val;
- void __iomem *l2x0_base = omap4_get_l2cache_base();
- if (l2x0_base) {
- val = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
- writel_relaxed(val, sar_base + L2X0_AUXCTRL_OFFSET);
- val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
- writel_relaxed(val, sar_base + L2X0_PREFETCH_CTRL_OFFSET);
- }
+ writel_relaxed(l2x0_saved_regs.aux_ctrl,
+ sar_base + L2X0_AUXCTRL_OFFSET);
+ writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+ sar_base + L2X0_PREFETCH_CTRL_OFFSET);
}
#else
-static void save_l2x0_context(void)
+static void __init save_l2x0_context(void)
{}
#endif
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 99b0154493a4..326cd982a3cb 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -167,75 +167,57 @@ void __iomem *omap4_get_l2cache_base(void)
return l2cache_base;
}
-static void omap4_l2x0_disable(void)
+static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
{
- outer_flush_all();
- /* Disable PL310 L2 Cache controller */
- omap_smc1(0x102, 0x0);
-}
+ unsigned smc_op;
-static void omap4_l2x0_set_debug(unsigned long val)
-{
- /* Program PL310 L2 Cache controller debug register */
- omap_smc1(0x100, val);
+ switch (reg) {
+ case L2X0_CTRL:
+ smc_op = OMAP4_MON_L2X0_CTRL_INDEX;
+ break;
+
+ case L2X0_AUX_CTRL:
+ smc_op = OMAP4_MON_L2X0_AUXCTRL_INDEX;
+ break;
+
+ case L2X0_DEBUG_CTRL:
+ smc_op = OMAP4_MON_L2X0_DBG_CTRL_INDEX;
+ break;
+
+ case L310_PREFETCH_CTRL:
+ smc_op = OMAP4_MON_L2X0_PREFETCH_INDEX;
+ break;
+
+ default:
+ WARN_ONCE(1, "OMAP L2C310: ignoring write to reg 0x%x\n", reg);
+ return;
+ }
+
+ omap_smc1(smc_op, val);
}
-static int __init omap_l2_cache_init(void)
+int __init omap_l2_cache_init(void)
{
- u32 aux_ctrl = 0;
-
- /*
- * To avoid code running on other OMAPs in
- * multi-omap builds
- */
- if (!cpu_is_omap44xx())
- return -ENODEV;
+ u32 aux_ctrl;
/* Static mapping, never released */
l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);
if (WARN_ON(!l2cache_base))
return -ENOMEM;
- /*
- * 16-way associativity, parity disabled
- * Way size - 32KB (es1.0)
- * Way size - 64KB (es2.0 +)
- */
- aux_ctrl = ((1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT) |
- (0x1 << 25) |
- (0x1 << L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT) |
- (0x1 << L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT));
-
- if (omap_rev() == OMAP4430_REV_ES1_0) {
- aux_ctrl |= 0x2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT;
- } else {
- aux_ctrl |= ((0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) |
- (1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) |
- (1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) |
- (1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) |
- (1 << L2X0_AUX_CTRL_EARLY_BRESP_SHIFT));
- }
- if (omap_rev() != OMAP4430_REV_ES1_0)
- omap_smc1(0x109, aux_ctrl);
-
- /* Enable PL310 L2 Cache controller */
- omap_smc1(0x102, 0x1);
+ /* 16-way associativity, parity disabled, way size - 64KB (es2.0 +) */
+ aux_ctrl = L2C_AUX_CTRL_SHARED_OVERRIDE |
+ L310_AUX_CTRL_DATA_PREFETCH |
+ L310_AUX_CTRL_INSTR_PREFETCH;
+ outer_cache.write_sec = omap4_l2c310_write_sec;
if (of_have_populated_dt())
- l2x0_of_init(aux_ctrl, L2X0_AUX_CTRL_MASK);
+ l2x0_of_init(aux_ctrl, 0xcf9fffff);
else
- l2x0_init(l2cache_base, aux_ctrl, L2X0_AUX_CTRL_MASK);
-
- /*
- * Override default outer_cache.disable with a OMAP4
- * specific one
- */
- outer_cache.disable = omap4_l2x0_disable;
- outer_cache.set_debug = omap4_l2x0_set_debug;
+ l2x0_init(l2cache_base, aux_ctrl, 0xcf9fffff);
return 0;
}
-omap_early_initcall(omap_l2_cache_init);
#endif
void __iomem *omap4_get_sar_ram_base(void)
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 3f1de1111e0f..6bbb7b55c6d1 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -365,8 +365,7 @@ void orion5x_restart(enum reboot_mode mode, const char *cmd)
* Many orion-based systems have buggy bootloader implementations.
* This is a common fixup for bogus memory tags.
*/
-void __init tag_fixup_mem32(struct tag *t, char **from,
- struct meminfo *meminfo)
+void __init tag_fixup_mem32(struct tag *t, char **from)
{
for (; t->hdr.size; t = tag_next(t))
if (t->hdr.tag == ATAG_MEM &&
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 26d6f34b6027..cd0389c6e822 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -64,9 +64,8 @@ int orion5x_pci_sys_setup(int nr, struct pci_sys_data *sys);
struct pci_bus *orion5x_pci_sys_scan_bus(int nr, struct pci_sys_data *sys);
int orion5x_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
-struct meminfo;
struct tag;
-extern void __init tag_fixup_mem32(struct tag *, char **, struct meminfo *);
+extern void __init tag_fixup_mem32(struct tag *, char **);
#ifdef CONFIG_MACH_MSS2_DT
extern void mss2_init(void);
diff --git a/arch/arm/mach-prima2/Makefile b/arch/arm/mach-prima2/Makefile
index 7a6b4a323125..8846e7d87ea5 100644
--- a/arch/arm/mach-prima2/Makefile
+++ b/arch/arm/mach-prima2/Makefile
@@ -2,7 +2,6 @@ obj-y += rstc.o
obj-y += common.o
obj-y += rtciobrg.o
obj-$(CONFIG_DEBUG_LL) += lluart.o
-obj-$(CONFIG_CACHE_L2X0) += l2x0.o
obj-$(CONFIG_SUSPEND) += pm.o sleep.o
obj-$(CONFIG_SMP) += platsmp.o headsmp.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c
index 47c7819edb9b..a860ea27e8ae 100644
--- a/arch/arm/mach-prima2/common.c
+++ b/arch/arm/mach-prima2/common.c
@@ -34,6 +34,8 @@ static const char *atlas6_dt_match[] __initconst = {
DT_MACHINE_START(ATLAS6_DT, "Generic ATLAS6 (Flattened Device Tree)")
/* Maintainer: Barry Song <baohua.song@csr.com> */
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.map_io = sirfsoc_map_io,
.init_late = sirfsoc_init_late,
.dt_compat = atlas6_dt_match,
@@ -48,6 +50,8 @@ static const char *prima2_dt_match[] __initconst = {
DT_MACHINE_START(PRIMA2_DT, "Generic PRIMA2 (Flattened Device Tree)")
/* Maintainer: Barry Song <baohua.song@csr.com> */
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.map_io = sirfsoc_map_io,
.dma_zone_size = SZ_256M,
.init_late = sirfsoc_init_late,
@@ -63,6 +67,8 @@ static const char *marco_dt_match[] __initconst = {
DT_MACHINE_START(MARCO_DT, "Generic MARCO (Flattened Device Tree)")
/* Maintainer: Barry Song <baohua.song@csr.com> */
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.smp = smp_ops(sirfsoc_smp_ops),
.map_io = sirfsoc_map_io,
.init_late = sirfsoc_init_late,
diff --git a/arch/arm/mach-prima2/l2x0.c b/arch/arm/mach-prima2/l2x0.c
deleted file mode 100644
index c7102539c0b0..000000000000
--- a/arch/arm/mach-prima2/l2x0.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * l2 cache initialization for CSR SiRFprimaII
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- *
- * Licensed under GPLv2 or later.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <asm/hardware/cache-l2x0.h>
-
-struct l2x0_aux {
- u32 val;
- u32 mask;
-};
-
-static const struct l2x0_aux prima2_l2x0_aux __initconst = {
- .val = 2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT,
- .mask = 0,
-};
-
-static const struct l2x0_aux marco_l2x0_aux __initconst = {
- .val = (2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) |
- (1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT),
- .mask = L2X0_AUX_CTRL_MASK,
-};
-
-static const struct of_device_id sirf_l2x0_ids[] __initconst = {
- { .compatible = "sirf,prima2-pl310-cache", .data = &prima2_l2x0_aux, },
- { .compatible = "sirf,marco-pl310-cache", .data = &marco_l2x0_aux, },
- {},
-};
-
-static int __init sirfsoc_l2x0_init(void)
-{
- struct device_node *np;
- const struct l2x0_aux *aux;
-
- np = of_find_matching_node(NULL, sirf_l2x0_ids);
- if (np) {
- aux = of_match_node(sirf_l2x0_ids, np)->data;
- return l2x0_of_init(aux->val, aux->mask);
- }
-
- return 0;
-}
-early_initcall(sirfsoc_l2x0_init);
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index c4525a88e5da..96e9bc102117 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -71,7 +71,6 @@ static int sirfsoc_pm_enter(suspend_state_t state)
case PM_SUSPEND_MEM:
sirfsoc_pre_suspend_power_off();
- outer_flush_all();
outer_disable();
/* go zzz */
cpu_suspend(0, sirfsoc_finish_suspend);
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 584439bfa59f..4d3588d26c2a 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -837,8 +837,7 @@ static void __init cm_x300_init(void)
cm_x300_init_bl();
}
-static void __init cm_x300_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init cm_x300_fixup(struct tag *tags, char **cmdline)
{
/* Make sure that mi->bank[0].start = PHYS_ADDR */
for (; tags->hdr.size; tags = tag_next(tags))
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 57d60542f982..91dd1c7cdbcd 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -34,6 +34,7 @@
#include <linux/input/matrix_keypad.h>
#include <linux/gpio_keys.h>
#include <linux/module.h>
+#include <linux/memblock.h>
#include <video/w100fb.h>
#include <asm/setup.h>
@@ -753,16 +754,13 @@ static void __init corgi_init(void)
platform_add_devices(devices, ARRAY_SIZE(devices));
}
-static void __init fixup_corgi(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init fixup_corgi(struct tag *tags, char **cmdline)
{
sharpsl_save_param();
- mi->nr_banks=1;
- mi->bank[0].start = 0xa0000000;
if (machine_is_corgi())
- mi->bank[0].size = (32*1024*1024);
+ memblock_add(0xa0000000, SZ_32M);
else
- mi->bank[0].size = (64*1024*1024);
+ memblock_add(0xa0000000, SZ_64M);
}
#ifdef CONFIG_MACH_CORGI
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index 8280ebcaab9f..cfb864173ce3 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -21,6 +21,7 @@
#include <linux/mtd/nand.h>
#include <linux/mtd/partitions.h>
#include <linux/usb/gpio_vbus.h>
+#include <linux/memblock.h>
#include <video/w100fb.h>
@@ -41,14 +42,12 @@
#include "clock.h"
/* Only e800 has 128MB RAM */
-void __init eseries_fixup(struct tag *tags, char **cmdline, struct meminfo *mi)
+void __init eseries_fixup(struct tag *tags, char **cmdline)
{
- mi->nr_banks=1;
- mi->bank[0].start = 0xa0000000;
if (machine_is_e800())
- mi->bank[0].size = (128*1024*1024);
+ memblock_add(0xa0000000, SZ_128M);
else
- mi->bank[0].size = (64*1024*1024);
+ memblock_add(0xa0000000, SZ_64M);
}
struct gpio_vbus_mach_info e7xx_udc_info = {
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index aedf053a1de5..131991629116 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -29,6 +29,7 @@
#include <linux/spi/ads7846.h>
#include <linux/spi/pxa2xx_spi.h>
#include <linux/mtd/sharpsl.h>
+#include <linux/memblock.h>
#include <mach/hardware.h>
#include <asm/mach-types.h>
@@ -456,13 +457,10 @@ static void __init poodle_init(void)
poodle_init_spi();
}
-static void __init fixup_poodle(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init fixup_poodle(struct tag *tags, char **cmdline)
{
sharpsl_save_param();
- mi->nr_banks=1;
- mi->bank[0].start = 0xa0000000;
- mi->bank[0].size = (32*1024*1024);
+ memblock_add(0xa0000000, SZ_32M);
}
MACHINE_START(POODLE, "SHARP Poodle")
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 0b11c1af51c4..840c3a48e720 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -32,6 +32,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/reboot.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/mach-types.h>
@@ -971,13 +972,10 @@ static void __init spitz_init(void)
spitz_i2c_init();
}
-static void __init spitz_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init spitz_fixup(struct tag *tags, char **cmdline)
{
sharpsl_save_param();
- mi->nr_banks = 1;
- mi->bank[0].start = 0xa0000000;
- mi->bank[0].size = (64*1024*1024);
+ memblock_add(0xa0000000, SZ_64M);
}
#ifdef CONFIG_MACH_SPITZ
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index ef5557b807ed..c158a6e3e0aa 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -37,6 +37,7 @@
#include <linux/i2c/pxa-i2c.h>
#include <linux/usb/gpio_vbus.h>
#include <linux/reboot.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/mach-types.h>
@@ -960,13 +961,10 @@ static void __init tosa_init(void)
platform_add_devices(devices, ARRAY_SIZE(devices));
}
-static void __init fixup_tosa(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init fixup_tosa(struct tag *tags, char **cmdline)
{
sharpsl_save_param();
- mi->nr_banks=1;
- mi->bank[0].start = 0xa0000000;
- mi->bank[0].size = (64*1024*1024);
+ memblock_add(0xa0000000, SZ_64M);
}
MACHINE_START(TOSA, "SHARP Tosa")
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 960b8dd78c44..8c1b39a0caa0 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -31,6 +31,7 @@
#include <linux/amba/mmci.h>
#include <linux/gfp.h>
#include <linux/mtd/physmap.h>
+#include <linux/memblock.h>
#include <mach/hardware.h>
#include <asm/irq.h>
@@ -385,19 +386,15 @@ void __init realview_timer_init(unsigned int timer_irq)
/*
* Setup the memory banks.
*/
-void realview_fixup(struct tag *tags, char **from, struct meminfo *meminfo)
+void realview_fixup(struct tag *tags, char **from)
{
/*
* Most RealView platforms have 512MB contiguous RAM at 0x70000000.
* Half of this is mirrored at 0.
*/
#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET
- meminfo->bank[0].start = 0x70000000;
- meminfo->bank[0].size = SZ_512M;
- meminfo->nr_banks = 1;
+ memblock_add(0x70000000, SZ_512M);
#else
- meminfo->bank[0].start = 0;
- meminfo->bank[0].size = SZ_256M;
- meminfo->nr_banks = 1;
+ memblock_add(0, SZ_256M);
#endif
}
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 13dc830ef469..868ece221978 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -52,8 +52,7 @@ extern int realview_flash_register(struct resource *res, u32 num);
extern int realview_eth_register(const char *name, struct resource *res);
extern int realview_usb_register(struct resource *res);
extern void realview_init_early(void);
-extern void realview_fixup(struct tag *tags, char **from,
- struct meminfo *meminfo);
+extern void realview_fixup(struct tag *tags, char **from);
extern struct smp_operations realview_smp_ops;
extern void realview_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index 6bb070e80128..739d4f113097 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -442,8 +442,13 @@ static void __init realview_eb_init(void)
realview_eb11mp_fixup();
#ifdef CONFIG_CACHE_L2X0
- /* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
- * Bits: .... ...0 0111 1001 0000 .... .... .... */
+ /*
+ * The PL220 needs to be manually configured as the hardware
+ * doesn't report the correct sizes.
+ * 1MB (128KB/way), 8-way associativity, event monitor and
+ * parity enabled, ignore share bit, no force write allocate
+ * Bits: .... ...0 0111 1001 0000 .... .... ....
+ */
l2x0_init(__io_address(REALVIEW_EB11MP_L220_BASE), 0x00790000, 0xfe000fff);
#endif
platform_device_register(&pmu_device);
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 173f2c15de49..b0e0dcaed944 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -32,6 +32,7 @@
#include <linux/irqchip/arm-gic.h>
#include <linux/platform_data/clk-realview.h>
#include <linux/reboot.h>
+#include <linux/memblock.h>
#include <mach/hardware.h>
#include <asm/irq.h>
@@ -339,15 +340,12 @@ static void realview_pb1176_restart(enum reboot_mode mode, const char *cmd)
dsb();
}
-static void realview_pb1176_fixup(struct tag *tags, char **from,
- struct meminfo *meminfo)
+static void realview_pb1176_fixup(struct tag *tags, char **from)
{
/*
* RealView PB1176 only has 128MB of RAM mapped at 0.
*/
- meminfo->bank[0].start = 0;
- meminfo->bank[0].size = SZ_128M;
- meminfo->nr_banks = 1;
+ memblock_add(0, SZ_128M);
}
static void __init realview_pb1176_init(void)
@@ -355,7 +353,13 @@ static void __init realview_pb1176_init(void)
int i;
#ifdef CONFIG_CACHE_L2X0
- /* 128Kb (16Kb/way) 8-way associativity. evmon/parity/share enabled. */
+ /*
+ * The PL220 needs to be manually configured as the hardware
+ * doesn't report the correct sizes.
+ * 128kB (16kB/way), 8-way associativity, event monitor and
+ * parity enabled, ignore share bit, no force write allocate
+ * Bits: .... ...0 0111 0011 0000 .... .... ....
+ */
l2x0_init(__io_address(REALVIEW_PB1176_L220_BASE), 0x00730000, 0xfe000fff);
#endif
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index bde7e6b1fd44..47bf55fdbf27 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -337,8 +337,13 @@ static void __init realview_pb11mp_init(void)
int i;
#ifdef CONFIG_CACHE_L2X0
- /* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
- * Bits: .... ...0 0111 1001 0000 .... .... .... */
+ /*
+ * The PL220 needs to be manually configured as the hardware
+ * doesn't report the correct sizes.
+ * 1MB (128KB/way), 8-way associativity, event monitor and
+ * parity enabled, ignore share bit, no force write allocate
+ * Bits: .... ...0 0111 1001 0000 .... .... ....
+ */
l2x0_init(__io_address(REALVIEW_TC11MP_L220_BASE), 0x00790000, 0xfe000fff);
#endif
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index 72c96caebefa..d89eb4023467 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -29,6 +29,7 @@
#include <linux/irqchip/arm-gic.h>
#include <linux/platform_data/clk-realview.h>
#include <linux/reboot.h>
+#include <linux/memblock.h>
#include <asm/irq.h>
#include <asm/mach-types.h>
@@ -325,23 +326,19 @@ static void __init realview_pbx_timer_init(void)
realview_pbx_twd_init();
}
-static void realview_pbx_fixup(struct tag *tags, char **from,
- struct meminfo *meminfo)
+static void realview_pbx_fixup(struct tag *tags, char **from)
{
#ifdef CONFIG_SPARSEMEM
/*
* Memory configuration with SPARSEMEM enabled on RealView PBX (see
* asm/mach/memory.h for more information).
*/
- meminfo->bank[0].start = 0;
- meminfo->bank[0].size = SZ_256M;
- meminfo->bank[1].start = 0x20000000;
- meminfo->bank[1].size = SZ_512M;
- meminfo->bank[2].start = 0x80000000;
- meminfo->bank[2].size = SZ_256M;
- meminfo->nr_banks = 3;
+
+ memblock_add(0, SZ_256M);
+ memblock_add(0x20000000, SZ_512M);
+ memblock_add(0x80000000, SZ_256M);
#else
- realview_fixup(tags, from, meminfo);
+ realview_fixup(tags, from);
#endif
}
@@ -370,8 +367,8 @@ static void __init realview_pbx_init(void)
__io_address(REALVIEW_PBX_TILE_L220_BASE);
/* set RAM latencies to 1 cycle for eASIC */
- writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL);
- writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL);
+ writel(0, l2x0_base + L310_TAG_LATENCY_CTRL);
+ writel(0, l2x0_base + L310_DATA_LATENCY_CTRL);
/* 16KB way size, 8-way associativity, parity disabled
* Bits: .. 0 0 0 0 1 00 1 0 1 001 0 000 0 .... .... .... */
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index 4499b0a31a27..968cc348e624 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -24,12 +24,6 @@
#include <asm/hardware/cache-l2x0.h>
#include "core.h"
-static void __init rockchip_dt_init(void)
-{
- l2x0_of_init(0, ~0UL);
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char * const rockchip_board_dt_compat[] = {
"rockchip,rk2928",
"rockchip,rk3066a",
@@ -39,6 +33,7 @@ static const char * const rockchip_board_dt_compat[] = {
};
DT_MACHINE_START(ROCKCHIP_DT, "Rockchip Cortex-A9 (Device Tree)")
- .init_machine = rockchip_dt_init,
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.dt_compat = rockchip_board_dt_compat,
MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2413.c b/arch/arm/mach-s3c24xx/mach-smdk2413.c
index a38f8a049e22..fb3b80e44595 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2413.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2413.c
@@ -22,6 +22,7 @@
#include <linux/serial_s3c.h>
#include <linux/platform_device.h>
#include <linux/io.h>
+#include <linux/memblock.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
@@ -93,13 +94,10 @@ static struct platform_device *smdk2413_devices[] __initdata = {
&s3c2412_device_dma,
};
-static void __init smdk2413_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init smdk2413_fixup(struct tag *tags, char **cmdline)
{
if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
- mi->nr_banks=1;
- mi->bank[0].start = 0x30000000;
- mi->bank[0].size = SZ_64M;
+ memblock_add(0x30000000, SZ_64M);
}
}
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index 6b706c915387..9104c2be36c9 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -23,6 +23,7 @@
#include <linux/mtd/nand.h>
#include <linux/mtd/nand_ecc.h>
#include <linux/mtd/partitions.h>
+#include <linux/memblock.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
@@ -129,13 +130,10 @@ static struct platform_device *vstms_devices[] __initdata = {
&s3c2412_device_dma,
};
-static void __init vstms_fixup(struct tag *tags, char **cmdline,
- struct meminfo *mi)
+static void __init vstms_fixup(struct tag *tags, char **cmdline)
{
if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
- mi->nr_banks=1;
- mi->bank[0].start = 0x30000000;
- mi->bank[0].size = SZ_64M;
+ memblock_add(0x30000000, SZ_64M);
}
}
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 8443a27bca2f..7dd894ece9ae 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -531,7 +531,7 @@ static void __init get_assabet_scr(void)
}
static void __init
-fixup_assabet(struct tag *tags, char **cmdline, struct meminfo *mi)
+fixup_assabet(struct tag *tags, char **cmdline)
{
/* This must be done before any call to machine_has_neponset() */
map_sa1100_gpio_regs();
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
index 57d246eb8813..f660fbb96e0b 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
@@ -164,8 +164,8 @@ static void __init eva_init(void)
r8a7740_meram_workaround();
#ifdef CONFIG_CACHE_L2X0
- /* Early BRESP enable, Shared attribute override enable, 32K*8way */
- l2x0_init(IOMEM(0xf0002000), 0x40440000, 0x82000fff);
+ /* Shared attribute override enable, 32K*8way */
+ l2x0_init(IOMEM(0xf0002000), 0x00400000, 0xc20f0fff);
#endif
r8a7740_add_standard_devices_dt();
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index bc2cf7a89534..01f81100c330 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1271,8 +1271,8 @@ static void __init eva_init(void)
#ifdef CONFIG_CACHE_L2X0
- /* Early BRESP enable, Shared attribute override enable, 32K*8way */
- l2x0_init(IOMEM(0xf0002000), 0x40440000, 0x82000fff);
+ /* Shared attribute override enable, 32K*8way */
+ l2x0_init(IOMEM(0xf0002000), 0x00400000, 0xc20f0fff);
#endif
i2c_register_board_info(0, i2c0_devices, ARRAY_SIZE(i2c0_devices));
diff --git a/arch/arm/mach-shmobile/board-kzm9g-reference.c b/arch/arm/mach-shmobile/board-kzm9g-reference.c
index 598e32488410..a735a1d80c28 100644
--- a/arch/arm/mach-shmobile/board-kzm9g-reference.c
+++ b/arch/arm/mach-shmobile/board-kzm9g-reference.c
@@ -36,8 +36,8 @@ static void __init kzm_init(void)
sh73a0_add_standard_devices_dt();
#ifdef CONFIG_CACHE_L2X0
- /* Early BRESP enable, Shared attribute override enable, 64K*8way */
- l2x0_init(IOMEM(0xf0100000), 0x40460000, 0x82000fff);
+ /* Shared attribute override enable, 64K*8way */
+ l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
#endif
}
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 03dc3ac84502..f94ec8ca42c1 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -876,8 +876,8 @@ static void __init kzm_init(void)
gpio_request_one(223, GPIOF_IN, NULL); /* IRQ8 */
#ifdef CONFIG_CACHE_L2X0
- /* Early BRESP enable, Shared attribute override enable, 64K*8way */
- l2x0_init(IOMEM(0xf0100000), 0x40460000, 0x82000fff);
+ /* Shared attribute override enable, 64K*8way */
+ l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
#endif
i2c_register_board_info(0, i2c0_devices, ARRAY_SIZE(i2c0_devices));
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index 8c02e24f2483..d311ef903b39 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -285,10 +285,10 @@ void __init r8a7778_add_dt_devices(void)
void __iomem *base = ioremap_nocache(0xf0100000, 0x1000);
if (base) {
/*
- * Early BRESP enable, Shared attribute override enable, 64K*16way
+ * Shared attribute override enable, 64K*16way
* don't call iounmap(base)
*/
- l2x0_init(base, 0x40470000, 0x82000fff);
+ l2x0_init(base, 0x00400000, 0xc20f0fff);
}
#endif
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index d197b5adc886..aba4ed652d54 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -660,8 +660,8 @@ static struct platform_device *r8a7779_standard_devices[] __initdata = {
void __init r8a7779_add_standard_devices(void)
{
#ifdef CONFIG_CACHE_L2X0
- /* Early BRESP enable, Shared attribute override enable, 64K*16way */
- l2x0_init(IOMEM(0xf0100000), 0x40470000, 0x82000fff);
+ /* Shared attribute override enable, 64K*16way */
+ l2x0_init(IOMEM(0xf0100000), 0x00400000, 0xc20f0fff);
#endif
r8a7779_pm_init();
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index d86231e11b34..adbf38314ca8 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -98,22 +98,17 @@ static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
writel(temp, rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL);
}
-static void __init socfpga_cyclone5_init(void)
-{
- l2x0_of_init(0, ~0UL);
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char *altera_dt_match[] = {
"altr,socfpga",
NULL
};
DT_MACHINE_START(SOCFPGA, "Altera SOCFPGA")
+ .l2c_aux_val = 0,
+ .l2c_aux_mask = ~0,
.smp = smp_ops(socfpga_smp_ops),
.map_io = socfpga_map_io,
.init_irq = socfpga_init_irq,
- .init_machine = socfpga_cyclone5_init,
.restart = socfpga_cyclone5_restart,
.dt_compat = altera_dt_match,
MACHINE_END
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
index c19751fff2c6..fd4297713d67 100644
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -20,6 +20,18 @@
#include <mach/spear.h>
#include "generic.h"
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not. This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+ pen_release = val;
+ smp_wmb();
+ sync_cache_w(&pen_release);
+}
+
static DEFINE_SPINLOCK(boot_lock);
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
@@ -30,8 +42,7 @@ static void spear13xx_secondary_init(unsigned int cpu)
* let the primary processor know we're out of the
* pen, then head off into the C entry point
*/
- pen_release = -1;
- smp_wmb();
+ write_pen_release(-1);
/*
* Synchronise with the boot thread.
@@ -58,9 +69,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
* Note that "pen_release" is the hardware CPU ID, whereas
* "cpu" is Linux's internal ID.
*/
- pen_release = cpu;
- flush_cache_all();
- outer_flush_all();
+ write_pen_release(cpu);
timeout = jiffies + (1 * HZ);
while (time_before(jiffies, timeout)) {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index 7aa6e8cf830f..c9897ea38980 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -38,15 +38,15 @@ void __init spear13xx_l2x0_init(void)
if (!IS_ENABLED(CONFIG_CACHE_L2X0))
return;
- writel_relaxed(0x06, VA_L2CC_BASE + L2X0_PREFETCH_CTRL);
+ writel_relaxed(0x06, VA_L2CC_BASE + L310_PREFETCH_CTRL);
/*
* Program following latencies in order to make
* SPEAr1340 work at 600 MHz
*/
- writel_relaxed(0x221, VA_L2CC_BASE + L2X0_TAG_LATENCY_CTRL);
- writel_relaxed(0x441, VA_L2CC_BASE + L2X0_DATA_LATENCY_CTRL);
- l2x0_init(VA_L2CC_BASE, 0x70A60001, 0xfe00ffff);
+ writel_relaxed(0x221, VA_L2CC_BASE + L310_TAG_LATENCY_CTRL);
+ writel_relaxed(0x441, VA_L2CC_BASE + L310_DATA_LATENCY_CTRL);
+ l2x0_init(VA_L2CC_BASE, 0x30a00001, 0xfe0fffff);
}
/*
diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c
index df731f2322fa..3cf6ef8d4317 100644
--- a/arch/arm/mach-sti/board-dt.c
+++ b/arch/arm/mach-sti/board-dt.c
@@ -14,25 +14,6 @@
#include "smp.h"
-void __init stih41x_l2x0_init(void)
-{
- u32 way_size = 0x4;
- u32 aux_ctrl;
- /* may be this can be encoded in macros like BIT*() */
- aux_ctrl = (0x1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) |
- (0x1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) |
- (0x1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) |
- (way_size << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
-
- l2x0_of_init(aux_ctrl, L2X0_AUX_CTRL_MASK);
-}
-
-static void __init stih41x_machine_init(void)
-{
- stih41x_l2x0_init();
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char *stih41x_dt_match[] __initdata = {
"st,stih415",
"st,stih416",
@@ -41,7 +22,11 @@ static const char *stih41x_dt_match[] __initdata = {
};
DT_MACHINE_START(STM, "STiH415/416 SoC with Flattened Device Tree")
- .init_machine = stih41x_machine_init,
- .smp = smp_ops(sti_smp_ops),
.dt_compat = stih41x_dt_match,
+ .l2c_aux_val = L2C_AUX_CTRL_SHARED_OVERRIDE |
+ L310_AUX_CTRL_DATA_PREFETCH |
+ L310_AUX_CTRL_INSTR_PREFETCH |
+ L2C_AUX_CTRL_WAY_SIZE(4),
+ .l2c_aux_mask = 0xc0000fff,
+ .smp = smp_ops(sti_smp_ops),
MACHINE_END
diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h
index 6e92a7c2ecbd..f4a89698e5b0 100644
--- a/arch/arm/mach-tegra/pm.h
+++ b/arch/arm/mach-tegra/pm.h
@@ -35,8 +35,6 @@ void tegra20_sleep_core_init(void);
void tegra30_lp1_iram_hook(void);
void tegra30_sleep_core_init(void);
-extern unsigned long l2x0_saved_regs_addr;
-
void tegra_clear_cpu_in_lp2(void);
bool tegra_set_cpu_in_lp2(void);
diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
index 8c1ba4fea384..578d4d1ad648 100644
--- a/arch/arm/mach-tegra/reset-handler.S
+++ b/arch/arm/mach-tegra/reset-handler.S
@@ -19,7 +19,6 @@
#include <asm/cache.h>
#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
#include "flowctrl.h"
#include "fuse.h"
@@ -78,8 +77,10 @@ ENTRY(tegra_resume)
str r1, [r0]
#endif
+#ifdef CONFIG_CACHE_L2X0
/* L2 cache resume & re-enable */
- l2_cache_resume r0, r1, r2, l2x0_saved_regs_addr
+ bl l2c310_early_resume
+#endif
end_ca9_scu_l2_resume:
mov32 r9, 0xc0f
cmp r8, r9
@@ -89,12 +90,6 @@ end_ca9_scu_l2_resume:
ENDPROC(tegra_resume)
#endif
-#ifdef CONFIG_CACHE_L2X0
- .globl l2x0_saved_regs_addr
-l2x0_saved_regs_addr:
- .long 0
-#endif
-
.align L1_CACHE_SHIFT
ENTRY(__tegra_cpu_reset_handler_start)
diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h
index a4edbb3abd3d..339fe42cd6fb 100644
--- a/arch/arm/mach-tegra/sleep.h
+++ b/arch/arm/mach-tegra/sleep.h
@@ -120,37 +120,6 @@
mov \tmp1, \tmp1, lsr #8
.endm
-/* Macro to resume & re-enable L2 cache */
-#ifndef L2X0_CTRL_EN
-#define L2X0_CTRL_EN 1
-#endif
-
-#ifdef CONFIG_CACHE_L2X0
-.macro l2_cache_resume, tmp1, tmp2, tmp3, phys_l2x0_saved_regs
- W(adr) \tmp1, \phys_l2x0_saved_regs
- ldr \tmp1, [\tmp1]
- ldr \tmp2, [\tmp1, #L2X0_R_PHY_BASE]
- ldr \tmp3, [\tmp2, #L2X0_CTRL]
- tst \tmp3, #L2X0_CTRL_EN
- bne exit_l2_resume
- ldr \tmp3, [\tmp1, #L2X0_R_TAG_LATENCY]
- str \tmp3, [\tmp2, #L2X0_TAG_LATENCY_CTRL]
- ldr \tmp3, [\tmp1, #L2X0_R_DATA_LATENCY]
- str \tmp3, [\tmp2, #L2X0_DATA_LATENCY_CTRL]
- ldr \tmp3, [\tmp1, #L2X0_R_PREFETCH_CTRL]
- str \tmp3, [\tmp2, #L2X0_PREFETCH_CTRL]
- ldr \tmp3, [\tmp1, #L2X0_R_PWR_CTRL]
- str \tmp3, [\tmp2, #L2X0_POWER_CTRL]
- ldr \tmp3, [\tmp1, #L2X0_R_AUX_CTRL]
- str \tmp3, [\tmp2, #L2X0_AUX_CTRL]
- mov \tmp3, #L2X0_CTRL_EN
- str \tmp3, [\tmp2, #L2X0_CTRL]
-exit_l2_resume:
-.endm
-#else /* CONFIG_CACHE_L2X0 */
-.macro l2_cache_resume, tmp1, tmp2, tmp3, phys_l2x0_saved_regs
-.endm
-#endif /* CONFIG_CACHE_L2X0 */
#else
void tegra_pen_lock(void);
void tegra_pen_unlock(void);
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 6191603379e1..15ac9fcc96b1 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -70,40 +70,12 @@ u32 tegra_uart_config[3] = {
0,
};
-static void __init tegra_init_cache(void)
-{
-#ifdef CONFIG_CACHE_L2X0
- static const struct of_device_id pl310_ids[] __initconst = {
- { .compatible = "arm,pl310-cache", },
- {}
- };
-
- struct device_node *np;
- int ret;
- void __iomem *p = IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x3000;
- u32 aux_ctrl, cache_type;
-
- np = of_find_matching_node(NULL, pl310_ids);
- if (!np)
- return;
-
- cache_type = readl(p + L2X0_CACHE_TYPE);
- aux_ctrl = (cache_type & 0x700) << (17-8);
- aux_ctrl |= 0x7C400001;
-
- ret = l2x0_of_init(aux_ctrl, 0x8200c3fe);
- if (!ret)
- l2x0_saved_regs_addr = virt_to_phys(&l2x0_saved_regs);
-#endif
-}
-
static void __init tegra_init_early(void)
{
of_register_trusted_foundations();
tegra_apb_io_init();
tegra_init_fuse();
tegra_cpu_reset_handler_init();
- tegra_init_cache();
tegra_powergate_init();
tegra_hotplug_init();
}
@@ -191,8 +163,10 @@ static const char * const tegra_dt_board_compat[] = {
};
DT_MACHINE_START(TEGRA_DT, "NVIDIA Tegra SoC (Flattened Device Tree)")
- .map_io = tegra_map_common_io,
+ .l2c_aux_val = 0x3c400001,
+ .l2c_aux_mask = 0xc20fc3fe,
.smp = smp_ops(tegra_smp_ops),
+ .map_io = tegra_map_common_io,
.init_early = tegra_init_early,
.init_irq = tegra_dt_init_irq,
.init_machine = tegra_dt_init,
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 264f894c0e3d..842ebedbdd1c 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -35,10 +35,16 @@ static int __init ux500_l2x0_unlock(void)
return 0;
}
-static int __init ux500_l2x0_init(void)
+static void ux500_l2c310_write_sec(unsigned long val, unsigned reg)
{
- u32 aux_val = 0x3e000000;
+ /*
+ * We can't write to secure registers as we are in non-secure
+ * mode, until we have some SMI service available.
+ */
+}
+static int __init ux500_l2x0_init(void)
+{
if (cpu_is_u8500_family() || cpu_is_ux540_family())
l2x0_base = __io_address(U8500_L2CC_BASE);
else
@@ -48,28 +54,12 @@ static int __init ux500_l2x0_init(void)
/* Unlock before init */
ux500_l2x0_unlock();
- /* DBx540's L2 has 128KB way size */
- if (cpu_is_ux540_family())
- /* 128KB way size */
- aux_val |= (0x4 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
- else
- /* 64KB way size */
- aux_val |= (0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT);
+ outer_cache.write_sec = ux500_l2c310_write_sec;
- /* 64KB way size, 8 way associativity, force WA */
if (of_have_populated_dt())
- l2x0_of_init(aux_val, 0xc0000fff);
+ l2x0_of_init(0, ~0);
else
- l2x0_init(l2x0_base, aux_val, 0xc0000fff);
-
- /*
- * We can't disable l2 as we are in non secure mode, currently
- * this seems be called only during kexec path. So let's
- * override outer.disable with nasty assignment until we have
- * some SMI service available.
- */
- outer_cache.disable = NULL;
- outer_cache.set_debug = NULL;
+ l2x0_init(l2x0_base, 0, ~0);
return 0;
}
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 494d70bfddad..86150d7a2e7d 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -45,6 +45,23 @@ static void __init ct_ca9x4_map_io(void)
iotable_init(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc));
}
+static void __init ca9x4_l2_init(void)
+{
+#ifdef CONFIG_CACHE_L2X0
+ void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
+
+ if (l2x0_base) {
+ /* set RAM latencies to 1 cycle for this core tile. */
+ writel(0, l2x0_base + L310_TAG_LATENCY_CTRL);
+ writel(0, l2x0_base + L310_DATA_LATENCY_CTRL);
+
+ l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
+ } else {
+ pr_err("L2C: unable to map L2 cache controller\n");
+ }
+#endif
+}
+
#ifdef CONFIG_HAVE_ARM_TWD
static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, A9_MPCORE_TWD, IRQ_LOCALTIMER);
@@ -63,6 +80,7 @@ static void __init ct_ca9x4_init_irq(void)
gic_init(0, 29, ioremap(A9_MPCORE_GIC_DIST, SZ_4K),
ioremap(A9_MPCORE_GIC_CPU, SZ_256));
ca9x4_twd_init();
+ ca9x4_l2_init();
}
static int ct_ca9x4_clcd_setup(struct clcd_fb *fb)
@@ -146,16 +164,6 @@ static void __init ct_ca9x4_init(void)
{
int i;
-#ifdef CONFIG_CACHE_L2X0
- void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
-
- /* set RAM latencies to 1 cycle for this core tile. */
- writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL);
- writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL);
-
- l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
-#endif
-
for (i = 0; i < ARRAY_SIZE(ct_ca9x4_amba_devs); i++)
amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 29e7785a54bc..b743a0ae02ce 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -209,7 +209,7 @@ static int tc2_core_in_reset(unsigned int cpu, unsigned int cluster)
#define POLL_MSEC 10
#define TIMEOUT_MSEC 1000
-static int tc2_pm_power_down_finish(unsigned int cpu, unsigned int cluster)
+static int tc2_pm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
{
unsigned tries;
@@ -290,7 +290,7 @@ static void tc2_pm_powered_up(void)
static const struct mcpm_platform_ops tc2_pm_power_ops = {
.power_up = tc2_pm_power_up,
.power_down = tc2_pm_power_down,
- .power_down_finish = tc2_pm_power_down_finish,
+ .wait_for_powerdown = tc2_pm_wait_for_powerdown,
.suspend = tc2_pm_suspend,
.powered_up = tc2_pm_powered_up,
};
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 38f4f6f37770..6ff681a24ba7 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -372,7 +372,6 @@ MACHINE_END
static void __init v2m_dt_init(void)
{
- l2x0_of_init(0x00400000, 0xfe0fffff);
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
@@ -383,6 +382,8 @@ static const char * const v2m_dt_match[] __initconst = {
DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
.dt_compat = v2m_dt_match,
+ .l2c_aux_val = 0x00400000,
+ .l2c_aux_mask = 0xfe0fffff,
.smp = smp_ops(vexpress_smp_dt_ops),
.smp_init = smp_init_ops(vexpress_smp_init_ops),
.init_machine = v2m_dt_init,
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index edbd9d83f407..31a6fa40ba37 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -109,11 +109,6 @@ static void __init zynq_init_machine(void)
struct soc_device *soc_dev;
struct device *parent = NULL;
- /*
- * 64KB way size, 8-way associativity, parity disabled
- */
- l2x0_of_init(0x02060000, 0xF0F0FFFF);
-
soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
if (!soc_dev_attr)
goto out;
@@ -202,6 +197,9 @@ static const char * const zynq_dt_match[] = {
};
DT_MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
+ /* 64KB way size, 8-way associativity, parity disabled */
+ .l2c_aux_val = 0x02000000,
+ .l2c_aux_mask = 0xf0ffffff,
.smp = smp_ops(zynq_smp_ops),
.map_io = zynq_map_io,
.init_irq = zynq_irq_init,
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5bf7c3c3b301..eda0dd0ab97b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -897,6 +897,57 @@ config CACHE_PL310
This option enables optimisations for the PL310 cache
controller.
+config PL310_ERRATA_588369
+ bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
+ depends on CACHE_L2X0
+ help
+ The PL310 L2 cache controller implements three types of Clean &
+ Invalidate maintenance operations: by Physical Address
+ (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
+ They are architecturally defined to behave as the execution of a
+ clean operation followed immediately by an invalidate operation,
+ both performing to the same memory location. This functionality
+ is not correctly implemented in PL310 as clean lines are not
+ invalidated as a result of these operations.
+
+config PL310_ERRATA_727915
+ bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
+ depends on CACHE_L2X0
+ help
+ PL310 implements the Clean & Invalidate by Way L2 cache maintenance
+ operation (offset 0x7FC). This operation runs in background so that
+ PL310 can handle normal accesses while it is in progress. Under very
+ rare circumstances, due to this erratum, write data can be lost when
+ PL310 treats a cacheable write transaction during a Clean &
+ Invalidate by Way operation.
+
+config PL310_ERRATA_753970
+ bool "PL310 errata: cache sync operation may be faulty"
+ depends on CACHE_PL310
+ help
+ This option enables the workaround for the 753970 PL310 (r3p0) erratum.
+
+ Under some condition the effect of cache sync operation on
+ the store buffer still remains when the operation completes.
+ This means that the store buffer is always asked to drain and
+ this prevents it from merging any further writes. The workaround
+ is to replace the normal offset of cache sync operation (0x730)
+ by another offset targeting an unmapped PL310 register 0x740.
+ This has the same effect as the cache sync operation: store buffer
+ drain and waiting for all buffers empty.
+
+config PL310_ERRATA_769419
+ bool "PL310 errata: no automatic Store Buffer drain"
+ depends on CACHE_L2X0
+ help
+ On revisions of the PL310 prior to r3p2, the Store Buffer does
+ not automatically drain. This can cause normal, non-cacheable
+ writes to be retained when the memory system is idle, leading
+ to suboptimal I/O performance for drivers using coherent DMA.
+ This option adds a write barrier to the cpu_idle loop so that,
+ on systems with an outer cache, the store buffer is drained
+ explicitly.
+
config CACHE_TAUROS2
bool "Enable the Tauros2 L2 cache controller"
depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7f39ce2f841f..91da64de440f 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -95,7 +95,8 @@ obj-$(CONFIG_CPU_V7M) += proc-v7m.o
AFLAGS_proc-v6.o :=-Wa,-march=armv6
AFLAGS_proc-v7.o :=-Wa,-march=armv7-a
+obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o
-obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o
+obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o
obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o
obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 924036473b16..b8cb1a2688a0 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -28,6 +28,7 @@
#include <asm/opcodes.h>
#include "fault.h"
+#include "mm.h"
/*
* 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
@@ -81,6 +82,7 @@ static unsigned long ai_word;
static unsigned long ai_dword;
static unsigned long ai_multi;
static int ai_usermode;
+static unsigned long cr_no_alignment;
core_param(alignment, ai_usermode, int, 0600);
@@ -91,7 +93,7 @@ core_param(alignment, ai_usermode, int, 0600);
/* Return true if and only if the ARMv6 unaligned access model is in use. */
static bool cpu_is_v6_unaligned(void)
{
- return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U);
+ return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U;
}
static int safe_usermode(int new_usermode, bool warn)
@@ -949,6 +951,13 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
return 0;
}
+static int __init noalign_setup(char *__unused)
+{
+ set_cr(__clear_cr(CR_A));
+ return 1;
+}
+__setup("noalign", noalign_setup);
+
/*
* This needs to be done after sysctl_init, otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
@@ -966,14 +975,12 @@ static int __init alignment_init(void)
return -ENOMEM;
#endif
-#ifdef CONFIG_CPU_CP15
if (cpu_is_v6_unaligned()) {
- cr_alignment &= ~CR_A;
- cr_no_alignment &= ~CR_A;
- set_cr(cr_alignment);
+ set_cr(__clear_cr(CR_A));
ai_usermode = safe_usermode(ai_usermode, false);
}
-#endif
+
+ cr_no_alignment = get_cr() & ~CR_A;
hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
"alignment exception");
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index dc814a548056..e028a7f2ebcc 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -350,7 +350,6 @@ void __init feroceon_l2_init(int __l2_wt_override)
outer_cache.inv_range = feroceon_l2_inv_range;
outer_cache.clean_range = feroceon_l2_clean_range;
outer_cache.flush_range = feroceon_l2_flush_range;
- outer_cache.inv_all = l2_inv_all;
enable_l2();
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 7abde2ce8973..efc5cabf70e0 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -16,18 +16,33 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/init.h>
+#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
#include <asm/hardware/cache-l2x0.h>
#include "cache-tauros3.h"
#include "cache-aurora-l2.h"
+struct l2c_init_data {
+ const char *type;
+ unsigned way_size_0;
+ unsigned num_lock;
+ void (*of_parse)(const struct device_node *, u32 *, u32 *);
+ void (*enable)(void __iomem *, u32, unsigned);
+ void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
+ void (*save)(void __iomem *);
+ struct outer_cache_fns outer_cache;
+};
+
#define CACHE_LINE_SIZE 32
static void __iomem *l2x0_base;
@@ -36,96 +51,116 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */
static u32 l2x0_size;
static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
-/* Aurora don't have the cache ID register available, so we have to
- * pass it though the device tree */
-static u32 cache_id_part_number_from_dt;
-
struct l2x0_regs l2x0_saved_regs;
-struct l2x0_of_data {
- void (*setup)(const struct device_node *, u32 *, u32 *);
- void (*save)(void);
- struct outer_cache_fns outer_cache;
-};
-
-static bool of_init = false;
-
-static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
+/*
+ * Common code for all cache controllers.
+ */
+static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
{
/* wait for cache operation by line or way to complete */
while (readl_relaxed(reg) & mask)
cpu_relax();
}
-#ifdef CONFIG_CACHE_PL310
-static inline void cache_wait(void __iomem *reg, unsigned long mask)
+/*
+ * By default, we write directly to secure registers. Platforms must
+ * override this if they are running non-secure.
+ */
+static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
{
- /* cache operations by line are atomic on PL310 */
+ if (val == readl_relaxed(base + reg))
+ return;
+ if (outer_cache.write_sec)
+ outer_cache.write_sec(val, reg);
+ else
+ writel_relaxed(val, base + reg);
}
-#else
-#define cache_wait cache_wait_way
-#endif
-static inline void cache_sync(void)
+/*
+ * This should only be called when we have a requirement that the
+ * register be written due to a work-around, as platforms running
+ * in non-secure mode may not be able to access this register.
+ */
+static inline void l2c_set_debug(void __iomem *base, unsigned long val)
{
- void __iomem *base = l2x0_base;
-
- writel_relaxed(0, base + sync_reg_offset);
- cache_wait(base + L2X0_CACHE_SYNC, 1);
+ l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
}
-static inline void l2x0_clean_line(unsigned long addr)
+static void __l2c_op_way(void __iomem *reg)
{
- void __iomem *base = l2x0_base;
- cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
- writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(l2x0_way_mask, reg);
+ l2c_wait_mask(reg, l2x0_way_mask);
}
-static inline void l2x0_inv_line(unsigned long addr)
+static inline void l2c_unlock(void __iomem *base, unsigned num)
{
- void __iomem *base = l2x0_base;
- cache_wait(base + L2X0_INV_LINE_PA, 1);
- writel_relaxed(addr, base + L2X0_INV_LINE_PA);
+ unsigned i;
+
+ for (i = 0; i < num; i++) {
+ writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
+ i * L2X0_LOCKDOWN_STRIDE);
+ writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
+ i * L2X0_LOCKDOWN_STRIDE);
+ }
}
-#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
-static inline void debug_writel(unsigned long val)
+/*
+ * Enable the L2 cache controller. This function must only be
+ * called when the cache controller is known to be disabled.
+ */
+static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
{
- if (outer_cache.set_debug)
- outer_cache.set_debug(val);
+ unsigned long flags;
+
+ l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+
+ l2c_unlock(base, num_lock);
+
+ local_irq_save(flags);
+ __l2c_op_way(base + L2X0_INV_WAY);
+ writel_relaxed(0, base + sync_reg_offset);
+ l2c_wait_mask(base + sync_reg_offset, 1);
+ local_irq_restore(flags);
+
+ l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
}
-static void pl310_set_debug(unsigned long val)
+static void l2c_disable(void)
{
- writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
+ void __iomem *base = l2x0_base;
+
+ outer_cache.flush_all();
+ l2c_write_sec(0, base, L2X0_CTRL);
+ dsb(st);
}
-#else
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
+
+#ifdef CONFIG_CACHE_PL310
+static inline void cache_wait(void __iomem *reg, unsigned long mask)
{
+ /* cache operations by line are atomic on PL310 */
}
-
-#define pl310_set_debug NULL
+#else
+#define cache_wait l2c_wait_mask
#endif
-#ifdef CONFIG_PL310_ERRATA_588369
-static inline void l2x0_flush_line(unsigned long addr)
+static inline void cache_sync(void)
{
void __iomem *base = l2x0_base;
- /* Clean by PA followed by Invalidate by PA */
- cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
- writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
- cache_wait(base + L2X0_INV_LINE_PA, 1);
- writel_relaxed(addr, base + L2X0_INV_LINE_PA);
+ writel_relaxed(0, base + sync_reg_offset);
+ cache_wait(base + L2X0_CACHE_SYNC, 1);
}
-#else
-static inline void l2x0_flush_line(unsigned long addr)
+#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
+static inline void debug_writel(unsigned long val)
+{
+ l2c_set_debug(l2x0_base, val);
+}
+#else
+/* Optimised out for non-errata case */
+static inline void debug_writel(unsigned long val)
{
- void __iomem *base = l2x0_base;
- cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
- writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
}
#endif
@@ -141,8 +176,7 @@ static void l2x0_cache_sync(void)
static void __l2x0_flush_all(void)
{
debug_writel(0x03);
- writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY);
- cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask);
+ __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
cache_sync();
debug_writel(0x00);
}
@@ -157,275 +191,883 @@ static void l2x0_flush_all(void)
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2x0_clean_all(void)
+static void l2x0_disable(void)
{
unsigned long flags;
- /* clean all ways */
raw_spin_lock_irqsave(&l2x0_lock, flags);
- writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
- cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
- cache_sync();
+ __l2x0_flush_all();
+ l2c_write_sec(0, l2x0_base, L2X0_CTRL);
+ dsb(st);
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2x0_inv_all(void)
+static void l2c_save(void __iomem *base)
{
- unsigned long flags;
+ l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
- /* invalidate all ways */
- raw_spin_lock_irqsave(&l2x0_lock, flags);
- /* Invalidating when L2 is enabled is a nono */
- BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
- writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
- cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
- cache_sync();
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+/*
+ * L2C-210 specific code.
+ *
+ * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
+ * ensure that no background operation is running. The way operations
+ * are all background tasks.
+ *
+ * While a background operation is in progress, any new operation is
+ * ignored (unspecified whether this causes an error.) Thankfully, not
+ * used on SMP.
+ *
+ * Never has a different sync register other than L2X0_CACHE_SYNC, but
+ * we use sync_reg_offset here so we can share some of this with L2C-310.
+ */
+static void __l2c210_cache_sync(void __iomem *base)
+{
+ writel_relaxed(0, base + sync_reg_offset);
}
-static void l2x0_inv_range(unsigned long start, unsigned long end)
+static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
+ unsigned long end)
+{
+ while (start < end) {
+ writel_relaxed(start, reg);
+ start += CACHE_LINE_SIZE;
+ }
+}
+
+static void l2c210_inv_range(unsigned long start, unsigned long end)
{
void __iomem *base = l2x0_base;
- unsigned long flags;
- raw_spin_lock_irqsave(&l2x0_lock, flags);
if (start & (CACHE_LINE_SIZE - 1)) {
start &= ~(CACHE_LINE_SIZE - 1);
- debug_writel(0x03);
- l2x0_flush_line(start);
- debug_writel(0x00);
+ writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
start += CACHE_LINE_SIZE;
}
if (end & (CACHE_LINE_SIZE - 1)) {
end &= ~(CACHE_LINE_SIZE - 1);
- debug_writel(0x03);
- l2x0_flush_line(end);
- debug_writel(0x00);
+ writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
}
+ __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c210_clean_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_all(void)
+{
+ void __iomem *base = l2x0_base;
+
+ BUG_ON(!irqs_disabled());
+
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c210_sync(void)
+{
+ __l2c210_cache_sync(l2x0_base);
+}
+
+static void l2c210_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
+}
+
+static const struct l2c_init_data l2c210_data __initconst = {
+ .type = "L2C-210",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .enable = l2c_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c210_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+/*
+ * L2C-220 specific code.
+ *
+ * All operations are background operations: they have to be waited for.
+ * Conflicting requests generate a slave error (which will cause an
+ * imprecise abort.) Never uses sync_reg_offset, so we hard-code the
+ * sync register here.
+ *
+ * However, we can re-use the l2c210_resume call.
+ */
+static inline void __l2c220_cache_sync(void __iomem *base)
+{
+ writel_relaxed(0, base + L2X0_CACHE_SYNC);
+ l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
+}
+
+static void l2c220_op_way(void __iomem *base, unsigned reg)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c_op_way(base + reg);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
+ unsigned long end, unsigned long flags)
+{
+ raw_spinlock_t *lock = &l2x0_lock;
+
while (start < end) {
unsigned long blk_end = start + min(end - start, 4096UL);
while (start < blk_end) {
- l2x0_inv_line(start);
+ l2c_wait_mask(reg, 1);
+ writel_relaxed(start, reg);
start += CACHE_LINE_SIZE;
}
if (blk_end < end) {
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
- raw_spin_lock_irqsave(&l2x0_lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
}
}
- cache_wait(base + L2X0_INV_LINE_PA, 1);
- cache_sync();
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+ return flags;
}
-static void l2x0_clean_range(unsigned long start, unsigned long end)
+static void l2c220_inv_range(unsigned long start, unsigned long end)
{
void __iomem *base = l2x0_base;
unsigned long flags;
- if ((end - start) >= l2x0_size) {
- l2x0_clean_all();
- return;
- }
-
raw_spin_lock_irqsave(&l2x0_lock, flags);
- start &= ~(CACHE_LINE_SIZE - 1);
- while (start < end) {
- unsigned long blk_end = start + min(end - start, 4096UL);
-
- while (start < blk_end) {
- l2x0_clean_line(start);
+ if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
start += CACHE_LINE_SIZE;
}
- if (blk_end < end) {
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
- raw_spin_lock_irqsave(&l2x0_lock, flags);
+ if (end & (CACHE_LINE_SIZE - 1)) {
+ end &= ~(CACHE_LINE_SIZE - 1);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
}
}
- cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
- cache_sync();
+
+ flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2x0_flush_range(unsigned long start, unsigned long end)
+static void l2c220_clean_range(unsigned long start, unsigned long end)
{
void __iomem *base = l2x0_base;
unsigned long flags;
+ start &= ~(CACHE_LINE_SIZE - 1);
if ((end - start) >= l2x0_size) {
- l2x0_flush_all();
+ l2c220_op_way(base, L2X0_CLEAN_WAY);
return;
}
raw_spin_lock_irqsave(&l2x0_lock, flags);
+ flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
start &= ~(CACHE_LINE_SIZE - 1);
+ if ((end - start) >= l2x0_size) {
+ l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_all(void)
+{
+ l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
+}
+
+static void l2c220_sync(void)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c220_cache_sync(l2x0_base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+ /*
+ * Always enable non-secure access to the lockdown registers -
+ * we write to them as part of the L2C enable sequence so they
+ * need to be accessible.
+ */
+ aux |= L220_AUX_CTRL_NS_LOCKDOWN;
+
+ l2c_enable(base, aux, num_lock);
+}
+
+static const struct l2c_init_data l2c220_data = {
+ .type = "L2C-220",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .enable = l2c220_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c220_inv_range,
+ .clean_range = l2c220_clean_range,
+ .flush_range = l2c220_flush_range,
+ .flush_all = l2c220_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c220_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+/*
+ * L2C-310 specific code.
+ *
+ * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
+ * and the way operations are all background tasks. However, issuing an
+ * operation while a background operation is in progress results in a
+ * SLVERR response. We can reuse:
+ *
+ * __l2c210_cache_sync (using sync_reg_offset)
+ * l2c210_sync
+ * l2c210_inv_range (if 588369 is not applicable)
+ * l2c210_clean_range
+ * l2c210_flush_range (if 588369 is not applicable)
+ * l2c210_flush_all (if 727915 is not applicable)
+ *
+ * Errata:
+ * 588369: PL310 R0P0->R1P0, fixed R2P0.
+ * Affects: all clean+invalidate operations
+ * clean and invalidate skips the invalidate step, so we need to issue
+ * separate operations. We also require the above debug workaround
+ * enclosing this code fragment on affected parts. On unaffected parts,
+ * we must not use this workaround without the debug register writes
+ * to avoid exposing a problem similar to 727915.
+ *
+ * 727915: PL310 R2P0->R3P0, fixed R3P1.
+ * Affects: clean+invalidate by way
+ * clean and invalidate by way runs in the background, and a store can
+ * hit the line between the clean operation and invalidate operation,
+ * resulting in the store being lost.
+ *
+ * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
+ * Affects: 8x64-bit (double fill) line fetches
+ * double fill line fetches can fail to cause dirty data to be evicted
+ * from the cache before the new data overwrites the second line.
+ *
+ * 753970: PL310 R3P0, fixed R3P1.
+ * Affects: sync
+ * prevents merging writes after the sync operation, until another L2C
+ * operation is performed (or a number of other conditions.)
+ *
+ * 769419: PL310 R0P0->R3P1, fixed R3P2.
+ * Affects: store buffer
+ * store buffer is not automatically drained.
+ */
+static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+
+ if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+ unsigned long flags;
+
+ /* Erratum 588369 for both clean+invalidate operations */
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ l2c_set_debug(base, 0x03);
+
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(start, base + L2X0_INV_LINE_PA);
+ start += CACHE_LINE_SIZE;
+ }
+
+ if (end & (CACHE_LINE_SIZE - 1)) {
+ end &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(end, base + L2X0_INV_LINE_PA);
+ }
+
+ l2c_set_debug(base, 0x00);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+ }
+
+ __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
+{
+ raw_spinlock_t *lock = &l2x0_lock;
+ unsigned long flags;
+ void __iomem *base = l2x0_base;
+
+ raw_spin_lock_irqsave(lock, flags);
while (start < end) {
unsigned long blk_end = start + min(end - start, 4096UL);
- debug_writel(0x03);
+ l2c_set_debug(base, 0x03);
while (start < blk_end) {
- l2x0_flush_line(start);
+ writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(start, base + L2X0_INV_LINE_PA);
start += CACHE_LINE_SIZE;
}
- debug_writel(0x00);
+ l2c_set_debug(base, 0x00);
if (blk_end < end) {
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
- raw_spin_lock_irqsave(&l2x0_lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
}
}
- cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
- cache_sync();
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
+ __l2c210_cache_sync(base);
}
-static void l2x0_disable(void)
+static void l2c310_flush_all_erratum(void)
{
+ void __iomem *base = l2x0_base;
unsigned long flags;
raw_spin_lock_irqsave(&l2x0_lock, flags);
- __l2x0_flush_all();
- writel_relaxed(0, l2x0_base + L2X0_CTRL);
- dsb(st);
+ l2c_set_debug(base, 0x03);
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ l2c_set_debug(base, 0x00);
+ __l2c210_cache_sync(base);
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2x0_unlock(u32 cache_id)
+static void __init l2c310_save(void __iomem *base)
{
- int lockregs;
- int i;
+ unsigned revision;
- switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
- case L2X0_CACHE_ID_PART_L310:
- lockregs = 8;
- break;
- case AURORA_CACHE_ID:
- lockregs = 4;
+ l2c_save(base);
+
+ l2x0_saved_regs.tag_latency = readl_relaxed(base +
+ L310_TAG_LATENCY_CTRL);
+ l2x0_saved_regs.data_latency = readl_relaxed(base +
+ L310_DATA_LATENCY_CTRL);
+ l2x0_saved_regs.filter_end = readl_relaxed(base +
+ L310_ADDR_FILTER_END);
+ l2x0_saved_regs.filter_start = readl_relaxed(base +
+ L310_ADDR_FILTER_START);
+
+ revision = readl_relaxed(base + L2X0_CACHE_ID) &
+ L2X0_CACHE_ID_RTL_MASK;
+
+ /* From r2p0, there is Prefetch offset/control register */
+ if (revision >= L310_CACHE_ID_RTL_R2P0)
+ l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
+ L310_PREFETCH_CTRL);
+
+ /* From r3p0, there is Power control register */
+ if (revision >= L310_CACHE_ID_RTL_R3P0)
+ l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
+ L310_POWER_CTRL);
+}
+
+static void l2c310_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ unsigned revision;
+
+ /* restore pl310 setup */
+ writel_relaxed(l2x0_saved_regs.tag_latency,
+ base + L310_TAG_LATENCY_CTRL);
+ writel_relaxed(l2x0_saved_regs.data_latency,
+ base + L310_DATA_LATENCY_CTRL);
+ writel_relaxed(l2x0_saved_regs.filter_end,
+ base + L310_ADDR_FILTER_END);
+ writel_relaxed(l2x0_saved_regs.filter_start,
+ base + L310_ADDR_FILTER_START);
+
+ revision = readl_relaxed(base + L2X0_CACHE_ID) &
+ L2X0_CACHE_ID_RTL_MASK;
+
+ if (revision >= L310_CACHE_ID_RTL_R2P0)
+ l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+ L310_PREFETCH_CTRL);
+ if (revision >= L310_CACHE_ID_RTL_R3P0)
+ l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+ L310_POWER_CTRL);
+
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
+
+ /* Re-enable full-line-of-zeros for Cortex-A9 */
+ if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+ }
+}
+
+static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
+{
+ switch (act & ~CPU_TASKS_FROZEN) {
+ case CPU_STARTING:
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
break;
- default:
- /* L210 and unknown types */
- lockregs = 1;
+ case CPU_DYING:
+ set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
break;
}
+ return NOTIFY_OK;
+}
- for (i = 0; i < lockregs; i++) {
- writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
- i * L2X0_LOCKDOWN_STRIDE);
- writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
- i * L2X0_LOCKDOWN_STRIDE);
+static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+ unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
+ bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+
+ if (rev >= L310_CACHE_ID_RTL_R2P0) {
+ if (cortex_a9) {
+ aux |= L310_AUX_CTRL_EARLY_BRESP;
+ pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
+ } else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
+ pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
+ aux &= ~L310_AUX_CTRL_EARLY_BRESP;
+ }
+ }
+
+ if (cortex_a9) {
+ u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
+ u32 acr = get_auxcr();
+
+ pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
+
+ if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
+ pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
+
+ if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
+ pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
+
+ if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
+ aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
+ pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
+ }
+ } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
+ pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
+ aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
+ }
+
+ if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
+ u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
+
+ pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
+ aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
+ aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
+ 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
+ }
+
+ /* r3p0 or later has power control register */
+ if (rev >= L310_CACHE_ID_RTL_R3P0) {
+ u32 power_ctrl;
+
+ l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
+ base, L310_POWER_CTRL);
+ power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
+ pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
+ power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
+ power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
+ }
+
+ /*
+ * Always enable non-secure access to the lockdown registers -
+ * we write to them as part of the L2C enable sequence so they
+ * need to be accessible.
+ */
+ aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+ l2c_enable(base, aux, num_lock);
+
+ if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+ cpu_notifier(l2c310_cpu_enable_flz, 0);
}
}
-void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
+ struct outer_cache_fns *fns)
{
- u32 aux;
- u32 cache_id;
- u32 way_size = 0;
- int ways;
- int way_size_shift = L2X0_WAY_SIZE_SHIFT;
- const char *type;
+ unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
+ const char *errata[8];
+ unsigned n = 0;
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
+ revision < L310_CACHE_ID_RTL_R2P0 &&
+ /* For bcm compatibility */
+ fns->inv_range == l2c210_inv_range) {
+ fns->inv_range = l2c310_inv_range_erratum;
+ fns->flush_range = l2c310_flush_range_erratum;
+ errata[n++] = "588369";
+ }
- l2x0_base = base;
- if (cache_id_part_number_from_dt)
- cache_id = cache_id_part_number_from_dt;
- else
- cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
- aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
+ revision >= L310_CACHE_ID_RTL_R2P0 &&
+ revision < L310_CACHE_ID_RTL_R3P1) {
+ fns->flush_all = l2c310_flush_all_erratum;
+ errata[n++] = "727915";
+ }
+
+ if (revision >= L310_CACHE_ID_RTL_R3P0 &&
+ revision < L310_CACHE_ID_RTL_R3P2) {
+ u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+ /* I don't think bit23 is required here... but iMX6 does so */
+ if (val & (BIT(30) | BIT(23))) {
+ val &= ~(BIT(30) | BIT(23));
+ l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+ errata[n++] = "752271";
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
+ revision == L310_CACHE_ID_RTL_R3P0) {
+ sync_reg_offset = L2X0_DUMMY_REG;
+ errata[n++] = "753970";
+ }
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
+ errata[n++] = "769419";
+
+ if (n) {
+ unsigned i;
+ pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
+ for (i = 0; i < n; i++)
+ pr_cont(" %s", errata[i]);
+ pr_cont(" enabled\n");
+ }
+}
+
+static void l2c310_disable(void)
+{
+ /*
+ * If full-line-of-zeros is enabled, we must first disable it in the
+ * Cortex-A9 auxiliary control register before disabling the L2 cache.
+ */
+ if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+ set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+
+ l2c_disable();
+}
+
+static const struct l2c_init_data l2c310_init_fns __initconst = {
+ .type = "L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .enable = l2c310_enable,
+ .fixup = l2c310_fixup,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
+
+static void __init __l2c_init(const struct l2c_init_data *data,
+ u32 aux_val, u32 aux_mask, u32 cache_id)
+{
+ struct outer_cache_fns fns;
+ unsigned way_size_bits, ways;
+ u32 aux, old_aux;
+
+ /*
+ * Sanity check the aux values. aux_mask is the bits we preserve
+ * from reading the hardware register, and aux_val is the bits we
+ * set.
+ */
+ if (aux_val & aux_mask)
+ pr_alert("L2C: platform provided aux values permit register corruption.\n");
+
+ old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
aux &= aux_mask;
aux |= aux_val;
+ if (old_aux != aux)
+ pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
+ old_aux, aux);
+
/* Determine the number of ways */
switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
case L2X0_CACHE_ID_PART_L310:
+ if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
+ pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
if (aux & (1 << 16))
ways = 16;
else
ways = 8;
- type = "L310";
-#ifdef CONFIG_PL310_ERRATA_753970
- /* Unmapped register. */
- sync_reg_offset = L2X0_DUMMY_REG;
-#endif
- if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0)
- outer_cache.set_debug = pl310_set_debug;
break;
+
case L2X0_CACHE_ID_PART_L210:
+ case L2X0_CACHE_ID_PART_L220:
ways = (aux >> 13) & 0xf;
- type = "L210";
break;
case AURORA_CACHE_ID:
- sync_reg_offset = AURORA_SYNC_REG;
ways = (aux >> 13) & 0xf;
ways = 2 << ((ways + 1) >> 2);
- way_size_shift = AURORA_WAY_SIZE_SHIFT;
- type = "Aurora";
break;
+
default:
/* Assume unknown chips have 8 ways */
ways = 8;
- type = "L2x0 series";
break;
}
l2x0_way_mask = (1 << ways) - 1;
/*
- * L2 cache Size = Way size * Number of ways
+ * way_size_0 is the size that a way_size value of zero would be
+ * given the calculation: way_size = way_size_0 << way_size_bits.
+ * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
+ * then way_size_0 would be 8k.
+ *
+ * L2 cache size = number of ways * way size.
*/
- way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
- way_size = 1 << (way_size + way_size_shift);
+ way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
+ L2C_AUX_CTRL_WAY_SIZE_SHIFT;
+ l2x0_size = ways * (data->way_size_0 << way_size_bits);
- l2x0_size = ways * way_size * SZ_1K;
+ fns = data->outer_cache;
+ fns.write_sec = outer_cache.write_sec;
+ if (data->fixup)
+ data->fixup(l2x0_base, cache_id, &fns);
/*
- * Check if l2x0 controller is already enabled.
- * If you are booting from non-secure mode
- * accessing the below registers will fault.
+ * Check if l2x0 controller is already enabled. If we are booting
+ * in non-secure mode accessing the below registers will fault.
*/
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
- /* Make sure that I&D is not locked down when starting */
- l2x0_unlock(cache_id);
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+ data->enable(l2x0_base, aux, data->num_lock);
- /* l2x0 controller is disabled */
- writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
+ outer_cache = fns;
- l2x0_inv_all();
-
- /* enable L2X0 */
- writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
- }
+ /*
+ * It is strange to save the register state before initialisation,
+ * but hey, this is what the DT implementations decided to do.
+ */
+ if (data->save)
+ data->save(l2x0_base);
/* Re-read it in case some bits are reserved. */
aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
- /* Save the value for resuming. */
- l2x0_saved_regs.aux_ctrl = aux;
+ pr_info("%s cache controller enabled, %d ways, %d kB\n",
+ data->type, ways, l2x0_size >> 10);
+ pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
+ data->type, cache_id, aux);
+}
+
+void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+{
+ const struct l2c_init_data *data;
+ u32 cache_id;
+
+ l2x0_base = base;
+
+ cache_id = readl_relaxed(base + L2X0_CACHE_ID);
+
+ switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+ default:
+ case L2X0_CACHE_ID_PART_L210:
+ data = &l2c210_data;
+ break;
- if (!of_init) {
- outer_cache.inv_range = l2x0_inv_range;
- outer_cache.clean_range = l2x0_clean_range;
- outer_cache.flush_range = l2x0_flush_range;
- outer_cache.sync = l2x0_cache_sync;
- outer_cache.flush_all = l2x0_flush_all;
- outer_cache.inv_all = l2x0_inv_all;
- outer_cache.disable = l2x0_disable;
+ case L2X0_CACHE_ID_PART_L220:
+ data = &l2c220_data;
+ break;
+
+ case L2X0_CACHE_ID_PART_L310:
+ data = &l2c310_init_fns;
+ break;
}
- pr_info("%s cache controller enabled\n", type);
- pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n",
- ways, cache_id, aux, l2x0_size >> 10);
+ __l2c_init(data, aux_val, aux_mask, cache_id);
}
#ifdef CONFIG_OF
static int l2_wt_override;
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32 cache_id_part_number_from_dt;
+
+static void __init l2x0_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 data[2] = { 0, 0 };
+ u32 tag = 0;
+ u32 dirty = 0;
+ u32 val = 0, mask = 0;
+
+ of_property_read_u32(np, "arm,tag-latency", &tag);
+ if (tag) {
+ mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+ val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+ }
+
+ of_property_read_u32_array(np, "arm,data-latency",
+ data, ARRAY_SIZE(data));
+ if (data[0] && data[1]) {
+ mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+ L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+ val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+ ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+ }
+
+ of_property_read_u32(np, "arm,dirty-latency", &dirty);
+ if (dirty) {
+ mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+ val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+ }
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_l2c210_data __initconst = {
+ .type = "L2C-210",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .of_parse = l2x0_of_parse,
+ .enable = l2c_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c210_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+static const struct l2c_init_data of_l2c220_data __initconst = {
+ .type = "L2C-220",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .of_parse = l2x0_of_parse,
+ .enable = l2c220_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c220_inv_range,
+ .clean_range = l2c220_clean_range,
+ .flush_range = l2c220_flush_range,
+ .flush_all = l2c220_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c220_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+static void __init l2c310_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 data[3] = { 0, 0, 0 };
+ u32 tag[3] = { 0, 0, 0 };
+ u32 filter[2] = { 0, 0 };
+
+ of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+ if (tag[0] && tag[1] && tag[2])
+ writel_relaxed(
+ L310_LATENCY_CTRL_RD(tag[0] - 1) |
+ L310_LATENCY_CTRL_WR(tag[1] - 1) |
+ L310_LATENCY_CTRL_SETUP(tag[2] - 1),
+ l2x0_base + L310_TAG_LATENCY_CTRL);
+
+ of_property_read_u32_array(np, "arm,data-latency",
+ data, ARRAY_SIZE(data));
+ if (data[0] && data[1] && data[2])
+ writel_relaxed(
+ L310_LATENCY_CTRL_RD(data[0] - 1) |
+ L310_LATENCY_CTRL_WR(data[1] - 1) |
+ L310_LATENCY_CTRL_SETUP(data[2] - 1),
+ l2x0_base + L310_DATA_LATENCY_CTRL);
+
+ of_property_read_u32_array(np, "arm,filter-ranges",
+ filter, ARRAY_SIZE(filter));
+ if (filter[1]) {
+ writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
+ l2x0_base + L310_ADDR_FILTER_END);
+ writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
+ l2x0_base + L310_ADDR_FILTER_START);
+ }
+}
+
+static const struct l2c_init_data of_l2c310_data __initconst = {
+ .type = "L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .of_parse = l2c310_of_parse,
+ .enable = l2c310_enable,
+ .fixup = l2c310_fixup,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
+
/*
* Note that the end addresses passed to Linux primitives are
* noninclusive, while the hardware cache range operations use
@@ -524,6 +1166,100 @@ static void aurora_flush_range(unsigned long start, unsigned long end)
}
}
+static void aurora_save(void __iomem *base)
+{
+ l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+ l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+}
+
+static void aurora_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
+ writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
+ }
+}
+
+/*
+ * For Aurora cache in no outer mode, enable via the CP15 coprocessor
+ * broadcasting of cache commands to L2.
+ */
+static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
+ unsigned num_lock)
+{
+ u32 u;
+
+ asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
+ u |= AURORA_CTRL_FW; /* Set the FW bit */
+ asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
+
+ isb();
+
+ l2c_enable(base, aux, num_lock);
+}
+
+static void __init aurora_fixup(void __iomem *base, u32 cache_id,
+ struct outer_cache_fns *fns)
+{
+ sync_reg_offset = AURORA_SYNC_REG;
+}
+
+static void __init aurora_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+ u32 mask = AURORA_ACR_REPLACEMENT_MASK;
+
+ of_property_read_u32(np, "cache-id-part",
+ &cache_id_part_number_from_dt);
+
+ /* Determine and save the write policy */
+ l2_wt_override = of_property_read_bool(np, "wt-override");
+
+ if (l2_wt_override) {
+ val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+ mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+ }
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
+ .type = "Aurora",
+ .way_size_0 = SZ_4K,
+ .num_lock = 4,
+ .of_parse = aurora_of_parse,
+ .enable = l2c_enable,
+ .fixup = aurora_fixup,
+ .save = aurora_save,
+ .outer_cache = {
+ .inv_range = aurora_inv_range,
+ .clean_range = aurora_clean_range,
+ .flush_range = aurora_flush_range,
+ .flush_all = l2x0_flush_all,
+ .disable = l2x0_disable,
+ .sync = l2x0_cache_sync,
+ .resume = aurora_resume,
+ },
+};
+
+static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
+ .type = "Aurora",
+ .way_size_0 = SZ_4K,
+ .num_lock = 4,
+ .of_parse = aurora_of_parse,
+ .enable = aurora_enable_no_outer,
+ .fixup = aurora_fixup,
+ .save = aurora_save,
+ .outer_cache = {
+ .resume = aurora_resume,
+ },
+};
+
/*
* For certain Broadcom SoCs, depending on the address range, different offsets
* need to be added to the address before passing it to L2 for
@@ -588,16 +1324,16 @@ static void bcm_inv_range(unsigned long start, unsigned long end)
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
- l2x0_inv_range(new_start, new_end);
+ l2c210_inv_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
- l2x0_inv_range(new_start,
+ l2c210_inv_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
- l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
@@ -610,26 +1346,21 @@ static void bcm_clean_range(unsigned long start, unsigned long end)
if (unlikely(end <= start))
return;
- if ((end - start) >= l2x0_size) {
- l2x0_clean_all();
- return;
- }
-
new_start = bcm_l2_phys_addr(start);
new_end = bcm_l2_phys_addr(end);
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
- l2x0_clean_range(new_start, new_end);
+ l2c210_clean_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
- l2x0_clean_range(new_start,
+ l2c210_clean_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
- l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
@@ -643,7 +1374,7 @@ static void bcm_flush_range(unsigned long start, unsigned long end)
return;
if ((end - start) >= l2x0_size) {
- l2x0_flush_all();
+ outer_cache.flush_all();
return;
}
@@ -652,283 +1383,67 @@ static void bcm_flush_range(unsigned long start, unsigned long end)
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
- l2x0_flush_range(new_start, new_end);
+ l2c210_flush_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
- l2x0_flush_range(new_start,
+ l2c210_flush_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
- l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
-static void __init l2x0_of_setup(const struct device_node *np,
- u32 *aux_val, u32 *aux_mask)
-{
- u32 data[2] = { 0, 0 };
- u32 tag = 0;
- u32 dirty = 0;
- u32 val = 0, mask = 0;
-
- of_property_read_u32(np, "arm,tag-latency", &tag);
- if (tag) {
- mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
- val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
- }
-
- of_property_read_u32_array(np, "arm,data-latency",
- data, ARRAY_SIZE(data));
- if (data[0] && data[1]) {
- mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
- L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
- val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
- ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
- }
-
- of_property_read_u32(np, "arm,dirty-latency", &dirty);
- if (dirty) {
- mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
- val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
- }
-
- *aux_val &= ~mask;
- *aux_val |= val;
- *aux_mask &= ~mask;
-}
-
-static void __init pl310_of_setup(const struct device_node *np,
- u32 *aux_val, u32 *aux_mask)
-{
- u32 data[3] = { 0, 0, 0 };
- u32 tag[3] = { 0, 0, 0 };
- u32 filter[2] = { 0, 0 };
-
- of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
- if (tag[0] && tag[1] && tag[2])
- writel_relaxed(
- ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
- ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
- ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
- l2x0_base + L2X0_TAG_LATENCY_CTRL);
-
- of_property_read_u32_array(np, "arm,data-latency",
- data, ARRAY_SIZE(data));
- if (data[0] && data[1] && data[2])
- writel_relaxed(
- ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
- ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
- ((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
- l2x0_base + L2X0_DATA_LATENCY_CTRL);
-
- of_property_read_u32_array(np, "arm,filter-ranges",
- filter, ARRAY_SIZE(filter));
- if (filter[1]) {
- writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
- l2x0_base + L2X0_ADDR_FILTER_END);
- writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
- l2x0_base + L2X0_ADDR_FILTER_START);
- }
-}
-
-static void __init pl310_save(void)
-{
- u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
- L2X0_CACHE_ID_RTL_MASK;
-
- l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base +
- L2X0_TAG_LATENCY_CTRL);
- l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base +
- L2X0_DATA_LATENCY_CTRL);
- l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base +
- L2X0_ADDR_FILTER_END);
- l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base +
- L2X0_ADDR_FILTER_START);
-
- if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
- /*
- * From r2p0, there is Prefetch offset/control register
- */
- l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base +
- L2X0_PREFETCH_CTRL);
- /*
- * From r3p0, there is Power control register
- */
- if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
- l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base +
- L2X0_POWER_CTRL);
- }
-}
+/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
+static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
+ .type = "BCM-L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .of_parse = l2c310_of_parse,
+ .enable = l2c310_enable,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = bcm_inv_range,
+ .clean_range = bcm_clean_range,
+ .flush_range = bcm_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
-static void aurora_save(void)
+static void __init tauros3_save(void __iomem *base)
{
- l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL);
- l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
-}
+ l2c_save(base);
-static void __init tauros3_save(void)
-{
l2x0_saved_regs.aux2_ctrl =
- readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL);
+ readl_relaxed(base + TAUROS3_AUX2_CTRL);
l2x0_saved_regs.prefetch_ctrl =
- readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
-}
-
-static void l2x0_resume(void)
-{
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
- /* restore aux ctrl and enable l2 */
- l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
-
- writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
- L2X0_AUX_CTRL);
-
- l2x0_inv_all();
-
- writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
- }
-}
-
-static void pl310_resume(void)
-{
- u32 l2x0_revision;
-
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
- /* restore pl310 setup */
- writel_relaxed(l2x0_saved_regs.tag_latency,
- l2x0_base + L2X0_TAG_LATENCY_CTRL);
- writel_relaxed(l2x0_saved_regs.data_latency,
- l2x0_base + L2X0_DATA_LATENCY_CTRL);
- writel_relaxed(l2x0_saved_regs.filter_end,
- l2x0_base + L2X0_ADDR_FILTER_END);
- writel_relaxed(l2x0_saved_regs.filter_start,
- l2x0_base + L2X0_ADDR_FILTER_START);
-
- l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
- L2X0_CACHE_ID_RTL_MASK;
-
- if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
- writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
- l2x0_base + L2X0_PREFETCH_CTRL);
- if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
- writel_relaxed(l2x0_saved_regs.pwr_ctrl,
- l2x0_base + L2X0_POWER_CTRL);
- }
- }
-
- l2x0_resume();
-}
-
-static void aurora_resume(void)
-{
- if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
- writel_relaxed(l2x0_saved_regs.aux_ctrl,
- l2x0_base + L2X0_AUX_CTRL);
- writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
- }
+ readl_relaxed(base + L310_PREFETCH_CTRL);
}
static void tauros3_resume(void)
{
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
writel_relaxed(l2x0_saved_regs.aux2_ctrl,
- l2x0_base + TAUROS3_AUX2_CTRL);
+ base + TAUROS3_AUX2_CTRL);
writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
- l2x0_base + L2X0_PREFETCH_CTRL);
- }
+ base + L310_PREFETCH_CTRL);
- l2x0_resume();
-}
-
-static void __init aurora_broadcast_l2_commands(void)
-{
- __u32 u;
- /* Enable Broadcasting of cache commands to L2*/
- __asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
- u |= AURORA_CTRL_FW; /* Set the FW bit */
- __asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
- isb();
-}
-
-static void __init aurora_of_setup(const struct device_node *np,
- u32 *aux_val, u32 *aux_mask)
-{
- u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
- u32 mask = AURORA_ACR_REPLACEMENT_MASK;
-
- of_property_read_u32(np, "cache-id-part",
- &cache_id_part_number_from_dt);
-
- /* Determine and save the write policy */
- l2_wt_override = of_property_read_bool(np, "wt-override");
-
- if (l2_wt_override) {
- val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
- mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
}
-
- *aux_val &= ~mask;
- *aux_val |= val;
- *aux_mask &= ~mask;
}
-static const struct l2x0_of_data pl310_data = {
- .setup = pl310_of_setup,
- .save = pl310_save,
- .outer_cache = {
- .resume = pl310_resume,
- .inv_range = l2x0_inv_range,
- .clean_range = l2x0_clean_range,
- .flush_range = l2x0_flush_range,
- .sync = l2x0_cache_sync,
- .flush_all = l2x0_flush_all,
- .inv_all = l2x0_inv_all,
- .disable = l2x0_disable,
- },
-};
-
-static const struct l2x0_of_data l2x0_data = {
- .setup = l2x0_of_setup,
- .save = NULL,
- .outer_cache = {
- .resume = l2x0_resume,
- .inv_range = l2x0_inv_range,
- .clean_range = l2x0_clean_range,
- .flush_range = l2x0_flush_range,
- .sync = l2x0_cache_sync,
- .flush_all = l2x0_flush_all,
- .inv_all = l2x0_inv_all,
- .disable = l2x0_disable,
- },
-};
-
-static const struct l2x0_of_data aurora_with_outer_data = {
- .setup = aurora_of_setup,
- .save = aurora_save,
- .outer_cache = {
- .resume = aurora_resume,
- .inv_range = aurora_inv_range,
- .clean_range = aurora_clean_range,
- .flush_range = aurora_flush_range,
- .sync = l2x0_cache_sync,
- .flush_all = l2x0_flush_all,
- .inv_all = l2x0_inv_all,
- .disable = l2x0_disable,
- },
-};
-
-static const struct l2x0_of_data aurora_no_outer_data = {
- .setup = aurora_of_setup,
- .save = aurora_save,
- .outer_cache = {
- .resume = aurora_resume,
- },
-};
-
-static const struct l2x0_of_data tauros3_data = {
- .setup = NULL,
+static const struct l2c_init_data of_tauros3_data __initconst = {
+ .type = "Tauros3",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .enable = l2c_enable,
.save = tauros3_save,
/* Tauros3 broadcasts L1 cache operations to L2 */
.outer_cache = {
@@ -936,43 +1451,26 @@ static const struct l2x0_of_data tauros3_data = {
},
};
-static const struct l2x0_of_data bcm_l2x0_data = {
- .setup = pl310_of_setup,
- .save = pl310_save,
- .outer_cache = {
- .resume = pl310_resume,
- .inv_range = bcm_inv_range,
- .clean_range = bcm_clean_range,
- .flush_range = bcm_flush_range,
- .sync = l2x0_cache_sync,
- .flush_all = l2x0_flush_all,
- .inv_all = l2x0_inv_all,
- .disable = l2x0_disable,
- },
-};
-
+#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
static const struct of_device_id l2x0_ids[] __initconst = {
- { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
- { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
- { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
- { .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */
- .data = (void *)&bcm_l2x0_data},
- { .compatible = "brcm,bcm11351-a2-pl310-cache",
- .data = (void *)&bcm_l2x0_data},
- { .compatible = "marvell,aurora-outer-cache",
- .data = (void *)&aurora_with_outer_data},
- { .compatible = "marvell,aurora-system-cache",
- .data = (void *)&aurora_no_outer_data},
- { .compatible = "marvell,tauros3-cache",
- .data = (void *)&tauros3_data },
+ L2C_ID("arm,l210-cache", of_l2c210_data),
+ L2C_ID("arm,l220-cache", of_l2c220_data),
+ L2C_ID("arm,pl310-cache", of_l2c310_data),
+ L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+ L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
+ L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
+ L2C_ID("marvell,tauros3-cache", of_tauros3_data),
+ /* Deprecated IDs */
+ L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
{}
};
int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
{
+ const struct l2c_init_data *data;
struct device_node *np;
- const struct l2x0_of_data *data;
struct resource res;
+ u32 cache_id, old_aux;
np = of_find_matching_node(NULL, l2x0_ids);
if (!np)
@@ -989,23 +1487,29 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
data = of_match_node(l2x0_ids, np)->data;
- /* L2 configuration can only be changed if the cache is disabled */
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
- if (data->setup)
- data->setup(np, &aux_val, &aux_mask);
-
- /* For aurora cache in no outer mode select the
- * correct mode using the coprocessor*/
- if (data == &aurora_no_outer_data)
- aurora_broadcast_l2_commands();
+ old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+ if (old_aux != ((old_aux & aux_mask) | aux_val)) {
+ pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
+ old_aux, (old_aux & aux_mask) | aux_val);
+ } else if (aux_mask != ~0U && aux_val != 0) {
+ pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n");
}
- if (data->save)
- data->save();
+ /* All L2 caches are unified, so this property should be specified */
+ if (!of_property_read_bool(np, "cache-unified"))
+ pr_err("L2C: device tree omits to specify unified cache\n");
+
+ /* L2 configuration can only be changed if the cache is disabled */
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+ if (data->of_parse)
+ data->of_parse(np, &aux_val, &aux_mask);
+
+ if (cache_id_part_number_from_dt)
+ cache_id = cache_id_part_number_from_dt;
+ else
+ cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
- of_init = true;
- memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
- l2x0_init(l2x0_base, aux_val, aux_mask);
+ __l2c_init(data, aux_val, aux_mask, cache_id);
return 0;
}
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 778bcf88ee79..615c99e38ba1 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -59,7 +59,7 @@ ENTRY(v7_invalidate_l1)
bgt 2b
cmp r2, #0
bgt 1b
- dsb
+ dsb st
isb
mov pc, lr
ENDPROC(v7_invalidate_l1)
@@ -166,7 +166,7 @@ skip:
finished:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
- dsb
+ dsb st
isb
mov pc, lr
ENDPROC(v7_flush_dcache_all)
@@ -335,7 +335,7 @@ ENTRY(v7_flush_kern_dcache_area)
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
ENDPROC(v7_flush_kern_dcache_area)
@@ -368,7 +368,7 @@ v7_dma_inv_range:
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
ENDPROC(v7_dma_inv_range)
@@ -390,7 +390,7 @@ v7_dma_clean_range:
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
ENDPROC(v7_dma_clean_range)
@@ -412,7 +412,7 @@ ENTRY(v7_dma_flush_range)
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
ENDPROC(v7_dma_flush_range)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5bef858568e6..4c88935654ca 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -885,7 +885,7 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
size_t size, enum dma_data_direction dir)
{
- unsigned long paddr;
+ phys_addr_t paddr;
dma_cache_maint_page(page, off, size, dir, dmac_map_area);
@@ -901,14 +901,15 @@ static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
size_t size, enum dma_data_direction dir)
{
- unsigned long paddr = page_to_phys(page) + off;
+ phys_addr_t paddr = page_to_phys(page) + off;
/* FIXME: non-speculating: not required */
- /* don't bother invalidating if DMA to device */
- if (dir != DMA_TO_DEVICE)
+ /* in any case, don't bother invalidating if DMA to device */
+ if (dir != DMA_TO_DEVICE) {
outer_inv_range(paddr, paddr + size);
- dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+ dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+ }
/*
* Mark the D-cache clean for these pages to avoid extra flushing.
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 3387e60e4ea3..43d54f5b26b9 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -104,17 +104,20 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig
#define flush_icache_alias(pfn,vaddr,len) do { } while (0)
#endif
+#define FLAG_PA_IS_EXEC 1
+#define FLAG_PA_CORE_IN_MM 2
+
static void flush_ptrace_access_other(void *args)
{
__flush_icache_all();
}
-static
-void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
- unsigned long uaddr, void *kaddr, unsigned long len)
+static inline
+void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr,
+ unsigned long len, unsigned int flags)
{
if (cache_is_vivt()) {
- if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+ if (flags & FLAG_PA_CORE_IN_MM) {
unsigned long addr = (unsigned long)kaddr;
__cpuc_coherent_kern_range(addr, addr + len);
}
@@ -128,7 +131,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
}
/* VIPT non-aliasing D-cache */
- if (vma->vm_flags & VM_EXEC) {
+ if (flags & FLAG_PA_IS_EXEC) {
unsigned long addr = (unsigned long)kaddr;
if (icache_is_vipt_aliasing())
flush_icache_alias(page_to_pfn(page), uaddr, len);
@@ -140,6 +143,26 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
}
}
+static
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+ unsigned long uaddr, void *kaddr, unsigned long len)
+{
+ unsigned int flags = 0;
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+ flags |= FLAG_PA_CORE_IN_MM;
+ if (vma->vm_flags & VM_EXEC)
+ flags |= FLAG_PA_IS_EXEC;
+ __flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+ void *kaddr, unsigned long len)
+{
+ unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC;
+
+ __flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
/*
* Copy user data from/to a page which is mapped into a different
* processes address space. Really, we want to allow our "user
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 21b9e1bf9b77..45aeaaca9052 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,6 +18,21 @@
#include <asm/tlbflush.h>
#include "mm.h"
+pte_t *fixmap_page_table;
+
+static inline void set_fixmap_pte(int idx, pte_t pte)
+{
+ unsigned long vaddr = __fix_to_virt(idx);
+ set_pte_ext(fixmap_page_table + idx, pte, 0);
+ local_flush_tlb_kernel_page(vaddr);
+}
+
+static inline pte_t get_fixmap_pte(unsigned long vaddr)
+{
+ unsigned long idx = __virt_to_fix(vaddr);
+ return *(fixmap_page_table + idx);
+}
+
void *kmap(struct page *page)
{
might_sleep();
@@ -63,20 +78,20 @@ void *kmap_atomic(struct page *page)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
/*
* With debugging enabled, kunmap_atomic forces that entry to 0.
* Make sure it was indeed properly unmapped.
*/
- BUG_ON(!pte_none(get_top_pte(vaddr)));
+ BUG_ON(!pte_none(*(fixmap_page_table + idx)));
#endif
/*
* When debugging is off, kunmap_atomic leaves the previous mapping
* in place, so the contained TLB flush ensures the TLB is updated
* with the new mapping.
*/
- set_top_pte(vaddr, mk_pte(page, kmap_prot));
+ set_fixmap_pte(idx, mk_pte(page, kmap_prot));
return (void *)vaddr;
}
@@ -94,8 +109,8 @@ void __kunmap_atomic(void *kvaddr)
if (cache_is_vivt())
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
- set_top_pte(vaddr, __pte(0));
+ BUG_ON(vaddr != __fix_to_virt(idx));
+ set_fixmap_pte(idx, __pte(0));
#else
(void) idx; /* to kill a warning */
#endif
@@ -117,11 +132,11 @@ void *kmap_atomic_pfn(unsigned long pfn)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(get_top_pte(vaddr)));
+ BUG_ON(!pte_none(*(fixmap_page_table + idx)));
#endif
- set_top_pte(vaddr, pfn_pte(pfn, kmap_prot));
+ set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
return (void *)vaddr;
}
@@ -133,5 +148,5 @@ struct page *kmap_atomic_to_page(const void *ptr)
if (vaddr < FIXADDR_START)
return virt_to_page(ptr);
- return pte_page(get_top_pte(vaddr));
+ return pte_page(get_fixmap_pte(vaddr));
}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 928d596d9ab4..659c75d808dc 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -23,6 +23,7 @@
#include <linux/dma-contiguous.h>
#include <linux/sizes.h>
+#include <asm/cp15.h>
#include <asm/mach-types.h>
#include <asm/memblock.h>
#include <asm/prom.h>
@@ -36,6 +37,14 @@
#include "mm.h"
+#ifdef CONFIG_CPU_CP15_MMU
+unsigned long __init __clear_cr(unsigned long mask)
+{
+ cr_alignment = cr_alignment & ~mask;
+ return cr_alignment;
+}
+#endif
+
static phys_addr_t phys_initrd_start __initdata = 0;
static unsigned long phys_initrd_size __initdata = 0;
@@ -81,24 +90,21 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2);
* initialization functions, as well as show_mem() for the skipping
* of holes in the memory map. It is populated by arm_add_memory().
*/
-struct meminfo meminfo;
-
void show_mem(unsigned int filter)
{
int free = 0, total = 0, reserved = 0;
- int shared = 0, cached = 0, slab = 0, i;
- struct meminfo * mi = &meminfo;
+ int shared = 0, cached = 0, slab = 0;
+ struct memblock_region *reg;
printk("Mem-info:\n");
show_free_areas(filter);
- for_each_bank (i, mi) {
- struct membank *bank = &mi->bank[i];
+ for_each_memblock (memory, reg) {
unsigned int pfn1, pfn2;
struct page *page, *end;
- pfn1 = bank_pfn_start(bank);
- pfn2 = bank_pfn_end(bank);
+ pfn1 = memblock_region_memory_base_pfn(reg);
+ pfn2 = memblock_region_memory_end_pfn(reg);
page = pfn_to_page(pfn1);
end = pfn_to_page(pfn2 - 1) + 1;
@@ -115,8 +121,9 @@ void show_mem(unsigned int filter)
free++;
else
shared += page_count(page) - 1;
- page++;
- } while (page < end);
+ pfn1++;
+ page = pfn_to_page(pfn1);
+ } while (pfn1 < pfn2);
}
printk("%d pages of RAM\n", total);
@@ -130,16 +137,9 @@ void show_mem(unsigned int filter)
static void __init find_limits(unsigned long *min, unsigned long *max_low,
unsigned long *max_high)
{
- struct meminfo *mi = &meminfo;
- int i;
-
- /* This assumes the meminfo array is properly sorted */
- *min = bank_pfn_start(&mi->bank[0]);
- for_each_bank (i, mi)
- if (mi->bank[i].highmem)
- break;
- *max_low = bank_pfn_end(&mi->bank[i - 1]);
- *max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]);
+ *max_low = PFN_DOWN(memblock_get_current_limit());
+ *min = PFN_UP(memblock_start_of_DRAM());
+ *max_high = PFN_DOWN(memblock_end_of_DRAM());
}
#ifdef CONFIG_ZONE_DMA
@@ -274,14 +274,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
return phys;
}
-void __init arm_memblock_init(struct meminfo *mi,
- const struct machine_desc *mdesc)
+void __init arm_memblock_init(const struct machine_desc *mdesc)
{
- int i;
-
- for (i = 0; i < mi->nr_banks; i++)
- memblock_add(mi->bank[i].start, mi->bank[i].size);
-
/* Register the kernel text, kernel data and initrd with memblock. */
#ifdef CONFIG_XIP_KERNEL
memblock_reserve(__pa(_sdata), _end - _sdata);
@@ -412,54 +406,53 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
/*
* The mem_map array can get very big. Free the unused area of the memory map.
*/
-static void __init free_unused_memmap(struct meminfo *mi)
+static void __init free_unused_memmap(void)
{
- unsigned long bank_start, prev_bank_end = 0;
- unsigned int i;
+ unsigned long start, prev_end = 0;
+ struct memblock_region *reg;
/*
* This relies on each bank being in address order.
* The banks are sorted previously in bootmem_init().
*/
- for_each_bank(i, mi) {
- struct membank *bank = &mi->bank[i];
-
- bank_start = bank_pfn_start(bank);
+ for_each_memblock(memory, reg) {
+ start = memblock_region_memory_base_pfn(reg);
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist
* due to SPARSEMEM sections which aren't present.
*/
- bank_start = min(bank_start,
- ALIGN(prev_bank_end, PAGES_PER_SECTION));
+ start = min(start,
+ ALIGN(prev_end, PAGES_PER_SECTION));
#else
/*
* Align down here since the VM subsystem insists that the
* memmap entries are valid from the bank start aligned to
* MAX_ORDER_NR_PAGES.
*/
- bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES);
+ start = round_down(start, MAX_ORDER_NR_PAGES);
#endif
/*
* If we had a previous bank, and there is a space
* between the current bank and the previous, free it.
*/
- if (prev_bank_end && prev_bank_end < bank_start)
- free_memmap(prev_bank_end, bank_start);
+ if (prev_end && prev_end < start)
+ free_memmap(prev_end, start);
/*
* Align up here since the VM subsystem insists that the
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
- prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
+ prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
+ MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM
- if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
- free_memmap(prev_bank_end,
- ALIGN(prev_bank_end, PAGES_PER_SECTION));
+ if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
+ free_memmap(prev_end,
+ ALIGN(prev_end, PAGES_PER_SECTION));
#endif
}
@@ -535,7 +528,7 @@ void __init mem_init(void)
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
/* this will put all unused low memory onto the freelists */
- free_unused_memmap(&meminfo);
+ free_unused_memmap();
free_all_bootmem();
#ifdef CONFIG_SA1111
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index f9c32ba73544..d1e5ad7ab3bc 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -438,6 +438,13 @@ void __arm_iounmap(volatile void __iomem *io_addr)
EXPORT_SYMBOL(__arm_iounmap);
#ifdef CONFIG_PCI
+static int pci_ioremap_mem_type = MT_DEVICE;
+
+void pci_ioremap_set_mem_type(int mem_type)
+{
+ pci_ioremap_mem_type = mem_type;
+}
+
int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
{
BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
@@ -445,7 +452,7 @@ int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
PCI_IO_VIRT_BASE + offset + SZ_64K,
phys_addr,
- __pgprot(get_mem_type(MT_DEVICE)->prot_pte));
+ __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
}
EXPORT_SYMBOL_GPL(pci_ioremap_io);
#endif
diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c
new file mode 100644
index 000000000000..10a3cf28c362
--- /dev/null
+++ b/arch/arm/mm/l2c-common.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2010 ARM Ltd.
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <asm/outercache.h>
+
+void outer_disable(void)
+{
+ WARN_ON(!irqs_disabled());
+ WARN_ON(num_online_cpus() > 1);
+
+ if (outer_cache.disable)
+ outer_cache.disable();
+}
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
new file mode 100644
index 000000000000..99b05f21a59a
--- /dev/null
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -0,0 +1,58 @@
+/*
+ * L2C-310 early resume code. This can be used by platforms to restore
+ * the settings of their L2 cache controller before restoring the
+ * processor state.
+ *
+ * This code can only be used to if you are running in the secure world.
+ */
+#include <linux/linkage.h>
+#include <asm/hardware/cache-l2x0.h>
+
+ .text
+
+ENTRY(l2c310_early_resume)
+ adr r0, 1f
+ ldr r2, [r0]
+ add r0, r2, r0
+
+ ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8}
+ @ r1 = phys address of L2C-310 controller
+ @ r2 = aux_ctrl
+ @ r3 = tag_latency
+ @ r4 = data_latency
+ @ r5 = filter_start
+ @ r6 = filter_end
+ @ r7 = prefetch_ctrl
+ @ r8 = pwr_ctrl
+
+ @ Check that the address has been initialised
+ teq r1, #0
+ moveq pc, lr
+
+ @ The prefetch and power control registers are revision dependent
+ @ and can be written whether or not the L2 cache is enabled
+ ldr r0, [r1, #L2X0_CACHE_ID]
+ and r0, r0, #L2X0_CACHE_ID_RTL_MASK
+ cmp r0, #L310_CACHE_ID_RTL_R2P0
+ strcs r7, [r1, #L310_PREFETCH_CTRL]
+ cmp r0, #L310_CACHE_ID_RTL_R3P0
+ strcs r8, [r1, #L310_POWER_CTRL]
+
+ @ Don't setup the L2 cache if it is already enabled
+ ldr r0, [r1, #L2X0_CTRL]
+ tst r0, #L2X0_CTRL_EN
+ movne pc, lr
+
+ str r3, [r1, #L310_TAG_LATENCY_CTRL]
+ str r4, [r1, #L310_DATA_LATENCY_CTRL]
+ str r6, [r1, #L310_ADDR_FILTER_END]
+ str r5, [r1, #L310_ADDR_FILTER_START]
+
+ str r2, [r1, #L2X0_AUX_CTRL]
+ mov r9, #L2X0_CTRL_EN
+ str r9, [r1, #L2X0_CTRL]
+ mov pc, lr
+ENDPROC(l2c310_early_resume)
+
+ .align
+1: .long l2x0_saved_regs - .
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 7ea641b7aa7d..ce727d47275c 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -2,6 +2,8 @@
#include <linux/list.h>
#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+
/* the upper-most page table pointer */
extern pmd_t *top_pmd;
@@ -93,3 +95,5 @@ extern phys_addr_t arm_lowmem_limit;
void __init bootmem_init(void);
void arm_mm_memblock_reserve(void);
void dma_contiguous_remap(void);
+
+unsigned long __clear_cr(unsigned long mask);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b68c6b22e1c8..ab14b79b03f0 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -35,6 +35,7 @@
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
#include <asm/mach/pci.h>
+#include <asm/fixmap.h>
#include "mm.h"
#include "tcm.h"
@@ -117,28 +118,54 @@ static struct cachepolicy cache_policies[] __initdata = {
};
#ifdef CONFIG_CPU_CP15
+static unsigned long initial_pmd_value __initdata = 0;
+
/*
- * These are useful for identifying cache coherency
- * problems by allowing the cache or the cache and
- * writebuffer to be turned off. (Note: the write
- * buffer should not be on and the cache off).
+ * Initialise the cache_policy variable with the initial state specified
+ * via the "pmd" value. This is used to ensure that on ARMv6 and later,
+ * the C code sets the page tables up with the same policy as the head
+ * assembly code, which avoids an illegal state where the TLBs can get
+ * confused. See comments in early_cachepolicy() for more information.
*/
-static int __init early_cachepolicy(char *p)
+void __init init_default_cache_policy(unsigned long pmd)
{
int i;
+ initial_pmd_value = pmd;
+
+ pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
+
+ for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
+ if (cache_policies[i].pmd == pmd) {
+ cachepolicy = i;
+ break;
+ }
+
+ if (i == ARRAY_SIZE(cache_policies))
+ pr_err("ERROR: could not find cache policy\n");
+}
+
+/*
+ * These are useful for identifying cache coherency problems by allowing
+ * the cache or the cache and writebuffer to be turned off. (Note: the
+ * write buffer should not be on and the cache off).
+ */
+static int __init early_cachepolicy(char *p)
+{
+ int i, selected = -1;
+
for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
int len = strlen(cache_policies[i].policy);
if (memcmp(p, cache_policies[i].policy, len) == 0) {
- cachepolicy = i;
- cr_alignment &= ~cache_policies[i].cr_mask;
- cr_no_alignment &= ~cache_policies[i].cr_mask;
+ selected = i;
break;
}
}
- if (i == ARRAY_SIZE(cache_policies))
- printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
+
+ if (selected == -1)
+ pr_err("ERROR: unknown or unsupported cache policy\n");
+
/*
* This restriction is partly to do with the way we boot; it is
* unpredictable to have memory mapped using two different sets of
@@ -146,12 +173,18 @@ static int __init early_cachepolicy(char *p)
* change these attributes once the initial assembly has setup the
* page tables.
*/
- if (cpu_architecture() >= CPU_ARCH_ARMv6) {
- printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
- cachepolicy = CPOLICY_WRITEBACK;
+ if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
+ pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
+ cache_policies[cachepolicy].policy);
+ return 0;
+ }
+
+ if (selected != cachepolicy) {
+ unsigned long cr = __clear_cr(cache_policies[selected].cr_mask);
+ cachepolicy = selected;
+ flush_cache_all();
+ set_cr(cr);
}
- flush_cache_all();
- set_cr(cr_alignment);
return 0;
}
early_param("cachepolicy", early_cachepolicy);
@@ -186,35 +219,6 @@ static int __init early_ecc(char *p)
early_param("ecc", early_ecc);
#endif
-static int __init noalign_setup(char *__unused)
-{
- cr_alignment &= ~CR_A;
- cr_no_alignment &= ~CR_A;
- set_cr(cr_alignment);
- return 1;
-}
-__setup("noalign", noalign_setup);
-
-#ifndef CONFIG_SMP
-void adjust_cr(unsigned long mask, unsigned long set)
-{
- unsigned long flags;
-
- mask &= ~CR_A;
-
- set &= mask;
-
- local_irq_save(flags);
-
- cr_no_alignment = (cr_no_alignment & ~mask) | set;
- cr_alignment = (cr_alignment & ~mask) | set;
-
- set_cr((get_cr() & ~mask) | set);
-
- local_irq_restore(flags);
-}
-#endif
-
#else /* ifdef CONFIG_CPU_CP15 */
static int __init early_cachepolicy(char *p)
@@ -414,8 +418,17 @@ static void __init build_mem_type_table(void)
cachepolicy = CPOLICY_WRITEBACK;
ecc_mask = 0;
}
- if (is_smp())
- cachepolicy = CPOLICY_WRITEALLOC;
+
+ if (is_smp()) {
+ if (cachepolicy != CPOLICY_WRITEALLOC) {
+ pr_warn("Forcing write-allocate cache policy for SMP\n");
+ cachepolicy = CPOLICY_WRITEALLOC;
+ }
+ if (!(initial_pmd_value & PMD_SECT_S)) {
+ pr_warn("Forcing shared mappings for SMP\n");
+ initial_pmd_value |= PMD_SECT_S;
+ }
+ }
/*
* Strip out features not present on earlier architectures.
@@ -539,11 +552,12 @@ static void __init build_mem_type_table(void)
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
#endif
- if (is_smp()) {
- /*
- * Mark memory with the "shared" attribute
- * for SMP systems
- */
+ /*
+ * If the initial page tables were created with the S bit
+ * set, then we need to do the same here for the same
+ * reasons given in early_cachepolicy().
+ */
+ if (initial_pmd_value & PMD_SECT_S) {
user_pgprot |= L_PTE_SHARED;
kern_pgprot |= L_PTE_SHARED;
vecs_pgprot |= L_PTE_SHARED;
@@ -1061,74 +1075,47 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
void __init sanity_check_meminfo(void)
{
phys_addr_t memblock_limit = 0;
- int i, j, highmem = 0;
+ int highmem = 0;
phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+ struct memblock_region *reg;
- for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
- struct membank *bank = &meminfo.bank[j];
- phys_addr_t size_limit;
-
- *bank = meminfo.bank[i];
- size_limit = bank->size;
+ for_each_memblock(memory, reg) {
+ phys_addr_t block_start = reg->base;
+ phys_addr_t block_end = reg->base + reg->size;
+ phys_addr_t size_limit = reg->size;
- if (bank->start >= vmalloc_limit)
+ if (reg->base >= vmalloc_limit)
highmem = 1;
else
- size_limit = vmalloc_limit - bank->start;
+ size_limit = vmalloc_limit - reg->base;
- bank->highmem = highmem;
-#ifdef CONFIG_HIGHMEM
- /*
- * Split those memory banks which are partially overlapping
- * the vmalloc area greatly simplifying things later.
- */
- if (!highmem && bank->size > size_limit) {
- if (meminfo.nr_banks >= NR_BANKS) {
- printk(KERN_CRIT "NR_BANKS too low, "
- "ignoring high memory\n");
- } else {
- memmove(bank + 1, bank,
- (meminfo.nr_banks - i) * sizeof(*bank));
- meminfo.nr_banks++;
- i++;
- bank[1].size -= size_limit;
- bank[1].start = vmalloc_limit;
- bank[1].highmem = highmem = 1;
- j++;
+ if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
+
+ if (highmem) {
+ pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
+ &block_start, &block_end);
+ memblock_remove(reg->base, reg->size);
+ continue;
}
- bank->size = size_limit;
- }
-#else
- /*
- * Highmem banks not allowed with !CONFIG_HIGHMEM.
- */
- if (highmem) {
- printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
- "(!CONFIG_HIGHMEM).\n",
- (unsigned long long)bank->start,
- (unsigned long long)bank->start + bank->size - 1);
- continue;
- }
- /*
- * Check whether this memory bank would partially overlap
- * the vmalloc area.
- */
- if (bank->size > size_limit) {
- printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
- "to -%.8llx (vmalloc region overlap).\n",
- (unsigned long long)bank->start,
- (unsigned long long)bank->start + bank->size - 1,
- (unsigned long long)bank->start + size_limit - 1);
- bank->size = size_limit;
+ if (reg->size > size_limit) {
+ phys_addr_t overlap_size = reg->size - size_limit;
+
+ pr_notice("Truncating RAM at %pa-%pa to -%pa",
+ &block_start, &block_end, &vmalloc_limit);
+ memblock_remove(vmalloc_limit, overlap_size);
+ block_end = vmalloc_limit;
+ }
}
-#endif
- if (!bank->highmem) {
- phys_addr_t bank_end = bank->start + bank->size;
- if (bank_end > arm_lowmem_limit)
- arm_lowmem_limit = bank_end;
+ if (!highmem) {
+ if (block_end > arm_lowmem_limit) {
+ if (reg->size > size_limit)
+ arm_lowmem_limit = vmalloc_limit;
+ else
+ arm_lowmem_limit = block_end;
+ }
/*
* Find the first non-section-aligned page, and point
@@ -1144,35 +1131,15 @@ void __init sanity_check_meminfo(void)
* occurs before any free memory is mapped.
*/
if (!memblock_limit) {
- if (!IS_ALIGNED(bank->start, SECTION_SIZE))
- memblock_limit = bank->start;
- else if (!IS_ALIGNED(bank_end, SECTION_SIZE))
- memblock_limit = bank_end;
+ if (!IS_ALIGNED(block_start, SECTION_SIZE))
+ memblock_limit = block_start;
+ else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+ memblock_limit = arm_lowmem_limit;
}
- }
- j++;
- }
-#ifdef CONFIG_HIGHMEM
- if (highmem) {
- const char *reason = NULL;
- if (cache_is_vipt_aliasing()) {
- /*
- * Interactions between kmap and other mappings
- * make highmem support with aliasing VIPT caches
- * rather difficult.
- */
- reason = "with VIPT aliasing cache";
- }
- if (reason) {
- printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
- reason);
- while (j > 0 && meminfo.bank[j - 1].highmem)
- j--;
}
}
-#endif
- meminfo.nr_banks = j;
+
high_memory = __va(arm_lowmem_limit - 1) + 1;
/*
@@ -1359,6 +1326,9 @@ static void __init kmap_init(void)
#ifdef CONFIG_HIGHMEM
pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
PKMAP_BASE, _PAGE_KERNEL_TABLE);
+
+ fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START),
+ FIXADDR_START, _PAGE_KERNEL_TABLE);
#endif
}
@@ -1461,7 +1431,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
* just complicate the code.
*/
flush_cache_louis();
- dsb();
+ dsb(ishst);
isb();
/* remap level 1 table */
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 55764a7ef1f0..da1874f9f8cf 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -88,30 +88,35 @@ static unsigned long irbar_read(void)
void __init sanity_check_meminfo_mpu(void)
{
int i;
- struct membank *bank = meminfo.bank;
phys_addr_t phys_offset = PHYS_OFFSET;
phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
-
- /* Initially only use memory continuous from PHYS_OFFSET */
- if (bank_phys_start(&bank[0]) != phys_offset)
- panic("First memory bank must be contiguous from PHYS_OFFSET");
-
- /* Banks have already been sorted by start address */
- for (i = 1; i < meminfo.nr_banks; i++) {
- if (bank[i].start <= bank_phys_end(&bank[0]) &&
- bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) {
- bank[0].size = bank_phys_end(&bank[i]) - bank[0].start;
+ struct memblock_region *reg;
+ bool first = true;
+ phys_addr_t mem_start;
+ phys_addr_t mem_end;
+
+ for_each_memblock(memory, reg) {
+ if (first) {
+ /*
+ * Initially only use memory continuous from
+ * PHYS_OFFSET */
+ if (reg->base != phys_offset)
+ panic("First memory bank must be contiguous from PHYS_OFFSET");
+
+ mem_start = reg->base;
+ mem_end = reg->base + reg->size;
+ specified_mem_size = reg->size;
+ first = false;
} else {
- pr_notice("Ignoring RAM after 0x%.8lx. "
- "First non-contiguous (ignored) bank start: 0x%.8lx\n",
- (unsigned long)bank_phys_end(&bank[0]),
- (unsigned long)bank_phys_start(&bank[i]));
- break;
+ /*
+ * memblock auto merges contiguous blocks, remove
+ * all blocks afterwards
+ */
+ pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+ &mem_start, &reg->base);
+ memblock_remove(reg->base, reg->size);
}
}
- /* All contiguous banks are now merged in to the first bank */
- meminfo.nr_banks = 1;
- specified_mem_size = bank[0].size;
/*
* MPU has curious alignment requirements: Size must be power of 2, and
@@ -128,23 +133,24 @@ void __init sanity_check_meminfo_mpu(void)
*/
aligned_region_size = (phys_offset - 1) ^ (phys_offset);
/* Find the max power-of-two sized region that fits inside our bank */
- rounded_mem_size = (1 << __fls(bank[0].size)) - 1;
+ rounded_mem_size = (1 << __fls(specified_mem_size)) - 1;
/* The actual region size is the smaller of the two */
aligned_region_size = aligned_region_size < rounded_mem_size
? aligned_region_size + 1
: rounded_mem_size + 1;
- if (aligned_region_size != specified_mem_size)
- pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)",
- (unsigned long)specified_mem_size,
- (unsigned long)aligned_region_size);
+ if (aligned_region_size != specified_mem_size) {
+ pr_warn("Truncating memory from %pa to %pa (MPU region constraints)",
+ &specified_mem_size, &aligned_region_size);
+ memblock_remove(mem_start + aligned_region_size,
+ specified_mem_size - aligned_round_size);
+
+ mem_end = mem_start + aligned_region_size;
+ }
- meminfo.bank[0].size = aligned_region_size;
- pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n",
- (unsigned long)phys_offset,
- (unsigned long)aligned_region_size,
- (unsigned long)bank_phys_end(&bank[0]));
+ pr_debug("MPU Region from %pa size %pa (end %pa))\n",
+ &phys_offset, &aligned_region_size, &mem_end);
}
@@ -292,7 +298,7 @@ void __init sanity_check_meminfo(void)
{
phys_addr_t end;
sanity_check_meminfo_mpu();
- end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
+ end = memblock_end_of_DRAM();
high_memory = __va(end - 1) + 1;
}
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 01a719e18bb0..22e3ad63500c 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -64,6 +64,14 @@ ENTRY(cpu_v7_switch_mm)
mov pc, lr
ENDPROC(cpu_v7_switch_mm)
+#ifdef __ARMEB__
+#define rl r3
+#define rh r2
+#else
+#define rl r2
+#define rh r3
+#endif
+
/*
* cpu_v7_set_pte_ext(ptep, pte)
*
@@ -73,13 +81,13 @@ ENDPROC(cpu_v7_switch_mm)
*/
ENTRY(cpu_v7_set_pte_ext)
#ifdef CONFIG_MMU
- tst r2, #L_PTE_VALID
+ tst rl, #L_PTE_VALID
beq 1f
- tst r3, #1 << (57 - 32) @ L_PTE_NONE
- bicne r2, #L_PTE_VALID
+ tst rh, #1 << (57 - 32) @ L_PTE_NONE
+ bicne rl, #L_PTE_VALID
bne 1f
- tst r3, #1 << (55 - 32) @ L_PTE_DIRTY
- orreq r2, #L_PTE_RDONLY
+ tst rh, #1 << (55 - 32) @ L_PTE_DIRTY
+ orreq rl, #L_PTE_RDONLY
1: strd r2, r3, [r0]
ALT_SMP(W(nop))
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 195731d3813b..3db2c2f04a30 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -169,9 +169,31 @@ ENDPROC(cpu_pj4b_do_idle)
globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle
#endif
globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area
- globl_equ cpu_pj4b_do_suspend, cpu_v7_do_suspend
- globl_equ cpu_pj4b_do_resume, cpu_v7_do_resume
- globl_equ cpu_pj4b_suspend_size, cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_pj4b_do_suspend)
+ stmfd sp!, {r6 - r10}
+ mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features
+ mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0
+ mrc p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2
+ mrc p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1
+ mrc p15, 0, r10, c9, c14, 0 @ save CP15 - PMC
+ stmia r0!, {r6 - r10}
+ ldmfd sp!, {r6 - r10}
+ b cpu_v7_do_suspend
+ENDPROC(cpu_pj4b_do_suspend)
+
+ENTRY(cpu_pj4b_do_resume)
+ ldmia r0!, {r6 - r10}
+ mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features
+ mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0
+ mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2
+ mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1
+ mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC
+ b cpu_v7_do_resume
+ENDPROC(cpu_pj4b_do_resume)
+#endif
+.globl cpu_pj4b_suspend_size
+.equ cpu_pj4b_suspend_size, 4 * 14
#endif
@@ -194,6 +216,7 @@ __v7_cr7mp_setup:
__v7_ca7mp_setup:
__v7_ca12mp_setup:
__v7_ca15mp_setup:
+__v7_ca17mp_setup:
mov r10, #0
1:
#ifdef CONFIG_SMP
@@ -505,6 +528,16 @@ __v7_ca15mp_proc_info:
.size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
/*
+ * ARM Ltd. Cortex A17 processor.
+ */
+ .type __v7_ca17mp_proc_info, #object
+__v7_ca17mp_proc_info:
+ .long 0x410fc0e0
+ .long 0xff0ffff0
+ __v7_proc __v7_ca17mp_setup
+ .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
+
+ /*
* Qualcomm Inc. Krait processors.
*/
.type __krait_proc_info, #object
diff --git a/arch/arm/plat-samsung/s5p-sleep.S b/arch/arm/plat-samsung/s5p-sleep.S
index c5001659bdf8..25c68ceb9e2b 100644
--- a/arch/arm/plat-samsung/s5p-sleep.S
+++ b/arch/arm/plat-samsung/s5p-sleep.S
@@ -22,7 +22,6 @@
*/
#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
.data
.align
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index f0759e70fb86..fe6ca574d093 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -22,11 +22,10 @@
@ r9 = normal "successful" return address
@ r10 = this threads thread_info structure
@ lr = unrecognised instruction return address
-@ IRQs disabled.
+@ IRQs enabled.
@
ENTRY(do_vfp)
inc_preempt_count r10, r4
- enable_irq
ldr r4, .LCvfp
ldr r11, [r10, #TI_CPU] @ CPU number
add r10, r10, #TI_VFPSTATE @ r10 = workspace
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f5f63b715d91..7295419165e1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -30,12 +30,17 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS
+ select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_MEMBLOCK
@@ -43,6 +48,7 @@ config ARM64
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select NO_BOOTMEM
@@ -245,6 +251,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
+config ARCH_HAS_CACHE_LINE_SIZE
+ def_bool y
+
source "mm/Kconfig"
config XEN_DOM0
@@ -283,6 +292,20 @@ config CMDLINE_FORCE
This is useful if you cannot or don't want to change the
command-line options your boot loader passes to the kernel.
+config EFI
+ bool "UEFI runtime support"
+ depends on OF && !CPU_BIG_ENDIAN
+ select LIBFDT
+ select UCS2_STRING
+ select EFI_PARAMS_FROM_FDT
+ default y
+ help
+ This option provides support for runtime services provided
+ by UEFI firmware (such as non-volatile variables, realtime
+ clock, and platform reset). A UEFI stub is also provided to
+ allow the kernel to be booted as an EFI application. This
+ is only useful on systems that have UEFI firmware.
+
endmenu
menu "Userspace binary formats"
@@ -334,6 +357,8 @@ source "net/Kconfig"
source "drivers/Kconfig"
+source "drivers/firmware/Kconfig"
+
source "fs/Kconfig"
source "arch/arm64/kvm/Kconfig"
@@ -343,5 +368,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
source "lib/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2fceb71ac3b7..8185a913c5ed 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
index f8c40a66e65d..c5f0a47a1375 100644
--- a/arch/arm64/boot/dts/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -257,6 +257,19 @@
enable-offset = <0x0>;
enable-mask = <0x39>;
};
+
+ rtcclk: rtcclk@17000000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x17000000 0x0 0x2000>;
+ reg-names = "csr-reg";
+ csr-offset = <0xc>;
+ csr-mask = <0x2>;
+ enable-offset = <0x10>;
+ enable-mask = <0x2>;
+ clock-output-names = "rtcclk";
+ };
};
serial0: serial@1c020000 {
@@ -342,5 +355,13 @@
phys = <&phy3 0>;
phy-names = "sata-phy";
};
+
+ rtc: rtc@10510000 {
+ compatible = "apm,xgene-rtc";
+ reg = <0x0 0x10510000 0x0 0x400>;
+ interrupts = <0x0 0x46 0x4>;
+ #clock-cells = <1>;
+ clocks = <&rtcclk 0>;
+ };
};
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 7959dd0ca5d5..157e1d8d9a47 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,11 +1,11 @@
# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
@@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_XGENE=y
CONFIG_SMP=y
CONFIG_PREEMPT=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -44,7 +45,7 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DMA_CMA=y
-CONFIG_SCSI=y
+CONFIG_VIRTIO_BLK=y
# CONFIG_SCSI_PROC_FS is not set
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
@@ -56,20 +57,18 @@ CONFIG_SMC91X=y
CONFIG_SMSC911X=y
# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y
-# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
@@ -79,27 +78,38 @@ CONFIG_USB_ISP1760_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
CONFIG_MMC_ARMMMCI=y
+CONFIG_VIRTIO_MMIO=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=y
CONFIG_CUSE=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_MAGIC_SYSRQ=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=y
+CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
+CONFIG_LOCKUP_DETECTOR=y
# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
# CONFIG_FTRACE is not set
-CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_BLK=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_ARM64_CRYPTO=y
+CONFIG_CRYPTO_SHA1_ARM64_CE=y
+CONFIG_CRYPTO_SHA2_ARM64_CE=y
+CONFIG_CRYPTO_GHASH_ARM64_CE=y
+CONFIG_CRYPTO_AES_ARM64_CE=y
+CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
+CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
+CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000000..5562652c5316
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,53 @@
+
+menuconfig ARM64_CRYPTO
+ bool "ARM64 Accelerated Cryptographic Algorithms"
+ depends on ARM64
+ help
+ Say Y here to choose from a selection of cryptographic algorithms
+ implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+ tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_SHA2_ARM64_CE
+ tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_GHASH_ARM64_CE
+ tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_AES_ARM64_CE
+ tristate "AES core cipher using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+
+config CRYPTO_AES_ARM64_CE_CCM
+ tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+ select CRYPTO_AEAD
+
+config CRYPTO_AES_ARM64_CE_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000000..2070a56ecc46
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,38 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
+AFLAGS_aes-neon.o := -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+ $(call if_changed_dep,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000000..432e4841cd81
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ * u32 *macp, u8 const rk[], u32 rounds);
+ */
+ENTRY(ce_aes_ccm_auth_data)
+ ldr w8, [x3] /* leftover from prev round? */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cbz w8, 1f
+ sub w8, w8, #16
+ eor v1.16b, v1.16b, v1.16b
+0: ldrb w7, [x1], #1 /* get 1 byte of input */
+ subs w2, w2, #1
+ add w8, w8, #1
+ ins v1.b[0], w7
+ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
+ beq 8f /* out of input? */
+ cbnz w8, 0b
+ eor v0.16b, v0.16b, v1.16b
+1: ld1 {v3.2d}, [x4] /* load first round key */
+ prfm pldl1strm, [x1]
+ cmp w5, #12 /* which key size? */
+ add x6, x4, #16
+ sub w7, w5, #2 /* modified # of rounds */
+ bmi 2f
+ bne 5f
+ mov v5.16b, v3.16b
+ b 4f
+2: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
+3: aese v0.16b, v4.16b
+ aesmc v0.16b, v0.16b
+4: ld1 {v3.2d}, [x6], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aesmc v0.16b, v0.16b
+5: ld1 {v4.2d}, [x6], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v5.2d}, [x6], #16 /* load next round key */
+ bpl 3b
+ aese v0.16b, v4.16b
+ subs w2, w2, #16 /* last data? */
+ eor v0.16b, v0.16b, v5.16b /* final round */
+ bmi 6f
+ ld1 {v1.16b}, [x1], #16 /* load next input block */
+ eor v0.16b, v0.16b, v1.16b /* xor with mac */
+ bne 1b
+6: st1 {v0.2d}, [x0] /* store mac */
+ beq 10f
+ adds w2, w2, #16
+ beq 10f
+ mov w8, w2
+7: ldrb w7, [x1], #1
+ umov w6, v0.b[0]
+ eor w6, w6, w7
+ strb w6, [x0], #1
+ subs w2, w2, #1
+ beq 10f
+ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
+ b 7b
+8: mov w7, w8
+ add w8, w8, #16
+9: ext v1.16b, v1.16b, v1.16b, #1
+ adds w7, w7, #1
+ bne 9b
+ eor v0.16b, v0.16b, v1.16b
+ st1 {v0.2d}, [x0]
+10: str w8, [x3]
+ ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+ /*
+ * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+ * u32 rounds);
+ */
+ENTRY(ce_aes_ccm_final)
+ ld1 {v3.2d}, [x2], #16 /* load first round key */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cmp w3, #12 /* which key size? */
+ sub w3, w3, #2 /* modified # of rounds */
+ ld1 {v1.2d}, [x1] /* load 1st ctriv */
+ bmi 0f
+ bne 3f
+ mov v5.16b, v3.16b
+ b 2f
+0: mov v4.16b, v3.16b
+1: ld1 {v5.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+2: ld1 {v3.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v4.2d}, [x2], #16 /* load next round key */
+ subs w3, w3, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ bpl 1b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ /* final round key cancels out */
+ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
+ st1 {v0.2d}, [x0] /* store result */
+ ret
+ENDPROC(ce_aes_ccm_final)
+
+ .macro aes_ccm_do_crypt,enc
+ ldr x8, [x6, #8] /* load lower ctr */
+ ld1 {v0.2d}, [x5] /* load mac */
+ rev x8, x8 /* keep swabbed ctr in reg */
+0: /* outer loop */
+ ld1 {v1.1d}, [x6] /* load upper ctr */
+ prfm pldl1strm, [x1]
+ add x8, x8, #1
+ rev x9, x8
+ cmp w4, #12 /* which key size? */
+ sub w7, w4, #2 /* get modified # of rounds */
+ ins v1.d[1], x9 /* no carry in lower ctr */
+ ld1 {v3.2d}, [x3] /* load first round key */
+ add x10, x3, #16
+ bmi 1f
+ bne 4f
+ mov v5.16b, v3.16b
+ b 3f
+1: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
+2: /* inner loop: 3 rounds, 2x interleaved */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v3.2d}, [x10], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+4: ld1 {v4.2d}, [x10], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ ld1 {v5.2d}, [x10], #16 /* load next round key */
+ bpl 2b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ subs w2, w2, #16
+ bmi 6f /* partial block? */
+ ld1 {v2.16b}, [x1], #16 /* load next input block */
+ .if \enc == 1
+ eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
+ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
+ .else
+ eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
+ eor v1.16b, v2.16b, v5.16b /* final round enc */
+ .endif
+ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
+ st1 {v1.16b}, [x0], #16 /* write output block */
+ bne 0b
+ rev x8, x8
+ st1 {v0.2d}, [x5] /* store mac */
+ str x8, [x6, #8] /* store lsb end of ctr (BE) */
+5: ret
+
+6: eor v0.16b, v0.16b, v5.16b /* final round mac */
+ eor v1.16b, v1.16b, v5.16b /* final round enc */
+ st1 {v0.2d}, [x5] /* store mac */
+ add w2, w2, #16 /* process partial tail block */
+7: ldrb w9, [x1], #1 /* get 1 byte of input */
+ umov w6, v1.b[0] /* get top crypted ctr byte */
+ umov w7, v0.b[0] /* get top mac byte */
+ .if \enc == 1
+ eor w7, w7, w9
+ eor w9, w9, w6
+ .else
+ eor w9, w9, w6
+ eor w7, w7, w9
+ .endif
+ strb w9, [x0], #1 /* store out byte */
+ strb w7, [x5], #1 /* store mac byte */
+ subs w2, w2, #1
+ beq 5b
+ ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
+ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
+ b 7b
+ .endm
+
+ /*
+ * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ */
+ENTRY(ce_aes_ccm_encrypt)
+ aes_ccm_do_crypt 1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+ aes_ccm_do_crypt 0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000000..9e6cdde9b43d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,297 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+ u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(ctx, in_key, key_len);
+ if (!ret)
+ return 0;
+
+ tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ if ((authsize & 1) || authsize < 4)
+ return -EINVAL;
+ return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+ u32 l = req->iv[0] + 1;
+
+ /* verify that CCM dimension 'L' is set correctly in the IV */
+ if (l < 2 || l > 8)
+ return -EINVAL;
+
+ /* verify that msglen can in fact be represented in L bytes */
+ if (l < 4 && msglen >> (8 * l))
+ return -EOVERFLOW;
+
+ /*
+ * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+ * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+ */
+ n[0] = 0;
+ n[1] = cpu_to_be32(msglen);
+
+ memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+ /*
+ * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+ * - bits 0..2 : max # of bytes required to represent msglen, minus 1
+ * (already set by caller)
+ * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+ * - bit 6 : indicates presence of authenticate-only data
+ */
+ maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+ if (req->assoclen)
+ maciv[0] |= 0x40;
+
+ memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+ return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+ struct scatter_walk walk;
+ u32 len = req->assoclen;
+ u32 macp = 0;
+
+ /* prepend the AAD with a length tag */
+ if (len < 0xff00) {
+ ltag.l = cpu_to_be16(len);
+ ltag.len = 2;
+ } else {
+ ltag.l = cpu_to_be16(0xfffe);
+ put_unaligned_be32(len, &ltag.h);
+ ltag.len = 6;
+ }
+
+ ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ scatterwalk_start(&walk, req->assoc);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+ p = scatterwalk_map(&walk);
+ ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ len -= n;
+
+ scatterwalk_unmap(p);
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen - authsize;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+ authsize, 0);
+
+ if (memcmp(mac, buf, authsize))
+ return -EBADMSG;
+ return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+ .cra_name = "ccm(aes)",
+ .cra_driver_name = "ccm-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = ccm_setkey,
+ .setauthsize = ccm_setauthsize,
+ .encrypt = ccm_encrypt,
+ .decrypt = ccm_decrypt,
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_AES))
+ return -ENODEV;
+ return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000000..2075e1acae6b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,155 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aese v0.16b, v2.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aese v0.16b, v3.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aese v0.16b, v1.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aese v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_enc),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aesd v0.16b, v2.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aesd v0.16b, v3.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aesd v0.16b, v1.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aesd v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_dec),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = crypto_aes_set_key,
+ .cia_encrypt = aes_cipher_encrypt,
+ .cia_decrypt = aes_cipher_decrypt
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000000..685a18f731eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ * Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(ce_ ## func)
+#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
+
+ .arch armv8-a+crypto
+
+ /* preload all round keys */
+ .macro load_round_keys, rounds, rk
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ ld1 {v17.16b-v18.16b}, [\rk], #32
+1111: ld1 {v19.16b-v20.16b}, [\rk], #32
+2222: ld1 {v21.16b-v24.16b}, [\rk], #64
+ ld1 {v25.16b-v28.16b}, [\rk], #64
+ ld1 {v29.16b-v31.16b}, [\rk]
+ .endm
+
+ /* prepare for encryption with key in rk[] */
+ .macro enc_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for encryption (again) but with new key in rk[] */
+ .macro enc_switch_key, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for decryption with key in rk[] */
+ .macro dec_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ aes\mc \i0\().16b, \i0\().16b
+ .ifnb \i1
+ aes\mc \i1\().16b, \i1\().16b
+ .ifnb \i3
+ aes\mc \i2\().16b, \i2\().16b
+ aes\mc \i3\().16b, \i3\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved encryption rounds with the same round key */
+ .macro round_Nx, enc, k, i0, i1, i2, i3
+ .ifc \enc, e
+ do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
+ .else
+ do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
+ .endif
+ .endm
+
+ /* up to 4 interleaved final rounds */
+ .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ eor \i0\().16b, \i0\().16b, \k2\().16b
+ .ifnb \i1
+ eor \i1\().16b, \i1\().16b, \k2\().16b
+ .ifnb \i3
+ eor \i2\().16b, \i2\().16b, \k2\().16b
+ eor \i3\().16b, \i3\().16b, \k2\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved blocks */
+ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ round_Nx \enc, v17, \i0, \i1, \i2, \i3
+ round_Nx \enc, v18, \i0, \i1, \i2, \i3
+1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
+ round_Nx \enc, v20, \i0, \i1, \i2, \i3
+2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ round_Nx \enc, \key, \i0, \i1, \i2, \i3
+ .endr
+ fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro encrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \in
+ .endm
+
+ .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1
+ .endm
+
+ .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro decrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \in
+ .endm
+
+ .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1
+ .endm
+
+ .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000000..60f2f4c12256
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,446 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE "ce"
+#define PRIO 300
+#define aes_ecb_encrypt ce_aes_ecb_encrypt
+#define aes_ecb_decrypt ce_aes_ecb_decrypt
+#define aes_cbc_encrypt ce_aes_cbc_encrypt
+#define aes_cbc_decrypt ce_aes_cbc_decrypt
+#define aes_ctr_encrypt ce_aes_ctr_encrypt
+#define aes_xts_encrypt ce_aes_xts_encrypt
+#define aes_xts_decrypt ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE "neon"
+#define PRIO 200
+#define aes_ecb_encrypt neon_aes_ecb_encrypt
+#define aes_ecb_decrypt neon_aes_ecb_decrypt
+#define aes_cbc_encrypt neon_aes_cbc_encrypt
+#define aes_cbc_decrypt neon_aes_cbc_decrypt
+#define aes_ctr_encrypt neon_aes_ctr_encrypt
+#define aes_xts_encrypt neon_aes_xts_encrypt
+#define aes_xts_decrypt neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS("ecb(aes)");
+MODULE_ALIAS("cbc(aes)");
+MODULE_ALIAS("ctr(aes)");
+MODULE_ALIAS("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+
+struct crypto_aes_xts_ctx {
+ struct crypto_aes_ctx key1;
+ struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+ if (!ret)
+ ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
+ key_len / 2);
+ if (!ret)
+ return 0;
+
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+ first = 1;
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ first = 0;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+ break;
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (nbytes) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+ /*
+ * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+ * to tell aes_ctr_encrypt() to only read half a block.
+ */
+ blocks = (nbytes <= 8) ? -1 : 1;
+
+ aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+ blocks, walk.iv, first);
+ memcpy(tdst, tail, nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_enc, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_dec, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+ .cra_name = "__ecb-aes-" MODE,
+ .cra_driver_name = "__driver-ecb-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+}, {
+ .cra_name = "__cbc-aes-" MODE,
+ .cra_driver_name = "__driver-cbc-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+}, {
+ .cra_name = "__ctr-aes-" MODE,
+ .cra_driver_name = "__driver-ctr-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+ },
+}, {
+ .cra_name = "__xts-aes-" MODE,
+ .cra_driver_name = "__driver-xts-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_set_key,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+} };
+
+static int __init aes_init(void)
+{
+ return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+ crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000000..f6e372c528eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+ .text
+ .align 4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare - setup NEON registers for encryption
+ * - dec_prepare - setup NEON registers for decryption
+ * - enc_switch_key - change to new key after having prepared for encryption
+ * - encrypt_block - encrypt a single block
+ * - decrypt block - decrypt a single block
+ * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+ .macro do_encrypt_block2x
+ bl aes_encrypt_block2x
+ .endm
+
+ .macro do_decrypt_block2x
+ bl aes_decrypt_block2x
+ .endm
+
+ .macro do_encrypt_block4x
+ bl aes_encrypt_block4x
+ .endm
+
+ .macro do_decrypt_block4x
+ bl aes_decrypt_block4x
+ .endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+ .macro do_encrypt_block2x
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block2x
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_encrypt_block4x
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block4x
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+#endif
+
+ /*
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ */
+
+AES_ENTRY(aes_ecb_encrypt)
+ FRAME_PUSH
+ cbz w5, .LecbencloopNx
+
+ enc_prepare w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ do_encrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ do_encrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbencloopNx
+.Lecbenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbencout
+#endif
+.Lecbencloop:
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbencloop
+.Lecbencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+ FRAME_PUSH
+ cbz w5, .LecbdecloopNx
+
+ dec_prepare w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ do_decrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ do_decrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbdecloopNx
+.Lecbdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbdecout
+#endif
+.Lecbdecloop:
+ ld1 {v0.16b}, [x1], #16 /* get next ct block */
+ decrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbdecloop
+.Lecbdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+ /*
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ */
+
+AES_ENTRY(aes_cbc_encrypt)
+ cbz w6, .Lcbcencloop
+
+ ld1 {v0.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x5
+
+.Lcbcencloop:
+ ld1 {v1.16b}, [x1], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcencloop
+ ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+ FRAME_PUSH
+ cbz w6, .LcbcdecloopNx
+
+ ld1 {v7.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lcbcdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ mov v2.16b, v0.16b
+ mov v3.16b, v1.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v2.16b
+ mov v7.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ mov v4.16b, v0.16b
+ mov v5.16b, v1.16b
+ mov v6.16b, v2.16b
+ do_decrypt_block4x
+ sub x1, x1, #16
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v4.16b
+ ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v5.16b
+ eor v3.16b, v3.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LcbcdecloopNx
+.Lcbcdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lcbcdecout
+#endif
+.Lcbcdecloop:
+ ld1 {v1.16b}, [x1], #16 /* get next ct block */
+ mov v0.16b, v1.16b /* ...and copy to v0 */
+ decrypt_block v0, w3, x2, x5, w6
+ eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
+ mov v7.16b, v1.16b /* ct is next iv */
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcdecloop
+.Lcbcdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+ /*
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 ctr[], int first)
+ */
+
+AES_ENTRY(aes_ctr_encrypt)
+ FRAME_PUSH
+ cbnz w6, .Lctrfirst /* 1st time around? */
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrinc
+ add x5, x5, #1 /* increment BE ctr */
+ b .LctrincNx
+#else
+ b .Lctrinc
+#endif
+.Lctrfirst:
+ enc_prepare w3, x2, x6
+ ld1 {v4.16b}, [x5]
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrloop
+.LctrloopNx:
+ subs w4, w4, #INTERLEAVE
+ bmi .Lctr1x
+#if INTERLEAVE == 2
+ mov v0.8b, v4.8b
+ mov v1.8b, v4.8b
+ rev x7, x5
+ add x5, x5, #1
+ ins v0.d[1], x7
+ rev x7, x5
+ add x5, x5, #1
+ ins v1.d[1], x7
+ ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
+ dup v7.4s, w5
+ mov v0.16b, v4.16b
+ add v7.4s, v7.4s, v8.4s
+ mov v1.16b, v4.16b
+ rev32 v8.16b, v7.16b
+ mov v2.16b, v4.16b
+ mov v3.16b, v4.16b
+ mov v1.s[3], v8.s[0]
+ mov v2.s[3], v8.s[1]
+ mov v3.s[3], v8.s[2]
+ ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ do_encrypt_block4x
+ eor v0.16b, v5.16b, v0.16b
+ ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ eor v1.16b, v6.16b, v1.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v3.16b, v5.16b, v3.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ add x5, x5, #INTERLEAVE
+#endif
+ cbz w4, .LctroutNx
+.LctrincNx:
+ rev x7, x5
+ ins v4.d[1], x7
+ b .LctrloopNx
+.LctroutNx:
+ sub x5, x5, #1
+ rev x7, x5
+ ins v4.d[1], x7
+ b .Lctrout
+.Lctr1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lctrout
+#endif
+.Lctrloop:
+ mov v0.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ subs w4, w4, #1
+ bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
+ ld1 {v3.16b}, [x1], #16
+ eor v3.16b, v0.16b, v3.16b
+ st1 {v3.16b}, [x0], #16
+ beq .Lctrout
+.Lctrinc:
+ adds x5, x5, #1 /* increment BE ctr */
+ rev x7, x5
+ ins v4.d[1], x7
+ bcc .Lctrloop /* no overflow? */
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrloop
+.Lctrhalfblock:
+ ld1 {v3.8b}, [x1]
+ eor v3.8b, v0.8b, v3.8b
+ st1 {v3.8b}, [x0]
+.Lctrout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ctr_encrypt)
+ .ltorg
+
+
+ /*
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ */
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+.Lxts_mul_x:
+ .word 1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsencloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ enc_switch_key w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsencNx
+
+.LxtsencloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsencoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsencNx
+.LxtsencoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsencout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_encrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsencout
+ b .LxtsencloopNx
+#endif
+.Lxtsenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsencout
+#endif
+.Lxtsencloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsencout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsencloop
+.Lxtsencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsdecloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ dec_prepare w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsdecNx
+
+.LxtsdecloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsdecoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsdecNx
+.LxtsdecoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsdecout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_decrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsdecout
+ b .LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsdecout
+#endif
+.Lxtsdecloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsdecout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsdecloop
+.Lxtsdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000000..b93170e1cc93
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(neon_ ## func)
+#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
+
+ /* multiply by polynomial 'x' in GF(2^8) */
+ .macro mul_by_x, out, in, temp, const
+ sshr \temp, \in, #7
+ add \out, \in, \in
+ and \temp, \temp, \const
+ eor \out, \out, \temp
+ .endm
+
+ /* preload the entire Sbox */
+ .macro prepare, sbox, shiftrows, temp
+ adr \temp, \sbox
+ movi v12.16b, #0x40
+ ldr q13, \shiftrows
+ movi v14.16b, #0x1b
+ ld1 {v16.16b-v19.16b}, [\temp], #64
+ ld1 {v20.16b-v23.16b}, [\temp], #64
+ ld1 {v24.16b-v27.16b}, [\temp], #64
+ ld1 {v28.16b-v31.16b}, [\temp]
+ .endm
+
+ /* do preload for encryption */
+ .macro enc_prepare, ignore0, ignore1, temp
+ prepare .LForward_Sbox, .LForward_ShiftRows, \temp
+ .endm
+
+ .macro enc_switch_key, ignore0, ignore1, temp
+ /* do nothing */
+ .endm
+
+ /* do preload for decryption */
+ .macro dec_prepare, ignore0, ignore1, temp
+ prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
+ .endm
+
+ /* apply SubBytes transformation using the the preloaded Sbox */
+ .macro sub_bytes, in
+ sub v9.16b, \in\().16b, v12.16b
+ tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
+ sub v10.16b, v9.16b, v12.16b
+ tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v11.16b, v10.16b, v12.16b
+ tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ /* apply MixColumns transformation */
+ .macro mix_columns, in
+ mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
+ rev32 v8.8h, \in\().8h
+ eor \in\().16b, v10.16b, \in\().16b
+ shl v9.4s, v8.4s, #24
+ shl v11.4s, \in\().4s, #24
+ sri v9.4s, v8.4s, #8
+ sri v11.4s, \in\().4s, #8
+ eor v9.16b, v9.16b, v8.16b
+ eor v10.16b, v10.16b, v9.16b
+ eor \in\().16b, v10.16b, v11.16b
+ .endm
+
+ /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+ .macro inv_mix_columns, in
+ mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
+ mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
+ eor \in\().16b, \in\().16b, v11.16b
+ rev32 v11.8h, v11.8h
+ eor \in\().16b, \in\().16b, v11.16b
+ mix_columns \in
+ .endm
+
+ .macro do_block, enc, in, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
+ sub_bytes \in
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns \in
+ .else
+ inv_mix_columns \in
+ .endif
+ b 1111b
+2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block, in, rounds, rk, rkp, i
+ do_block 1, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block, in, rounds, rk, rkp, i
+ do_block 0, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ /*
+ * Interleaved versions: functionally equivalent to the
+ * ones above, but applied to 2 or 4 AES states in parallel.
+ */
+
+ .macro sub_bytes_2x, in0, in1
+ sub v8.16b, \in0\().16b, v12.16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, v8.16b, v12.16b
+ sub v11.16b, v9.16b, v12.16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v10.16b, v12.16b
+ sub v9.16b, v11.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ .endm
+
+ .macro sub_bytes_4x, in0, in1, in2, in3
+ sub v8.16b, \in0\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, \in2\().16b, v12.16b
+ tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+ sub v11.16b, \in3\().16b, v12.16b
+ tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
+ tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+ sshr \tmp0\().16b, \in0\().16b, #7
+ add \out0\().16b, \in0\().16b, \in0\().16b
+ sshr \tmp1\().16b, \in1\().16b, #7
+ and \tmp0\().16b, \tmp0\().16b, \const\().16b
+ add \out1\().16b, \in1\().16b, \in1\().16b
+ and \tmp1\().16b, \tmp1\().16b, \const\().16b
+ eor \out0\().16b, \out0\().16b, \tmp0\().16b
+ eor \out1\().16b, \out1\().16b, \tmp1\().16b
+ .endm
+
+ .macro mix_columns_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ rev32 v10.8h, \in0\().8h
+ rev32 v11.8h, \in1\().8h
+ eor \in0\().16b, v8.16b, \in0\().16b
+ eor \in1\().16b, v9.16b, \in1\().16b
+ shl v12.4s, v10.4s, #24
+ shl v13.4s, v11.4s, #24
+ eor v8.16b, v8.16b, v10.16b
+ sri v12.4s, v10.4s, #8
+ shl v10.4s, \in0\().4s, #24
+ eor v9.16b, v9.16b, v11.16b
+ sri v13.4s, v11.4s, #8
+ shl v11.4s, \in1\().4s, #24
+ sri v10.4s, \in0\().4s, #8
+ eor \in0\().16b, v8.16b, v12.16b
+ sri v11.4s, \in1\().4s, #8
+ eor \in1\().16b, v9.16b, v13.16b
+ eor \in0\().16b, v10.16b, \in0\().16b
+ eor \in1\().16b, v11.16b, \in1\().16b
+ .endm
+
+ .macro inv_mix_cols_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ mix_columns_2x \in0, \in1
+ .endm
+
+ .macro inv_mix_cols_4x, in0, in1, in2, in3
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
+ mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
+ mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ rev32 v10.8h, v10.8h
+ rev32 v11.8h, v11.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ .endm
+
+ .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ sub_bytes_2x \in0, \in1
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_2x \in0, \in1
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ sub_bytes_4x \in0, \in1, \in2, \in3
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
+ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_4x \in0, \in1, \in2, \in3
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+#include "aes-modes.S"
+
+ .text
+ .align 4
+.LForward_ShiftRows:
+ .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+ .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+ .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+ .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+ .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+ .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+ .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+ .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+ .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+ .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+ .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+ .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+ .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+ .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+ .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+ .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+ .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+ .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+ .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+ .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+ .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+ .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+ .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+ .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+ .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+ .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+ .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+ .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+ .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+ .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+ .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+ .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+ .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+ .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+ .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+ .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..b9e6eaf41c9b
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,95 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
+ *
+ * Copyright (c) 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal
+ * Erdinc Ozturk
+ * Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ DATA .req v0
+ SHASH .req v1
+ IN1 .req v2
+ T1 .req v2
+ T2 .req v3
+ T3 .req v4
+ VZR .req v5
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update)
+ ld1 {DATA.16b}, [x1]
+ ld1 {SHASH.16b}, [x3]
+ eor VZR.16b, VZR.16b, VZR.16b
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {IN1.2d}, [x4]
+ b 1f
+
+0: ld1 {IN1.2d}, [x2], #16
+ sub w0, w0, #1
+1: ext IN1.16b, IN1.16b, IN1.16b, #8
+CPU_LE( rev64 IN1.16b, IN1.16b )
+ eor DATA.16b, DATA.16b, IN1.16b
+
+ /* multiply DATA by SHASH in GF(2^128) */
+ ext T2.16b, DATA.16b, DATA.16b, #8
+ ext T3.16b, SHASH.16b, SHASH.16b, #8
+ eor T2.16b, T2.16b, DATA.16b
+ eor T3.16b, T3.16b, SHASH.16b
+
+ pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
+ pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
+ pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
+ eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
+ eor T2.16b, T2.16b, DATA.16b
+
+ ext T3.16b, VZR.16b, T2.16b, #8
+ ext T2.16b, T2.16b, VZR.16b, #8
+ eor DATA.16b, DATA.16b, T3.16b
+ eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
+ // carry-less multiplication
+
+ /* first phase of the reduction */
+ shl T3.2d, DATA.2d, #1
+ eor T3.16b, T3.16b, DATA.16b
+ shl T3.2d, T3.2d, #5
+ eor T3.16b, T3.16b, DATA.16b
+ shl T3.2d, T3.2d, #57
+ ext T2.16b, VZR.16b, T3.16b, #8
+ ext T3.16b, T3.16b, VZR.16b, #8
+ eor DATA.16b, DATA.16b, T2.16b
+ eor T1.16b, T1.16b, T3.16b
+
+ /* second phase of the reduction */
+ ushr T2.2d, DATA.2d, #5
+ eor T2.16b, T2.16b, DATA.16b
+ ushr T2.2d, T2.2d, #1
+ eor T2.16b, T2.16b, DATA.16b
+ ushr T2.2d, T2.2d, #1
+ eor T1.16b, T1.16b, T2.16b
+ eor DATA.16b, DATA.16b, T1.16b
+
+ cbnz w0, 0b
+
+ st1 {DATA.16b}, [x1]
+ ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..b92baf3f68c7
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,155 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_key {
+ u64 a;
+ u64 b;
+};
+
+struct ghash_desc_ctx {
+ u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+ u8 buf[GHASH_BLOCK_SIZE];
+ u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ ctx->count += len;
+
+ if ((partial + len) >= GHASH_BLOCK_SIZE) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ int blocks;
+
+ if (partial) {
+ int p = GHASH_BLOCK_SIZE - partial;
+
+ memcpy(ctx->buf + partial, src, p);
+ src += p;
+ len -= p;
+ }
+
+ blocks = len / GHASH_BLOCK_SIZE;
+ len %= GHASH_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(6);
+ pmull_ghash_update(blocks, ctx->digest, src, key,
+ partial ? ctx->buf : NULL);
+ kernel_neon_end();
+ src += blocks * GHASH_BLOCK_SIZE;
+ }
+ if (len)
+ memcpy(ctx->buf + partial, src, len);
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ if (partial) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+ memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+ kernel_neon_begin_partial(6);
+ pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+ kernel_neon_end();
+ }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ghash_key *key = crypto_shash_ctx(tfm);
+ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ /* perform multiplication by 'x' in GF(2^128) */
+ b = get_unaligned_be64(inkey);
+ a = get_unaligned_be64(inkey + 8);
+
+ key->a = (a << 1) | (b >> 63);
+ key->b = (b << 1) | (a >> 63);
+
+ if (b >> 63)
+ key->b ^= 0xc200000000000000UL;
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_key),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..09d57d98609c
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ k0 .req v0
+ k1 .req v1
+ k2 .req v2
+ k3 .req v3
+
+ t0 .req v4
+ t1 .req v5
+
+ dga .req q6
+ dgav .req v6
+ dgb .req s7
+ dgbv .req v7
+
+ dg0q .req q12
+ dg0s .req s12
+ dg0v .req v12
+ dg1s .req s13
+ dg1v .req v13
+ dg2s .req s14
+
+ .macro add_only, op, ev, rc, s0, dg1
+ .ifc \ev, ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha1h dg2s, dg0s
+ .ifnb \dg1
+ sha1\op dg0q, \dg1, t0.4s
+ .else
+ sha1\op dg0q, dg1s, t0.4s
+ .endif
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha1h dg1s, dg0s
+ sha1\op dg0q, dg2s, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
+ sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
+ add_only \op, \ev, \rc, \s1, \dg1
+ sha1su1 v\s0\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA1 round constants
+ */
+ .align 4
+.Lsha1_rcon:
+ .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+ /*
+ * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha1_ce_transform)
+ /* load round constants */
+ adr x6, .Lsha1_rcon
+ ld1r {k0.4s}, [x6], #4
+ ld1r {k1.4s}, [x6], #4
+ ld1r {k2.4s}, [x6], #4
+ ld1r {k3.4s}, [x6]
+
+ /* load state */
+ ldr dga, [x2]
+ ldr dgb, [x2, #16]
+
+ /* load partial state (if supplied) */
+ cbz x3, 0f
+ ld1 {v8.4s-v11.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v8.4s-v11.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v8.16b, v8.16b )
+CPU_LE( rev32 v9.16b, v9.16b )
+CPU_LE( rev32 v10.16b, v10.16b )
+CPU_LE( rev32 v11.16b, v11.16b )
+
+2: add t0.4s, v8.4s, k0.4s
+ mov dg0v.16b, dgav.16b
+
+ add_update c, ev, k0, 8, 9, 10, 11, dgb
+ add_update c, od, k0, 9, 10, 11, 8
+ add_update c, ev, k0, 10, 11, 8, 9
+ add_update c, od, k0, 11, 8, 9, 10
+ add_update c, ev, k1, 8, 9, 10, 11
+
+ add_update p, od, k1, 9, 10, 11, 8
+ add_update p, ev, k1, 10, 11, 8, 9
+ add_update p, od, k1, 11, 8, 9, 10
+ add_update p, ev, k1, 8, 9, 10, 11
+ add_update p, od, k2, 9, 10, 11, 8
+
+ add_update m, ev, k2, 10, 11, 8, 9
+ add_update m, od, k2, 11, 8, 9, 10
+ add_update m, ev, k2, 8, 9, 10, 11
+ add_update m, od, k2, 9, 10, 11, 8
+ add_update m, ev, k3, 10, 11, 8, 9
+
+ add_update p, od, k3, 11, 8, 9, 10
+ add_only p, ev, k3, 9
+ add_only p, od, k3, 10
+ add_only p, ev, k3, 11
+ add_only p, od
+
+ /* update state */
+ add dgbv.2s, dgbv.2s, dg1v.2s
+ add dgav.4s, dgav.4s, dg0v.4s
+
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v9.2d, #0
+ mov x8, #0x80000000
+ movi v10.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d8, x8
+ mov x4, #0
+ mov v11.d[0], xzr
+ mov v11.d[1], x7
+ b 2b
+
+ /* store new state */
+3: str dga, [x2]
+ str dgb, [x2, #16]
+ ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..6fe83f37a750
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,174 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+ return 0;
+}
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA1_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buffer + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA1_BLOCK_SIZE;
+ len %= SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buffer : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA1_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buffer + partial, data, len);
+ return 0;
+}
+
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ u32 padlen = SHA1_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
+
+ sha1_update(desc, padding, padlen);
+ sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int blocks;
+ int i;
+
+ if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
+ sha1_update(desc, data, len);
+ return sha1_final(desc, out);
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha1_ce_transform()
+ */
+ blocks = len / SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .init = sha1_init,
+ .update = sha1_update,
+ .final = sha1_final,
+ .finup = sha1_finup,
+ .export = sha1_export,
+ .import = sha1_import,
+ .descsize = sizeof(struct sha1_state),
+ .digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..7f29fc031ea8
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,156 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ dga .req q20
+ dgav .req v20
+ dgb .req q21
+ dgbv .req v21
+
+ t0 .req v22
+ t1 .req v23
+
+ dg0q .req q24
+ dg0v .req v24
+ dg1q .req q25
+ dg1v .req v25
+ dg2q .req q26
+ dg2v .req v26
+
+ .macro add_only, ev, rc, s0
+ mov dg2v.16b, dg0v.16b
+ .ifeq \ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha256h dg0q, dg1q, t0.4s
+ sha256h2 dg1q, dg2q, t0.4s
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha256h dg0q, dg1q, t1.4s
+ sha256h2 dg1q, dg2q, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, ev, rc, s0, s1, s2, s3
+ sha256su0 v\s0\().4s, v\s1\().4s
+ add_only \ev, \rc, \s1
+ sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA-256 round constants
+ */
+ .align 4
+.Lsha2_rcon:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+ /*
+ * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha2_ce_transform)
+ /* load round constants */
+ adr x8, .Lsha2_rcon
+ ld1 { v0.4s- v3.4s}, [x8], #64
+ ld1 { v4.4s- v7.4s}, [x8], #64
+ ld1 { v8.4s-v11.4s}, [x8], #64
+ ld1 {v12.4s-v15.4s}, [x8]
+
+ /* load state */
+ ldp dga, dgb, [x2]
+
+ /* load partial input (if supplied) */
+ cbz x3, 0f
+ ld1 {v16.4s-v19.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v16.4s-v19.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v16.16b, v16.16b )
+CPU_LE( rev32 v17.16b, v17.16b )
+CPU_LE( rev32 v18.16b, v18.16b )
+CPU_LE( rev32 v19.16b, v19.16b )
+
+2: add t0.4s, v16.4s, v0.4s
+ mov dg0v.16b, dgav.16b
+ mov dg1v.16b, dgbv.16b
+
+ add_update 0, v1, 16, 17, 18, 19
+ add_update 1, v2, 17, 18, 19, 16
+ add_update 0, v3, 18, 19, 16, 17
+ add_update 1, v4, 19, 16, 17, 18
+
+ add_update 0, v5, 16, 17, 18, 19
+ add_update 1, v6, 17, 18, 19, 16
+ add_update 0, v7, 18, 19, 16, 17
+ add_update 1, v8, 19, 16, 17, 18
+
+ add_update 0, v9, 16, 17, 18, 19
+ add_update 1, v10, 17, 18, 19, 16
+ add_update 0, v11, 18, 19, 16, 17
+ add_update 1, v12, 19, 16, 17, 18
+
+ add_only 0, v13, 17
+ add_only 1, v14, 18
+ add_only 0, v15, 19
+ add_only 1
+
+ /* update state */
+ add dgav.4s, dgav.4s, dg0v.4s
+ add dgbv.4s, dgbv.4s, dg1v.4s
+
+ /* handled all input blocks? */
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v17.2d, #0
+ mov x8, #0x80000000
+ movi v18.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d16, x8
+ mov x4, #0
+ mov v19.d[0], xzr
+ mov v19.d[1], x7
+ b 2b
+
+ /* store new state */
+3: stp dga, dgb, [x2]
+ ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..c294e67d3925
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,255 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+ SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha2_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA256_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA256_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buf + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA256_BLOCK_SIZE;
+ len %= SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buf : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA256_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buf + partial, data, len);
+ return 0;
+}
+
+static void sha2_final(struct shash_desc *desc)
+{
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ u32 padlen = SHA256_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
+
+ sha2_update(desc, padding, padlen);
+ sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static void sha2_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ int blocks;
+
+ if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
+ sha2_update(desc, data, len);
+ sha2_final(desc);
+ return;
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha2_ce_transform()
+ */
+ blocks = len / SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+ data += blocks * SHA256_BLOCK_SIZE;
+}
+
+static int sha224_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha2_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha2_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .init = sha224_init,
+ .update = sha2_update,
+ .final = sha224_final,
+ .finup = sha224_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA224_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .init = sha256_init,
+ .update = sha2_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 83f71b3004a8..42c7eecd2bb6 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -40,6 +40,7 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index fd3e3924041b..5901480bfdca 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -21,6 +21,7 @@
#endif
#include <asm/ptrace.h>
+#include <asm/thread_info.h>
/*
* Stack pushing/popping (register pairs only). Equivalent to store decrement
@@ -68,23 +69,31 @@
msr daifclr, #8
.endm
- .macro disable_step, tmp
+ .macro disable_step_tsk, flgs, tmp
+ tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
bic \tmp, \tmp, #1
msr mdscr_el1, \tmp
+ isb // Synchronise with enable_dbg
+9990:
.endm
- .macro enable_step, tmp
+ .macro enable_step_tsk, flgs, tmp
+ tbz \flgs, #TIF_SINGLESTEP, 9990f
+ disable_dbg
mrs \tmp, mdscr_el1
orr \tmp, \tmp, #1
msr mdscr_el1, \tmp
+9990:
.endm
- .macro enable_dbg_if_not_stepping, tmp
- mrs \tmp, mdscr_el1
- tbnz \tmp, #0, 9990f
- enable_dbg
-9990:
+/*
+ * Enable both debug exceptions and interrupts. This is likely to be
+ * faster than two daifclr operations, since writes to this register
+ * are self-synchronising.
+ */
+ .macro enable_dbg_and_irq
+ msr daifclr, #(8 | 2)
.endm
/*
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 57e8cb49824c..65f1569ac96e 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -157,7 +157,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/
#define ATOMIC64_INIT(i) { (i) }
-#define atomic64_read(v) (*(volatile long long *)&(v)->counter)
+#define atomic64_read(v) (*(volatile long *)&(v)->counter)
#define atomic64_set(v,i) (((v)->counter) = (i))
static inline void atomic64_add(u64 i, atomic64_t *v)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 48b9e704af7c..6389d60574d9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,12 +25,12 @@
#define wfi() asm volatile("wfi" : : : "memory")
#define isb() asm volatile("isb" : : : "memory")
-#define dmb(opt) asm volatile("dmb sy" : : : "memory")
-#define dsb(opt) asm volatile("dsb sy" : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
-#define mb() dsb()
-#define rmb() asm volatile("dsb ld" : : : "memory")
-#define wmb() asm volatile("dsb st" : : : "memory")
+#define mb() dsb(sy)
+#define rmb() dsb(ld)
+#define wmb() dsb(st)
#ifndef CONFIG_SMP
#define smp_mb() barrier()
@@ -40,7 +40,7 @@
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
- smp_mb(); \
+ barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
@@ -48,15 +48,15 @@ do { \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
- smp_mb(); \
+ barrier(); \
___p1; \
})
#else
-#define smp_mb() asm volatile("dmb ish" : : : "memory")
-#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
+#define smp_mb() dmb(ish)
+#define smp_rmb() dmb(ishld)
+#define smp_wmb() dmb(ishst)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 390308a67f0d..88cc05b5f3ac 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -16,6 +16,8 @@
#ifndef __ASM_CACHE_H
#define __ASM_CACHE_H
+#include <asm/cachetype.h>
+
#define L1_CACHE_SHIFT 6
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -27,6 +29,15 @@
* the CPU.
*/
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#define ARCH_SLAB_MINALIGN 8
+
+#ifndef __ASSEMBLY__
+
+static inline int cache_line_size(void)
+{
+ u32 cwg = cache_type_cwg();
+ return cwg ? 4 << cwg : L1_CACHE_BYTES;
+}
+
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 4c60e64a801c..a5176cf32dad 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -123,7 +123,7 @@ extern void flush_dcache_page(struct page *);
static inline void __flush_icache_all(void)
{
asm("ic ialluis");
- dsb();
+ dsb(ish);
}
#define flush_dcache_mmap_lock(mapping) \
@@ -150,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
* set_pte_at() called from vmap_pte_range() does not
* have a DSB after cleaning the cache line.
*/
- dsb();
+ dsb(ish);
}
static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index 85f5f511352a..4b23e758d5e0 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -20,12 +20,16 @@
#define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3
+#define CTR_CWG_SHIFT 24
+#define CTR_CWG_MASK 15
#define ICACHE_POLICY_RESERVED 0
#define ICACHE_POLICY_AIVIVT 1
#define ICACHE_POLICY_VIPT 2
#define ICACHE_POLICY_PIPT 3
+#ifndef __ASSEMBLY__
+
static inline u32 icache_policy(void)
{
return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK;
@@ -45,4 +49,11 @@ static inline int icache_is_aivivt(void)
return icache_policy() == ICACHE_POLICY_AIVIVT;
}
+static inline u32 cache_type_cwg(void)
+{
+ return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_CACHETYPE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 57c0fa7bf711..ddb9d7830558 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -72,7 +72,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
}
#define xchg(ptr,x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __ret; \
+})
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index e71f81fe127a..253e33bc94fb 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
#else /* !CONFIG_COMPAT */
-static inline int is_compat_task(void)
-{
- return 0;
-}
-
static inline int is_compat_thread(struct thread_info *thread)
{
return 0;
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
new file mode 100644
index 000000000000..5a46c4e7f539
--- /dev/null
+++ b/arch/arm64/include/asm/efi.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/io.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+extern void efi_idmap_init(void);
+#else
+#define efi_init()
+#define efi_idmap_init()
+#endif
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index c4a7f940b387..72674f4c3871 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -18,9 +18,11 @@
#ifndef __ASM_ESR_H
#define __ASM_ESR_H
-#define ESR_EL1_EC_SHIFT (26)
-#define ESR_EL1_IL (1U << 25)
+#define ESR_EL1_WRITE (1 << 6)
+#define ESR_EL1_CM (1 << 8)
+#define ESR_EL1_IL (1 << 25)
+#define ESR_EL1_EC_SHIFT (26)
#define ESR_EL1_EC_UNKNOWN (0x00)
#define ESR_EL1_EC_WFI (0x01)
#define ESR_EL1_EC_CP15_32 (0x03)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac13008..50f559f574fe 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,8 +37,21 @@ struct fpsimd_state {
u32 fpcr;
};
};
+ /* the id of the last cpu to have restored this state */
+ unsigned int cpu;
};
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+ u32 fpsr;
+ u32 fpcr;
+ u32 num_regs;
+ __uint128_t vregs[32];
+};
+
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
@@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
+extern void fpsimd_update_current_state(struct fpsimd_state *state);
+
+extern void fpsimd_flush_task_state(struct task_struct *target);
+
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+ u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
#endif
#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..768414d55e64 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -62,3 +62,38 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
msr fpcr, x\tmpnr
.endm
+
+.altmacro
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+ mrs x\tmpnr1, fpsr
+ str w\numnr, [\state, #8]
+ mrs x\tmpnr2, fpcr
+ stp w\tmpnr1, w\tmpnr2, [\state]
+ adr x\tmpnr1, 0f
+ add \state, \state, x\numnr, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+ br x\tmpnr1
+ .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+ .irp qb, %(qa + 1)
+ stp q\qa, q\qb, [\state, # -16 * \qa - 16]
+ .endr
+ .endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+ ldp w\tmpnr1, w\tmpnr2, [\state]
+ msr fpsr, x\tmpnr1
+ msr fpcr, x\tmpnr2
+ adr x\tmpnr1, 0f
+ ldr w\tmpnr2, [\state, #8]
+ add \state, \state, x\tmpnr2, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+ br x\tmpnr1
+ .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+ .irp qb, %(qa + 1)
+ ldp q\qa, q\qb, [\state, # -16 * \qa - 16]
+ .endr
+ .endr
+0:
+.endm
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000000..c5534facf941
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,59 @@
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /*
+ * addr is the address of the mcount call instruction.
+ * recordmcount does the necessary offset calculation.
+ */
+ return addr;
+}
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user realy wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+ return is_compat_task();
+}
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index ae4801d77514..0be67821f9ce 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 5
+#define NR_IPI 6
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c44ad39ed310..dc1f73b13e74 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -21,6 +21,7 @@
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
+#ifndef __ASSEMBLY__
/*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section C3.1 "A64 instruction index by encoding":
@@ -104,5 +105,6 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index a1bef78f0303..e0ecdcf6632d 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -230,19 +230,11 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot
extern void __iounmap(volatile void __iomem *addr);
extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
#define iounmap __iounmap
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-
#define ARCH_HAS_IOREMAP_WC
#include <asm-generic/iomap.h>
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index aff0292c8f4d..c2f006c48bdb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -31,5 +31,7 @@ extern void paging_init(void);
extern void setup_mm_for_reboot(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
+/* create an identity mapping for memory (or io if map_io is true) */
+extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io);
#endif
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index b0cc58a97780..13ce4cc18e26 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,7 +8,11 @@
* published by the Free Software Foundation.
*/
+#include <linux/types.h>
+
#define cpu_has_neon() (1)
-void kernel_neon_begin(void);
+#define kernel_neon_begin() kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_end(void);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5fc8a66c3924..955e8c5f0afb 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -29,6 +29,8 @@
*/
#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
+#define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
+#define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0)
/*
* Level 2 descriptor (PMD).
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e2f96748859b..598cc384fc1c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -52,66 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#endif
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
-/*
- * The pgprot_* and protection_map entries will be fixed up at runtime to
- * include the cachable and bufferable bits based on memory policy, as well as
- * any architecture dependent bits like global/ASID and SMP shared mapping
- * bits.
- */
-#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF
+#ifdef CONFIG_SMP
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#else
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
+#endif
-extern pgprot_t pgprot_default;
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
-#define __pgprot_modify(prot,mask,bits) \
- __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
+#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP)
+#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
-#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-
-#endif /* __ASSEMBLY__ */
-
-#define __P000 __PAGE_NONE
-#define __P001 __PAGE_READONLY
-#define __P010 __PAGE_COPY
-#define __P011 __PAGE_COPY
-#define __P100 __PAGE_READONLY_EXEC
-#define __P101 __PAGE_READONLY_EXEC
-#define __P110 __PAGE_COPY_EXEC
-#define __P111 __PAGE_COPY_EXEC
-
-#define __S000 __PAGE_NONE
-#define __S001 __PAGE_READONLY
-#define __S010 __PAGE_SHARED
-#define __S011 __PAGE_SHARED
-#define __S100 __PAGE_READONLY_EXEC
-#define __S101 __PAGE_READONLY_EXEC
-#define __S110 __PAGE_SHARED_EXEC
-#define __S111 __PAGE_SHARED_EXEC
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
-#ifndef __ASSEMBLY__
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
@@ -265,6 +258,7 @@ static inline pmd_t pte_pmd(pte_t pte)
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
@@ -273,6 +267,9 @@ static inline int has_transparent_hugepage(void)
return 1;
}
+#define __pgprot_modify(prot,mask,bits) \
+ __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
/*
* Mark the prot value as uncacheable and unbufferable.
*/
@@ -295,11 +292,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
+#ifdef ARM64_64K_PAGES
+#define pud_sect(pud) (0)
+#else
+#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
+ PUD_TYPE_SECT)
+#endif
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
- dsb();
+ dsb(ishst);
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -329,7 +332,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
- dsb();
+ dsb(ishst);
}
static inline void pud_clear(pud_t *pudp)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 45b20cd6cbca..34de2a8f7d93 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -79,6 +79,7 @@ struct thread_struct {
unsigned long tp_value;
struct fpsimd_state fpsimd_state;
unsigned long fault_address; /* fault info */
+ unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
};
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index c7ba261dd4b3..a429b5940be2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -135,6 +135,11 @@ struct pt_regs {
#define user_stack_pointer(regs) \
(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+ return regs->regs[0];
+}
+
/*
* Are the current registers suitable for user mode? (used to maintain
* security in signal handlers)
diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h
deleted file mode 100644
index dca1094acc74..000000000000
--- a/arch/arm64/include/asm/sigcontext.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_SIGCONTEXT_H
-#define __ASM_SIGCONTEXT_H
-
-#include <uapi/asm/sigcontext.h>
-
-/*
- * Auxiliary context saved in the sigcontext.__reserved array. Not exported to
- * user space as it will change with the addition of new context. User space
- * should check the magic/size information.
- */
-struct aux_context {
- struct fpsimd_context fpsimd;
- /* additional context to be added before "end" */
- struct _aarch64_ctx end;
-};
-#endif
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 3ee8b303d9a9..64d2d4884a9d 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -22,6 +22,18 @@ extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c);
+#define __HAVE_ARCH_STRCMP
+extern int strcmp(const char *, const char *);
+
+#define __HAVE_ARCH_STRNCMP
+extern int strncmp(const char *, const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_STRLEN
+extern __kernel_size_t strlen(const char *);
+
+#define __HAVE_ARCH_STRNLEN
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
+
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t);
@@ -34,4 +46,7 @@ extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
#endif
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 70ba9d4ee978..383771eb0b87 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -18,6 +18,7 @@
#include <linux/err.h>
+extern const void *sys_call_table[];
static inline int syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 7b8e3a2a00fb..e40b6d06d515 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void)
/*
* thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active
+ * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ * TIF_SYSCALL_AUDIT - syscall auditing
+ * TIF_SECOMP - syscall secure computing
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -99,7 +102,11 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SIGPENDING 0
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_SYSCALL_TRACE 8
+#define TIF_SYSCALL_AUDIT 9
+#define TIF_SYSCALL_TRACEPOINT 10
+#define TIF_SECCOMP 11
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -110,10 +117,18 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME)
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8b482035cfc2..b9349c4513ea 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
*/
static inline void flush_tlb_all(void)
{
- dsb();
+ dsb(ishst);
asm("tlbi vmalle1is");
- dsb();
+ dsb(ish);
isb();
}
@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
{
unsigned long asid = (unsigned long)ASID(mm) << 48;
- dsb();
+ dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid));
- dsb();
+ dsb(ish);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -93,16 +93,36 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr = uaddr >> 12 |
((unsigned long)ASID(vma->vm_mm) << 48);
- dsb();
+ dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr));
- dsb();
+ dsb(ish);
}
-/*
- * Convert calls to our calling convention.
- */
-#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
-#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+ unsigned long addr;
+ start = asid | (start >> 12);
+ end = asid | (end >> 12);
+
+ dsb(ishst);
+ for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+ asm("tlbi vae1is, %0" : : "r"(addr));
+ dsb(ish);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+ start >>= 12;
+ end >>= 12;
+
+ dsb(ishst);
+ for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+ asm("tlbi vaae1is, %0" : : "r"(addr));
+ dsb(ish);
+}
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
@@ -114,7 +134,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
* set_pte() does not have a DSB, so make sure that the page table
* write is visible.
*/
- dsb();
+ dsb(ishst);
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 0172e6d76bf3..7ebcd31ce51c 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -20,9 +20,6 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
-#define mc_capable() (cpu_topology[0].cluster_id != -1)
-#define smt_capable() (cpu_topology[0].thread_id != -1)
-
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index a4654c656a1e..e5f47df00c24 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -29,3 +29,5 @@
#endif
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 690ad51cc901..b72cf405b3fe 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -53,5 +53,12 @@ struct fpsimd_context {
__uint128_t vregs[32];
};
+/* ESR_EL1 context */
+#define ESR_MAGIC 0x45535201
+
+struct esr_context {
+ struct _aarch64_ctx head;
+ u64 esr;
+};
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7a6fce5167e9..cdaedad3afe5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -4,15 +4,22 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \
+ -I$(src)/../../../scripts/dtc/libfdt
+
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_return_address.o = -pg
# Object file lists.
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o
+ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
@@ -21,6 +28,7 @@ arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o
+arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 338b568cd8ae..a85843ddbde8 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr);
/* string / mem functions */
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(memcmp);
/* atomic bitops */
EXPORT_SYMBOL(set_bit);
@@ -56,3 +61,7 @@ EXPORT_SYMBOL(clear_bit);
EXPORT_SYMBOL(test_and_clear_bit);
EXPORT_SYMBOL(change_bit);
EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
new file mode 100644
index 000000000000..66716c9b9e5f
--- /dev/null
+++ b/arch/arm64/kernel/efi-entry.S
@@ -0,0 +1,109 @@
+/*
+ * EFI entry point.
+ *
+ * Copyright (C) 2013, 2014 Red Hat, Inc.
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+
+#define EFI_LOAD_ERROR 0x8000000000000001
+
+ __INIT
+
+ /*
+ * We arrive here from the EFI boot manager with:
+ *
+ * * CPU in little-endian mode
+ * * MMU on with identity-mapped RAM
+ * * Icache and Dcache on
+ *
+ * We will most likely be running from some place other than where
+ * we want to be. The kernel image wants to be placed at TEXT_OFFSET
+ * from start of RAM.
+ */
+ENTRY(efi_stub_entry)
+ /*
+ * Create a stack frame to save FP/LR with extra space
+ * for image_addr variable passed to efi_entry().
+ */
+ stp x29, x30, [sp, #-32]!
+
+ /*
+ * Call efi_entry to do the real work.
+ * x0 and x1 are already set up by firmware. Current runtime
+ * address of image is calculated and passed via *image_addr.
+ *
+ * unsigned long efi_entry(void *handle,
+ * efi_system_table_t *sys_table,
+ * unsigned long *image_addr) ;
+ */
+ adrp x8, _text
+ add x8, x8, #:lo12:_text
+ add x2, sp, 16
+ str x8, [x2]
+ bl efi_entry
+ cmn x0, #1
+ b.eq efi_load_fail
+
+ /*
+ * efi_entry() will have relocated the kernel image if necessary
+ * and we return here with device tree address in x0 and the kernel
+ * entry point stored at *image_addr. Save those values in registers
+ * which are callee preserved.
+ */
+ mov x20, x0 // DTB address
+ ldr x0, [sp, #16] // relocated _text address
+ mov x21, x0
+
+ /*
+ * Flush dcache covering current runtime addresses
+ * of kernel text/data. Then flush all of icache.
+ */
+ adrp x1, _text
+ add x1, x1, #:lo12:_text
+ adrp x2, _edata
+ add x2, x2, #:lo12:_edata
+ sub x1, x2, x1
+
+ bl __flush_dcache_area
+ ic ialluis
+
+ /* Turn off Dcache and MMU */
+ mrs x0, CurrentEL
+ cmp x0, #PSR_MODE_EL2t
+ ccmp x0, #PSR_MODE_EL2h, #0x4, ne
+ b.ne 1f
+ mrs x0, sctlr_el2
+ bic x0, x0, #1 << 0 // clear SCTLR.M
+ bic x0, x0, #1 << 2 // clear SCTLR.C
+ msr sctlr_el2, x0
+ isb
+ b 2f
+1:
+ mrs x0, sctlr_el1
+ bic x0, x0, #1 << 0 // clear SCTLR.M
+ bic x0, x0, #1 << 2 // clear SCTLR.C
+ msr sctlr_el1, x0
+ isb
+2:
+ /* Jump to kernel entry point */
+ mov x0, x20
+ mov x1, xzr
+ mov x2, xzr
+ mov x3, xzr
+ br x21
+
+efi_load_fail:
+ mov x0, #EFI_LOAD_ERROR
+ ldp x29, x30, [sp], #32
+ ret
+
+ENDPROC(efi_stub_entry)
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
new file mode 100644
index 000000000000..60e98a639ac5
--- /dev/null
+++ b/arch/arm64/kernel/efi-stub.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org>
+ *
+ * This file implements the EFI boot stub for the arm64 kernel.
+ * Adapted from ARM version by Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/efi.h>
+#include <linux/libfdt.h>
+#include <asm/sections.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+/*
+ * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
+ * start of kernel and may not cross a 2MiB boundary. We set alignment to
+ * 2MiB so we know it won't cross a 2MiB boundary.
+ */
+#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */
+#define MAX_FDT_OFFSET SZ_512M
+
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+
+static void efi_char16_printk(efi_system_table_t *sys_table_arg,
+ efi_char16_t *str);
+
+static efi_status_t efi_open_volume(efi_system_table_t *sys_table,
+ void *__image, void **__fh);
+static efi_status_t efi_file_close(void *handle);
+
+static efi_status_t
+efi_file_read(void *handle, unsigned long *size, void *addr);
+
+static efi_status_t
+efi_file_size(efi_system_table_t *sys_table, void *__fh,
+ efi_char16_t *filename_16, void **handle, u64 *file_sz);
+
+/* Include shared EFI stub code */
+#include "../../../drivers/firmware/efi/efi-stub-helper.c"
+#include "../../../drivers/firmware/efi/fdt.c"
+#include "../../../drivers/firmware/efi/arm-stub.c"
+
+
+static efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
+ unsigned long *image_addr,
+ unsigned long *image_size,
+ unsigned long *reserve_addr,
+ unsigned long *reserve_size,
+ unsigned long dram_base,
+ efi_loaded_image_t *image)
+{
+ efi_status_t status;
+ unsigned long kernel_size, kernel_memsize = 0;
+
+ /* Relocate the image, if required. */
+ kernel_size = _edata - _text;
+ if (*image_addr != (dram_base + TEXT_OFFSET)) {
+ kernel_memsize = kernel_size + (_end - _edata);
+ status = efi_relocate_kernel(sys_table, image_addr,
+ kernel_size, kernel_memsize,
+ dram_base + TEXT_OFFSET,
+ PAGE_SIZE);
+ if (status != EFI_SUCCESS) {
+ pr_efi_err(sys_table, "Failed to relocate kernel\n");
+ return status;
+ }
+ if (*image_addr != (dram_base + TEXT_OFFSET)) {
+ pr_efi_err(sys_table, "Failed to alloc kernel memory\n");
+ efi_free(sys_table, kernel_memsize, *image_addr);
+ return EFI_ERROR;
+ }
+ *image_size = kernel_memsize;
+ }
+
+
+ return EFI_SUCCESS;
+}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
new file mode 100644
index 000000000000..14db1f6e8d7f
--- /dev/null
+++ b/arch/arm64/kernel/efi.c
@@ -0,0 +1,469 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 2.4
+ *
+ * Copyright (C) 2013, 2014 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/bootmem.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/efi.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+struct efi_memory_map memmap;
+
+static efi_runtime_services_t *runtime;
+
+static u64 efi_system_table;
+
+static int uefi_debug __initdata;
+static int __init uefi_debug_setup(char *str)
+{
+ uefi_debug = 1;
+
+ return 0;
+}
+early_param("uefi_debug", uefi_debug_setup);
+
+static int __init is_normal_ram(efi_memory_desc_t *md)
+{
+ if (md->attribute & EFI_MEMORY_WB)
+ return 1;
+ return 0;
+}
+
+static void __init efi_setup_idmap(void)
+{
+ struct memblock_region *r;
+ efi_memory_desc_t *md;
+ u64 paddr, npages, size;
+
+ for_each_memblock(memory, r)
+ create_id_mapping(r->base, r->size, 0);
+
+ /* map runtime io spaces */
+ for_each_efi_memory_desc(&memmap, md) {
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md))
+ continue;
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+ create_id_mapping(paddr, size, 1);
+ }
+}
+
+static int __init uefi_init(void)
+{
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i, retval;
+
+ efi.systab = early_memremap(efi_system_table,
+ sizeof(efi_system_table_t));
+ if (efi.systab == NULL) {
+ pr_warn("Unable to map EFI system table.\n");
+ return -ENOMEM;
+ }
+
+ set_bit(EFI_BOOT, &efi.flags);
+ set_bit(EFI_64BIT, &efi.flags);
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+ pr_err("System table signature incorrect\n");
+ return -EINVAL;
+ }
+ if ((efi.systab->hdr.revision >> 16) < 2)
+ pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff);
+
+ /* Show what we know for posterity */
+ c16 = early_memremap(efi.systab->fw_vendor,
+ sizeof(vendor));
+ if (c16) {
+ for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
+ vendor[i] = c16[i];
+ vendor[i] = '\0';
+ }
+
+ pr_info("EFI v%u.%.02u by %s\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ retval = efi_config_init(NULL);
+ if (retval == 0)
+ set_bit(EFI_CONFIG_TABLES, &efi.flags);
+
+ early_memunmap(c16, sizeof(vendor));
+ early_memunmap(efi.systab, sizeof(efi_system_table_t));
+
+ return retval;
+}
+
+static __initdata char memory_type_name[][32] = {
+ {"Reserved"},
+ {"Loader Code"},
+ {"Loader Data"},
+ {"Boot Code"},
+ {"Boot Data"},
+ {"Runtime Code"},
+ {"Runtime Data"},
+ {"Conventional Memory"},
+ {"Unusable Memory"},
+ {"ACPI Reclaim Memory"},
+ {"ACPI Memory NVS"},
+ {"Memory Mapped I/O"},
+ {"MMIO Port Space"},
+ {"PAL Code"},
+};
+
+/*
+ * Return true for RAM regions we want to permanently reserve.
+ */
+static __init int is_reserve_region(efi_memory_desc_t *md)
+{
+ if (!is_normal_ram(md))
+ return 0;
+
+ if (md->attribute & EFI_MEMORY_RUNTIME)
+ return 1;
+
+ if (md->type == EFI_ACPI_RECLAIM_MEMORY ||
+ md->type == EFI_RESERVED_TYPE)
+ return 1;
+
+ return 0;
+}
+
+static __init void reserve_regions(void)
+{
+ efi_memory_desc_t *md;
+ u64 paddr, npages, size;
+
+ if (uefi_debug)
+ pr_info("Processing EFI memory map:\n");
+
+ for_each_efi_memory_desc(&memmap, md) {
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+
+ if (uefi_debug)
+ pr_info(" 0x%012llx-0x%012llx [%s]",
+ paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
+ memory_type_name[md->type]);
+
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+
+ if (is_normal_ram(md))
+ early_init_dt_add_memory_arch(paddr, size);
+
+ if (is_reserve_region(md) ||
+ md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA) {
+ memblock_reserve(paddr, size);
+ if (uefi_debug)
+ pr_cont("*");
+ }
+
+ if (uefi_debug)
+ pr_cont("\n");
+ }
+}
+
+
+static u64 __init free_one_region(u64 start, u64 end)
+{
+ u64 size = end - start;
+
+ if (uefi_debug)
+ pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1);
+
+ free_bootmem_late(start, size);
+ return size;
+}
+
+static u64 __init free_region(u64 start, u64 end)
+{
+ u64 map_start, map_end, total = 0;
+
+ if (end <= start)
+ return total;
+
+ map_start = (u64)memmap.phys_map;
+ map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map));
+ map_start &= PAGE_MASK;
+
+ if (start < map_end && end > map_start) {
+ /* region overlaps UEFI memmap */
+ if (start < map_start)
+ total += free_one_region(start, map_start);
+
+ if (map_end < end)
+ total += free_one_region(map_end, end);
+ } else
+ total += free_one_region(start, end);
+
+ return total;
+}
+
+static void __init free_boot_services(void)
+{
+ u64 total_freed = 0;
+ u64 keep_end, free_start, free_end;
+ efi_memory_desc_t *md;
+
+ /*
+ * If kernel uses larger pages than UEFI, we have to be careful
+ * not to inadvertantly free memory we want to keep if there is
+ * overlap at the kernel page size alignment. We do not want to
+ * free is_reserve_region() memory nor the UEFI memmap itself.
+ *
+ * The memory map is sorted, so we keep track of the end of
+ * any previous region we want to keep, remember any region
+ * we want to free and defer freeing it until we encounter
+ * the next region we want to keep. This way, before freeing
+ * it, we can clip it as needed to avoid freeing memory we
+ * want to keep for UEFI.
+ */
+
+ keep_end = 0;
+ free_start = 0;
+
+ for_each_efi_memory_desc(&memmap, md) {
+ u64 paddr, npages, size;
+
+ if (is_reserve_region(md)) {
+ /*
+ * We don't want to free any memory from this region.
+ */
+ if (free_start) {
+ /* adjust free_end then free region */
+ if (free_end > md->phys_addr)
+ free_end -= PAGE_SIZE;
+ total_freed += free_region(free_start, free_end);
+ free_start = 0;
+ }
+ keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+ continue;
+ }
+
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA) {
+ /* no need to free this region */
+ continue;
+ }
+
+ /*
+ * We want to free memory from this region.
+ */
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+
+ if (free_start) {
+ if (paddr <= free_end)
+ free_end = paddr + size;
+ else {
+ total_freed += free_region(free_start, free_end);
+ free_start = paddr;
+ free_end = paddr + size;
+ }
+ } else {
+ free_start = paddr;
+ free_end = paddr + size;
+ }
+ if (free_start < keep_end) {
+ free_start += PAGE_SIZE;
+ if (free_start >= free_end)
+ free_start = 0;
+ }
+ }
+ if (free_start)
+ total_freed += free_region(free_start, free_end);
+
+ if (total_freed)
+ pr_info("Freed 0x%llx bytes of EFI boot services memory",
+ total_freed);
+}
+
+void __init efi_init(void)
+{
+ struct efi_fdt_params params;
+
+ /* Grab UEFI information placed in FDT by stub */
+ if (!efi_get_fdt_params(&params, uefi_debug))
+ return;
+
+ efi_system_table = params.system_table;
+
+ memblock_reserve(params.mmap & PAGE_MASK,
+ PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
+ memmap.phys_map = (void *)params.mmap;
+ memmap.map = early_memremap(params.mmap, params.mmap_size);
+ memmap.map_end = memmap.map + params.mmap_size;
+ memmap.desc_size = params.desc_size;
+ memmap.desc_version = params.desc_ver;
+
+ if (uefi_init() < 0)
+ return;
+
+ reserve_regions();
+}
+
+void __init efi_idmap_init(void)
+{
+ if (!efi_enabled(EFI_BOOT))
+ return;
+
+ /* boot time idmap_pg_dir is incomplete, so fill in missing parts */
+ efi_setup_idmap();
+}
+
+static int __init remap_region(efi_memory_desc_t *md, void **new)
+{
+ u64 paddr, vaddr, npages, size;
+
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+
+ if (is_normal_ram(md))
+ vaddr = (__force u64)ioremap_cache(paddr, size);
+ else
+ vaddr = (__force u64)ioremap(paddr, size);
+
+ if (!vaddr) {
+ pr_err("Unable to remap 0x%llx pages @ %p\n",
+ npages, (void *)paddr);
+ return 0;
+ }
+
+ /* adjust for any rounding when EFI and system pagesize differs */
+ md->virt_addr = vaddr + (md->phys_addr - paddr);
+
+ if (uefi_debug)
+ pr_info(" EFI remap 0x%012llx => %p\n",
+ md->phys_addr, (void *)md->virt_addr);
+
+ memcpy(*new, md, memmap.desc_size);
+ *new += memmap.desc_size;
+
+ return 1;
+}
+
+/*
+ * Switch UEFI from an identity map to a kernel virtual map
+ */
+static int __init arm64_enter_virtual_mode(void)
+{
+ efi_memory_desc_t *md;
+ phys_addr_t virtmap_phys;
+ void *virtmap, *virt_md;
+ efi_status_t status;
+ u64 mapsize;
+ int count = 0;
+ unsigned long flags;
+
+ if (!efi_enabled(EFI_BOOT)) {
+ pr_info("EFI services will not be available.\n");
+ return -1;
+ }
+
+ pr_info("Remapping and enabling EFI services.\n");
+
+ /* replace early memmap mapping with permanent mapping */
+ mapsize = memmap.map_end - memmap.map;
+ early_memunmap(memmap.map, mapsize);
+ memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
+ mapsize);
+ memmap.map_end = memmap.map + mapsize;
+
+ efi.memmap = &memmap;
+
+ /* Map the runtime regions */
+ virtmap = kmalloc(mapsize, GFP_KERNEL);
+ if (!virtmap) {
+ pr_err("Failed to allocate EFI virtual memmap\n");
+ return -1;
+ }
+ virtmap_phys = virt_to_phys(virtmap);
+ virt_md = virtmap;
+
+ for_each_efi_memory_desc(&memmap, md) {
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ if (remap_region(md, &virt_md))
+ ++count;
+ }
+
+ efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
+ if (efi.systab)
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+ local_irq_save(flags);
+ cpu_switch_mm(idmap_pg_dir, &init_mm);
+
+ /* Call SetVirtualAddressMap with the physical address of the map */
+ runtime = efi.systab->runtime;
+ efi.set_virtual_address_map = runtime->set_virtual_address_map;
+
+ status = efi.set_virtual_address_map(count * memmap.desc_size,
+ memmap.desc_size,
+ memmap.desc_version,
+ (efi_memory_desc_t *)virtmap_phys);
+ cpu_set_reserved_ttbr0();
+ flush_tlb_all();
+ local_irq_restore(flags);
+
+ kfree(virtmap);
+
+ free_boot_services();
+
+ if (status != EFI_SUCCESS) {
+ pr_err("Failed to set EFI virtual address map! [%lx]\n",
+ status);
+ return -1;
+ }
+
+ /* Set up runtime services function pointers */
+ runtime = efi.systab->runtime;
+ efi.get_time = runtime->get_time;
+ efi.set_time = runtime->set_time;
+ efi.get_wakeup_time = runtime->get_wakeup_time;
+ efi.set_wakeup_time = runtime->set_wakeup_time;
+ efi.get_variable = runtime->get_variable;
+ efi.get_next_variable = runtime->get_next_variable;
+ efi.set_variable = runtime->set_variable;
+ efi.query_variable_info = runtime->query_variable_info;
+ efi.update_capsule = runtime->update_capsule;
+ efi.query_capsule_caps = runtime->query_capsule_caps;
+ efi.get_next_high_mono_count = runtime->get_next_high_mono_count;
+ efi.reset_system = runtime->reset_system;
+
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+ return 0;
+}
+early_initcall(arm64_enter_virtual_mode);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..d358ccacfc00 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+ fpsimd_save_partial x0, 1, 8, 9
+ ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+ fpsimd_restore_partial x0, 8, 9
+ ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 000000000000..b051871f2965
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,218 @@
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ * mov x0, x30
+ * bl _mcount
+ * [function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp => 0:+-----+
+ * in _mcount() | x29 | -> instrumented function's fp
+ * +-----+
+ * | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp => +16:+-----+
+ * when instrumented | |
+ * function calls | ... |
+ * _mcount() | |
+ * | |
+ * instrumented => +xx:+-----+
+ * function's fp | x29 | -> parent's fp
+ * +-----+
+ * | x30 | -> instrumented function's lr (= parent's pc)
+ * +-----+
+ * | ... |
+ */
+
+ .macro mcount_enter
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ .endm
+
+ .macro mcount_exit
+ ldp x29, x30, [sp], #16
+ ret
+ .endm
+
+ .macro mcount_adjust_addr rd, rn
+ sub \rd, \rn, #AARCH64_INSN_SIZE
+ .endm
+
+ /* for instrumented function's parent */
+ .macro mcount_get_parent_fp reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg]
+ .endm
+
+ /* for instrumented function */
+ .macro mcount_get_pc0 reg
+ mcount_adjust_addr \reg, x30
+ .endm
+
+ .macro mcount_get_pc reg
+ ldr \reg, [x29, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr_addr reg
+ ldr \reg, [x29]
+ add \reg, \reg, #8
+ .endm
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ldr x0, =ftrace_trace_stop
+ ldr x0, [x0] // if ftrace_trace_stop
+ ret // return;
+#endif
+ mcount_enter
+
+ ldr x0, =ftrace_trace_function
+ ldr x2, [x0]
+ adr x0, ftrace_stub
+ cmp x0, x2 // if (ftrace_trace_function
+ b.eq skip_ftrace_call // != ftrace_stub) {
+
+ mcount_get_pc x0 // function's pc
+ mcount_get_lr x1 // function's lr (= parent's pc)
+ blr x2 // (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call: // return;
+ mcount_exit // }
+#else
+ mcount_exit // return;
+ // }
+skip_ftrace_call:
+ ldr x1, =ftrace_graph_return
+ ldr x2, [x1] // if ((ftrace_graph_return
+ cmp x0, x2 // != ftrace_stub)
+ b.ne ftrace_graph_caller
+
+ ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry
+ ldr x2, [x1] // != ftrace_graph_entry_stub))
+ ldr x0, =ftrace_graph_entry_stub
+ cmp x0, x2
+ b.ne ftrace_graph_caller // ftrace_graph_caller();
+
+ mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * _mcount() is used to build the kernel with -pg option, but all the branch
+ * instructions to _mcount() are replaced to NOP initially at kernel start up,
+ * and later on, NOP to branch to ftrace_caller() when enabled or branch to
+ * NOP when disabled per-function base.
+ */
+ENTRY(_mcount)
+ ret
+ENDPROC(_mcount)
+
+/*
+ * void ftrace_caller(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function is a counterpart of _mcount() in 'static' ftrace, and
+ * makes calls to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(ftrace_caller)
+ mcount_enter
+
+ mcount_get_pc0 x0 // function's pc
+ mcount_get_lr x1 // function's lr
+
+ .global ftrace_call
+ftrace_call: // tracer(pc, lr);
+ nop // This will be replaced with "bl xxx"
+ // where xxx can be any kind of tracer.
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .global ftrace_graph_call
+ftrace_graph_call: // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+ mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+ mcount_get_lr_addr x0 // pointer to function's saved lr
+ mcount_get_pc x1 // function's pc
+ mcount_get_parent_fp x2 // parent's fp
+ bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
+
+ mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+ str x0, [sp, #-16]!
+ mov x0, x29 // parent's fp
+ bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+ mov x30, x0 // restore the original return address
+ ldr x0, [sp], #16
+ ret
+END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630d83de..bf017f4ffb4f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -60,6 +60,9 @@
push x0, x1
.if \el == 0
mrs x21, sp_el0
+ get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
+ ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
+ disable_step_tsk x19, x20 // exceptions when scheduling.
.else
add x21, sp, #S_FRAME_SIZE
.endif
@@ -259,7 +262,7 @@ el1_da:
* Data abort handling
*/
mrs x0, far_el1
- enable_dbg_if_not_stepping x2
+ enable_dbg
// re-enable interrupts if they were enabled in the aborted context
tbnz x23, #7, 1f // PSR_I_BIT
enable_irq
@@ -275,6 +278,7 @@ el1_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x0, far_el1
+ enable_dbg
mov x1, x25
mov x2, sp
b do_sp_pc_abort
@@ -282,6 +286,7 @@ el1_undef:
/*
* Undefined instruction
*/
+ enable_dbg
mov x0, sp
b do_undefinstr
el1_dbg:
@@ -294,10 +299,11 @@ el1_dbg:
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
-
+ enable_dbg
kernel_exit 1
el1_inv:
// TODO: add support for undefined instructions in kernel mode
+ enable_dbg
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
@@ -307,7 +313,7 @@ ENDPROC(el1_sync)
.align 6
el1_irq:
kernel_entry 1
- enable_dbg_if_not_stepping x0
+ enable_dbg
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
@@ -332,8 +338,7 @@ ENDPROC(el1_irq)
#ifdef CONFIG_PREEMPT
el1_preempt:
mov x24, lr
-1: enable_dbg
- bl preempt_schedule_irq // irq en/disable is done inside
+1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24
@@ -349,7 +354,7 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state
b.eq el0_svc
- adr lr, ret_from_exception
+ adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
@@ -378,7 +383,7 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state
b.eq el0_svc_compat
- adr lr, ret_from_exception
+ adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
@@ -423,11 +428,8 @@ el0_da:
*/
mrs x0, far_el1
bic x0, x0, #(0xff << 56)
- disable_step x1
- isb
- enable_dbg
// enable interrupts before calling the main handler
- enable_irq
+ enable_dbg_and_irq
mov x1, x25
mov x2, sp
b do_mem_abort
@@ -436,11 +438,8 @@ el0_ia:
* Instruction abort handling
*/
mrs x0, far_el1
- disable_step x1
- isb
- enable_dbg
// enable interrupts before calling the main handler
- enable_irq
+ enable_dbg_and_irq
orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
mov x2, sp
b do_mem_abort
@@ -448,6 +447,7 @@ el0_fpsimd_acc:
/*
* Floating Point or Advanced SIMD access
*/
+ enable_dbg
mov x0, x25
mov x1, sp
b do_fpsimd_acc
@@ -455,6 +455,7 @@ el0_fpsimd_exc:
/*
* Floating Point or Advanced SIMD exception
*/
+ enable_dbg
mov x0, x25
mov x1, sp
b do_fpsimd_exc
@@ -463,11 +464,8 @@ el0_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x0, far_el1
- disable_step x1
- isb
- enable_dbg
// enable interrupts before calling the main handler
- enable_irq
+ enable_dbg_and_irq
mov x1, x25
mov x2, sp
b do_sp_pc_abort
@@ -475,9 +473,9 @@ el0_undef:
/*
* Undefined instruction
*/
- mov x0, sp
// enable interrupts before calling the main handler
- enable_irq
+ enable_dbg_and_irq
+ mov x0, sp
b do_undefinstr
el0_dbg:
/*
@@ -485,11 +483,13 @@ el0_dbg:
*/
tbnz x24, #0, el0_inv // EL0 only
mrs x0, far_el1
- disable_step x1
mov x1, x25
mov x2, sp
- b do_debug_exception
+ bl do_debug_exception
+ enable_dbg
+ b ret_to_user
el0_inv:
+ enable_dbg
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
@@ -500,15 +500,12 @@ ENDPROC(el0_sync)
el0_irq:
kernel_entry 0
el0_irq_naked:
- disable_step x1
- isb
enable_dbg
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
irq_handler
- get_thread_info tsk
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
@@ -517,14 +514,6 @@ el0_irq_naked:
ENDPROC(el0_irq)
/*
- * This is the return code to user mode for abort handlers
- */
-ret_from_exception:
- get_thread_info tsk
- b ret_to_user
-ENDPROC(ret_from_exception)
-
-/*
* Register switch for AArch64. The callee-saved registers need to be saved
* and restored. On entry:
* x0 = previous task_struct (must be preserved across the switch)
@@ -563,10 +552,7 @@ ret_fast_syscall:
ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, fast_work_pending
- tbz x1, #TIF_SINGLESTEP, fast_exit
- disable_dbg
- enable_step x2
-fast_exit:
+ enable_step_tsk x1, x2
kernel_exit 0, ret = 1
/*
@@ -576,7 +562,7 @@ fast_work_pending:
str x0, [sp, #S_X0] // returned x0
work_pending:
tbnz x1, #TIF_NEED_RESCHED, work_resched
- /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
+ /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
ldr x2, [sp, #S_PSTATE]
mov x0, sp // 'regs'
tst x2, #PSR_MODE_MASK // user mode regs?
@@ -585,7 +571,6 @@ work_pending:
bl do_notify_resume
b ret_to_user
work_resched:
- enable_dbg
bl schedule
/*
@@ -596,9 +581,7 @@ ret_to_user:
ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
- tbz x1, #TIF_SINGLESTEP, no_work_pending
- disable_dbg
- enable_step x2
+ enable_step_tsk x1, x2
no_work_pending:
kernel_exit 0, ret = 0
ENDPROC(ret_to_user)
@@ -625,14 +608,11 @@ el0_svc:
mov sc_nr, #__NR_syscalls
el0_svc_naked: // compat entry point
stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
- disable_step x16
- isb
- enable_dbg
- enable_irq
+ enable_dbg_and_irq
- get_thread_info tsk
- ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing
- tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+ ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ tst x16, #_TIF_SYSCALL_WORK
+ b.ne __sys_trace
adr lr, ret_fast_syscall // return address
cmp scno, sc_nr // check upper syscall limit
b.hs ni_sys
@@ -648,9 +628,8 @@ ENDPROC(el0_svc)
* switches, and waiting for our parent to respond.
*/
__sys_trace:
- mov x1, sp
- mov w0, #0 // trace entry
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_enter
adr lr, __sys_trace_return // return address
uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
@@ -665,9 +644,8 @@ __sys_trace:
__sys_trace_return:
str x0, [sp] // save returned x0
- mov x1, sp
- mov w0, #1 // trace exit
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_exit
b ret_to_user
/*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4aef42a04bdc..ad8aebb1cdef 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -35,6 +35,60 @@
#define FPEXC_IDF (1 << 7)
/*
+ * In order to reduce the number of times the FPSIMD state is needlessly saved
+ * and restored, we need to keep track of two things:
+ * (a) for each task, we need to remember which CPU was the last one to have
+ * the task's FPSIMD state loaded into its FPSIMD registers;
+ * (b) for each CPU, we need to remember which task's userland FPSIMD state has
+ * been loaded into its FPSIMD registers most recently, or whether it has
+ * been used to perform kernel mode NEON in the meantime.
+ *
+ * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
+ * address of the userland FPSIMD state of the task that was loaded onto the CPU
+ * the most recently, or NULL if kernel mode NEON has been performed after that.
+ *
+ * With this in place, we no longer have to restore the next FPSIMD state right
+ * when switching between tasks. Instead, we can defer this check to userland
+ * resume, at which time we verify whether the CPU's fpsimd_last_state and the
+ * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * can omit the FPSIMD restore.
+ *
+ * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
+ * indicate whether or not the userland FPSIMD state of the current task is
+ * present in the registers. The flag is set unless the FPSIMD registers of this
+ * CPU currently contain the most recent userland FPSIMD state of the current
+ * task.
+ *
+ * For a certain task, the sequence may look something like this:
+ * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
+ * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
+ * cleared, otherwise it is set;
+ *
+ * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
+ * userland FPSIMD state is copied from memory to the registers, the task's
+ * fpsimd_state.cpu field is set to the id of the current CPU, the current
+ * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
+ * TIF_FOREIGN_FPSTATE flag is cleared;
+ *
+ * - the task executes an ordinary syscall; upon return to userland, the
+ * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
+ * restored;
+ *
+ * - the task executes a syscall which executes some NEON instructions; this is
+ * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
+ * register contents to memory, clears the fpsimd_last_state per-cpu variable
+ * and sets the TIF_FOREIGN_FPSTATE flag;
+ *
+ * - the task gets preempted after kernel_neon_end() is called; as we have not
+ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
+ * whatever is in the FPSIMD registers is not saved to memory, but discarded.
+ */
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+
+/*
* Trapped FP/ASIMD access.
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
@@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next)
{
- /* check if not kernel threads */
- if (current->mm)
+ /*
+ * Save the current FPSIMD state to memory, but only if whatever is in
+ * the registers is in fact the most recent userland FPSIMD state of
+ * 'current'.
+ */
+ if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
- if (next->mm)
- fpsimd_load_state(&next->thread.fpsimd_state);
+
+ if (next->mm) {
+ /*
+ * If we are switching to a task whose most recent userland
+ * FPSIMD state is already in the registers of *this* cpu,
+ * we can skip loading the state from memory. Otherwise, set
+ * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
+ * upon the next return to userland.
+ */
+ struct fpsimd_state *st = &next->thread.fpsimd_state;
+
+ if (__this_cpu_read(fpsimd_last_state) == st
+ && st->cpu == smp_processor_id())
+ clear_ti_thread_flag(task_thread_info(next),
+ TIF_FOREIGN_FPSTATE);
+ else
+ set_ti_thread_flag(task_thread_info(next),
+ TIF_FOREIGN_FPSTATE);
+ }
}
void fpsimd_flush_thread(void)
{
- preempt_disable();
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
- fpsimd_load_state(&current->thread.fpsimd_state);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
+/*
+ * Save the userland FPSIMD state of 'current' to memory, but only if the state
+ * currently held in the registers does in fact belong to 'current'
+ */
+void fpsimd_preserve_current_state(void)
+{
+ preempt_disable();
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(&current->thread.fpsimd_state);
+ preempt_enable();
+}
+
+/*
+ * Load the userland FPSIMD state of 'current' from memory, but only if the
+ * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
+ * state of 'current'
+ */
+void fpsimd_restore_current_state(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ fpsimd_load_state(st);
+ this_cpu_write(fpsimd_last_state, st);
+ st->cpu = smp_processor_id();
+ }
+ preempt_enable();
+}
+
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory and set the
+ * flag that indicates that the FPSIMD register contents are the most recent
+ * FPSIMD state of 'current'
+ */
+void fpsimd_update_current_state(struct fpsimd_state *state)
+{
+ preempt_disable();
+ fpsimd_load_state(state);
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ this_cpu_write(fpsimd_last_state, st);
+ st->cpu = smp_processor_id();
+ }
preempt_enable();
}
+/*
+ * Invalidate live CPU copies of task t's FPSIMD state
+ */
+void fpsimd_flush_task_state(struct task_struct *t)
+{
+ t->thread.fpsimd_state.cpu = NR_CPUS;
+}
+
#ifdef CONFIG_KERNEL_MODE_NEON
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
/*
* Kernel-side NEON support functions
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
{
- /* Avoid using the NEON in interrupt context */
- BUG_ON(in_interrupt());
- preempt_disable();
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- if (current->mm)
- fpsimd_save_state(&current->thread.fpsimd_state);
+ BUG_ON(num_regs > 32);
+ fpsimd_save_partial_state(s, roundup(num_regs, 2));
+ } else {
+ /*
+ * Save the userland FPSIMD state if we have one and if we
+ * haven't done so already. Clear fpsimd_last_state to indicate
+ * that there is no longer userland FPSIMD state in the
+ * registers.
+ */
+ preempt_disable();
+ if (current->mm &&
+ !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(&current->thread.fpsimd_state);
+ this_cpu_write(fpsimd_last_state, NULL);
+ }
}
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void)
{
- if (current->mm)
- fpsimd_load_state(&current->thread.fpsimd_state);
-
- preempt_enable();
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+ fpsimd_load_partial_state(s);
+ } else {
+ preempt_enable();
+ }
}
EXPORT_SYMBOL(kernel_neon_end);
@@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
- if (current->mm)
+ if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
break;
case CPU_PM_EXIT:
if (current->mm)
- fpsimd_load_state(&current->thread.fpsimd_state);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
break;
case CPU_PM_ENTER_FAILED:
default:
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 000000000000..7924d73b6476
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,176 @@
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ * If @validate == true, a replaced instruction is checked against 'old'.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
+ bool validate)
+{
+ u32 replaced;
+
+ /*
+ * Note:
+ * Due to modules and __init, code can disappear and change,
+ * we need to protect against faulting as well as code changing.
+ * We do this by aarch64_insn_*() which use the probe_kernel_*().
+ *
+ * No lock is held here because all the modifications are run
+ * through stop_machine().
+ */
+ if (validate) {
+ if (aarch64_insn_read((void *)pc, &replaced))
+ return -EFAULT;
+
+ if (replaced != old)
+ return -EINVAL;
+ }
+ if (aarch64_insn_patch_text_nosync((void *)pc, new))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long pc;
+ u32 new;
+
+ pc = (unsigned long)&ftrace_call;
+ new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
+
+ return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_branch_imm(pc, addr, true);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, addr, true);
+ new = aarch64_insn_gen_nop();
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long old;
+ struct ftrace_graph_ent trace;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ /*
+ * Note:
+ * No protection against faulting at *parent, which may be seen
+ * on other archs. It's unlikely on AArch64.
+ */
+ old = *parent;
+ *parent = return_hooker;
+
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ *parent = old;
+ return;
+ }
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+ unsigned long pc = (unsigned long)&ftrace_graph_call;
+ u32 branch, nop;
+
+ branch = aarch64_insn_gen_branch_imm(pc,
+ (unsigned long)ftrace_graph_caller, false);
+ nop = aarch64_insn_gen_nop();
+
+ if (enable)
+ return ftrace_modify_code(pc, nop, branch, true);
+ else
+ return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0fd565000772..a96d3a6a63f6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -108,8 +108,18 @@
/*
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
*/
+#ifdef CONFIG_EFI
+efi_head:
+ /*
+ * This add instruction has no meaningful effect except that
+ * its opcode forms the magic "MZ" signature required by UEFI.
+ */
+ add x13, x18, #0x16
+ b stext
+#else
b stext // branch to kernel start, magic
.long 0 // reserved
+#endif
.quad TEXT_OFFSET // Image load offset from start of RAM
.quad 0 // reserved
.quad 0 // reserved
@@ -120,7 +130,109 @@
.byte 0x52
.byte 0x4d
.byte 0x64
+#ifdef CONFIG_EFI
+ .long pe_header - efi_head // Offset to the PE header.
+#else
.word 0 // reserved
+#endif
+
+#ifdef CONFIG_EFI
+ .align 3
+pe_header:
+ .ascii "PE"
+ .short 0
+coff_header:
+ .short 0xaa64 // AArch64
+ .short 2 // nr_sections
+ .long 0 // TimeDateStamp
+ .long 0 // PointerToSymbolTable
+ .long 1 // NumberOfSymbols
+ .short section_table - optional_header // SizeOfOptionalHeader
+ .short 0x206 // Characteristics.
+ // IMAGE_FILE_DEBUG_STRIPPED |
+ // IMAGE_FILE_EXECUTABLE_IMAGE |
+ // IMAGE_FILE_LINE_NUMS_STRIPPED
+optional_header:
+ .short 0x20b // PE32+ format
+ .byte 0x02 // MajorLinkerVersion
+ .byte 0x14 // MinorLinkerVersion
+ .long _edata - stext // SizeOfCode
+ .long 0 // SizeOfInitializedData
+ .long 0 // SizeOfUninitializedData
+ .long efi_stub_entry - efi_head // AddressOfEntryPoint
+ .long stext - efi_head // BaseOfCode
+
+extra_header_fields:
+ .quad 0 // ImageBase
+ .long 0x20 // SectionAlignment
+ .long 0x8 // FileAlignment
+ .short 0 // MajorOperatingSystemVersion
+ .short 0 // MinorOperatingSystemVersion
+ .short 0 // MajorImageVersion
+ .short 0 // MinorImageVersion
+ .short 0 // MajorSubsystemVersion
+ .short 0 // MinorSubsystemVersion
+ .long 0 // Win32VersionValue
+
+ .long _edata - efi_head // SizeOfImage
+
+ // Everything before the kernel image is considered part of the header
+ .long stext - efi_head // SizeOfHeaders
+ .long 0 // CheckSum
+ .short 0xa // Subsystem (EFI application)
+ .short 0 // DllCharacteristics
+ .quad 0 // SizeOfStackReserve
+ .quad 0 // SizeOfStackCommit
+ .quad 0 // SizeOfHeapReserve
+ .quad 0 // SizeOfHeapCommit
+ .long 0 // LoaderFlags
+ .long 0x6 // NumberOfRvaAndSizes
+
+ .quad 0 // ExportTable
+ .quad 0 // ImportTable
+ .quad 0 // ResourceTable
+ .quad 0 // ExceptionTable
+ .quad 0 // CertificationTable
+ .quad 0 // BaseRelocationTable
+
+ // Section table
+section_table:
+
+ /*
+ * The EFI application loader requires a relocation section
+ * because EFI applications must be relocatable. This is a
+ * dummy section as far as we are concerned.
+ */
+ .ascii ".reloc"
+ .byte 0
+ .byte 0 // end of 0 padding of section name
+ .long 0
+ .long 0
+ .long 0 // SizeOfRawData
+ .long 0 // PointerToRawData
+ .long 0 // PointerToRelocations
+ .long 0 // PointerToLineNumbers
+ .short 0 // NumberOfRelocations
+ .short 0 // NumberOfLineNumbers
+ .long 0x42100040 // Characteristics (section flags)
+
+
+ .ascii ".text"
+ .byte 0
+ .byte 0
+ .byte 0 // end of 0 padding of section name
+ .long _edata - stext // VirtualSize
+ .long stext - efi_head // VirtualAddress
+ .long _edata - stext // SizeOfRawData
+ .long stext - efi_head // PointerToRawData
+
+ .long 0 // PointerToRelocations (0 for executables)
+ .long 0 // PointerToLineNumbers (0 for executables)
+ .short 0 // NumberOfRelocations (0 for executables)
+ .short 0 // NumberOfLineNumbers (0 for executables)
+ .long 0xe0500020 // Characteristics (section flags)
+ .align 5
+#endif
ENTRY(stext)
mov x21, x0 // x21=FDT
@@ -230,11 +342,9 @@ ENTRY(set_cpu_boot_mode_flag)
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
-1: dc cvac, x1 // Clean potentially dirty cache line
- dsb sy
- str w20, [x1] // This CPU has booted in EL1
- dc civac, x1 // Clean&invalidate potentially stale cache line
- dsb sy
+1: str w20, [x1] // This CPU has booted in EL1
+ dmb sy
+ dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index bee789757806..df1cf15377b4 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -20,6 +20,7 @@
#define pr_fmt(fmt) "hw-breakpoint: " fmt
+#include <linux/compat.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hw_breakpoint.h>
@@ -27,7 +28,6 @@
#include <linux/ptrace.h>
#include <linux/smp.h>
-#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6391485f342d..43b7c34f92cb 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
+#include <linux/compat.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -113,32 +114,62 @@ void arch_cpu_idle_dead(void)
}
#endif
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
void machine_shutdown(void)
{
-#ifdef CONFIG_SMP
- smp_send_stop();
-#endif
+ disable_nonboot_cpus();
}
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
void machine_halt(void)
{
- machine_shutdown();
+ local_irq_disable();
+ smp_send_stop();
while (1);
}
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
void machine_power_off(void)
{
- machine_shutdown();
+ local_irq_disable();
+ smp_send_stop();
if (pm_power_off)
pm_power_off();
}
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
void machine_restart(char *cmd)
{
- machine_shutdown();
-
/* Disable interrupts first */
local_irq_disable();
+ smp_send_stop();
/* Now call the architecture specific reboot code. */
if (arm_pm_restart)
@@ -205,7 +236,7 @@ void release_thread(struct task_struct *dead_task)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- fpsimd_save_state(&current->thread.fpsimd_state);
+ fpsimd_preserve_current_state();
*dst = *src;
return 0;
}
@@ -300,7 +331,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* Complete any pending TLB or cache maintenance on this CPU in case
* the thread migrates to a different CPU.
*/
- dsb();
+ dsb(ish);
/* the actual thread switch */
last = cpu_switch_to(prev, next);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6a8928bba03c..3e926b9c0641 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -41,6 +42,9 @@
#include <asm/traps.h>
#include <asm/system_misc.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
/*
* TODO: does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -517,6 +521,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
return ret;
target->thread.fpsimd_state.user_fpsimd = newstate;
+ fpsimd_flush_task_state(target);
return ret;
}
@@ -764,6 +769,7 @@ static int compat_vfp_set(struct task_struct *target,
uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
}
+ fpsimd_flush_task_state(target);
return ret;
}
@@ -1058,35 +1064,49 @@ long arch_ptrace(struct task_struct *child, long request,
return ptrace_request(child, request, addr, data);
}
-asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
+enum ptrace_syscall_dir {
+ PTRACE_SYSCALL_ENTER = 0,
+ PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+ enum ptrace_syscall_dir dir)
{
+ int regno;
unsigned long saved_reg;
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- return regs->syscallno;
-
- if (is_compat_task()) {
- /* AArch32 uses ip (r12) for scratch */
- saved_reg = regs->regs[12];
- regs->regs[12] = dir;
- } else {
- /*
- * Save X7. X7 is used to denote syscall entry/exit:
- * X7 = 0 -> entry, = 1 -> exit
- */
- saved_reg = regs->regs[7];
- regs->regs[7] = dir;
- }
+ /*
+ * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+ * used to denote syscall entry/exit:
+ */
+ regno = (is_compat_task() ? 12 : 7);
+ saved_reg = regs->regs[regno];
+ regs->regs[regno] = dir;
- if (dir)
+ if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
regs->syscallno = ~0UL;
- if (is_compat_task())
- regs->regs[12] = saved_reg;
- else
- regs->regs[7] = saved_reg;
+ regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, regs->syscallno);
return regs->syscallno;
}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, regs_return_value(regs));
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
new file mode 100644
index 000000000000..89102a6ffad5
--- /dev/null
+++ b/arch/arm64/kernel/return_address.c
@@ -0,0 +1,55 @@
+/*
+ * arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)frame->pc;
+ return 1;
+ } else {
+ --data->level;
+ return 0;
+ }
+}
+
+void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+ struct stackframe frame;
+ register unsigned long current_sp asm ("sp");
+
+ data.level = level + 2;
+ data.addr = NULL;
+
+ frame.fp = (unsigned long)__builtin_frame_address(0);
+ frame.sp = current_sp;
+ frame.pc = (unsigned long)return_address; /* dummy */
+
+ walk_stackframe(&frame, save_return_addr, &data);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 7ec784653b29..46d1125571f6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -25,6 +25,7 @@
#include <linux/utsname.h>
#include <linux/initrd.h>
#include <linux/console.h>
+#include <linux/cache.h>
#include <linux/bootmem.h>
#include <linux/seq_file.h>
#include <linux/screen_info.h>
@@ -41,6 +42,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
+#include <linux/efi.h>
#include <asm/fixmap.h>
#include <asm/cputype.h>
@@ -55,6 +57,7 @@
#include <asm/traps.h>
#include <asm/memblock.h>
#include <asm/psci.h>
+#include <asm/efi.h>
unsigned int processor_id;
EXPORT_SYMBOL(processor_id);
@@ -198,6 +201,8 @@ static void __init setup_processor(void)
{
struct cpu_info *cpu_info;
u64 features, block;
+ u32 cwg;
+ int cls;
cpu_info = lookup_processor_type(read_cpuid_id());
if (!cpu_info) {
@@ -215,6 +220,18 @@ static void __init setup_processor(void)
elf_hwcap = 0;
/*
+ * Check for sane CTR_EL0.CWG value.
+ */
+ cwg = cache_type_cwg();
+ cls = cache_line_size();
+ if (!cwg)
+ pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+ cls);
+ if (L1_CACHE_BYTES < cls)
+ pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+ L1_CACHE_BYTES, cls);
+
+ /*
* ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
* The blocks we test below represent incremental functionality
* for non-negative values. Negative values are reserved.
@@ -361,16 +378,18 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
- init_mem_pgprot();
early_ioremap_init();
parse_early_param();
+ efi_init();
arm64_memblock_init();
paging_init();
request_standard_resources();
+ efi_idmap_init();
+
unflatten_device_tree();
psci_init();
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 890a591f75dd..6357b9c6c90e 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/personality.h>
@@ -25,7 +26,6 @@
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
-#include <asm/compat.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
@@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
int err;
/* dump the hardware registers to the fpsimd_state structure */
- fpsimd_save_state(fpsimd);
+ fpsimd_preserve_current_state();
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
/* load the hardware registers from the fpsimd_state structure */
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
return err ? -EFAULT : 0;
}
@@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs,
{
sigset_t set;
int i, err;
- struct aux_context __user *aux =
- (struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+ void *aux = sf->uc.uc_mcontext.__reserved;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
@@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs,
err |= !valid_user_regs(&regs->user_regs);
- if (err == 0)
- err |= restore_fpsimd_context(&aux->fpsimd);
+ if (err == 0) {
+ struct fpsimd_context *fpsimd_ctx =
+ container_of(aux, struct fpsimd_context, head);
+ err |= restore_fpsimd_context(fpsimd_ctx);
+ }
return err;
}
@@ -167,8 +166,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
- struct aux_context __user *aux =
- (struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+ void *aux = sf->uc.uc_mcontext.__reserved;
+ struct _aarch64_ctx *end;
/* set up the stack frame for unwinding */
__put_user_error(regs->regs[29], &sf->fp, err);
@@ -185,12 +184,27 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
- if (err == 0)
- err |= preserve_fpsimd_context(&aux->fpsimd);
+ if (err == 0) {
+ struct fpsimd_context *fpsimd_ctx =
+ container_of(aux, struct fpsimd_context, head);
+ err |= preserve_fpsimd_context(fpsimd_ctx);
+ aux += sizeof(*fpsimd_ctx);
+ }
+
+ /* fault information, if valid */
+ if (current->thread.fault_code) {
+ struct esr_context *esr_ctx =
+ container_of(aux, struct esr_context, head);
+ __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
+ __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
+ __put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
+ aux += sizeof(*esr_ctx);
+ }
/* set the "end" magic */
- __put_user_error(0, &aux->end.magic, err);
- __put_user_error(0, &aux->end.size, err);
+ end = aux;
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
return err;
}
@@ -416,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+
+ if (thread_flags & _TIF_FOREIGN_FPSTATE)
+ fpsimd_restore_current_state();
+
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index b3fc9f5ec6d3..3491c638f172 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -23,6 +23,7 @@
#include <linux/syscalls.h>
#include <linux/ratelimit.h>
+#include <asm/esr.h>
#include <asm/fpsimd.h>
#include <asm/signal32.h>
#include <asm/uaccess.h>
@@ -81,6 +82,8 @@ struct compat_vfp_sigframe {
#define VFP_MAGIC 0x56465001
#define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe)
+#define FSR_WRITE_SHIFT (11)
+
struct compat_aux_sigframe {
struct compat_vfp_sigframe vfp;
@@ -219,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_save_state(fpsimd);
+ fpsimd_preserve_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -282,11 +285,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
return err ? -EFAULT : 0;
}
@@ -500,7 +500,9 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf,
__put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err);
__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err);
- __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err);
+ /* set the compat FSR WnR */
+ __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) <<
+ FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err);
__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err);
__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index c3cb160edc69..40f38f46c8e0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -35,6 +35,7 @@
#include <linux/clockchips.h>
#include <linux/completion.h>
#include <linux/of.h>
+#include <linux/irq_work.h>
#include <asm/atomic.h>
#include <asm/cacheflush.h>
@@ -62,6 +63,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_TIMER,
+ IPI_IRQ_WORK,
};
/*
@@ -477,6 +479,14 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
}
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+ if (smp_cross_call)
+ smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
static const char *ipi_types[NR_IPI] = {
#define S(x,s) [x - IPI_RESCHEDULE] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -484,6 +494,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
+ S(IPI_IRQ_WORK, "IRQ work interrupts"),
};
void show_ipi_list(struct seq_file *p, int prec)
@@ -576,6 +587,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif
+#ifdef CONFIG_IRQ_WORK
+ case IPI_IRQ_WORK:
+ irq_enter();
+ irq_work_run();
+ irq_exit();
+ break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 7a530d2cc807..0347d38eea29 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -30,7 +30,6 @@ extern void secondary_holding_pen(void);
volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
-static DEFINE_RAW_SPINLOCK(boot_lock);
/*
* Write secondary_holding_pen_release in a way that is guaranteed to be
@@ -94,14 +93,6 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
static int smp_spin_table_cpu_boot(unsigned int cpu)
{
- unsigned long timeout;
-
- /*
- * Set synchronisation state between this boot processor
- * and the secondary one
- */
- raw_spin_lock(&boot_lock);
-
/*
* Update the pen release flag.
*/
@@ -112,34 +103,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu)
*/
sev();
- timeout = jiffies + (1 * HZ);
- while (time_before(jiffies, timeout)) {
- if (secondary_holding_pen_release == INVALID_HWID)
- break;
- udelay(10);
- }
-
- /*
- * Now the secondary core is starting up let it run its
- * calibrations, then wait for it to finish
- */
- raw_spin_unlock(&boot_lock);
-
- return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
-}
-
-static void smp_spin_table_cpu_postboot(void)
-{
- /*
- * Let the primary processor know we're out of the pen.
- */
- write_pen_release(INVALID_HWID);
-
- /*
- * Synchronise with the boot thread.
- */
- raw_spin_lock(&boot_lock);
- raw_spin_unlock(&boot_lock);
+ return 0;
}
const struct cpu_operations smp_spin_table_ops = {
@@ -147,5 +111,4 @@ const struct cpu_operations smp_spin_table_ops = {
.cpu_init = smp_spin_table_cpu_init,
.cpu_prepare = smp_spin_table_cpu_prepare,
.cpu_boot = smp_spin_table_cpu_boot,
- .cpu_postboot = smp_spin_table_cpu_postboot,
};
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 38f0558f0c0a..55437ba1f5a4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -35,7 +35,7 @@
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
-int unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct stackframe *frame)
{
unsigned long high, low;
unsigned long fp = frame->fp;
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 6815987b50f8..1a7125c3099b 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -18,6 +18,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/clockchips.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
@@ -69,6 +70,8 @@ void __init time_init(void)
of_clk_init(NULL);
clocksource_of_init();
+ tick_setup_hrtimer_broadcast();
+
arch_timer_rate = arch_timer_get_rate();
if (!arch_timer_rate)
panic("Unable to initialise architected timer.\n");
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 3e06b0be4ec8..43514f905916 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -17,10 +17,192 @@
#include <linux/percpu.h>
#include <linux/node.h>
#include <linux/nodemask.h>
+#include <linux/of.h>
#include <linux/sched.h>
#include <asm/topology.h>
+static int __init get_cpu_for_node(struct device_node *node)
+{
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpu", 0);
+ if (!cpu_node)
+ return -1;
+
+ for_each_possible_cpu(cpu) {
+ if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ of_node_put(cpu_node);
+ return cpu;
+ }
+ }
+
+ pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+ of_node_put(cpu_node);
+ return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+ int core_id)
+{
+ char name[10];
+ bool leaf = true;
+ int i = 0;
+ int cpu;
+ struct device_node *t;
+
+ do {
+ snprintf(name, sizeof(name), "thread%d", i);
+ t = of_get_child_by_name(core, name);
+ if (t) {
+ leaf = false;
+ cpu = get_cpu_for_node(t);
+ if (cpu >= 0) {
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ cpu_topology[cpu].thread_id = i;
+ } else {
+ pr_err("%s: Can't get CPU for thread\n",
+ t->full_name);
+ of_node_put(t);
+ return -EINVAL;
+ }
+ of_node_put(t);
+ }
+ i++;
+ } while (t);
+
+ cpu = get_cpu_for_node(core);
+ if (cpu >= 0) {
+ if (!leaf) {
+ pr_err("%s: Core has both threads and CPU\n",
+ core->full_name);
+ return -EINVAL;
+ }
+
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ } else if (leaf) {
+ pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+ char name[10];
+ bool leaf = true;
+ bool has_cores = false;
+ struct device_node *c;
+ static int cluster_id __initdata;
+ int core_id = 0;
+ int i, ret;
+
+ /*
+ * First check for child clusters; we currently ignore any
+ * information about the nesting of clusters and present the
+ * scheduler with a flat list of them.
+ */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "cluster%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ leaf = false;
+ ret = parse_cluster(c, depth + 1);
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ /* Now check for cores */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "core%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ has_cores = true;
+
+ if (depth == 0) {
+ pr_err("%s: cpu-map children should be clusters\n",
+ c->full_name);
+ of_node_put(c);
+ return -EINVAL;
+ }
+
+ if (leaf) {
+ ret = parse_core(c, cluster_id, core_id++);
+ } else {
+ pr_err("%s: Non-leaf cluster with core %s\n",
+ cluster->full_name, name);
+ ret = -EINVAL;
+ }
+
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ if (leaf && !has_cores)
+ pr_warn("%s: empty cluster\n", cluster->full_name);
+
+ if (leaf)
+ cluster_id++;
+
+ return 0;
+}
+
+static int __init parse_dt_topology(void)
+{
+ struct device_node *cn, *map;
+ int ret = 0;
+ int cpu;
+
+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return 0;
+ }
+
+ /*
+ * When topology is provided cpu-map is essentially a root
+ * cluster with restricted subnodes.
+ */
+ map = of_get_child_by_name(cn, "cpu-map");
+ if (!map)
+ goto out;
+
+ ret = parse_cluster(map, 0);
+ if (ret != 0)
+ goto out_map;
+
+ /*
+ * Check that all cores are in the topology; the SMP code will
+ * only mark cores described in the DT as possible.
+ */
+ for_each_possible_cpu(cpu) {
+ if (cpu_topology[cpu].cluster_id == -1) {
+ pr_err("CPU%d: No topology information specified\n",
+ cpu);
+ ret = -EINVAL;
+ }
+ }
+
+out_map:
+ of_node_put(map);
+out:
+ of_node_put(cn);
+ return ret;
+}
+
/*
* cpu topology table
*/
@@ -39,13 +221,9 @@ static void update_siblings_masks(unsigned int cpuid)
if (cpuid_topo->cluster_id == -1) {
/*
- * DT does not contain topology information for this cpu
- * reset it to default behaviour
+ * DT does not contain topology information for this cpu.
*/
pr_debug("CPU%u: No topology information configured\n", cpuid);
- cpuid_topo->core_id = 0;
- cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling);
- cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling);
return;
}
@@ -74,22 +252,32 @@ void store_cpu_topology(unsigned int cpuid)
update_siblings_masks(cpuid);
}
-/*
- * init_cpu_topology is called at boot when only one cpu is running
- * which prevent simultaneous write access to cpu_topology array
- */
-void __init init_cpu_topology(void)
+static void __init reset_cpu_topology(void)
{
unsigned int cpu;
- /* init core mask and power*/
for_each_possible_cpu(cpu) {
struct cpu_topology *cpu_topo = &cpu_topology[cpu];
cpu_topo->thread_id = -1;
- cpu_topo->core_id = -1;
+ cpu_topo->core_id = 0;
cpu_topo->cluster_id = -1;
+
cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
}
}
+
+void __init init_cpu_topology(void)
+{
+ reset_cpu_topology();
+
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
+ if (parse_dt_topology())
+ reset_cpu_topology();
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7ffadddb645d..c43cfa9b8304 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -251,10 +251,13 @@ void die(const char *str, struct pt_regs *regs, int err)
void arm64_notify_die(const char *str, struct pt_regs *regs,
struct siginfo *info, int err)
{
- if (user_mode(regs))
+ if (user_mode(regs)) {
+ current->thread.fault_address = 0;
+ current->thread.fault_code = err;
force_sig_info(info->si_signo, info, current);
- else
+ } else {
die(str, regs, err);
+ }
}
asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4ba7a55b49c7..f1e6d5c032e1 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -13,7 +13,7 @@
#define ARM_EXIT_DISCARD(x) x
OUTPUT_ARCH(aarch64)
-ENTRY(stext)
+ENTRY(_text)
jiffies = jiffies_64;
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 2c56012cb2d2..b0d1512acf08 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
* whole of Stage-1. Weep...
*/
tlbi ipas2e1is, x1
- dsb sy
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb ish
tlbi vmalle1is
- dsb sy
+ dsb ish
isb
msr vttbr_el2, xzr
@@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context)
dsb ishst
tlbi alle1is
ic ialluis
- dsb sy
+ dsb ish
ret
ENDPROC(__kvm_flush_vm_context)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 03244582bc55..c59a1bdab5eb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr)
static void do_dc_cisw(u32 val)
{
asm volatile("dc cisw, %x0" : : "r" (val));
- dsb();
+ dsb(ish);
}
static void do_dc_csw(u32 val)
{
asm volatile("dc csw, %x0" : : "r" (val));
- dsb();
+ dsb(ish);
}
/* See note at ARM ARM B1.14.4 */
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 328ce1a99daa..d98d3e39879e 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,4 +1,5 @@
lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_in_user.o copy_page.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
+ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
new file mode 100644
index 000000000000..6ea0776ba6de
--- /dev/null
+++ b/arch/arm64/lib/memcmp.S
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+* compare memory areas(when two memory areas' offset are different,
+* alignment handled by the hardware)
+*
+* Parameters:
+* x0 - const memory area 1 pointer
+* x1 - const memory area 2 pointer
+* x2 - the maximal compare byte length
+* Returns:
+* x0 - a compare result, maybe less than, equal to, or greater than ZERO
+*/
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+limit .req x2
+result .req x0
+
+/* Internal variables. */
+data1 .req x3
+data1w .req w3
+data2 .req x4
+data2w .req w4
+has_nul .req x5
+diff .req x6
+endloop .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+pos .req x11
+limit_wd .req x12
+mask .req x13
+
+ENTRY(memcmp)
+ cbz limit, .Lret0
+ eor tmp1, src1, src2
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+ /*
+ * The input source addresses are at alignment boundary.
+ * Directly compare eight bytes each time.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, cs /* Last Dword or differences. */
+ cbz endloop, .Lloop_aligned
+
+ /* Not reached the limit, must have found a diff. */
+ tbz limit_wd, #63, .Lnot_limit
+
+ /* Limit % 8 == 0 => the diff is in the last 8 bytes. */
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+ /*
+ * The remained bytes less than 8. It is needed to extract valid data
+ * from last eight bytes of the intended memory range.
+ */
+ lsl limit, limit, #3 /* bytes-> bits. */
+ mov mask, #~0
+CPU_BE( lsr mask, mask, limit )
+CPU_LE( lsl mask, mask, limit )
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+ b .Lnot_limit
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that precede the start point.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ /*
+ * We can not add limit with alignment offset(tmp1) here. Since the
+ * addition probably make the limit overflown.
+ */
+ sub limit_wd, limit, #1/*limit != 0, so no underflow.*/
+ and tmp3, limit_wd, #7
+ lsr limit_wd, limit_wd, #3
+ add tmp3, tmp3, tmp1
+ add limit_wd, limit_wd, tmp3, lsr #3
+ add limit, limit, tmp1/* Adjust the limit for the extra. */
+
+ lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
+ neg tmp1, tmp1/* Bits to alignment -64. */
+ mov tmp2, #~0
+ /*mask off the non-intended bytes before the start address.*/
+CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 )
+
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ b .Lstart_realigned
+
+ /*src1 and src2 have different alignment offset.*/
+.Lmisaligned8:
+ cmp limit, #8
+ b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/
+
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum.*/
+
+ sub limit, limit, pos
+ /*compare the proceeding bytes in the first 8 byte segment.*/
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*diff occurred before the last byte.*/
+ cmp data1w, data2w
+ b.eq .Lstart_align
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ /*process more leading bytes to make src1 aligned...*/
+ add src1, src1, tmp3 /*backwards src1 to alignment boundary*/
+ add src2, src2, tmp3
+ sub limit, limit, tmp3
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+ /*load 8 bytes from aligned SRC1..*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /*Non-zero if differences found.*/
+ csinv endloop, diff, xzr, ne
+ cbnz endloop, .Lunequal_proc
+ /*How far is the current SRC2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+
+.Lrecal_offset:/*src1 is aligned now..*/
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes and compare from
+ * the SRC2 alignment boundary. If all 8 bytes are equal,then start
+ * the second part's comparison. Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ cbnz diff, .Lnot_limit
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ subs limit_wd, limit_wd, #1
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ cbz endloop, .Lloopcmp_proc
+.Lunequal_proc:
+ cbz diff, .Lremain8
+
+/*There is differnence occured in the latest comparison.*/
+.Lnot_limit:
+/*
+* For little endian,reverse the low significant equal bits into MSB,then
+* following CLZ can find how many equal bits exist.
+*/
+CPU_LE( rev diff, diff )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+
+ /*
+ * The MS-non-zero bit of DIFF marks either the first bit
+ * that is different, or the end of the significant data.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * We need to zero-extend (char is unsigned) the value and then
+ * perform a signed subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+
+.Lremain8:
+ /* Limit % 8 == 0 =>. all data are equal.*/
+ ands limit, limit, #7
+ b.eq .Lret0
+
+.Ltiny8proc:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+
+ ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
+ b.eq .Ltiny8proc
+ sub result, data1, data2
+ ret
+.Lret0:
+ mov result, #0
+ ret
+ENDPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 27b5003609b6..8a9a96d3ddae 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
@@ -27,27 +36,166 @@
* Returns:
* x0 - dest
*/
+dstin .req x0
+src .req x1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+tmp3 .req x5
+tmp3w .req w5
+dst .req x6
+
+A_l .req x7
+A_h .req x8
+B_l .req x9
+B_h .req x10
+C_l .req x11
+C_h .req x12
+D_l .req x13
+D_h .req x14
+
ENTRY(memcpy)
- mov x4, x0
- subs x2, x2, #8
- b.mi 2f
-1: ldr x3, [x1], #8
- subs x2, x2, #8
- str x3, [x4], #8
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- ldr w3, [x1], #4
- sub x2, x2, #4
- str w3, [x4], #4
-3: adds x2, x2, #2
- b.mi 4f
- ldrh w3, [x1], #2
- sub x2, x2, #2
- strh w3, [x4], #2
-4: adds x2, x2, #1
- b.mi 5f
- ldrb w3, [x1]
- strb w3, [x4]
-5: ret
+ mov dst, dstin
+ cmp count, #16
+ /*When memory length is less than 16, the accessed are not aligned.*/
+ b.lo .Ltiny15
+
+ neg tmp2, src
+ ands tmp2, tmp2, #15/* Bytes to reach alignment. */
+ b.eq .LSrcAligned
+ sub count, count, tmp2
+ /*
+ * Copy the leading memory data from src to dst in an increasing
+ * address order.By this way,the risk of overwritting the source
+ * memory data is eliminated when the distance between src and
+ * dst is less than 16. The memory accesses here are alignment.
+ */
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src], #1
+ strb tmp1w, [dst], #1
+1:
+ tbz tmp2, #1, 2f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+2:
+ tbz tmp2, #2, 3f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+3:
+ tbz tmp2, #3, .LSrcAligned
+ ldr tmp1, [src],#8
+ str tmp1, [dst],#8
+
+.LSrcAligned:
+ cmp count, #64
+ b.ge .Lcpy_over64
+ /*
+ * Deal with small copies quickly by dropping straight into the
+ * exit block.
+ */
+.Ltail63:
+ /*
+ * Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate.
+ */
+ ands tmp1, count, #0x30
+ b.eq .Ltiny15
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+1:
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+2:
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+.Ltiny15:
+ /*
+ * Prefer to break one ldp/stp into several load/store to access
+ * memory in an increasing address order,rather than to load/store 16
+ * bytes from (src-16) to (dst-16) and to backward the src to aligned
+ * address,which way is used in original cortex memcpy. If keeping
+ * the original memcpy process here, memmove need to satisfy the
+ * precondition that src address is at least 16 bytes bigger than dst
+ * address,otherwise some source data will be overwritten when memove
+ * call memcpy directly. To make memmove simpler and decouple the
+ * memcpy's dependency on memmove, withdrew the original process.
+ */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 2f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+2:
+ tbz count, #1, 3f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+3:
+ tbz count, #0, .Lexitfunc
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+
+.Lexitfunc:
+ ret
+
+.Lcpy_over64:
+ subs count, count, #128
+ b.ge .Lcpy_body_large
+ /*
+ * Less than 128 bytes to copy, so handle 64 here and then jump
+ * to the tail.
+ */
+ ldp A_l, A_h, [src],#16
+ stp A_l, A_h, [dst],#16
+ ldp B_l, B_h, [src],#16
+ ldp C_l, C_h, [src],#16
+ stp B_l, B_h, [dst],#16
+ stp C_l, C_h, [dst],#16
+ ldp D_l, D_h, [src],#16
+ stp D_l, D_h, [dst],#16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lcpy_body_large:
+ /* pre-get 64 bytes data. */
+ ldp A_l, A_h, [src],#16
+ ldp B_l, B_h, [src],#16
+ ldp C_l, C_h, [src],#16
+ ldp D_l, D_h, [src],#16
+1:
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp A_l, A_h, [dst],#16
+ ldp A_l, A_h, [src],#16
+ stp B_l, B_h, [dst],#16
+ ldp B_l, B_h, [src],#16
+ stp C_l, C_h, [dst],#16
+ ldp C_l, C_h, [src],#16
+ stp D_l, D_h, [dst],#16
+ ldp D_l, D_h, [src],#16
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst],#16
+ stp B_l, B_h, [dst],#16
+ stp C_l, C_h, [dst],#16
+ stp D_l, D_h, [dst],#16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
ENDPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index b79fdfa42d39..57b19ea2dad4 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Move a buffer from src to test (alignment handled by the hardware).
@@ -28,30 +37,161 @@
* Returns:
* x0 - dest
*/
+dstin .req x0
+src .req x1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+tmp3 .req x5
+tmp3w .req w5
+dst .req x6
+
+A_l .req x7
+A_h .req x8
+B_l .req x9
+B_h .req x10
+C_l .req x11
+C_h .req x12
+D_l .req x13
+D_h .req x14
+
ENTRY(memmove)
- cmp x0, x1
- b.ls memcpy
- add x4, x0, x2
- add x1, x1, x2
- subs x2, x2, #8
- b.mi 2f
-1: ldr x3, [x1, #-8]!
- subs x2, x2, #8
- str x3, [x4, #-8]!
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- ldr w3, [x1, #-4]!
- sub x2, x2, #4
- str w3, [x4, #-4]!
-3: adds x2, x2, #2
- b.mi 4f
- ldrh w3, [x1, #-2]!
- sub x2, x2, #2
- strh w3, [x4, #-2]!
-4: adds x2, x2, #1
- b.mi 5f
- ldrb w3, [x1, #-1]
- strb w3, [x4, #-1]
-5: ret
+ cmp dstin, src
+ b.lo memcpy
+ add tmp1, src, count
+ cmp dstin, tmp1
+ b.hs memcpy /* No overlap. */
+
+ add dst, dstin, count
+ add src, src, count
+ cmp count, #16
+ b.lo .Ltail15 /*probably non-alignment accesses.*/
+
+ ands tmp2, src, #15 /* Bytes to reach alignment. */
+ b.eq .LSrcAligned
+ sub count, count, tmp2
+ /*
+ * process the aligned offset length to make the src aligned firstly.
+ * those extra instructions' cost is acceptable. It also make the
+ * coming accesses are based on aligned address.
+ */
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src, #-1]!
+ strb tmp1w, [dst, #-1]!
+1:
+ tbz tmp2, #1, 2f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+2:
+ tbz tmp2, #2, 3f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+3:
+ tbz tmp2, #3, .LSrcAligned
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+
+.LSrcAligned:
+ cmp count, #64
+ b.ge .Lcpy_over64
+
+ /*
+ * Deal with small copies quickly by dropping straight into the
+ * exit block.
+ */
+.Ltail63:
+ /*
+ * Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate.
+ */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+1:
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+2:
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+
+.Ltail15:
+ tbz count, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz count, #2, 2f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+2:
+ tbz count, #1, 3f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+3:
+ tbz count, #0, .Lexitfunc
+ ldrb tmp1w, [src, #-1]
+ strb tmp1w, [dst, #-1]
+
+.Lexitfunc:
+ ret
+
+.Lcpy_over64:
+ subs count, count, #128
+ b.ge .Lcpy_body_large
+ /*
+ * Less than 128 bytes to copy, so handle 64 bytes here and then jump
+ * to the tail.
+ */
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp D_l, D_h, [src, #-64]!
+ stp D_l, D_h, [dst, #-64]!
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lcpy_body_large:
+ /* pre-load 64 bytes data. */
+ ldp A_l, A_h, [src, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ ldp D_l, D_h, [src, #-64]!
+1:
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp A_l, A_h, [dst, #-16]
+ ldp A_l, A_h, [src, #-16]
+ stp B_l, B_h, [dst, #-32]
+ ldp B_l, B_h, [src, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp C_l, C_h, [src, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ ldp D_l, D_h, [src, #-64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #-16]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ stp D_l, D_h, [dst, #-64]!
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
ENDPROC(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 87e4a68fbbbc..7c72dfd36b63 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Fill in the buffer with character c (alignment handled by the hardware)
@@ -27,27 +36,181 @@
* Returns:
* x0 - buf
*/
+
+dstin .req x0
+val .req w1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+zva_len_x .req x5
+zva_len .req w5
+zva_bits_x .req x6
+
+A_l .req x7
+A_lw .req w7
+dst .req x8
+tmp3w .req w9
+tmp3 .req x9
+
ENTRY(memset)
- mov x4, x0
- and w1, w1, #0xff
- orr w1, w1, w1, lsl #8
- orr w1, w1, w1, lsl #16
- orr x1, x1, x1, lsl #32
- subs x2, x2, #8
- b.mi 2f
-1: str x1, [x4], #8
- subs x2, x2, #8
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- sub x2, x2, #4
- str w1, [x4], #4
-3: adds x2, x2, #2
- b.mi 4f
- sub x2, x2, #2
- strh w1, [x4], #2
-4: adds x2, x2, #1
- b.mi 5f
- strb w1, [x4]
-5: ret
+ mov dst, dstin /* Preserve return value. */
+ and A_lw, val, #255
+ orr A_lw, A_lw, A_lw, lsl #8
+ orr A_lw, A_lw, A_lw, lsl #16
+ orr A_l, A_l, A_l, lsl #32
+
+ cmp count, #15
+ b.hi .Lover16_proc
+ /*All store maybe are non-aligned..*/
+ tbz count, #3, 1f
+ str A_l, [dst], #8
+1:
+ tbz count, #2, 2f
+ str A_lw, [dst], #4
+2:
+ tbz count, #1, 3f
+ strh A_lw, [dst], #2
+3:
+ tbz count, #0, 4f
+ strb A_lw, [dst]
+4:
+ ret
+
+.Lover16_proc:
+ /*Whether the start address is aligned with 16.*/
+ neg tmp2, dst
+ ands tmp2, tmp2, #15
+ b.eq .Laligned
+/*
+* The count is not less than 16, we can use stp to store the start 16 bytes,
+* then adjust the dst aligned with 16.This process will make the current
+* memory address at alignment boundary.
+*/
+ stp A_l, A_l, [dst] /*non-aligned store..*/
+ /*make the dst aligned..*/
+ sub count, count, tmp2
+ add dst, dst, tmp2
+
+.Laligned:
+ cbz A_l, .Lzero_mem
+
+.Ltail_maybe_long:
+ cmp count, #64
+ b.ge .Lnot_short
+.Ltail63:
+ ands tmp1, count, #0x30
+ b.eq 3f
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ stp A_l, A_l, [dst], #16
+1:
+ stp A_l, A_l, [dst], #16
+2:
+ stp A_l, A_l, [dst], #16
+/*
+* The last store length is less than 16,use stp to write last 16 bytes.
+* It will lead some bytes written twice and the access is non-aligned.
+*/
+3:
+ ands count, count, #15
+ cbz count, 4f
+ add dst, dst, count
+ stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
+4:
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line, this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lnot_short:
+ sub dst, dst, #16/* Pre-bias. */
+ sub count, count, #64
+1:
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ stp A_l, A_l, [dst, #48]
+ stp A_l, A_l, [dst, #64]!
+ subs count, count, #64
+ b.ge 1b
+ tst count, #0x3f
+ add dst, dst, #16
+ b.ne .Ltail63
+.Lexitfunc:
+ ret
+
+ /*
+ * For zeroing memory, check to see if we can use the ZVA feature to
+ * zero entire 'cache' lines.
+ */
+.Lzero_mem:
+ cmp count, #63
+ b.le .Ltail63
+ /*
+ * For zeroing small amounts of memory, it's not worth setting up
+ * the line-clear code.
+ */
+ cmp count, #128
+ b.lt .Lnot_short /*count is at least 128 bytes*/
+
+ mrs tmp1, dczid_el0
+ tbnz tmp1, #4, .Lnot_short
+ mov tmp3w, #4
+ and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
+ lsl zva_len, tmp3w, zva_len
+
+ ands tmp3w, zva_len, #63
+ /*
+ * ensure the zva_len is not less than 64.
+ * It is not meaningful to use ZVA if the block size is less than 64.
+ */
+ b.ne .Lnot_short
+.Lzero_by_line:
+ /*
+ * Compute how far we need to go to become suitably aligned. We're
+ * already at quad-word alignment.
+ */
+ cmp count, zva_len_x
+ b.lt .Lnot_short /* Not enough to reach alignment. */
+ sub zva_bits_x, zva_len_x, #1
+ neg tmp2, dst
+ ands tmp2, tmp2, zva_bits_x
+ b.eq 2f /* Already aligned. */
+ /* Not aligned, check that there's enough to copy after alignment.*/
+ sub tmp1, count, tmp2
+ /*
+ * grantee the remain length to be ZVA is bigger than 64,
+ * avoid to make the 2f's process over mem range.*/
+ cmp tmp1, #64
+ ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
+ b.lt .Lnot_short
+ /*
+ * We know that there's at least 64 bytes to zero and that it's safe
+ * to overrun by 64 bytes.
+ */
+ mov count, tmp1
+1:
+ stp A_l, A_l, [dst]
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ subs tmp2, tmp2, #64
+ stp A_l, A_l, [dst, #48]
+ add dst, dst, #64
+ b.ge 1b
+ /* We've overrun a bit, so adjust dst downwards.*/
+ add dst, dst, tmp2
+2:
+ sub count, count, zva_len_x
+3:
+ dc zva, dst
+ add dst, dst, zva_len_x
+ subs count, count, zva_len_x
+ b.ge 3b
+ ands count, count, zva_bits_x
+ b.ne .Ltail_maybe_long
+ ret
ENDPROC(memset)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
new file mode 100644
index 000000000000..42f828b06c59
--- /dev/null
+++ b/arch/arm64/lib/strcmp.S
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ * x0 - const string 1 pointer
+ * x1 - const string 2 pointer
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero
+ * if s1 is found, respectively, to be less than, to match,
+ * or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+result .req x0
+
+/* Internal variables. */
+data1 .req x2
+data1w .req w2
+data2 .req x3
+data2w .req w3
+has_nul .req x4
+diff .req x5
+syndrome .req x6
+tmp1 .req x7
+tmp2 .req x8
+tmp3 .req x9
+zeroones .req x10
+pos .req x11
+
+ENTRY(strcmp)
+ eor tmp1, src1, src2
+ mov zeroones, #REP8_01
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lloop_aligned
+ b .Lcal_cmpresult
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that preceed the start point.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 /* Bits to alignment -64. */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ b .Lstart_realigned
+
+.Lmisaligned8:
+ /*
+ * Get the align offset length to compare per byte first.
+ * After this process, one string's address will be aligned.
+ */
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum. */
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*find the null or unequal...*/
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs
+ b.eq .Lstart_align /*the last bytes are equal....*/
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ /*process more leading bytes to make str1 aligned...*/
+ add src1, src1, tmp3
+ add src2, src2, tmp3
+ /*load 8 bytes from aligned str1 and non-aligned str2..*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbnz syndrome, .Lcal_cmpresult
+ /*How far is the current str2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+.Lrecal_offset:
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes from the SRC2 alignment
+ * boundary,then compare with the relative bytes from SRC1.
+ * If all 8 bytes are equal,then start the second part's comparison.
+ * Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbnz syndrome, .Lcal_cmpresult
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lloopcmp_proc
+
+.Lcal_cmpresult:
+ /*
+ * reversed the byte-order as big-endian,then CLZ can find the most
+ * significant zero bits.
+ */
+CPU_LE( rev syndrome, syndrome )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+
+ /*
+ * For big-endian we cannot use the trick with the syndrome value
+ * as carry-propagation can corrupt the upper bits if the trailing
+ * bytes in the string contain 0x01.
+ * However, if there is no NUL byte in the dword, we can generate
+ * the result directly. We ca not just subtract the bytes as the
+ * MSB might be significant.
+ */
+CPU_BE( cbnz has_nul, 1f )
+CPU_BE( cmp data1, data2 )
+CPU_BE( cset result, ne )
+CPU_BE( cneg result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+ /*Re-compute the NUL-byte detection, using a byte-reversed value. */
+CPU_BE( rev tmp3, data1 )
+CPU_BE( sub tmp1, tmp3, zeroones )
+CPU_BE( orr tmp2, tmp3, #REP8_7f )
+CPU_BE( bic has_nul, tmp1, tmp2 )
+CPU_BE( rev has_nul, has_nul )
+CPU_BE( orr syndrome, diff, has_nul )
+
+ clz pos, syndrome
+ /*
+ * The MS-non-zero bit of the syndrome marks either the first bit
+ * that is different, or the top bit of the first zero byte.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * But we need to zero-extend (char is unsigned) the value and then
+ * perform a signed 32-bit subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+ENDPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
new file mode 100644
index 000000000000..987b68b9ce44
--- /dev/null
+++ b/arch/arm64/lib/strlen.S
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * calculate the length of a string
+ *
+ * Parameters:
+ * x0 - const string pointer
+ * Returns:
+ * x0 - the return length of specific string
+ */
+
+/* Arguments and results. */
+srcin .req x0
+len .req x0
+
+/* Locals and temporaries. */
+src .req x1
+data1 .req x2
+data2 .req x3
+data2a .req x4
+has_nul1 .req x5
+has_nul2 .req x6
+tmp1 .req x7
+tmp2 .req x8
+tmp3 .req x9
+tmp4 .req x10
+zeroones .req x11
+pos .req x12
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strlen)
+ mov zeroones, #REP8_01
+ bic src, srcin, #15
+ ands tmp1, srcin, #15
+ b.ne .Lmisaligned
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+ /*
+ * The inner loop deals with two Dwords at a time. This has a
+ * slightly higher start-up cost, but we should win quite quickly,
+ * especially on cores with a high number of issue slots per
+ * cycle, as we get much better parallelism out of the operations.
+ */
+.Lloop:
+ ldp data1, data2, [src], #16
+.Lrealigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, #REP8_7f
+ bic has_nul1, tmp1, tmp2
+ bics has_nul2, tmp3, tmp4
+ ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
+ b.eq .Lloop
+
+ sub len, src, srcin
+ cbz has_nul1, .Lnul_in_data2
+CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
+ sub len, len, #8
+ mov has_nul2, has_nul1
+.Lnul_in_data2:
+ /*
+ * For big-endian, carry propagation (if the final byte in the
+ * string is 0x01) means we cannot use has_nul directly. The
+ * easiest way to get the correct byte is to byte-swap the data
+ * and calculate the syndrome a second time.
+ */
+CPU_BE( rev data2, data2 )
+CPU_BE( sub tmp1, data2, zeroones )
+CPU_BE( orr tmp2, data2, #REP8_7f )
+CPU_BE( bic has_nul2, tmp1, tmp2 )
+
+ sub len, len, #8
+ rev has_nul2, has_nul2
+ clz pos, has_nul2
+ add len, len, pos, lsr #3 /* Bits to bytes. */
+ ret
+
+.Lmisaligned:
+ cmp tmp1, #8
+ neg tmp1, tmp1
+ ldp data1, data2, [src], #16
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+
+ orr data1, data1, tmp2
+ orr data2a, data2, tmp2
+ csinv data1, data1, xzr, le
+ csel data2, data2, data2a, le
+ b .Lrealigned
+ENDPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
new file mode 100644
index 000000000000..0224cf5a5533
--- /dev/null
+++ b/arch/arm64/lib/strncmp.S
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ * x0 - const string 1 pointer
+ * x1 - const string 2 pointer
+ * x2 - the maximal length to be compared
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero if s1 is found,
+ * respectively, to be less than, to match, or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+limit .req x2
+result .req x0
+
+/* Internal variables. */
+data1 .req x3
+data1w .req w3
+data2 .req x4
+data2w .req w4
+has_nul .req x5
+diff .req x6
+syndrome .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+zeroones .req x11
+pos .req x12
+limit_wd .req x13
+mask .req x14
+endloop .req x15
+
+ENTRY(strncmp)
+ cbz limit, .Lret0
+ eor tmp1, src1, src2
+ mov zeroones, #REP8_01
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ /* Calculate the number of full and partial words -1. */
+ /*
+ * when limit is mulitply of 8, if not sub 1,
+ * the judgement of last dword will wrong.
+ */
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp endloop, #0, #0, eq
+ b.eq .Lloop_aligned
+
+ /*Not reached the limit, must have found the end or a diff. */
+ tbz limit_wd, #63, .Lnot_limit
+
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+
+ lsl limit, limit, #3 /* Bits -> bytes. */
+ mov mask, #~0
+CPU_BE( lsr mask, mask, limit )
+CPU_LE( lsl mask, mask, limit )
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ /* Make sure that the NUL byte is marked in the syndrome. */
+ orr has_nul, has_nul, mask
+
+.Lnot_limit:
+ orr syndrome, diff, has_nul
+ b .Lcal_cmpresult
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that precede the start point.
+ * We also need to adjust the limit calculations, but without
+ * overflowing if the limit is near ULONG_MAX.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ ldr data1, [src1], #8
+ neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
+
+ and tmp3, limit_wd, #7
+ lsr limit_wd, limit_wd, #3
+ /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
+ add limit, limit, tmp1
+ add tmp3, tmp3, tmp1
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ add limit_wd, limit_wd, tmp3, lsr #3
+ b .Lstart_realigned
+
+/*when src1 offset is not equal to src2 offset...*/
+.Lmisaligned8:
+ cmp limit, #8
+ b.lo .Ltiny8proc /*limit < 8... */
+ /*
+ * Get the align offset length to compare per byte first.
+ * After this process, one string's address will be aligned.*/
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum. */
+ /*
+ * Here, limit is not less than 8, so directly run .Ltinycmp
+ * without checking the limit.*/
+ sub limit, limit, pos
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*find the null or unequal...*/
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs
+ b.eq .Lstart_align /*the last bytes are equal....*/
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+ /*process more leading bytes to make str1 aligned...*/
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ add src1, src1, tmp3 /*tmp3 is positive in this branch.*/
+ add src2, src2, tmp3
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ sub limit, limit, tmp3
+ lsr limit_wd, limit, #3
+ subs limit_wd, limit_wd, #1
+
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ bics has_nul, tmp1, tmp2
+ ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+ b.ne .Lunequal_proc
+ /*How far is the current str2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+.Lrecal_offset:
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes from the SRC2 alignment
+ * boundary,then compare with the relative bytes from SRC1.
+ * If all 8 bytes are equal,then start the second part's comparison.
+ * Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, eq
+ cbnz endloop, .Lunequal_proc
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ subs limit_wd, limit_wd, #1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ bics has_nul, tmp1, tmp2
+ ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+ b.eq .Lloopcmp_proc
+
+.Lunequal_proc:
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lremain8
+.Lcal_cmpresult:
+ /*
+ * reversed the byte-order as big-endian,then CLZ can find the most
+ * significant zero bits.
+ */
+CPU_LE( rev syndrome, syndrome )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+ /*
+ * For big-endian we cannot use the trick with the syndrome value
+ * as carry-propagation can corrupt the upper bits if the trailing
+ * bytes in the string contain 0x01.
+ * However, if there is no NUL byte in the dword, we can generate
+ * the result directly. We can't just subtract the bytes as the
+ * MSB might be significant.
+ */
+CPU_BE( cbnz has_nul, 1f )
+CPU_BE( cmp data1, data2 )
+CPU_BE( cset result, ne )
+CPU_BE( cneg result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+ /* Re-compute the NUL-byte detection, using a byte-reversed value.*/
+CPU_BE( rev tmp3, data1 )
+CPU_BE( sub tmp1, tmp3, zeroones )
+CPU_BE( orr tmp2, tmp3, #REP8_7f )
+CPU_BE( bic has_nul, tmp1, tmp2 )
+CPU_BE( rev has_nul, has_nul )
+CPU_BE( orr syndrome, diff, has_nul )
+ /*
+ * The MS-non-zero bit of the syndrome marks either the first bit
+ * that is different, or the top bit of the first zero byte.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ clz pos, syndrome
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * But we need to zero-extend (char is unsigned) the value and then
+ * perform a signed 32-bit subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+
+.Lremain8:
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq .Lret0
+.Ltiny8proc:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltiny8proc
+ sub result, data1, data2
+ ret
+
+.Lret0:
+ mov result, #0
+ ret
+ENDPROC(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
new file mode 100644
index 000000000000..2ca665711bf2
--- /dev/null
+++ b/arch/arm64/lib/strnlen.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * determine the length of a fixed-size string
+ *
+ * Parameters:
+ * x0 - const string pointer
+ * x1 - maximal string length
+ * Returns:
+ * x0 - the return length of specific string
+ */
+
+/* Arguments and results. */
+srcin .req x0
+len .req x0
+limit .req x1
+
+/* Locals and temporaries. */
+src .req x2
+data1 .req x3
+data2 .req x4
+data2a .req x5
+has_nul1 .req x6
+has_nul2 .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+tmp4 .req x11
+zeroones .req x12
+pos .req x13
+limit_wd .req x14
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strnlen)
+ cbz limit, .Lhit_limit
+ mov zeroones, #REP8_01
+ bic src, srcin, #15
+ ands tmp1, srcin, #15
+ b.ne .Lmisaligned
+ /* Calculate the number of full and partial words -1. */
+ sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+ /*
+ * The inner loop deals with two Dwords at a time. This has a
+ * slightly higher start-up cost, but we should win quite quickly,
+ * especially on cores with a high number of issue slots per
+ * cycle, as we get much better parallelism out of the operations.
+ */
+.Lloop:
+ ldp data1, data2, [src], #16
+.Lrealigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, #REP8_7f
+ bic has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ subs limit_wd, limit_wd, #1
+ orr tmp1, has_nul1, has_nul2
+ ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
+ b.eq .Lloop
+
+ cbz tmp1, .Lhit_limit /* No null in final Qword. */
+
+ /*
+ * We know there's a null in the final Qword. The easiest thing
+ * to do now is work out the length of the string and return
+ * MIN (len, limit).
+ */
+ sub len, src, srcin
+ cbz has_nul1, .Lnul_in_data2
+CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/
+
+ sub len, len, #8
+ mov has_nul2, has_nul1
+.Lnul_in_data2:
+ /*
+ * For big-endian, carry propagation (if the final byte in the
+ * string is 0x01) means we cannot use has_nul directly. The
+ * easiest way to get the correct byte is to byte-swap the data
+ * and calculate the syndrome a second time.
+ */
+CPU_BE( rev data2, data2 )
+CPU_BE( sub tmp1, data2, zeroones )
+CPU_BE( orr tmp2, data2, #REP8_7f )
+CPU_BE( bic has_nul2, tmp1, tmp2 )
+
+ sub len, len, #8
+ rev has_nul2, has_nul2
+ clz pos, has_nul2
+ add len, len, pos, lsr #3 /* Bits to bytes. */
+ cmp len, limit
+ csel len, len, limit, ls /* Return the lower value. */
+ ret
+
+.Lmisaligned:
+ /*
+ * Deal with a partial first word.
+ * We're doing two things in parallel here;
+ * 1) Calculate the number of words (but avoiding overflow if
+ * limit is near ULONG_MAX) - to do this we need to work out
+ * limit + tmp1 - 1 as a 65-bit value before shifting it;
+ * 2) Load and mask the initial data words - we force the bytes
+ * before the ones we are interested in to 0xff - this ensures
+ * early bytes will not hit any zero detection.
+ */
+ ldp data1, data2, [src], #16
+
+ sub limit_wd, limit, #1
+ and tmp3, limit_wd, #15
+ lsr limit_wd, limit_wd, #4
+
+ add tmp3, tmp3, tmp1
+ add limit_wd, limit_wd, tmp3, lsr #4
+
+ neg tmp4, tmp1
+ lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
+
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
+
+ cmp tmp1, #8
+
+ orr data1, data1, tmp2
+ orr data2a, data2, tmp2
+
+ csinv data1, data1, xzr, le
+ csel data2, data2, data2a, le
+ b .Lrealigned
+
+.Lhit_limit:
+ mov len, limit
+ ret
+ENDPROC(strnlen)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index b51d36401d83..3ecb56c624d3 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o tlb.o proc.o
+ context.o proc.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index fda756875fa6..23663837acff 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -31,7 +31,7 @@
* Corrupted registers: x0-x7, x9-x11
*/
__flush_dcache_all:
- dsb sy // ensure ordering with previous memory accesses
+ dmb sy // ensure ordering with previous memory accesses
mrs x0, clidr_el1 // read clidr
and x3, x0, #0x7000000 // extract loc from clidr
lsr x3, x3, #23 // left align loc bit field
@@ -128,7 +128,7 @@ USER(9f, dc cvau, x4 ) // clean D line to PoU
add x4, x4, x2
cmp x4, x1
b.lo 1b
- dsb sy
+ dsb ish
icache_line_size x2, x3
sub x3, x2, #1
@@ -139,7 +139,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
cmp x4, x1
b.lo 1b
9: // ignore any faulting cache operation
- dsb sy
+ dsb ish
isb
ret
ENDPROC(flush_icache_range)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c851eb44dc50..4164c5ace9f8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -115,7 +115,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
for (i = 0; i < (size >> PAGE_SHIFT); i++)
map[i] = page + i;
coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
- __get_dma_pgprot(attrs, pgprot_default, false));
+ __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false));
kfree(map);
if (!coherent_ptr)
goto no_map;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c23751b06120..bcc965e2cce1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/exception.h>
#include <asm/debug-monitors.h>
+#include <asm/esr.h>
#include <asm/system_misc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -123,6 +124,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
}
tsk->thread.fault_address = addr;
+ tsk->thread.fault_code = esr;
si.si_signo = sig;
si.si_errno = 0;
si.si_code = code;
@@ -148,8 +150,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADMAP 0x010000
#define VM_FAULT_BADACCESS 0x020000
-#define ESR_WRITE (1 << 6)
-#define ESR_CM (1 << 8)
#define ESR_LNX_EXEC (1 << 24)
static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
@@ -218,7 +218,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
- } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+ } else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
@@ -525,7 +525,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
info.si_errno = 0;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
- arm64_notify_die("", regs, &info, esr);
+ arm64_notify_die("", regs, &info, 0);
return 0;
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0a472c41a67f..c43f1dd19489 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,11 +43,6 @@
struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
-pgprot_t pgprot_default;
-EXPORT_SYMBOL(pgprot_default);
-
-static pmdval_t prot_sect_kernel;
-
struct cachepolicy {
const char policy[16];
u64 mair;
@@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p)
}
early_param("cachepolicy", early_cachepolicy);
-/*
- * Adjust the PMD section entries according to the CPU in use.
- */
-void __init init_mem_pgprot(void)
-{
- pteval_t default_pgprot;
- int i;
-
- default_pgprot = PTE_ATTRINDX(MT_NORMAL);
- prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL);
-
-#ifdef CONFIG_SMP
- /*
- * Mark memory with the "shared" attribute for SMP systems
- */
- default_pgprot |= PTE_SHARED;
- prot_sect_kernel |= PMD_SECT_S;
-#endif
-
- for (i = 0; i < 16; i++) {
- unsigned long v = pgprot_val(protection_map[i]);
- protection_map[i] = __pgprot(v | default_pgprot);
- }
-
- pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot);
-}
-
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -168,7 +136,8 @@ static void __init *early_alloc(unsigned long sz)
}
static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, unsigned long pfn)
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
{
pte_t *pte;
@@ -180,16 +149,27 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ set_pte(pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
}
static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys)
+ unsigned long end, phys_addr_t phys,
+ int map_io)
{
pmd_t *pmd;
unsigned long next;
+ pmdval_t prot_sect;
+ pgprot_t prot_pte;
+
+ if (map_io) {
+ prot_sect = PROT_SECT_DEVICE_nGnRE;
+ prot_pte = __pgprot(PROT_DEVICE_nGnRE);
+ } else {
+ prot_sect = PROT_SECT_NORMAL_EXEC;
+ prot_pte = PAGE_KERNEL_EXEC;
+ }
/*
* Check for initial section mappings in the pgd/pud and remove them.
@@ -205,7 +185,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0) {
pmd_t old_pmd =*pmd;
- set_pmd(pmd, __pmd(phys | prot_sect_kernel));
+ set_pmd(pmd, __pmd(phys | prot_sect));
/*
* Check for previous table entries created during
* boot (__create_page_tables) and flush them.
@@ -213,21 +193,46 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
if (!pmd_none(old_pmd))
flush_tlb_all();
} else {
- alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys));
+ alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+ prot_pte);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
}
static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
- unsigned long end, unsigned long phys)
+ unsigned long end, unsigned long phys,
+ int map_io)
{
pud_t *pud = pud_offset(pgd, addr);
unsigned long next;
do {
next = pud_addr_end(addr, end);
- alloc_init_pmd(pud, addr, next, phys);
+
+ /*
+ * For 4K granule only, attempt to put down a 1GB block
+ */
+ if (!map_io && (PAGE_SHIFT == 12) &&
+ ((addr | next | phys) & ~PUD_MASK) == 0) {
+ pud_t old_pud = *pud;
+ set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
+
+ /*
+ * If we have an old value for a pud, it will
+ * be pointing to a pmd table that we no longer
+ * need (from swapper_pg_dir).
+ *
+ * Look up the old pmd table and free it.
+ */
+ if (!pud_none(old_pud)) {
+ phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+ memblock_free(table, PAGE_SIZE);
+ flush_tlb_all();
+ }
+ } else {
+ alloc_init_pmd(pud, addr, next, phys, map_io);
+ }
phys += next - addr;
} while (pud++, addr = next, addr != end);
}
@@ -236,30 +241,44 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
* Create the page directory entries and any necessary page tables for the
* mapping specified by 'md'.
*/
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
- phys_addr_t size)
+static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ int map_io)
{
unsigned long addr, length, end, next;
- pgd_t *pgd;
-
- if (virt < VMALLOC_START) {
- pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\n",
- phys, virt);
- return;
- }
addr = virt & PAGE_MASK;
length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
- pgd = pgd_offset_k(addr);
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys);
+ alloc_init_pud(pgd, addr, next, phys, map_io);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
+static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size)
+{
+ if (virt < VMALLOC_START) {
+ pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+ &phys, virt);
+ return;
+ }
+ __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+}
+
+void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+{
+ if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
+ pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+ return;
+ }
+ __create_mapping(&idmap_pg_dir[pgd_index(addr)],
+ addr, addr, size, map_io);
+}
+
static void __init map_mem(void)
{
struct memblock_region *reg;
@@ -370,6 +389,9 @@ int kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return 0;
+ if (pud_sect(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
@@ -417,7 +439,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
if (!p)
return -ENOMEM;
- set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel));
+ set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
} while (addr = next, addr != end);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9042aff5e9e3..7736779c9809 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -182,7 +182,7 @@ ENDPROC(cpu_do_switch_mm)
ENTRY(__cpu_setup)
ic iallu // I+BTB cache invalidate
tlbi vmalle1is // invalidate I + D TLBs
- dsb sy
+ dsb ish
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD
diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S
deleted file mode 100644
index 19da91e0cd27..000000000000
--- a/arch/arm64/mm/tlb.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Based on arch/arm/mm/tlb.S
- *
- * Copyright (C) 1997-2002 Russell King
- * Copyright (C) 2012 ARM Ltd.
- * Written by Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- * __cpu_flush_user_tlb_range(start, end, vma)
- *
- * Invalidate a range of TLB entries in the specified address space.
- *
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
- * - vma - vma_struct describing address range
- */
-ENTRY(__cpu_flush_user_tlb_range)
- vma_vm_mm x3, x2 // get vma->vm_mm
- mmid w3, x3 // get vm_mm->context.id
- dsb sy
- lsr x0, x0, #12 // align address
- lsr x1, x1, #12
- bfi x0, x3, #48, #16 // start VA and ASID
- bfi x1, x3, #48, #16 // end VA and ASID
-1: tlbi vae1is, x0 // TLB invalidate by address and ASID
- add x0, x0, #1
- cmp x0, x1
- b.lo 1b
- dsb sy
- ret
-ENDPROC(__cpu_flush_user_tlb_range)
-
-/*
- * __cpu_flush_kern_tlb_range(start,end)
- *
- * Invalidate a range of kernel TLB entries.
- *
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
- */
-ENTRY(__cpu_flush_kern_tlb_range)
- dsb sy
- lsr x0, x0, #12 // align address
- lsr x1, x1, #12
-1: tlbi vaae1is, x0 // TLB invalidate by address
- add x0, x0, #1
- cmp x0, x1
- b.lo 1b
- dsb sy
- isb
- ret
-ENDPROC(__cpu_flush_kern_tlb_range)
diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h
index 8a029505d7b7..2f1c3c2657ad 100644
--- a/arch/blackfin/include/asm/ftrace.h
+++ b/arch/blackfin/include/asm/ftrace.h
@@ -66,16 +66,7 @@ extern inline void *return_address(unsigned int level)
#endif /* CONFIG_FRAME_POINTER */
-#define HAVE_ARCH_CALLER_ADDR
-
-/* inline function or macro may lead to unexpected result */
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
#endif /* __ASSEMBLY__ */
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index e1f88e028cfe..8b8fe671b1a6 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -117,6 +117,7 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
int
is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len)
{
+ bool valid;
struct vm_area_struct *vma;
struct sram_list_struct *sraml;
@@ -124,9 +125,12 @@ is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long
if (start + len < start)
return -EIO;
+ down_read(&child->mm->mmap_sem);
vma = find_vma(child->mm, start);
- if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
- return 0;
+ valid = vma && start >= vma->vm_start && start + len <= vma->vm_end;
+ up_read(&child->mm->mmap_sem);
+ if (valid)
+ return 0;
for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
if (start >= (unsigned long)sraml->addr
diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c
index f4374bae4fb4..64285e0d3481 100644
--- a/arch/cris/arch-v10/drivers/gpio.c
+++ b/arch/cris/arch-v10/drivers/gpio.c
@@ -833,8 +833,8 @@ static int __init gpio_init(void)
printk(KERN_INFO "ETRAX 100LX GPIO driver v2.5, (c) 2001-2008 "
"Axis Communications AB\n");
/* We call etrax_gpio_wake_up_check() from timer interrupt and
- * from cpu_idle() in kernel/process.c
- * The check in cpu_idle() reduces latency from ~15 ms to ~6 ms
+ * from default_idle() in kernel/process.c
+ * The check in default_idle() reduces latency from ~15 ms to ~6 ms
* in some tests.
*/
res = request_irq(TIMER0_IRQ_NBR, gpio_poll_timer_interrupt,
diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
index 9e54273af0ca..009f4ee1bd09 100644
--- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c
+++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
@@ -958,11 +958,7 @@ gpio_init(void)
printk(KERN_INFO "ETRAX FS GPIO driver v2.5, (c) 2003-2007 "
"Axis Communications AB\n");
- /* We call etrax_gpio_wake_up_check() from timer interrupt and
- * from cpu_idle() in kernel/process.c
- * The check in cpu_idle() reduces latency from ~15 ms to ~6 ms
- * in some tests.
- */
+ /* We call etrax_gpio_wake_up_check() from timer interrupt */
if (request_irq(TIMER0_INTR_VECT, gpio_poll_timer_interrupt,
IRQF_SHARED, "gpio poll", &alarmlist))
printk(KERN_ERR "timer0 irq for gpio\n");
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index b942f4032d7a..2955f359e2a7 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -237,7 +237,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
}
#ifdef CONFIG_SYSCTL
-static ctl_table kdump_ctl_table[] = {
+static struct ctl_table kdump_ctl_table[] = {
{
.procname = "kdump_on_init",
.data = &kdump_on_init,
@@ -255,7 +255,7 @@ static ctl_table kdump_ctl_table[] = {
{ }
};
-static ctl_table sys_table[] = {
+static struct ctl_table sys_table[] = {
{
.procname = "kernel",
.mode = 0555,
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d841c4bd6864..5845ffea67c3 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -521,7 +521,7 @@ static pmu_config_t *pmu_conf;
pfm_sysctl_t pfm_sysctl;
EXPORT_SYMBOL(pfm_sysctl);
-static ctl_table pfm_ctl_table[]={
+static struct ctl_table pfm_ctl_table[] = {
{
.procname = "debug",
.data = &pfm_sysctl.debug,
@@ -552,7 +552,7 @@ static ctl_table pfm_ctl_table[]={
},
{}
};
-static ctl_table pfm_sysctl_dir[] = {
+static struct ctl_table pfm_sysctl_dir[] = {
{
.procname = "perfmon",
.mode = 0555,
@@ -560,7 +560,7 @@ static ctl_table pfm_sysctl_dir[] = {
},
{}
};
-static ctl_table pfm_sysctl_root[] = {
+static struct ctl_table pfm_sysctl_root[] = {
{
.procname = "kernel",
.mode = 0555,
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 214320b50384..8c8ce5e1ee0e 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -60,15 +60,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
__const_sigismember(set,sig) : \
__gen_sigismember(set,sig))
-static inline int sigfindinword(unsigned long word)
-{
- asm ("bfffo %1{#0,#0},%0"
- : "=d" (word)
- : "d" (word & -word)
- : "cc");
- return word ^ 31;
-}
-
#endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
#ifndef __uClinux__
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index deaf45ab6429..e2f6543b91e7 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -49,6 +49,7 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=y
CONFIG_I2C=y
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 10b5172283d7..a29ebd4a9fcb 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -58,6 +58,7 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=y
CONFIG_I2C=y
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index c98ed95c0541..35b3ecaf25d5 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -2,6 +2,7 @@
generic-y += barrier.h
generic-y += clkdev.h
generic-y += cputime.h
+generic-y += device.h
generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
diff --git a/arch/microblaze/include/asm/device.h b/arch/microblaze/include/asm/device.h
deleted file mode 100644
index 123b2fe72d01..000000000000
--- a/arch/microblaze/include/asm/device.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License v2. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_DEVICE_H
-#define _ASM_MICROBLAZE_DEVICE_H
-
-struct device_node;
-
-struct dev_archdata {
- /* DMA operations on that device */
- struct dma_map_ops *dma_ops;
- void *dma_data;
-};
-
-struct pdev_archdata {
- u64 dma_mask;
-};
-
-#endif /* _ASM_MICROBLAZE_DEVICE_H */
-
-
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 46460f1c49c4..ab353723076a 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -35,16 +35,6 @@
#define __dma_alloc_coherent(dev, gfp, size, handle) NULL
#define __dma_free_coherent(size, addr) ((void)0)
-static inline unsigned long device_to_mask(struct device *dev)
-{
- if (dev->dma_mask && *dev->dma_mask)
- return *dev->dma_mask;
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
-extern struct dma_map_ops *dma_ops;
-
/*
* Available generic sets of operations
*/
@@ -52,20 +42,7 @@ extern struct dma_map_ops dma_direct_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
- /* We don't handle the NULL dev case for ISA for now. We could
- * do it via an out of line call but it is not needed for now. The
- * only ISA DMA device we support is the floppy and we have a hack
- * in the floppy driver directly to get a device for us.
- */
- if (unlikely(!dev) || !dev->archdata.dma_ops)
- return NULL;
-
- return dev->archdata.dma_ops;
-}
-
-static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-{
- dev->archdata.dma_ops = ops;
+ return &dma_direct_ops;
}
static inline int dma_supported(struct device *dev, u64 mask)
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 1e4c3329f62e..433751b2a003 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -19,17 +19,14 @@
#ifndef CONFIG_PCI
#define _IO_BASE 0
#define _ISA_MEM_BASE 0
-#define PCI_DRAM_OFFSET 0
#else
#define _IO_BASE isa_io_base
#define _ISA_MEM_BASE isa_mem_base
-#define PCI_DRAM_OFFSET pci_dram_offset
struct pci_dev;
extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
#define pci_iounmap pci_iounmap
extern unsigned long isa_io_base;
-extern unsigned long pci_dram_offset;
extern resource_size_t isa_mem_base;
#endif
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 335524040fff..468aca8cec0d 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -45,14 +45,6 @@ struct pci_dev;
#define pcibios_assign_all_busses() 0
#ifdef CONFIG_PCI
-extern void set_pci_dma_ops(struct dma_map_ops *dma_ops);
-extern struct dma_map_ops *get_pci_dma_ops(void);
-#else /* CONFIG_PCI */
-#define set_pci_dma_ops(d)
-#define get_pci_dma_ops() NULL
-#endif
-
-#ifdef CONFIG_PCI
static inline void pci_dma_burst_advice(struct pci_dev *pdev,
enum pci_dma_burst_strategy *strat,
unsigned long *strategy_parameter)
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index da68d00fd087..4633c36c1b32 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -13,23 +13,6 @@
#include <linux/export.h>
#include <linux/bug.h>
-/*
- * Generic direct DMA implementation
- *
- * This implementation supports a per-device offset that can be applied if
- * the address at which memory is visible to devices is not 0. Platform code
- * can set archdata.dma_data to an unsigned long holding the offset. By
- * default the offset is PCI_DRAM_OFFSET.
- */
-
-static unsigned long get_dma_direct_offset(struct device *dev)
-{
- if (likely(dev))
- return (unsigned long)dev->archdata.dma_data;
-
- return PCI_DRAM_OFFSET; /* FIXME Not sure if is correct */
-}
-
#define NOT_COHERENT_CACHE
static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
@@ -51,7 +34,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
return NULL;
ret = page_address(page);
memset(ret, 0, size);
- *dma_handle = virt_to_phys(ret) + get_dma_direct_offset(dev);
+ *dma_handle = virt_to_phys(ret);
return ret;
#endif
@@ -77,7 +60,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
/* FIXME this part of code is untested */
for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
+ sg->dma_address = sg_phys(sg);
__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
sg->length, direction);
}
@@ -85,12 +68,6 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
return nents;
}
-static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- struct dma_attrs *attrs)
-{
-}
-
static int dma_direct_dma_supported(struct device *dev, u64 mask)
{
return 1;
@@ -104,7 +81,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
struct dma_attrs *attrs)
{
__dma_sync(page_to_phys(page) + offset, size, direction);
- return page_to_phys(page) + offset + get_dma_direct_offset(dev);
+ return page_to_phys(page) + offset;
}
static inline void dma_direct_unmap_page(struct device *dev,
@@ -181,7 +158,6 @@ struct dma_map_ops dma_direct_ops = {
.alloc = dma_direct_alloc_coherent,
.free = dma_direct_free_coherent,
.map_sg = dma_direct_map_sg,
- .unmap_sg = dma_direct_unmap_sg,
.dma_supported = dma_direct_dma_supported,
.map_page = dma_direct_map_page,
.unmap_page = dma_direct_unmap_page,
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 17645b2e2f07..4655ff342c64 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -205,7 +205,7 @@ GT4: /* r11 contains the rest - will be either 1 or 4 */
GT16: /* TLB0 is 16MB */
addik r9, r0, 0x1000000 /* means TLB0 is 16MB */
TLB1:
- /* must be used r2 because of substract if failed */
+ /* must be used r2 because of subtract if failed */
addik r2, r11, -0x0400000
bgei r2, GT20 /* size is greater than 16MB */
/* size is >16MB and <20MB */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 67cc4b282cc1..ab5b488e1fde 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -71,13 +71,9 @@ void __init setup_arch(char **cmdline_p)
xilinx_pci_init();
-#ifdef CONFIG_VT
-#if defined(CONFIG_XILINX_CONSOLE)
- conswitchp = &xil_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
+#if defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
-#endif
}
#ifdef CONFIG_MTD_UCLINUX
@@ -229,31 +225,3 @@ static int __init debugfs_tlb(void)
device_initcall(debugfs_tlb);
# endif
#endif
-
-static int dflt_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- set_dma_ops(dev, &dma_direct_ops);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block dflt_plat_bus_notifier = {
- .notifier_call = dflt_bus_notify,
- .priority = INT_MAX,
-};
-
-static int __init setup_bus_notifier(void)
-{
- bus_register_notifier(&platform_bus_type, &dflt_plat_bus_notifier);
-
- return 0;
-}
-
-arch_initcall(setup_bus_notifier);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index a59de1bc1ce0..9037914f6985 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -47,24 +47,9 @@ static int global_phb_number; /* Global phb counter */
/* ISA Memory physical address */
resource_size_t isa_mem_base;
-static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
-
unsigned long isa_io_base;
-unsigned long pci_dram_offset;
static int pci_bus_count;
-
-void set_pci_dma_ops(struct dma_map_ops *dma_ops)
-{
- pci_dma_ops = dma_ops;
-}
-
-struct dma_map_ops *get_pci_dma_ops(void)
-{
- return pci_dma_ops;
-}
-EXPORT_SYMBOL(get_pci_dma_ops);
-
struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
{
struct pci_controller *phb;
@@ -866,10 +851,6 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
*/
set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
- /* Hook up default DMA ops */
- set_dma_ops(&dev->dev, pci_dma_ops);
- dev->dev.archdata.dma_data = (void *)PCI_DRAM_OFFSET;
-
/* Read default IRQs and fixup if necessary */
dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
}
diff --git a/arch/mips/dec/Makefile b/arch/mips/dec/Makefile
index 3d5d2c56de8d..bd74e05c90b0 100644
--- a/arch/mips/dec/Makefile
+++ b/arch/mips/dec/Makefile
@@ -3,7 +3,7 @@
#
obj-y := ecc-berr.o int-handler.o ioasic-irq.o kn01-berr.o \
- kn02-irq.o kn02xa-berr.o reset.o setup.o time.o
+ kn02-irq.o kn02xa-berr.o platform.o reset.o setup.o time.o
obj-$(CONFIG_TC) += tc.o
obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
diff --git a/arch/mips/dec/platform.c b/arch/mips/dec/platform.c
new file mode 100644
index 000000000000..c7ac86af847a
--- /dev/null
+++ b/arch/mips/dec/platform.c
@@ -0,0 +1,44 @@
+/*
+ * DEC platform devices.
+ *
+ * Copyright (c) 2014 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+
+static struct resource dec_rtc_resources[] = {
+ {
+ .name = "rtc",
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct cmos_rtc_board_info dec_rtc_info = {
+ .flags = CMOS_RTC_FLAGS_NOFREQ,
+ .address_space = 64,
+};
+
+static struct platform_device dec_rtc_device = {
+ .name = "rtc_cmos",
+ .id = PLATFORM_DEVID_NONE,
+ .dev.platform_data = &dec_rtc_info,
+ .resource = dec_rtc_resources,
+ .num_resources = ARRAY_SIZE(dec_rtc_resources),
+};
+
+static int __init dec_add_devices(void)
+{
+ dec_rtc_resources[0].start = RTC_PORT(0);
+ dec_rtc_resources[0].end = RTC_PORT(0) + dec_kn_slot_size - 1;
+ return platform_device_register(&dec_rtc_device);
+}
+
+device_initcall(dec_add_devices);
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
index 72c0fafaa039..544ed8ef87eb 100644
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -24,15 +24,7 @@ extern void return_to_handler(void);
extern unsigned long return_address(unsigned int);
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#define CALLER_ADDR4 return_address(4)
-#define CALLER_ADDR5 return_address(5)
-#define CALLER_ADDR6 return_address(6)
+#define ftrace_return_address(n) return_address(n)
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ca1cd7459c4a..248ee7e5bebd 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -304,7 +304,7 @@ void notrace restore_interrupts(void)
* being re-enabled and generally sanitized the lazy irq state,
* and in the latter case it will leave with interrupts hard
* disabled and marked as such, so the local_irq_enable() call
- * in cpu_idle() will properly re-enable everything.
+ * in arch_cpu_idle() will properly re-enable everything.
*/
bool prep_irq_for_idle(void)
{
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 13e9966464c2..e79fb6ebaa42 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -40,15 +40,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
/* arch/sh/kernel/return_address.c */
extern void *return_address(unsigned int);
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
#endif /* __ASSEMBLY__ */
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 681100c59fda..6829a9508649 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -113,7 +113,7 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory
*/
-static ctl_table unaligned_subtable[] = {
+static struct ctl_table unaligned_subtable[] = {
{
.procname = "enabled",
.data = &unaligned_fixup,
@@ -138,7 +138,7 @@ static ctl_table unaligned_subtable[] = {
{}
};
-static ctl_table unaligned_table[] = {
+static struct ctl_table unaligned_table[] = {
{
.procname = "unaligned_fixup",
.mode = 0555,
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 4703a6c4b8e3..0331d765c2bb 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1087,8 +1087,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
- cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
- &options_size);
+ cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
if (!cmdline_ptr)
goto fail;
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 0d558ee899ae..2884e0c3e8a5 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -452,7 +452,7 @@ efi32_config:
.global efi64_config
efi64_config:
.fill 11,8,0
- .quad efi_call6
+ .quad efi_call
.byte 1
#endif /* CONFIG_EFI_STUB */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 0ca9a5c362bc..84c223479e3c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -375,8 +375,7 @@ xloadflags:
# define XLF0 0
#endif
-#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \
- !defined(CONFIG_EFI_MIXED)
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
/* kernel/boot_param/ramdisk could be loaded above 4g */
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
#else
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 185fad49d86f..5d1e0075ac24 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -92,7 +92,7 @@ __clmul_gf128mul_ble:
ret
ENDPROC(__clmul_gf128mul_ble)
-/* void clmul_ghash_mul(char *dst, const be128 *shash) */
+/* void clmul_ghash_mul(char *dst, const u128 *shash) */
ENTRY(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
@@ -106,7 +106,7 @@ ENDPROC(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- * const be128 *shash);
+ * const u128 *shash);
*/
ENTRY(clmul_ghash_update)
cmp $16, %rdx
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index d785cf2c529c..88bb7ba8b175 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -25,17 +25,17 @@
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
-void clmul_ghash_mul(char *dst, const be128 *shash);
+void clmul_ghash_mul(char *dst, const u128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- const be128 *shash);
+ const u128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
- be128 shash;
+ u128 shash;
};
struct ghash_desc_ctx {
@@ -68,11 +68,11 @@ static int ghash_setkey(struct crypto_shash *tfm,
a = be64_to_cpu(x->a);
b = be64_to_cpu(x->b);
- ctx->shash.a = (__be64)((b << 1) | (a >> 63));
- ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+ ctx->shash.a = (b << 1) | (a >> 63);
+ ctx->shash.b = (a << 1) | (b >> 63);
if (a >> 63)
- ctx->shash.b ^= cpu_to_be64(0xc2);
+ ctx->shash.b ^= ((u64)0xc2) << 56;
return 0;
}
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0869434eaf72..1eb5f6433ad8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H
+#include <asm/i387.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
@@ -27,91 +28,58 @@
extern unsigned long asmlinkage efi_call_phys(void *, ...);
-#define efi_call_phys0(f) efi_call_phys(f)
-#define efi_call_phys1(f, a1) efi_call_phys(f, a1)
-#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2)
-#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3)
-#define efi_call_phys4(f, a1, a2, a3, a4) \
- efi_call_phys(f, a1, a2, a3, a4)
-#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
- efi_call_phys(f, a1, a2, a3, a4, a5)
-#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
- efi_call_phys(f, a1, a2, a3, a4, a5, a6)
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
*/
+/* Use this macro if your virtual returns a non-void value */
#define efi_call_virt(f, args...) \
- ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
-
-#define efi_call_virt0(f) efi_call_virt(f)
-#define efi_call_virt1(f, a1) efi_call_virt(f, a1)
-#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2)
-#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3)
-#define efi_call_virt4(f, a1, a2, a3, a4) \
- efi_call_virt(f, a1, a2, a3, a4)
-#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
- efi_call_virt(f, a1, a2, a3, a4, a5)
-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
- efi_call_virt(f, a1, a2, a3, a4, a5, a6)
+({ \
+ efi_status_t __s; \
+ kernel_fpu_begin(); \
+ __s = ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
+ kernel_fpu_end(); \
+ __s; \
+})
+
+/* Use this macro if your virtual call does not return any value */
+#define __efi_call_virt(f, args...) \
+({ \
+ kernel_fpu_begin(); \
+ ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
+ kernel_fpu_end(); \
+})
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
#else /* !CONFIG_X86_32 */
-extern u64 efi_call0(void *fp);
-extern u64 efi_call1(void *fp, u64 arg1);
-extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
-extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3);
-extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4);
-extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3,
- u64 arg4, u64 arg5);
-extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
- u64 arg4, u64 arg5, u64 arg6);
-
-#define efi_call_phys0(f) \
- efi_call0((f))
-#define efi_call_phys1(f, a1) \
- efi_call1((f), (u64)(a1))
-#define efi_call_phys2(f, a1, a2) \
- efi_call2((f), (u64)(a1), (u64)(a2))
-#define efi_call_phys3(f, a1, a2, a3) \
- efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3))
-#define efi_call_phys4(f, a1, a2, a3, a4) \
- efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4))
-#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
- efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4), (u64)(a5))
-#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
- efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4), (u64)(a5), (u64)(a6))
-
-#define _efi_call_virtX(x, f, ...) \
+#define EFI_LOADER_SIGNATURE "EL64"
+
+extern u64 asmlinkage efi_call(void *fp, ...);
+
+#define efi_call_phys(f, args...) efi_call((f), args)
+
+#define efi_call_virt(f, ...) \
({ \
efi_status_t __s; \
\
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
- __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
+ __kernel_fpu_begin(); \
+ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
+ __kernel_fpu_end(); \
preempt_enable(); \
__s; \
})
-#define efi_call_virt0(f) \
- _efi_call_virtX(0, f)
-#define efi_call_virt1(f, a1) \
- _efi_call_virtX(1, f, (u64)(a1))
-#define efi_call_virt2(f, a1, a2) \
- _efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
-#define efi_call_virt3(f, a1, a2, a3) \
- _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
-#define efi_call_virt4(f, a1, a2, a3, a4) \
- _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
-#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
- _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
- _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
+/*
+ * All X86_64 virt calls return non-void values. Thus, use non-void call for
+ * virt calls that would be void on X86_32.
+ */
+#define __efi_call_virt(f, args...) efi_call_virt(f, args)
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index cea1c76d49bf..115e3689cd53 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -87,22 +87,22 @@ static inline int is_x32_frame(void)
static __always_inline __pure bool use_eager_fpu(void)
{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
+ return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
+ return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
- return static_cpu_has(X86_FEATURE_XSAVE);
+ return static_cpu_has_safe(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
- return static_cpu_has(X86_FEATURE_FXSR);
+ return static_cpu_has_safe(X86_FEATURE_FXSR);
}
static inline void fx_finit(struct i387_fxsave_struct *fx)
@@ -293,7 +293,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
- if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 35e67a457182..31eab867e6d3 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -92,12 +92,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
? __const_sigismember((set), (sig)) \
: __gen_sigismember((set), (sig)))
-static inline int sigfindinword(unsigned long word)
-{
- asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc");
- return word;
-}
-
struct pt_regs;
#else /* __i386__ */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 283a76a9cc40..11ccfb0a63e7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -17,6 +17,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
+#include <asm/desc.h>
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
@@ -334,10 +335,17 @@ int check_irq_vectors_for_cpu_disable(void)
for_each_online_cpu(cpu) {
if (cpu == this_cpu)
continue;
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
- vector++) {
- if (per_cpu(vector_irq, cpu)[vector] < 0)
- count++;
+ /*
+ * We scan from FIRST_EXTERNAL_VECTOR to first system
+ * vector. If the vector is marked in the used vectors
+ * bitmap or an irq is assigned to it, we don't count
+ * it as available.
+ */
+ for (vector = FIRST_EXTERNAL_VECTOR;
+ vector < first_system_vector; vector++) {
+ if (!test_bit(vector, used_vectors) &&
+ per_cpu(vector_irq, cpu)[vector] < 0)
+ count++;
}
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5d93ac1b72db..5492798930ef 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -866,9 +866,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
/* was set by cpu_init() */
cpumask_clear_cpu(cpu, cpu_initialized_mask);
-
- set_cpu_present(cpu, false);
- per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
}
/* mark "stuck" area as not stuck */
@@ -928,7 +925,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
- pr_debug("do_boot_cpu failed %d\n", err);
+ pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return -EIO;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 3781dd39e8bd..87fc96bcc13c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -110,7 +110,7 @@ static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(get_time, tm, tc);
+ status = efi_call_virt(get_time, tm, tc);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -121,7 +121,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt1(set_time, tm);
+ status = efi_call_virt(set_time, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -134,8 +134,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt3(get_wakeup_time,
- enabled, pending, tm);
+ status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -146,8 +145,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(set_wakeup_time,
- enabled, tm);
+ status = efi_call_virt(set_wakeup_time, enabled, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -158,17 +156,17 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
unsigned long *data_size,
void *data)
{
- return efi_call_virt5(get_variable,
- name, vendor, attr,
- data_size, data);
+ return efi_call_virt(get_variable,
+ name, vendor, attr,
+ data_size, data);
}
static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- return efi_call_virt3(get_next_variable,
- name_size, name, vendor);
+ return efi_call_virt(get_next_variable,
+ name_size, name, vendor);
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -177,9 +175,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
unsigned long data_size,
void *data)
{
- return efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
+ return efi_call_virt(set_variable,
+ name, vendor, attr,
+ data_size, data);
}
static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -190,13 +188,13 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt4(query_variable_info, attr, storage_space,
- remaining_space, max_variable_size);
+ return efi_call_virt(query_variable_info, attr, storage_space,
+ remaining_space, max_variable_size);
}
static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
{
- return efi_call_virt1(get_next_high_mono_count, count);
+ return efi_call_virt(get_next_high_mono_count, count);
}
static void virt_efi_reset_system(int reset_type,
@@ -204,8 +202,8 @@ static void virt_efi_reset_system(int reset_type,
unsigned long data_size,
efi_char16_t *data)
{
- efi_call_virt4(reset_system, reset_type, status,
- data_size, data);
+ __efi_call_virt(reset_system, reset_type, status,
+ data_size, data);
}
static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
@@ -215,7 +213,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt3(update_capsule, capsules, count, sg_list);
+ return efi_call_virt(update_capsule, capsules, count, sg_list);
}
static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
@@ -226,8 +224,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt4(query_capsule_caps, capsules, count, max_size,
- reset_type);
+ return efi_call_virt(query_capsule_caps, capsules, count, max_size,
+ reset_type);
}
static efi_status_t __init phys_efi_set_virtual_address_map(
@@ -239,9 +237,9 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
efi_status_t status;
efi_call_phys_prelog();
- status = efi_call_phys4(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
+ status = efi_call_phys(efi_phys.set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
efi_call_phys_epilog();
return status;
}
@@ -919,6 +917,9 @@ static void __init save_runtime_map(void)
void *tmp, *p, *q = NULL;
int count = 0;
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return;
+
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index e0984ef0374b..5fcda7272550 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -73,84 +73,7 @@
2:
.endm
-ENTRY(efi_call0)
- SAVE_XMM
- subq $32, %rsp
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call0)
-
-ENTRY(efi_call1)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call1)
-
-ENTRY(efi_call2)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call2)
-
-ENTRY(efi_call3)
- SAVE_XMM
- subq $32, %rsp
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call3)
-
-ENTRY(efi_call4)
- SAVE_XMM
- subq $32, %rsp
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call4)
-
-ENTRY(efi_call5)
- SAVE_XMM
- subq $48, %rsp
- mov %r9, 32(%rsp)
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $48, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call5)
-
-ENTRY(efi_call6)
+ENTRY(efi_call)
SAVE_XMM
mov (%rsp), %rax
mov 8(%rax), %rax
@@ -166,7 +89,7 @@ ENTRY(efi_call6)
addq $48, %rsp
RESTORE_XMM
ret
-ENDPROC(efi_call6)
+ENDPROC(efi_call)
#ifdef CONFIG_EFI_MIXED
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 766612137a62..1584cbed0dce 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -39,7 +39,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
*/
return BIOS_STATUS_UNIMPLEMENTED;
- ret = efi_call6((void *)__va(tab->function), (u64)which,
+ ret = efi_call((void *)__va(tab->function), (u64)which,
a1, a2, a3, a4, a5);
return ret;
}
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
index 736b9d214d80..6c6d9a9f185f 100644
--- a/arch/xtensa/include/asm/ftrace.h
+++ b/arch/xtensa/include/asm/ftrace.h
@@ -12,24 +12,18 @@
#include <asm/processor.h>
-#define HAVE_ARCH_CALLER_ADDR
#ifndef __ASSEMBLY__
-#define CALLER_ADDR0 ({ unsigned long a0, a1; \
+#define ftrace_return_address0 ({ unsigned long a0, a1; \
__asm__ __volatile__ ( \
"mov %0, a0\n" \
"mov %1, a1\n" \
: "=r"(a0), "=r"(a1)); \
MAKE_PC_FROM_RA(a0, a1); })
+
#ifdef CONFIG_FRAME_POINTER
extern unsigned long return_address(unsigned level);
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#else /* CONFIG_FRAME_POINTER */
-#define CALLER_ADDR1 (0)
-#define CALLER_ADDR2 (0)
-#define CALLER_ADDR3 (0)
-#endif /* CONFIG_FRAME_POINTER */
+#define ftrace_return_address(n) return_address(n)
+#endif
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_FUNCTION_TRACER