From b9a348cb12f3925a27fcf0a38a146b40978588d0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Aug 2012 10:43:29 +0100 Subject: ARM: opcodes: add __ERET/__MSR_ELR_HYP instruction encoding Enabling boot from HYP mode requires the use of some more virt-specific instructions ("eret" and "msr elr_hyp, reg"). Add the necessary encoding to asm/opcode-virt.h. Signed-off-by: Marc Zyngier --- arch/arm/include/asm/opcodes-virt.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/include/asm/opcodes-virt.h b/arch/arm/include/asm/opcodes-virt.h index b85665a96f8e..efcfdf92d9d5 100644 --- a/arch/arm/include/asm/opcodes-virt.h +++ b/arch/arm/include/asm/opcodes-virt.h @@ -26,4 +26,14 @@ 0xF7E08000 | (((imm16) & 0xF000) << 4) | ((imm16) & 0x0FFF) \ ) +#define __ERET __inst_arm_thumb32( \ + 0xE160006E, \ + 0xF3DE8F00 \ +) + +#define __MSR_ELR_HYP(regnum) __inst_arm_thumb32( \ + 0xE12EF300 | regnum, \ + 0xF3808E30 | (regnum << 16) \ +) + #endif /* ! __ASM_ARM_OPCODES_VIRT_H */ -- cgit v1.2.3 From 80c59dafb1a9a86fa996e6e34d06b60567c925ca Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 9 Feb 2012 08:47:17 -0800 Subject: ARM: virt: allow the kernel to be entered in HYP mode This patch does two things: * Ensure that asynchronous aborts are masked at kernel entry. The bootloader should be masking these anyway, but this reduces the damage window just in case it doesn't. * Enter svc mode via exception return to ensure that CPU state is properly serialised. This does not matter when switching from an ordinary privileged mode ("PL1" modes in ARMv7-AR rev C parlance), but it potentially does matter when switching from a another privileged mode such as hyp mode. This should allow the kernel to boot safely either from svc mode or hyp mode, even if no support for use of the ARM Virtualization Extensions is built into the kernel. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm/include/asm/assembler.h | 28 ++++++ arch/arm/include/asm/ptrace.h | 1 + arch/arm/include/asm/virt.h | 52 +++++++++++ arch/arm/kernel/Makefile | 2 + arch/arm/kernel/head.S | 14 ++- arch/arm/kernel/hyp-stub.S | 192 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 286 insertions(+), 3 deletions(-) create mode 100644 arch/arm/include/asm/virt.h create mode 100644 arch/arm/kernel/hyp-stub.S diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 03fb93621d0d..658a15dbc87e 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -22,6 +22,7 @@ #include #include +#include #define IOMEM(x) (x) @@ -239,6 +240,33 @@ .endm #endif +/* + * Helper macro to enter SVC mode cleanly and mask interrupts. reg is + * a scratch register for the macro to overwrite. + * + * This macro is intended for forcing the CPU into SVC mode at boot time. + * you cannot return to the original mode. + * + * Beware, it also clobers LR. + */ +.macro safe_svcmode_maskall reg:req + mrs \reg , cpsr + mov lr , \reg + and lr , lr , #MODE_MASK + cmp lr , #HYP_MODE + orr \reg , \reg , #PSR_A_BIT | PSR_I_BIT | PSR_F_BIT + bic \reg , \reg , #MODE_MASK + orr \reg , \reg , #SVC_MODE +THUMB( orr \reg , \reg , #PSR_T_BIT ) + msr spsr_cxsf, \reg + adr lr, BSYM(2f) + bne 1f + __MSR_ELR_HYP(14) + __ERET +1: movs pc, lr +2: +.endm + /* * STRT/LDRT access macros with ARM and Thumb-2 variants */ diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 355ece523f41..91ef6c231c47 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -44,6 +44,7 @@ #define IRQ_MODE 0x00000012 #define SVC_MODE 0x00000013 #define ABT_MODE 0x00000017 +#define HYP_MODE 0x0000001a #define UND_MODE 0x0000001b #define SYSTEM_MODE 0x0000001f #define MODE32_BIT 0x00000010 diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h new file mode 100644 index 000000000000..0a99723edbf2 --- /dev/null +++ b/arch/arm/include/asm/virt.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef VIRT_H +#define VIRT_H + +#include + +/* + * Flag indicating that the kernel was not entered in the same mode on every + * CPU. The zImage loader stashes this value in an SPSR, so we need an + * architecturally defined flag bit here (the N flag, as it happens) + */ +#define BOOT_CPU_MODE_MISMATCH (1<<31) + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_ARM_VIRT_EXT +/* + * __boot_cpu_mode records what mode the primary CPU was booted in. + * A correctly-implemented bootloader must start all CPUs in the same mode: + * if it fails to do this, the flag BOOT_CPU_MODE_MISMATCH is set to indicate + * that some CPU(s) were booted in a different mode. + * + * This allows the kernel to flag an error when the secondaries have come up. + */ +extern int __boot_cpu_mode; + +void __hyp_set_vectors(unsigned long phys_vector_base); +unsigned long __hyp_get_vectors(void); +#else +#define __boot_cpu_mode (SVC_MODE) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* ! VIRT_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 7ad2d5cf7008..49b61a3f5b29 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -82,4 +82,6 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3db960e20cb8..27093e4feef8 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -83,8 +83,12 @@ ENTRY(stext) THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode - @ and irqs disabled +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install +#endif + @ ensure svc mode and all interrupts masked + safe_svcmode_maskall r9 + mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10, r5 @ invalid processor (r5=0)? @@ -326,7 +330,11 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install +#endif + safe_svcmode_maskall r9 + mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type movs r10, r5 @ invalid processor? diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S new file mode 100644 index 000000000000..b03e9244e5ad --- /dev/null +++ b/arch/arm/kernel/hyp-stub.S @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +/* + * For the kernel proper, we need to find out the CPU boot mode long after + * boot, so we need to store it in a writable variable. + * + * This is not in .bss, because we set it sufficiently early that the boot-time + * zeroing of .bss would clobber it. + */ +.data +ENTRY(__boot_cpu_mode) + .long 0 +.text + + /* + * Save the primary CPU boot mode. Requires 3 scratch registers. + */ + .macro store_primary_cpu_mode reg1, reg2, reg3 + mrs \reg1, cpsr + and \reg1, \reg1, #MODE_MASK + adr \reg2, .L__boot_cpu_mode_offset + ldr \reg3, [\reg2] + str \reg1, [\reg2, \reg3] + .endm + + /* + * Compare the current mode with the one saved on the primary CPU. + * If they don't match, record that fact. The Z bit indicates + * if there's a match or not. + * Requires 3 additionnal scratch registers. + */ + .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + adr \reg2, .L__boot_cpu_mode_offset + ldr \reg3, [\reg2] + ldr \reg1, [\reg2, \reg3] + cmp \mode, \reg1 @ matches primary CPU boot mode? + orrne r7, r7, #BOOT_CPU_MODE_MISMATCH + strne r7, [r5, r6] @ record what happened and give up + .endm + +/* + * Hypervisor stub installation functions. + * + * These must be called with the MMU and D-cache off. + * They are not ABI compliant and are only intended to be called from the kernel + * entry points in head.S. + */ +@ Call this from the primary CPU +ENTRY(__hyp_stub_install) + store_primary_cpu_mode r4, r5, r6 +ENDPROC(__hyp_stub_install) + + @ fall through... + +@ Secondary CPUs should call here +ENTRY(__hyp_stub_install_secondary) + mrs r4, cpsr + and r4, r4, #MODE_MASK + + /* + * If the secondary has booted with a different mode, give up + * immediately. + */ + compare_cpu_mode_with_primary r4, r5, r6, r7 + bxne lr + + /* + * Once we have given up on one CPU, we do not try to install the + * stub hypervisor on the remaining ones: because the saved boot mode + * is modified, it can't compare equal to the CPSR mode field any + * more. + * + * Otherwise... + */ + + cmp r4, #HYP_MODE + bxne lr @ give up if the CPU is not in HYP mode + +/* + * Configure HSCTLR to set correct exception endianness/instruction set + * state etc. + * Turn off all traps + * Eventually, CPU-specific code might be needed -- assume not for now + * + * This code relies on the "eret" instruction to synchronize the + * various coprocessor accesses. + */ + @ Now install the hypervisor stub: + adr r7, __hyp_stub_vectors + mcr p15, 4, r7, c12, c0, 0 @ set hypervisor vector base (HVBAR) + + @ Disable all traps, so we don't get any nasty surprise + mov r7, #0 + mcr p15, 4, r7, c1, c1, 0 @ HCR + mcr p15, 4, r7, c1, c1, 2 @ HCPTR + mcr p15, 4, r7, c1, c1, 3 @ HSTR + +THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE +#ifdef CONFIG_CPU_BIG_ENDIAN + orr r7, #(1 << 9) @ HSCTLR.EE +#endif + mcr p15, 4, r7, c1, c0, 0 @ HSCTLR + + mrc p15, 4, r7, c1, c1, 1 @ HDCR + and r7, #0x1f @ Preserve HPMN + mcr p15, 4, r7, c1, c1, 1 @ HDCR + + bic r7, r4, #MODE_MASK + orr r7, r7, #SVC_MODE +THUMB( orr r7, r7, #PSR_T_BIT ) + msr spsr_cxsf, r7 @ This is SPSR_hyp. + + __MSR_ELR_HYP(14) @ msr elr_hyp, lr + __ERET @ return, switching to SVC mode + @ The boot CPU mode is left in r4. +ENDPROC(__hyp_stub_install_secondary) + +__hyp_stub_do_trap: + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + mcrne p15, 4, r0, c12, c0, 0 @ set HVBAR + __ERET +ENDPROC(__hyp_stub_do_trap) + +/* + * __hyp_set_vectors: Call this after boot to set the initial hypervisor + * vectors as part of hypervisor installation. On an SMP system, this should + * be called on each CPU. + * + * r0 must be the physical address of the new vector table (which must lie in + * the bottom 4GB of physical address space. + * + * r0 must be 32-byte aligned. + * + * Before calling this, you must check that the stub hypervisor is installed + * everywhere, by waiting for any secondary CPUs to be brought up and then + * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true. + * + * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or + * something else went wrong... in such cases, trying to install a new + * hypervisor is unlikely to work as desired. + * + * When you call into your shiny new hypervisor, sp_hyp will contain junk, + * so you will need to set that to something sensible at the new hypervisor's + * initialisation entry point. + */ +ENTRY(__hyp_get_vectors) + mov r0, #-1 +ENDPROC(__hyp_get_vectors) + @ fall through +ENTRY(__hyp_set_vectors) + __HVC(0) + bx lr +ENDPROC(__hyp_set_vectors) + +.align 2 +.L__boot_cpu_mode_offset: + .long __boot_cpu_mode - . + +.align 5 +__hyp_stub_vectors: +__hyp_stub_reset: W(b) . +__hyp_stub_und: W(b) . +__hyp_stub_svc: W(b) . +__hyp_stub_pabort: W(b) . +__hyp_stub_dabort: W(b) . +__hyp_stub_trap: W(b) __hyp_stub_do_trap +__hyp_stub_irq: W(b) . +__hyp_stub_fiq: W(b) . +ENDPROC(__hyp_stub_vectors) + -- cgit v1.2.3 From 424e5994e63326a42012f003f1174f3c363c7b62 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 10 Feb 2012 18:07:07 -0800 Subject: ARM: zImage/virt: hyp mode entry support for the zImage loader The zImage loader needs to turn on the MMU in order to take advantage of caching while decompressing the zImage. Running this in hyp mode would require the LPAE pagetable format to be supported; to avoid this complexity, this patch switches out of hyp mode, and returns back to hyp mode just before booting the kernel. This implementation assumes that the Hyp mode view of memory and the PL1 view of memory are coherent, providing that the MMU and caches are off in both, as required by the boot protocol. The zImage decompression code must drain the write buffer on completion anyway, and entry into Hyp mode should flush any prefetch buffer, avoiding hazards associated with local write buffers and the pipeline. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm/boot/compressed/.gitignore | 1 + arch/arm/boot/compressed/Makefile | 9 ++++- arch/arm/boot/compressed/head.S | 71 +++++++++++++++++++++++++++++++++---- arch/arm/kernel/hyp-stub.S | 18 ++++++++++ 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index d0d441c429ae..f79a08efe000 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -1,6 +1,7 @@ ashldi3.S font.c lib1funcs.S +hyp-stub.S piggy.gzip piggy.lzo piggy.lzma diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index bb267562e7ed..a517153a13ea 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -30,6 +30,10 @@ FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c OBJS += string.o CFLAGS_string.o := -Os +ifeq ($(CONFIG_ARM_VIRT_EXT),y) +OBJS += hyp-stub.o +endif + # # Architecture dependencies # @@ -126,7 +130,7 @@ KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) endif ccflags-y := -fpic -fno-builtin -I$(obj) -asflags-y := -Wa,-march=all +asflags-y := -Wa,-march=all -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. KBSS_SZ = $(shell $(CROSS_COMPILE)size $(obj)/../../../../vmlinux | \ @@ -198,3 +202,6 @@ $(obj)/font.c: $(FONTC) $(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG) @sed "$(SEDFLAGS)" < $< > $@ + +$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S + $(call cmd,shipped) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index b8c64b80bafc..0749a6184abf 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ #include +#include /* * Debugging stuff @@ -132,7 +133,12 @@ start: .word start @ absolute load/run zImage address .word _edata @ zImage end address THUMB( .thumb ) -1: mov r7, r1 @ save architecture ID +1: + mrs r9, cpsr +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install @ get into SVC mode, reversibly +#endif + mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer #ifndef __ARM_ARCH_2__ @@ -148,9 +154,9 @@ start: ARM( swi 0x123456 ) @ angel_SWI_ARM THUMB( svc 0xab ) @ angel_SWI_THUMB not_angel: - mrs r2, cpsr @ turn off interrupts to - orr r2, r2, #0xc0 @ prevent angel from running - msr cpsr_c, r2 + safe_svcmode_maskall r0 + msr spsr_cxsf, r9 @ Save the CPU boot mode in + @ SPSR #else teqp pc, #0x0c000003 @ turn off interrupts #endif @@ -350,6 +356,20 @@ dtb_check_done: adr r5, restart bic r5, r5, #31 +/* Relocate the hyp vector base if necessary */ +#ifdef CONFIG_ARM_VIRT_EXT + mrs r0, spsr + and r0, r0, #MODE_MASK + cmp r0, #HYP_MODE + bne 1f + + bl __hyp_get_vectors + sub r0, r0, r5 + add r0, r0, r10 + bl __hyp_set_vectors +1: +#endif + sub r9, r6, r5 @ size to copy add r9, r9, #31 @ rounded up to a multiple bic r9, r9, #31 @ ... of 32 bytes @@ -458,11 +478,29 @@ not_relocated: mov r0, #0 bl decompress_kernel bl cache_clean_flush bl cache_off - mov r0, #0 @ must be zero mov r1, r7 @ restore architecture number mov r2, r8 @ restore atags pointer - ARM( mov pc, r4 ) @ call kernel - THUMB( bx r4 ) @ entry point is always ARM + +#ifdef CONFIG_ARM_VIRT_EXT + mrs r0, spsr @ Get saved CPU boot mode + and r0, r0, #MODE_MASK + cmp r0, #HYP_MODE @ if not booted in HYP mode... + bne __enter_kernel @ boot kernel directly + + adr r12, .L__hyp_reentry_vectors_offset + ldr r0, [r12] + add r0, r0, r12 + + bl __hyp_set_vectors + __HVC(0) @ otherwise bounce to hyp mode + + b . @ should never be reached + + .align 2 +.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . +#else + b __enter_kernel +#endif .align 2 .type LC0, #object @@ -1191,6 +1229,25 @@ memdump: mov r12, r0 #endif .ltorg + +#ifdef CONFIG_ARM_VIRT_EXT +.align 5 +__hyp_reentry_vectors: + W(b) . @ reset + W(b) . @ undef + W(b) . @ svc + W(b) . @ pabort + W(b) . @ dabort + W(b) __enter_kernel @ hyp + W(b) . @ irq + W(b) . @ fiq +#endif /* CONFIG_ARM_VIRT_EXT */ + +__enter_kernel: + mov r0, #0 @ must be 0 + ARM( mov pc, r4 ) @ call kernel + THUMB( bx r4 ) @ entry point is always ARM + reloc_code_end: .align diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index b03e9244e5ad..70609417deaf 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -21,6 +21,7 @@ #include #include +#ifndef ZIMAGE /* * For the kernel proper, we need to find out the CPU boot mode long after * boot, so we need to store it in a writable variable. @@ -59,6 +60,21 @@ ENTRY(__boot_cpu_mode) strne r7, [r5, r6] @ record what happened and give up .endm +#else /* ZIMAGE */ + + .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .endm + +/* + * The zImage loader only runs on one CPU, so we don't bother with mult-CPU + * consistency checking: + */ + .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + cmp \mode, \mode + .endm + +#endif /* ZIMAGE */ + /* * Hypervisor stub installation functions. * @@ -174,9 +190,11 @@ ENTRY(__hyp_set_vectors) bx lr ENDPROC(__hyp_set_vectors) +#ifndef ZIMAGE .align 2 .L__boot_cpu_mode_offset: .long __boot_cpu_mode - . +#endif .align 5 __hyp_stub_vectors: -- cgit v1.2.3 From 6a6d55c38c8b4ee77b50a33f03ea09e75b18bf82 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 10 Feb 2012 18:07:07 -0800 Subject: ARM: virt: Update documentation for hyp mode entry support Document the possibility of the kernel being entered in HYP mode. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- Documentation/arm/Booting | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting index a341d87d276e..0c1f475fdf36 100644 --- a/Documentation/arm/Booting +++ b/Documentation/arm/Booting @@ -154,13 +154,33 @@ In either case, the following conditions must be met: - CPU mode All forms of interrupts must be disabled (IRQs and FIQs) - The CPU must be in SVC mode. (A special exception exists for Angel) + + For CPUs which do not include the ARM virtualization extensions, the + CPU must be in SVC mode. (A special exception exists for Angel) + + CPUs which include support for the virtualization extensions can be + entered in HYP mode in order to enable the kernel to make full use of + these extensions. This is the recommended boot method for such CPUs, + unless the virtualisations are already in use by a pre-installed + hypervisor. + + If the kernel is not entered in HYP mode for any reason, it must be + entered in SVC mode. - Caches, MMUs The MMU must be off. Instruction cache may be on or off. Data cache must be off. + If the kernel is entered in HYP mode, the above requirements apply to + the HYP mode configuration in addition to the ordinary PL1 (privileged + kernel modes) configuration. In addition, all traps into the + hypervisor must be disabled, and PL1 access must be granted for all + peripherals and CPU resources for which this is architecturally + possible. Except for entering in HYP mode, the system configuration + should be such that a kernel which does not include support for the + virtualization extensions can boot correctly without extra help. + - The boot loader is expected to call the kernel image by jumping directly to the first instruction of the kernel image. -- cgit v1.2.3 From 4588c34daabb5aebee9cbe90f0ca6ab11412f207 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 17 Feb 2012 16:54:28 +0000 Subject: ARM: virt: Add boot-time diagnostics In order to easily detect pathological cases, print some diagnostics when the kernel boots. This also provides helpers to detect that HYP mode is actually available, which can be used by other subsystems to enable HYP specific features. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm/include/asm/virt.h | 17 +++++++++++++++++ arch/arm/kernel/setup.c | 20 ++++++++++++++++++++ arch/arm/kernel/smp.c | 3 +++ 3 files changed, 40 insertions(+) diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 0a99723edbf2..86164df86cb4 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -47,6 +47,23 @@ unsigned long __hyp_get_vectors(void); #define __boot_cpu_mode (SVC_MODE) #endif +#ifndef ZIMAGE +void hyp_mode_check(void); + +/* Reports the availability of HYP mode */ +static inline bool is_hyp_mode_available(void) +{ + return ((__boot_cpu_mode & MODE_MASK) == HYP_MODE && + !(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH)); +} + +/* Check if the bootloader has booted CPUs in different modes */ +static inline bool is_hyp_mode_mismatched(void) +{ + return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* ! VIRT_H */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index a81dcecc7343..04fd01feea86 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -55,6 +55,7 @@ #include #include #include +#include #if defined(CONFIG_DEPRECATED_PARAM_STRUCT) #include "compat.h" @@ -937,6 +938,21 @@ static int __init meminfo_cmp(const void *_a, const void *_b) return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; } +void __init hyp_mode_check(void) +{ +#ifdef CONFIG_ARM_VIRT_EXT + if (is_hyp_mode_available()) { + pr_info("CPU: All CPU(s) started in HYP mode.\n"); + pr_info("CPU: Virtualization extensions available.\n"); + } else if (is_hyp_mode_mismatched()) { + pr_warn("CPU: WARNING: CPU(s) started in wrong/inconsistent modes (primary CPU mode 0x%x)\n", + __boot_cpu_mode & MODE_MASK); + pr_warn("CPU: This may indicate a broken bootloader or firmware.\n"); + } else + pr_info("CPU: All CPU(s) started in SVC mode.\n"); +#endif +} + void __init setup_arch(char **cmdline_p) { struct machine_desc *mdesc; @@ -980,6 +996,10 @@ void __init setup_arch(char **cmdline_p) if (is_smp()) smp_init_cpus(); #endif + + if (!is_smp()) + hyp_mode_check(); + reserve_crashkernel(); tcm_init(); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ebd8ad274d76..adf226e718d2 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include /* * as from 2.5, kernels no longer have an init_tasks structure @@ -287,6 +288,8 @@ void __init smp_cpus_done(unsigned int max_cpus) num_online_cpus(), bogosum / (500000/HZ), (bogosum / (5000/HZ)) % 100); + + hyp_mode_check(); } void __init smp_prepare_boot_cpu(void) -- cgit v1.2.3 From 5b6728d4189d14c19f384d5ec6087276e7c196d8 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 17 Feb 2012 16:54:28 +0000 Subject: ARM: virt: Add CONFIG_ARM_VIRT_EXT option It is now possible to enable the virtualization extention support. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm/mm/Kconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 101b9681c08c..c9a4963b5c3d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -624,6 +624,23 @@ config ARM_THUMBEE Say Y here if you have a CPU with the ThumbEE extension and code to make use of it. Say N for code that can run on CPUs without ThumbEE. +config ARM_VIRT_EXT + bool "Native support for the ARM Virtualization Extensions" + depends on MMU && CPU_V7 + help + Enable the kernel to make use of the ARM Virtualization + Extensions to install hypervisors without run-time firmware + assistance. + + A compliant bootloader is required in order to make maximum + use of this feature. Refer to Documentation/arm/Booting for + details. + + It is safe to enable this option even if the kernel may not be + booted in HYP mode, may not have support for the + virtualization extensions, or may be booted with a + non-compliant bootloader. + config SWP_EMULATE bool "Emulate SWP/SWPB instructions" depends on !CPU_USE_DOMAINS && CPU_V7 -- cgit v1.2.3 From 8ec58be9f3ff2ad4a4d7bde8f48b4a4c406768e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 1 Aug 2012 14:46:41 +0100 Subject: ARM: virt: arch_timers: enable access to physical timers If booting in HYP mode, it makes sense to enable the use of the physical timers, so the kernel can use them directly. Signed-off-by: Marc Zyngier --- arch/arm/kernel/hyp-stub.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 70609417deaf..65b2417aebce 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -142,6 +142,19 @@ THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE and r7, #0x1f @ Preserve HPMN mcr p15, 4, r7, c1, c1, 1 @ HDCR +#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) + @ make CNTP_* and CNTPCT accessible from PL1 + mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 + lsr r7, #16 + and r7, #0xf + cmp r7, #1 + bne 1f + mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL + orr r7, r7, #3 @ PL1PCEN | PL1PCTEN + mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL +1: +#endif + bic r7, r4, #MODE_MASK orr r7, r7, #SVC_MODE THUMB( orr r7, r7, #PSR_T_BIT ) -- cgit v1.2.3 From 031bd879f79d59d2f4fccd44377adf24fb977b5a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 6 Sep 2012 18:35:13 +0530 Subject: ARM: mm: implement LoUIS API for cache maintenance ops ARM v7 architecture introduced the concept of cache levels and related control registers. New processors like A7 and A15 embed an L2 unified cache controller that becomes part of the cache level hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable do not require a flush of the entire cache hierarchy to DRAM but just the cache levels belonging to the Level of Unification Inner Shareable (LoUIS), which in most of ARM v7 systems correspond to L1. The current cache flushing API used in cpu_suspend and __cpu_disable, flush_cache_all(), ends up flushing the whole cache hierarchy since for v7 it cleans and invalidates all cache levels up to Level of Coherency (LoC) which cripples system performance when used in hot paths like hotplug and cpuidle. Therefore a new kernel cache maintenance API must be added to cope with latest ARM system requirements. This patch adds flush_cache_louis() to the ARM kernel cache maintenance API. This function cleans and invalidates all data cache levels up to the Level of Unification Inner Shareable (LoUIS) and invalidates the instruction cache for processors that support it (> v7). This patch also creates an alias of the cache LoUIS function to flush_kern_all for all processor versions prior to v7, so that the current cache flushing behaviour is unchanged for those processors. v7 cache maintenance code implements a cache LoUIS function that cleans and invalidates the D-cache up to LoUIS and invalidates the I-cache, according to the new API. Reviewed-by: Santosh Shilimkar Reviewed-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/include/asm/cacheflush.h | 15 +++++++++++++++ arch/arm/include/asm/glue-cache.h | 1 + arch/arm/mm/cache-fa.S | 3 +++ arch/arm/mm/cache-v3.S | 3 +++ arch/arm/mm/cache-v4.S | 3 +++ arch/arm/mm/cache-v4wb.S | 3 +++ arch/arm/mm/cache-v4wt.S | 3 +++ arch/arm/mm/cache-v6.S | 3 +++ arch/arm/mm/cache-v7.S | 36 ++++++++++++++++++++++++++++++++++++ arch/arm/mm/proc-arm1020.S | 3 +++ arch/arm/mm/proc-arm1020e.S | 3 +++ arch/arm/mm/proc-arm1022.S | 3 +++ arch/arm/mm/proc-arm1026.S | 3 +++ arch/arm/mm/proc-arm920.S | 3 +++ arch/arm/mm/proc-arm922.S | 3 +++ arch/arm/mm/proc-arm925.S | 3 +++ arch/arm/mm/proc-arm926.S | 3 +++ arch/arm/mm/proc-arm940.S | 3 +++ arch/arm/mm/proc-arm946.S | 3 +++ arch/arm/mm/proc-feroceon.S | 3 +++ arch/arm/mm/proc-macros.S | 1 + arch/arm/mm/proc-mohawk.S | 3 +++ arch/arm/mm/proc-xsc3.S | 3 +++ arch/arm/mm/proc-xscale.S | 3 +++ 24 files changed, 113 insertions(+) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e4448e16046d..e1489c54cd12 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -49,6 +49,13 @@ * * Unconditionally clean and invalidate the entire cache. * + * flush_kern_louis() + * + * Flush data cache levels up to the level of unification + * inner shareable and invalidate the I-cache. + * Only needed from v7 onwards, falls back to flush_cache_all() + * for all other processor versions. + * * flush_user_all() * * Clean and invalidate all user space cache entries @@ -97,6 +104,7 @@ struct cpu_cache_fns { void (*flush_icache_all)(void); void (*flush_kern_all)(void); + void (*flush_kern_louis)(void); void (*flush_user_all)(void); void (*flush_user_range)(unsigned long, unsigned long, unsigned int); @@ -119,6 +127,7 @@ extern struct cpu_cache_fns cpu_cache; #define __cpuc_flush_icache_all cpu_cache.flush_icache_all #define __cpuc_flush_kern_all cpu_cache.flush_kern_all +#define __cpuc_flush_kern_louis cpu_cache.flush_kern_louis #define __cpuc_flush_user_all cpu_cache.flush_user_all #define __cpuc_flush_user_range cpu_cache.flush_user_range #define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range @@ -139,6 +148,7 @@ extern struct cpu_cache_fns cpu_cache; extern void __cpuc_flush_icache_all(void); extern void __cpuc_flush_kern_all(void); +extern void __cpuc_flush_kern_louis(void); extern void __cpuc_flush_user_all(void); extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int); extern void __cpuc_coherent_kern_range(unsigned long, unsigned long); @@ -204,6 +214,11 @@ static inline void __flush_icache_all(void) __flush_icache_preferred(); } +/* + * Flush caches up to Level of Unification Inner Shareable + */ +#define flush_cache_louis() __cpuc_flush_kern_louis() + #define flush_cache_all() __cpuc_flush_kern_all() static inline void vivt_flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index 7e30874377e6..2d6a7de87a88 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -132,6 +132,7 @@ #ifndef MULTI_CACHE #define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all) #define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all) +#define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_louis) #define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all) #define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range) #define __cpuc_coherent_kern_range __glue(_CACHE,_coherent_kern_range) diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 072016371093..e505befe51b5 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -240,6 +240,9 @@ ENTRY(fa_dma_unmap_area) mov pc, lr ENDPROC(fa_dma_unmap_area) + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S index 52e35f32eefb..8a3fadece8d3 100644 --- a/arch/arm/mm/cache-v3.S +++ b/arch/arm/mm/cache-v3.S @@ -128,6 +128,9 @@ ENTRY(v3_dma_map_area) ENDPROC(v3_dma_unmap_area) ENDPROC(v3_dma_map_area) + .globl v3_flush_kern_cache_louis + .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 022135d2b7e4..43e5d77be677 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -140,6 +140,9 @@ ENTRY(v4_dma_map_area) ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 8f1eeae340c8..cd4945321407 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -251,6 +251,9 @@ ENTRY(v4wb_dma_unmap_area) mov pc, lr ENDPROC(v4wb_dma_unmap_area) + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index b34a5f908a82..11e5e5838bc5 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -196,6 +196,9 @@ ENTRY(v4wt_dma_map_area) ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 4b10760c56d6..d8fd4d4bd3d4 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -326,6 +326,9 @@ ENTRY(v6_dma_unmap_area) mov pc, lr ENDPROC(v6_dma_unmap_area) + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 39e3fb3db801..d1fa2f66d8c0 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -33,6 +33,24 @@ ENTRY(v7_flush_icache_all) mov pc, lr ENDPROC(v7_flush_icache_all) + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr + ands r3, r0, #0xe00000 @ extract LoUIS from clidr + mov r3, r3, lsr #20 @ r3 = LoUIS * 2 + moveq pc, lr @ return if level == 0 + mov r10, #0 @ r10 (starting level) = 0 + b loop1 @ start flushing cache levels +ENDPROC(v7_flush_dcache_louis) + /* * v7_flush_dcache_all() * @@ -120,6 +138,24 @@ ENTRY(v7_flush_kern_cache_all) mov pc, lr ENDPROC(v7_flush_kern_cache_all) + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_louis) + /* * v7_flush_cache_all() * diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 0650bb87c1e3..2bb61e703d6c 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -368,6 +368,9 @@ ENTRY(arm1020_dma_unmap_area) mov pc, lr ENDPROC(arm1020_dma_unmap_area) + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1020 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 4188478325a6..8f96aa40f510 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -354,6 +354,9 @@ ENTRY(arm1020e_dma_unmap_area) mov pc, lr ENDPROC(arm1020e_dma_unmap_area) + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1020e diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 33c68824bff0..8ebe4a469a22 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -343,6 +343,9 @@ ENTRY(arm1022_dma_unmap_area) mov pc, lr ENDPROC(arm1022_dma_unmap_area) + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1022 diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index fbc1d5fc24dc..093fc7e520c3 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -337,6 +337,9 @@ ENTRY(arm1026_dma_unmap_area) mov pc, lr ENDPROC(arm1026_dma_unmap_area) + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1026 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 1a8c138eb897..2c3b9421ab5e 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -319,6 +319,9 @@ ENTRY(arm920_dma_unmap_area) mov pc, lr ENDPROC(arm920_dma_unmap_area) + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm920 #endif diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 4c44d7e1c3ca..4464c49d7449 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -321,6 +321,9 @@ ENTRY(arm922_dma_unmap_area) mov pc, lr ENDPROC(arm922_dma_unmap_area) + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm922 #endif diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ec5b1180994f..281eb9b9c1d6 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -376,6 +376,9 @@ ENTRY(arm925_dma_unmap_area) mov pc, lr ENDPROC(arm925_dma_unmap_area) + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm925 diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index c31e62c606c0..f1803f7e2972 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -339,6 +339,9 @@ ENTRY(arm926_dma_unmap_area) mov pc, lr ENDPROC(arm926_dma_unmap_area) + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm926 diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index a613a7dd7146..8da189d4a402 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -267,6 +267,9 @@ ENTRY(arm940_dma_unmap_area) mov pc, lr ENDPROC(arm940_dma_unmap_area) + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm940 diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 9f4f2999fdd0..f666cf34075a 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -310,6 +310,9 @@ ENTRY(arm946_dma_unmap_area) mov pc, lr ENDPROC(arm946_dma_unmap_area) + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm946 diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 23a8e4c7f2bd..85e5e3baa3d4 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -415,6 +415,9 @@ ENTRY(feroceon_dma_unmap_area) mov pc, lr ENDPROC(feroceon_dma_unmap_area) + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions feroceon diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 2d8ff3ad86d3..b29a2265af01 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -299,6 +299,7 @@ ENTRY(\name\()_processor_functions) ENTRY(\name\()_cache_fns) .long \name\()_flush_icache_all .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis .long \name\()_flush_user_cache_all .long \name\()_flush_user_cache_range .long \name\()_coherent_kern_range diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index fbb2124a547d..82f9cdc751d6 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -303,6 +303,9 @@ ENTRY(mohawk_dma_unmap_area) mov pc, lr ENDPROC(mohawk_dma_unmap_area) + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions mohawk diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index b0d57869da2d..eb93d6487f35 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -337,6 +337,9 @@ ENTRY(xsc3_dma_unmap_area) mov pc, lr ENDPROC(xsc3_dma_unmap_area) + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions xsc3 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 4ffebaa595ee..b5ea31d6daac 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -410,6 +410,9 @@ ENTRY(xscale_dma_unmap_area) mov pc, lr ENDPROC(xscale_dma_unmap_area) + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions xscale -- cgit v1.2.3 From 3287be8c4e2bd91211b3947ba726d95e8a1092b5 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 18 Sep 2012 16:29:44 +0100 Subject: ARM: mm: rename jump labels in v7_flush_dcache_all function This patch renames jump labels in v7_flush_dcache_all in order to define a specific flush cache levels entry point. Acked-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/mm/cache-v7.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index d1fa2f66d8c0..140b294bbd9b 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -48,7 +48,7 @@ ENTRY(v7_flush_dcache_louis) mov r3, r3, lsr #20 @ r3 = LoUIS * 2 moveq pc, lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 - b loop1 @ start flushing cache levels + b flush_levels @ start flushing cache levels ENDPROC(v7_flush_dcache_louis) /* @@ -67,7 +67,7 @@ ENTRY(v7_flush_dcache_all) mov r3, r3, lsr #23 @ left align loc bit field beq finished @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only @@ -89,9 +89,9 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +loop1: mov r9, r4 @ create working copy of max way size -loop3: +loop2: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 @@ -100,13 +100,13 @@ loop3: THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index bge loop2 + subs r7, r7, #1 @ decrement the index + bge loop1 skip: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 + bgt flush_levels finished: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr -- cgit v1.2.3 From dbee0c6fb4c1269b2dfc8b0b7a29907ea7fed560 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 7 Sep 2012 11:06:57 +0530 Subject: ARM: kernel: update cpu_suspend code to use cache LoUIS operations In processors like A15/A7 L2 cache is unified and integrated within the processor cache hierarchy, so that it is not considered an outer cache anymore. For processors like A15/A7 flush_cache_all() ends up cleaning all cache levels up to Level of Coherency (LoC) that includes the L2 unified cache. When a single CPU is suspended (CPU idle) a complete L2 clean is not required, so generic cpu_suspend code must clean the data cache using the newly introduced cache LoUIS function. The context and stack pointer (context pointer) are cleaned to main memory using cache area functions that operate on MVA and guarantee that the data is written back to main memory (perform cache cleaning up to the Point of Coherency - PoC) so that the processor can fetch the context when the MMU is off in the cpu_resume code path. outer_cache management remains unchanged. Reviewed-by: Santosh Shilimkar Reviewed-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/kernel/suspend.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 1794cc3b0f18..358bca3a995e 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -17,6 +17,8 @@ extern void cpu_resume_mmu(void); */ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) { + u32 *ctx = ptr; + *save_ptr = virt_to_phys(ptr); /* This must correspond to the LDM in cpu_resume() assembly */ @@ -26,7 +28,20 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) cpu_do_suspend(ptr); - flush_cache_all(); + flush_cache_louis(); + + /* + * flush_cache_louis does not guarantee that + * save_ptr and ptr are cleaned to main memory, + * just up to the Level of Unification Inner Shareable. + * Since the context pointer and context itself + * are to be retrieved with the MMU off that + * data must be cleaned from all cache levels + * to main memory using "area" cache primitives. + */ + __cpuc_flush_dcache_area(ctx, ptrsz); + __cpuc_flush_dcache_area(save_ptr, sizeof(*save_ptr)); + outer_clean_range(*save_ptr, *save_ptr + ptrsz); outer_clean_range(virt_to_phys(save_ptr), virt_to_phys(save_ptr) + sizeof(*save_ptr)); -- cgit v1.2.3 From e6b866e954a7f0d0144a951c158f3922dac1e6b9 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 7 Sep 2012 11:09:15 +0530 Subject: ARM: kernel: update __cpu_disable to use cache LoUIS maintenance API When a CPU is hotplugged out caches that reside in its power domain lose their contents and so must be cleaned to the next memory level. Currently, __cpu_disable calls flush_cache_all() that for new generation processor like A15/A7 ends up cleaning and invalidating all cache levels up to Level of Coherency, which includes the unified L2. This ends up being a waste of cycles since the L2 cache contents are not lost on power down. This patch updates __cpu_disable to use the new LoUIS API cache operations. Acked-by: Nicolas Pitre Reviewed-by: Santosh Shilimkar Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/kernel/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ebd8ad274d76..199558b9462e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -134,8 +134,11 @@ int __cpu_disable(void) /* * Flush user cache and TLB mappings, and then remove this CPU * from the vm mask set of all processes. + * + * Caches are flushed to the Level of Unification Inner Shareable + * to write-back dirty lines to unified caches shared by all CPUs. */ - flush_cache_all(); + flush_cache_louis(); local_flush_tlb_all(); clear_tasks_mm_cpumask(cpu); -- cgit v1.2.3 From 6323fa2256baa73d6a960ee57ec086b66aeecd0b Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 10 Sep 2012 15:07:26 +0530 Subject: ARM: mm: update __v7_setup() to the new LoUIS cache maintenance API The ARMv7 processor setup function __v7_setup() cleans and invalidates the CPU cache before enabling MMU to start the CPU with a clean CPU local cache. But on ARMv7 architectures like Cortex-[A15/A8], this code will end up flushing the L2 caches(up to level of Coherency) which is undesirable and expensive. The setup functions are used in the CPU hotplug scenario too and hence flushing all cache levels should be avoided. This patch replaces the cache flushing call with the newly introduced v7 dcache LoUIS API where only cache levels up to LoUIS are cleaned and invalidated when a processors executes __v7_setup which is the expected behavior. For processors like A9 and A5 where the L2 cache is an outer one the behavior should be unchanged. Reviewed-by: Nicolas Pitre Signed-off-by: Santosh Shilimkar Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c2e2b66f72b5..846d279f3176 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -172,7 +172,7 @@ __v7_ca15mp_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_all + bl v7_flush_dcache_louis ldmia r12, {r0-r5, r7, r9, r11, lr} mrc p15, 0, r0, c0, c0, 0 @ read main ID register -- cgit v1.2.3 From 8ee777fd915b0e36f35a430225729007a1df6441 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 28 Sep 2012 15:09:59 +0100 Subject: ARM: 7542/1: mm: fix cache LoUIS API for xscale and feroceon Some architectures like xscale and feroceon have cache API variants that map cache flushing functions as aliases to the base architecture. This patch adds the required aliases to complete the implementation of cache flushing LoUIS API. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Russell King --- arch/arm/mm/proc-feroceon.S | 1 + arch/arm/mm/proc-xscale.S | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 85e5e3baa3d4..4106b09e0c29 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -434,6 +434,7 @@ ENDPROC(feroceon_dma_unmap_area) range_alias flush_icache_all range_alias flush_user_cache_all range_alias flush_kern_cache_all + range_alias flush_kern_cache_louis range_alias flush_user_cache_range range_alias coherent_kern_range range_alias coherent_user_range diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index b5ea31d6daac..25510361aa18 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -442,6 +442,7 @@ ENDPROC(xscale_dma_unmap_area) a0_alias flush_icache_all a0_alias flush_user_cache_all a0_alias flush_kern_cache_all + a0_alias flush_kern_cache_louis a0_alias flush_user_cache_range a0_alias coherent_kern_range a0_alias coherent_user_range -- cgit v1.2.3 From 2a552d5e63d7fa602c9a9a0717008737f55625a6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 6 Oct 2012 17:03:17 +0100 Subject: ARM: 7549/1: HYP: fix boot on some ARM1136 cores It appears that performing a "movs pc, lr" to force the kernel into SVC mode on the OMAP2420 (ARM1136) prevents the platform from booting correctly (change introduced in 80c59da [ARM: virt: allow the kernel to be entered in HYP mode]). While the reason it fails is not understood yet (the same code runs fine on the OMAP2430, ARM1136 as well), partially revert that change for platforms that do not enter in HYP mode, preserving the new feature and restoring a working kernel on the OMAP2420. Reported-by: Tony Lindgren Acked-by: Nicolas Pitre Tested-by: Tony Lindgren Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 683a1e6b6020..2ef95813fce0 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -254,16 +254,17 @@ mov lr , \reg and lr , lr , #MODE_MASK cmp lr , #HYP_MODE - orr \reg , \reg , #PSR_A_BIT | PSR_I_BIT | PSR_F_BIT + orr \reg , \reg , #PSR_I_BIT | PSR_F_BIT bic \reg , \reg , #MODE_MASK orr \reg , \reg , #SVC_MODE THUMB( orr \reg , \reg , #PSR_T_BIT ) - msr spsr_cxsf, \reg - adr lr, BSYM(2f) bne 1f + orr \reg, \reg, #PSR_A_BIT + adr lr, BSYM(2f) + msr spsr_cxsf, \reg __MSR_ELR_HYP(14) __ERET -1: movs pc, lr +1: msr cpsr_c, \reg 2: .endm -- cgit v1.2.3 From 846a136881b8f73c1f74250bf6acfaa309cab1f2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 9 Oct 2012 11:13:26 +0100 Subject: ARM: vfp: fix saving d16-d31 vfp registers on v6+ kernels Michael Olbrich reported that his test program fails when built with -O2 -mcpu=cortex-a8 -mfpu=neon, and a kernel which supports v6 and v7 CPUs: volatile int x = 2; volatile int64_t y = 2; int main() { volatile int a = 0; volatile int64_t b = 0; while (1) { a = (a + x) % (1 << 30); b = (b + y) % (1 << 30); assert(a == b); } } and two instances are run. When built for just v7 CPUs, this program works fine. It uses the "vadd.i64 d19, d18, d16" VFP instruction. It appears that we do not save the high-16 double VFP registers across context switches when the kernel is built for v6 CPUs. Fix that. Cc: Tested-By: Michael Olbrich Signed-off-by: Russell King --- arch/arm/include/asm/vfpmacros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index a7aadbd9a6dd..6a6f1e485f41 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -28,7 +28,7 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPv3D16 - ldceq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + ldceql p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 @@ -52,7 +52,7 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPv3D16 - stceq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + stceql p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 -- cgit v1.2.3