diff options
Diffstat (limited to 'arch/arm64/kernel')
39 files changed, 2612 insertions, 559 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7d811d9522bc..df7ef8768fc2 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,24 +4,31 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ + cpuinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 338b568cd8ae..a85843ddbde8 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr); /* string / mem functions */ EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); /* atomic bitops */ EXPORT_SYMBOL(set_bit); @@ -56,3 +61,7 @@ EXPORT_SYMBOL(clear_bit); EXPORT_SYMBOL(test_and_clear_bit); EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d62d12fb36c8..cce952440c64 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -30,8 +30,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { #ifdef CONFIG_SMP &smp_spin_table_ops, - &cpu_psci_ops, #endif + &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c new file mode 100644 index 000000000000..f798f66634af --- /dev/null +++ b/arch/arm64/kernel/cpuinfo.c @@ -0,0 +1,192 @@ +/* + * Record and handle CPU attributes. + * + * Copyright (C) 2014 ARM Ltd. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <asm/arch_timer.h> +#include <asm/cachetype.h> +#include <asm/cpu.h> +#include <asm/cputype.h> + +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/smp.h> + +/* + * In case the boot CPU is hotpluggable, we record its initial state and + * current state separately. Certain system registers may contain different + * values depending on configuration at or after reset. + */ +DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); +static struct cpuinfo_arm64 boot_cpu_data; + +static char *icache_policy_str[] = { + [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_AIVIVT] = "AIVIVT", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", +}; + +unsigned long __icache_flags; + +static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) +{ + unsigned int cpu = smp_processor_id(); + u32 l1ip = CTR_L1IP(info->reg_ctr); + + if (l1ip != ICACHE_POLICY_PIPT) + set_bit(ICACHEF_ALIASING, &__icache_flags); + if (l1ip == ICACHE_POLICY_AIVIVT); + set_bit(ICACHEF_AIVIVT, &__icache_flags); + + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); +} + +static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) +{ + if ((boot & mask) == (cur & mask)) + return 0; + + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", + name, (unsigned long)boot, cpu, (unsigned long)cur); + + return 1; +} + +#define CHECK_MASK(field, mask, boot, cur, cpu) \ + check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) + +#define CHECK(field, boot, cur, cpu) \ + CHECK_MASK(field, ~0ULL, boot, cur, cpu) + +/* + * Verify that CPUs don't have unexpected differences that will cause problems. + */ +static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) +{ + unsigned int cpu = smp_processor_id(); + struct cpuinfo_arm64 *boot = &boot_cpu_data; + unsigned int diff = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + diff |= CHECK(dczid, boot, cur, cpu); + + /* If different, timekeeping will be broken (especially with KVM) */ + diff |= CHECK(cntfrq, boot, cur, cpu); + + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + diff |= CHECK(id_aa64isar0, boot, cur, cpu); + diff |= CHECK(id_aa64isar1, boot, cur, cpu); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + * ID_AA64MMFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); + diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); + diff |= CHECK(id_aa64pfr1, boot, cur, cpu); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + diff |= CHECK(id_isar0, boot, cur, cpu); + diff |= CHECK(id_isar1, boot, cur, cpu); + diff |= CHECK(id_isar2, boot, cur, cpu); + diff |= CHECK(id_isar3, boot, cur, cpu); + diff |= CHECK(id_isar4, boot, cur, cpu); + diff |= CHECK(id_isar5, boot, cur, cpu); + diff |= CHECK(id_mmfr0, boot, cur, cpu); + diff |= CHECK(id_mmfr1, boot, cur, cpu); + diff |= CHECK(id_mmfr2, boot, cur, cpu); + diff |= CHECK(id_mmfr3, boot, cur, cpu); + diff |= CHECK(id_pfr0, boot, cur, cpu); + diff |= CHECK(id_pfr1, boot, cur, cpu); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation."); +} + +static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) +{ + info->reg_cntfrq = arch_timer_get_cntfrq(); + info->reg_ctr = read_cpuid_cachetype(); + info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_midr = read_cpuid_id(); + + info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + cpuinfo_detect_icache_policy(info); +} + +void cpuinfo_store_cpu(void) +{ + struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); + __cpuinfo_store_cpu(info); + cpuinfo_sanity_check(info); +} + +void __init cpuinfo_store_boot_cpu(void) +{ + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, 0); + __cpuinfo_store_cpu(info); + + boot_cpu_data = *info; +} diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a7fb874b595e..fe5b94078d82 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -315,20 +315,20 @@ static int brk_handler(unsigned long addr, unsigned int esr, { siginfo_t info; - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; - if (!user_mode(regs)) + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; + } - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); return 0; } diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c deleted file mode 100644 index ffbbdde7aba1..000000000000 --- a/arch/arm64/kernel/early_printk.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Earlyprintk support. - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas <catalin.marinas@arm.com> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#include <linux/kernel.h> -#include <linux/console.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/io.h> - -#include <linux/amba/serial.h> -#include <linux/serial_reg.h> - -#include <asm/fixmap.h> - -static void __iomem *early_base; -static void (*printch)(char ch); - -/* - * PL011 single character TX. - */ -static void pl011_printch(char ch) -{ - while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_TXFF) - ; - writeb_relaxed(ch, early_base + UART01x_DR); - while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_BUSY) - ; -} - -/* - * Semihosting-based debug console - */ -static void smh_printch(char ch) -{ - asm volatile("mov x1, %0\n" - "mov x0, #3\n" - "hlt 0xf000\n" - : : "r" (&ch) : "x0", "x1", "memory"); -} - -/* - * 8250/16550 (8-bit aligned registers) single character TX. - */ -static void uart8250_8bit_printch(char ch) -{ - while (!(readb_relaxed(early_base + UART_LSR) & UART_LSR_THRE)) - ; - writeb_relaxed(ch, early_base + UART_TX); -} - -/* - * 8250/16550 (32-bit aligned registers) single character TX. - */ -static void uart8250_32bit_printch(char ch) -{ - while (!(readl_relaxed(early_base + (UART_LSR << 2)) & UART_LSR_THRE)) - ; - writel_relaxed(ch, early_base + (UART_TX << 2)); -} - -struct earlycon_match { - const char *name; - void (*printch)(char ch); -}; - -static const struct earlycon_match earlycon_match[] __initconst = { - { .name = "pl011", .printch = pl011_printch, }, - { .name = "smh", .printch = smh_printch, }, - { .name = "uart8250-8bit", .printch = uart8250_8bit_printch, }, - { .name = "uart8250-32bit", .printch = uart8250_32bit_printch, }, - {} -}; - -static void early_write(struct console *con, const char *s, unsigned n) -{ - while (n-- > 0) { - if (*s == '\n') - printch('\r'); - printch(*s); - s++; - } -} - -static struct console early_console_dev = { - .name = "earlycon", - .write = early_write, - .flags = CON_PRINTBUFFER | CON_BOOT, - .index = -1, -}; - -/* - * Parse earlyprintk=... parameter in the format: - * - * <name>[,<addr>][,<options>] - * - * and register the early console. It is assumed that the UART has been - * initialised by the bootloader already. - */ -static int __init setup_early_printk(char *buf) -{ - const struct earlycon_match *match = earlycon_match; - phys_addr_t paddr = 0; - - if (!buf) { - pr_warning("No earlyprintk arguments passed.\n"); - return 0; - } - - while (match->name) { - size_t len = strlen(match->name); - if (!strncmp(buf, match->name, len)) { - buf += len; - break; - } - match++; - } - if (!match->name) { - pr_warning("Unknown earlyprintk arguments: %s\n", buf); - return 0; - } - - /* I/O address */ - if (!strncmp(buf, ",0x", 3)) { - char *e; - paddr = simple_strtoul(buf + 1, &e, 16); - buf = e; - } - /* no options parsing yet */ - - if (paddr) { - set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); - early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); - } - - printch = match->printch; - early_console = &early_console_dev; - register_console(&early_console_dev); - - return 0; -} - -early_param("earlyprintk", setup_early_printk); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 000000000000..619b1dd7bcde --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,108 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + + __INIT + + /* + * We arrive here from the EFI boot manager with: + * + * * CPU in little-endian mode + * * MMU on with identity-mapped RAM + * * Icache and Dcache on + * + * We will most likely be running from some place other than where + * we want to be. The kernel image wants to be placed at TEXT_OFFSET + * from start of RAM. + */ +ENTRY(efi_stub_entry) + /* + * Create a stack frame to save FP/LR with extra space + * for image_addr variable passed to efi_entry(). + */ + stp x29, x30, [sp, #-32]! + + /* + * Call efi_entry to do the real work. + * x0 and x1 are already set up by firmware. Current runtime + * address of image is calculated and passed via *image_addr. + * + * unsigned long efi_entry(void *handle, + * efi_system_table_t *sys_table, + * unsigned long *image_addr) ; + */ + adrp x8, _text + add x8, x8, #:lo12:_text + add x2, sp, 16 + str x8, [x2] + bl efi_entry + cmn x0, #1 + b.eq efi_load_fail + + /* + * efi_entry() will have relocated the kernel image if necessary + * and we return here with device tree address in x0 and the kernel + * entry point stored at *image_addr. Save those values in registers + * which are callee preserved. + */ + mov x20, x0 // DTB address + ldr x0, [sp, #16] // relocated _text address + mov x21, x0 + + /* + * Flush dcache covering current runtime addresses + * of kernel text/data. Then flush all of icache. + */ + adrp x1, _text + add x1, x1, #:lo12:_text + adrp x2, _edata + add x2, x2, #:lo12:_edata + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* Turn off Dcache and MMU */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el2, x0 + isb + b 2f +1: + mrs x0, sctlr_el1 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb +2: + /* Jump to kernel entry point */ + mov x0, x20 + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x21 + +efi_load_fail: + mov x0, #EFI_LOAD_ERROR + ldp x29, x30, [sp], #32 + ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 000000000000..1317fef8dde9 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <asm/efi.h> +#include <asm/sections.h> + +efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + status = efi_relocate_kernel(sys_table, image_addr, + kernel_size, kernel_memsize, + dram_base + TEXT_OFFSET, + PAGE_SIZE); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + return status; + } + if (*image_addr != (dram_base + TEXT_OFFSET)) { + pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); + efi_free(sys_table, kernel_memsize, *image_addr); + return EFI_LOAD_ERROR; + } + *image_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 000000000000..e72f3100958f --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,477 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ + uefi_debug = 1; + + return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +static void __init efi_setup_idmap(void) +{ + struct memblock_region *r; + efi_memory_desc_t *md; + u64 paddr, npages, size; + + for_each_memblock(memory, r) + create_id_mapping(r->base, r->size, 0); + + /* map runtime io spaces */ + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + continue; + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + create_id_mapping(paddr, size, 1); + } +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi.systab->fw_vendor, + sizeof(vendor)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + retval = efi_config_init(NULL); + if (retval == 0) + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + early_memunmap(c16, sizeof(vendor)); + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + + return retval; +} + +static __initdata char memory_type_name[][32] = { + {"Reserved"}, + {"Loader Code"}, + {"Loader Data"}, + {"Boot Code"}, + {"Boot Data"}, + {"Runtime Code"}, + {"Runtime Data"}, + {"Conventional Memory"}, + {"Unusable Memory"}, + {"ACPI Reclaim Memory"}, + {"ACPI Memory NVS"}, + {"Memory Mapped I/O"}, + {"MMIO Port Space"}, + {"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + if (!is_normal_ram(md)) + return 0; + + if (md->attribute & EFI_MEMORY_RUNTIME) + return 1; + + if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_RESERVED_TYPE) + return 1; + + return 0; +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (uefi_debug) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (uefi_debug) + pr_info(" 0x%012llx-0x%012llx [%s]", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + memory_type_name[md->type]); + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md) || + md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) { + memblock_reserve(paddr, size); + if (uefi_debug) + pr_cont("*"); + } + + if (uefi_debug) + pr_cont("\n"); + } +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ + u64 size = end - start; + + if (uefi_debug) + pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); + + free_bootmem_late(start, size); + return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ + u64 map_start, map_end, total = 0; + + if (end <= start) + return total; + + map_start = (u64)memmap.phys_map; + map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); + map_start &= PAGE_MASK; + + if (start < map_end && end > map_start) { + /* region overlaps UEFI memmap */ + if (start < map_start) + total += free_one_region(start, map_start); + + if (map_end < end) + total += free_one_region(map_end, end); + } else + total += free_one_region(start, end); + + return total; +} + +static void __init free_boot_services(void) +{ + u64 total_freed = 0; + u64 keep_end, free_start, free_end; + efi_memory_desc_t *md; + + /* + * If kernel uses larger pages than UEFI, we have to be careful + * not to inadvertantly free memory we want to keep if there is + * overlap at the kernel page size alignment. We do not want to + * free is_reserve_region() memory nor the UEFI memmap itself. + * + * The memory map is sorted, so we keep track of the end of + * any previous region we want to keep, remember any region + * we want to free and defer freeing it until we encounter + * the next region we want to keep. This way, before freeing + * it, we can clip it as needed to avoid freeing memory we + * want to keep for UEFI. + */ + + keep_end = 0; + free_start = 0; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + + if (is_reserve_region(md)) { + /* + * We don't want to free any memory from this region. + */ + if (free_start) { + /* adjust free_end then free region */ + if (free_end > md->phys_addr) + free_end -= PAGE_SIZE; + total_freed += free_region(free_start, free_end); + free_start = 0; + } + keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + continue; + } + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + /* no need to free this region */ + continue; + } + + /* + * We want to free memory from this region. + */ + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (free_start) { + if (paddr <= free_end) + free_end = paddr + size; + else { + total_freed += free_region(free_start, free_end); + free_start = paddr; + free_end = paddr + size; + } + } else { + free_start = paddr; + free_end = paddr + size; + } + if (free_start < keep_end) { + free_start += PAGE_SIZE; + if (free_start >= free_end) + free_start = 0; + } + } + if (free_start) + total_freed += free_region(free_start, free_end); + + if (total_freed) + pr_info("Freed 0x%llx bytes of EFI boot services memory", + total_freed); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms, uefi_debug)) + return; + + efi_system_table = params.system_table; + + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); + memmap.phys_map = (void *)params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); +} + +void __init efi_idmap_init(void) +{ + if (!efi_enabled(EFI_BOOT)) + return; + + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ + efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ + u64 paddr, vaddr, npages, size; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + vaddr = (__force u64)ioremap_cache(paddr, size); + else + vaddr = (__force u64)ioremap(paddr, size); + + if (!vaddr) { + pr_err("Unable to remap 0x%llx pages @ %p\n", + npages, (void *)paddr); + return 0; + } + + /* adjust for any rounding when EFI and system pagesize differs */ + md->virt_addr = vaddr + (md->phys_addr - paddr); + + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + memcpy(*new, md, memmap.desc_size); + *new += memmap.desc_size; + + return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + phys_addr_t virtmap_phys; + void *virtmap, *virt_md; + efi_status_t status; + u64 mapsize; + int count = 0; + unsigned long flags; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return -1; + } + + pr_info("Remapping and enabling EFI services.\n"); + + /* replace early memmap mapping with permanent mapping */ + mapsize = memmap.map_end - memmap.map; + early_memunmap(memmap.map, mapsize); + memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + mapsize); + memmap.map_end = memmap.map + mapsize; + + efi.memmap = &memmap; + + /* Map the runtime regions */ + virtmap = kmalloc(mapsize, GFP_KERNEL); + if (!virtmap) { + pr_err("Failed to allocate EFI virtual memmap\n"); + return -1; + } + virtmap_phys = virt_to_phys(virtmap); + virt_md = virtmap; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (!remap_region(md, &virt_md)) + goto err_unmap; + ++count; + } + + efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + if (!efi.systab) { + /* + * If we have no virtual mapping for the System Table at this + * point, the memory map doesn't cover the physical offset where + * it resides. This means the System Table will be inaccessible + * to Runtime Services themselves once the virtual mapping is + * installed. + */ + pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); + goto err_unmap; + } + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + local_irq_save(flags); + cpu_switch_mm(idmap_pg_dir, &init_mm); + + /* Call SetVirtualAddressMap with the physical address of the map */ + runtime = efi.systab->runtime; + efi.set_virtual_address_map = runtime->set_virtual_address_map; + + status = efi.set_virtual_address_map(count * memmap.desc_size, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)virtmap_phys); + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + local_irq_restore(flags); + + kfree(virtmap); + + free_boot_services(); + + if (status != EFI_SUCCESS) { + pr_err("Failed to set EFI virtual address map! [%lx]\n", + status); + return -1; + } + + /* Set up runtime services function pointers */ + runtime = efi.systab->runtime; + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; + +err_unmap: + /* unmap all mappings that succeeded: there are 'count' of those */ + for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { + md = virt_md; + iounmap((__force void __iomem *)md->virt_addr); + } + kfree(virtmap); + return -1; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa6..c44a82f146b1 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_save_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 000000000000..38e704e597f7 --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,213 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630d83de..f0b5e5120a87 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,7 +27,32 @@ #include <asm/esr.h> #include <asm/thread_info.h> #include <asm/unistd.h> -#include <asm/unistd32.h> + +/* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, syscall = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \syscall == 1 + /* + * Save/restore needed during syscalls. Restore syscall arguments from + * the values already saved on stack during kernel_entry. + */ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm /* * Bad Abort numbers @@ -60,6 +85,9 @@ push x0, x1 .if \el == 0 mrs x21, sp_el0 + get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug + disable_step_tsk x19, x20 // exceptions when scheduling. .else add x21, sp, #S_FRAME_SIZE .endif @@ -88,6 +116,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP] // load return stack pointer .endif .if \ret @@ -259,7 +288,7 @@ el1_da: * Data abort handling */ mrs x0, far_el1 - enable_dbg_if_not_stepping x2 + enable_dbg // re-enable interrupts if they were enabled in the aborted context tbnz x23, #7, 1f // PSR_I_BIT enable_irq @@ -275,13 +304,14 @@ el1_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - mov x1, x25 + enable_dbg mov x2, sp b do_sp_pc_abort el1_undef: /* * Undefined instruction */ + enable_dbg mov x0, sp b do_undefinstr el1_dbg: @@ -294,10 +324,11 @@ el1_dbg: mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception - + enable_dbg kernel_exit 1 el1_inv: // TODO: add support for undefined instructions in kernel mode + enable_dbg mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 @@ -307,7 +338,7 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 - enable_dbg_if_not_stepping x0 + enable_dbg #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -332,8 +363,7 @@ ENDPROC(el1_irq) #ifdef CONFIG_PREEMPT el1_preempt: mov x24, lr -1: enable_dbg - bl preempt_schedule_irq // irq en/disable is done inside +1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 @@ -349,7 +379,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -378,7 +407,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -421,63 +449,69 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) - disable_step x1 - isb - enable_dbg + mrs x26, far_el1 // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq + ct_user_exit + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 - disable_step x1 - isb - enable_dbg + mrs x26, far_el1 // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq + ct_user_exit + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ + enable_dbg + ct_user_exit mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception */ + enable_dbg + ct_user_exit mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 - disable_step x1 - isb - enable_dbg + mrs x26, far_el1 // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* * Undefined instruction */ - mov x0, sp // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq + ct_user_exit + mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -485,14 +519,19 @@ el0_dbg: */ tbnz x24, #0, el0_inv // EL0 only mrs x0, far_el1 - disable_step x1 mov x1, x25 mov x2, sp - b do_debug_exception + bl do_debug_exception + enable_dbg + ct_user_exit + b ret_to_user el0_inv: + enable_dbg + ct_user_exit mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) @@ -500,15 +539,13 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: - disable_step x1 - isb enable_dbg #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif + ct_user_exit irq_handler - get_thread_info tsk #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on @@ -517,14 +554,6 @@ el0_irq_naked: ENDPROC(el0_irq) /* - * This is the return code to user mode for abort handlers - */ -ret_from_exception: - get_thread_info tsk - b ret_to_user -ENDPROC(ret_from_exception) - -/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) @@ -563,10 +592,7 @@ ret_fast_syscall: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, fast_work_pending - tbz x1, #TIF_SINGLESTEP, fast_exit - disable_dbg - enable_step x2 -fast_exit: + enable_step_tsk x1, x2 kernel_exit 0, ret = 1 /* @@ -576,7 +602,7 @@ fast_work_pending: str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' tst x2, #PSR_MODE_MASK // user mode regs? @@ -585,7 +611,6 @@ work_pending: bl do_notify_resume b ret_to_user work_resched: - enable_dbg bl schedule /* @@ -596,9 +621,7 @@ ret_to_user: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending - tbz x1, #TIF_SINGLESTEP, no_work_pending - disable_dbg - enable_step x2 + enable_step_tsk x1, x2 no_work_pending: kernel_exit 0, ret = 0 ENDPROC(ret_to_user) @@ -625,14 +648,12 @@ el0_svc: mov sc_nr, #__NR_syscalls el0_svc_naked: // compat entry point stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number - disable_step x16 - isb - enable_dbg - enable_irq + enable_dbg_and_irq + ct_user_exit 1 - get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys @@ -648,9 +669,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -665,9 +685,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4aef42a04bdc..ad8aebb1cdef 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -35,6 +35,60 @@ #define FPEXC_IDF (1 << 7) /* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + * the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's + * fpsimd_state.cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + * restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + * register contents to memory, clears the fpsimd_last_state per-cpu variable + * and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/* * Trapped FP/ASIMD access. */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) @@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { - /* check if not kernel threads */ - if (current->mm) + /* + * Save the current FPSIMD state to memory, but only if whatever is in + * the registers is in fact the most recent userland FPSIMD state of + * 'current'. + */ + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - if (next->mm) - fpsimd_load_state(&next->thread.fpsimd_state); + + if (next->mm) { + /* + * If we are switching to a task whose most recent userland + * FPSIMD state is already in the registers of *this* cpu, + * we can skip loading the state from memory. Otherwise, set + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ + struct fpsimd_state *st = &next->thread.fpsimd_state; + + if (__this_cpu_read(fpsimd_last_state) == st + && st->cpu == smp_processor_id()) + clear_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + else + set_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + } } void fpsimd_flush_thread(void) { - preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); +} + +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ + preempt_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + preempt_enable(); +} + +/* + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + fpsimd_load_state(st); + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_load_state(state); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } preempt_enable(); } +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ + t->thread.fpsimd_state.cpu = NR_CPUS; +} + #ifdef CONFIG_KERNEL_MODE_NEON +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + /* * Kernel-side NEON support functions */ -void kernel_neon_begin(void) +void kernel_neon_begin_partial(u32 num_regs) { - /* Avoid using the NEON in interrupt context */ - BUG_ON(in_interrupt()); - preempt_disable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - if (current->mm) - fpsimd_save_state(¤t->thread.fpsimd_state); + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } } -EXPORT_SYMBOL(kernel_neon_begin); +EXPORT_SYMBOL(kernel_neon_begin_partial); void kernel_neon_end(void) { - if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); - - preempt_enable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } } EXPORT_SYMBOL(kernel_neon_end); @@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - if (current->mm) + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); break; case CPU_PM_EXIT: if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); break; case CPU_PM_ENTER_FAILED: default: diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 000000000000..7924d73b6476 --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,176 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0fd565000772..144f10567f82 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,6 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> @@ -35,37 +36,31 @@ #include <asm/page.h> #include <asm/virt.h> -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xf) != 0 +#error TEXT_OFFSET must be at least 16B aligned +#elif (PAGE_OFFSET & 0xfffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0xfffff +#error TEXT_OFFSET must be less than 2MB #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES #define BLOCK_SHIFT PAGE_SHIFT #define BLOCK_SIZE PAGE_SIZE +#define TABLE_SHIFT PMD_SHIFT #else #define BLOCK_SHIFT SECTION_SHIFT #define BLOCK_SIZE SECTION_SIZE +#define TABLE_SHIFT PUD_SHIFT #endif #define KERNEL_START KERNEL_RAM_VADDR @@ -108,11 +103,21 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved - .quad TEXT_OFFSET // Image load offset from start of RAM - .quad 0 // reserved - .quad 0 // reserved +#endif + .quad _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad _kernel_size_le // Effective size of kernel image, little-endian + .quad _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved @@ -120,7 +125,109 @@ .byte 0x52 .byte 0x4d .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT @@ -158,8 +265,7 @@ ENDPROC(stext) */ ENTRY(el2_setup) mrs x0, CurrentEL - cmp x0, #PSR_MODE_EL2t - ccmp x0, #PSR_MODE_EL2h, #0x4, ne + cmp x0, #CurrentEL_EL2 b.ne 1f mrs x0, sctlr_el2 CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 @@ -184,6 +290,23 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 @@ -230,11 +353,9 @@ ENTRY(set_cpu_boot_mode_flag) cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: dc cvac, x1 // Clean potentially dirty cache line - dsb sy - str w20, [x1] // This CPU has booted in EL1 - dc civac, x1 // Clean&invalidate potentially stale cache line - dsb sy +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line ret ENDPROC(set_cpu_boot_mode_flag) @@ -304,7 +425,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -346,8 +467,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb @@ -370,17 +496,38 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * Macro to create a table entry to the next page. * - * Preserves: pgd, tbl, virt + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 - lsr \tmp1, \virt, #PGDIR_SHIFT - and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type - str \tmp2, [\pgd, \tmp1, lsl #3] + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif .endm /* @@ -413,7 +560,7 @@ ENDPROC(__calc_phys_offset) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses mov x27, lr /* @@ -441,10 +588,10 @@ __create_page_tables: /* * Create the identity mapping. */ - add x0, x25, #PAGE_SIZE // section table address + mov x0, x25 // idmap_pg_dir ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x0, x3, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -453,9 +600,9 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - add x0, x26, #PAGE_SIZE // section table address + mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x0, x5, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -477,13 +624,6 @@ __create_page_tables: create_block_map x0, x7, x3, x5, x6 1: /* - * Create the pgd entry for the fixed mappings. - */ - ldr x5, =FIXADDR_TOP // Fixed mapping virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 - - /* * Since the page tables have been populated with non-cacheable * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. @@ -502,7 +642,7 @@ ENDPROC(__create_page_tables) __switch_data: .quad __mmap_switched .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index bee789757806..df1cf15377b4 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> @@ -27,7 +28,6 @@ #include <linux/ptrace.h> #include <linux/smp.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 0959611d9ff1..a272f335c289 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h new file mode 100644 index 000000000000..8fae0756e175 --- /dev/null +++ b/arch/arm64/kernel/image.h @@ -0,0 +1,62 @@ +/* + * Linker script macros to generate Image header fields. + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +/* + * There aren't any ELF relocations we can use to endian-swap values known only + * at link time (e.g. the subtraction of two symbol addresses), so we must get + * the linker to endian-swap certain values before emitting them. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define DATA_LE64(data) \ + ((((data) & 0x00000000000000ff) << 56) | \ + (((data) & 0x000000000000ff00) << 40) | \ + (((data) & 0x0000000000ff0000) << 24) | \ + (((data) & 0x00000000ff000000) << 8) | \ + (((data) & 0x000000ff00000000) >> 8) | \ + (((data) & 0x0000ff0000000000) >> 24) | \ + (((data) & 0x00ff000000000000) >> 40) | \ + (((data) & 0xff00000000000000) >> 56)) +#else +#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#endif + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __HEAD_FLAG_BE 1 +#else +#define __HEAD_FLAG_BE 0 +#endif + +#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) + +/* + * These will output as part of the Image header, which should be little-endian + * regardless of the endianness of the kernel. While constant values could be + * endian swapped in head.S, all are done here for consistency. + */ +#define HEAD_SYMBOLS \ + _kernel_size_le = DATA_LE64(_end - _text); \ + _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ + _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + +#endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 473e5dbf8f39..0f08dfd69ebc 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -97,11 +97,15 @@ static bool migrate_one_irq(struct irq_desc *desc) if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) return false; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) ret = true; - } + /* + * when using forced irq_set_affinity we must ensure that the cpu + * being offlined is not present in the affinity mask, it may be + * selected as the target CPU otherwise + */ + affinity = cpu_online_mask; c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 7787208e8cc6..997e6b27ff6a 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -28,7 +28,7 @@ * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ -#include <asm/unistd32.h> +#include <asm/unistd.h> .align 5 .globl __kuser_helper_start diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6391485f342d..1309d64aa926 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include <stdarg.h> +#include <linux/compat.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -50,6 +51,12 @@ #include <asm/processor.h> #include <asm/stacktrace.h> +#ifdef CONFIG_CC_STACKPROTECTOR +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + static void setup_restart(void) { /* @@ -113,32 +120,62 @@ void arch_cpu_idle_dead(void) } #endif +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ void machine_shutdown(void) { -#ifdef CONFIG_SMP - smp_send_stop(); -#endif + disable_nonboot_cpus(); } +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ void machine_halt(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); while (1); } +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ void machine_power_off(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); if (pm_power_off) pm_power_off(); } +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ void machine_restart(char *cmd) { - machine_shutdown(); - /* Disable interrupts first */ local_irq_disable(); + smp_send_stop(); /* Now call the architecture specific reboot code. */ if (arm_pm_restart) @@ -205,7 +242,7 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_preserve_current_state(); *dst = *src; return 0; } @@ -300,7 +337,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. */ - dsb(); + dsb(ish); /* the actual thread switch */ last = cpu_switch_to(prev, next); diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea4828a4aa96..553954771a67 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -18,12 +18,17 @@ #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> #include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> #include <asm/smp_plat.h> +#include <asm/system_misc.h> #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 @@ -40,58 +45,52 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); }; static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 - static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); } /* @@ -128,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -171,26 +178,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} -void __init psci_init(void) +static int psci_migrate_info_type(void) { - struct device_node *np; - const char *method; - u32 id; + int err; + u32 fn; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return; + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; - pr_info("probing function IDs from device-tree\n"); + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -198,10 +215,99 @@ void __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int __init psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } } + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int __init psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; psci_ops.cpu_suspend = psci_cpu_suspend; @@ -224,7 +330,28 @@ void __init psci_init(void) out_put_node: of_node_put(np); - return; + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); } #ifdef CONFIG_SMP @@ -277,17 +404,49 @@ static void cpu_psci_cpu_die(unsigned int cpu) pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); } + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} +#endif #endif const struct cpu_operations cpu_psci_ops = { .name = "psci", +#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, +#endif #endif }; -#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6a8928bba03c..0310811bd77d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/audit.h> +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -38,9 +40,13 @@ #include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> +#include <asm/syscall.h> #include <asm/traps.h> #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -517,6 +523,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return ret; target->thread.fpsimd_state.user_fpsimd = newstate; + fpsimd_flush_task_state(target); return ret; } @@ -650,11 +657,16 @@ static int compat_gpr_get(struct task_struct *target, reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, ®, sizeof(reg)); - if (ret) - break; + if (kbuf) { + memcpy(kbuf, ®, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_to_user(ubuf, ®, sizeof(reg)); + if (ret) + break; - ubuf += sizeof(reg); + ubuf += sizeof(reg); + } } return ret; @@ -684,11 +696,16 @@ static int compat_gpr_set(struct task_struct *target, unsigned int idx = start + i; compat_ulong_t reg; - ret = copy_from_user(®, ubuf, sizeof(reg)); - if (ret) - return ret; + if (kbuf) { + memcpy(®, kbuf, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; - ubuf += sizeof(reg); + ubuf += sizeof(reg); + } switch (idx) { case 15: @@ -764,6 +781,7 @@ static int compat_vfp_set(struct task_struct *target, uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; } + fpsimd_flush_task_state(target); return ret; } @@ -821,6 +839,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -828,10 +847,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } @@ -1058,35 +1080,58 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; - - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); + +#ifdef CONFIG_AUDITSYSCALL + audit_syscall_entry(syscall_get_arch(), regs->syscallno, + regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); +#endif return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_AUDITSYSCALL + audit_syscall_exit(regs); +#endif + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 000000000000..89102a6ffad5 --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93e7df8968fe..f6f0ccf35ae6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -25,6 +25,7 @@ #include <linux/utsname.h> #include <linux/initrd.h> #include <linux/console.h> +#include <linux/cache.h> #include <linux/bootmem.h> #include <linux/seq_file.h> #include <linux/screen_info.h> @@ -41,8 +42,10 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <linux/efi.h> #include <asm/fixmap.h> +#include <asm/cpu.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> @@ -55,6 +58,7 @@ #include <asm/traps.h> #include <asm/memblock.h> #include <asm/psci.h> +#include <asm/efi.h> unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -74,7 +78,6 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; -static const char *machine_name; phys_addr_t __fdt_pointer __initdata; /* @@ -198,6 +201,8 @@ static void __init setup_processor(void) { struct cpu_info *cpu_info; u64 features, block; + u32 cwg; + int cls; cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { @@ -214,6 +219,20 @@ static void __init setup_processor(void) sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + cpuinfo_store_boot_cpu(); + + /* + * Check for sane CTR_EL0.CWG value. + */ + cwg = cache_type_cwg(); + cls = cache_line_size(); + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); + /* * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. * The blocks we test below represent incremental functionality @@ -290,8 +309,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) while (true) cpu_relax(); } - - machine_name = of_flat_dt_get_machine_name(); } /* @@ -361,16 +378,18 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; - init_mem_pgprot(); early_ioremap_init(); parse_early_param(); + efi_init(); arm64_memblock_init(); paging_init(); request_standard_resources(); + efi_idmap_init(); + unflatten_device_tree(); psci_init(); @@ -396,16 +415,14 @@ static int __init arm64_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -arch_initcall(arm64_device_init); - -static DEFINE_PER_CPU(struct cpu, cpu_data); +arch_initcall_sync(arm64_device_init); static int __init topology_init(void) { int i; for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_data, i); + struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; register_cpu(cpu, i); } @@ -430,10 +447,21 @@ static int c_show(struct seq_file *m, void *v) { int i; - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + /* + * Dump out the common processor features in a single line. Userspace + * should read the hwcaps with getauxval(AT_HWCAP) rather than + * attempting to parse this. + */ + seq_puts(m, "features\t:"); + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, " %s", hwcap_str[i]); + seq_puts(m, "\n\n"); for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + /* * glibc reads /proc/cpuinfo to determine the number of * online processors, looking for lines beginning with @@ -442,25 +470,13 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "variant\t\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "partnum\t\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "revision\t: 0x%x\n\n", MIDR_REVISION(midr)); } - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); - - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); - seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); - - seq_printf(m, "Hardware\t: %s\n", machine_name); - return 0; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75dd..6357b9c6c90e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/personality.h> @@ -25,7 +26,6 @@ #include <linux/tracehook.h> #include <linux/ratelimit.h> -#include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/cacheflush.h> @@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) int err; /* dump the hardware registers to the fpsimd_state structure */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); /* load the hardware registers from the fpsimd_state structure */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } @@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs, { sigset_t set; int i, err; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs); - if (err == 0) - err |= restore_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= restore_fpsimd_context(fpsimd_ctx); + } return err; } @@ -167,8 +166,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; + struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &sf->fp, err); @@ -185,12 +184,27 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) - err |= preserve_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= preserve_fpsimd_context(fpsimd_ctx); + aux += sizeof(*fpsimd_ctx); + } + + /* fault information, if valid */ + if (current->thread.fault_code) { + struct esr_context *esr_ctx = + container_of(aux, struct esr_context, head); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); + __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); + __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); + aux += sizeof(*esr_ctx); + } /* set the "end" magic */ - __put_user_error(0, &aux->end.magic, err); - __put_user_error(0, &aux->end.size, err); + end = aux; + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); return err; } @@ -416,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b3fc9f5ec6d3..c5ee208321c3 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -23,10 +23,11 @@ #include <linux/syscalls.h> #include <linux/ratelimit.h> +#include <asm/esr.h> #include <asm/fpsimd.h> #include <asm/signal32.h> #include <asm/uaccess.h> -#include <asm/unistd32.h> +#include <asm/unistd.h> struct compat_sigcontext { /* We always set these two fields to 0 */ @@ -81,6 +82,8 @@ struct compat_vfp_sigframe { #define VFP_MAGIC 0x56465001 #define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe) +#define FSR_WRITE_SHIFT (11) + struct compat_aux_sigframe { struct compat_vfp_sigframe vfp; @@ -219,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -282,11 +285,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } @@ -500,7 +500,9 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); - __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err); + /* set the compat FSR WnR */ + __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << + FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f0a141dd5655..3e2f5ebbf63e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,9 +35,11 @@ #include <linux/clockchips.h> #include <linux/completion.h> #include <linux/of.h> +#include <linux/irq_work.h> #include <asm/atomic.h> #include <asm/cacheflush.h> +#include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/mmu_context.h> @@ -62,6 +64,7 @@ enum ipi_msg_type { IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, + IPI_IRQ_WORK, }; /* @@ -153,6 +156,11 @@ asmlinkage void secondary_start_kernel(void) cpu_ops[cpu]->cpu_postboot(); /* + * Log the CPU info before it is marked online and might get read. + */ + cpuinfo_store_cpu(); + + /* * Enable GIC and timers. */ notify_cpu_starting(cpu); @@ -228,6 +236,19 @@ int __cpu_disable(void) return 0; } +static int op_cpu_kill(unsigned int cpu) +{ + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; + + return cpu_ops[cpu]->cpu_kill(cpu); +} + static DECLARE_COMPLETION(cpu_died); /* @@ -241,6 +262,15 @@ void __cpu_die(unsigned int cpu) return; } pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); } /* @@ -455,6 +485,14 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + if (smp_cross_call) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x - IPI_RESCHEDULE] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), @@ -462,6 +500,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -554,6 +593,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7a530d2cc807..0347d38eea29 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -30,7 +30,6 @@ extern void secondary_holding_pen(void); volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; -static DEFINE_RAW_SPINLOCK(boot_lock); /* * Write secondary_holding_pen_release in a way that is guaranteed to be @@ -94,14 +93,6 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) static int smp_spin_table_cpu_boot(unsigned int cpu) { - unsigned long timeout; - - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - /* * Update the pen release flag. */ @@ -112,34 +103,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu) */ sev(); - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; -} - -static void smp_spin_table_cpu_postboot(void) -{ - /* - * Let the primary processor know we're out of the pen. - */ - write_pen_release(INVALID_HWID); - - /* - * Synchronise with the boot thread. - */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + return 0; } const struct cpu_operations smp_spin_table_ops = { @@ -147,5 +111,4 @@ const struct cpu_operations smp_spin_table_ops = { .cpu_init = smp_spin_table_cpu_init, .cpu_prepare = smp_spin_table_cpu_prepare, .cpu_boot = smp_spin_table_cpu_boot, - .cpu_postboot = smp_spin_table_cpu_postboot, }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 38f0558f0c0a..55437ba1f5a4 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1fa9ce4afd8f..55a99b9a97e0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg) extern struct sleep_save_sp sleep_save_sp; extern phys_addr_t sleep_idmap_phys; -static int cpu_suspend_init(void) +static int __init cpu_suspend_init(void) { void *ctx_ptr; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4eeaba8..de2b0226e06d 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -26,7 +26,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> -#include <asm/unistd32.h> +#include <asm/unistd.h> static inline void do_compat_cache_op(unsigned long start, unsigned long end, int flags) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 6815987b50f8..1a7125c3099b 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -18,6 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/clockchips.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/interrupt.h> @@ -69,6 +70,8 @@ void __init time_init(void) of_clk_init(NULL); clocksource_of_init(); + tick_setup_hrtimer_broadcast(); + arch_timer_rate = arch_timer_get_rate(); if (!arch_timer_rate) panic("Unable to initialise architected timer.\n"); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 3e06b0be4ec8..b6ee26b0939a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -17,10 +17,189 @@ #include <linux/percpu.h> #include <linux/node.h> #include <linux/nodemask.h> +#include <linux/of.h> #include <linux/sched.h> +#include <asm/cputype.h> #include <asm/topology.h> +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { + of_node_put(cpu_node); + return cpu; + } + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + + of_node_put(cpu_node); + return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return -EINVAL; + } + + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int cluster_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%s: cpu-map children should be clusters\n", + c->full_name); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, cluster_id, core_id++); + } else { + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; + + return 0; +} + +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) + if (cpu_topology[cpu].cluster_id == -1) + ret = -EINVAL; + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + /* * cpu topology table */ @@ -37,18 +216,6 @@ static void update_siblings_masks(unsigned int cpuid) struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; int cpu; - if (cpuid_topo->cluster_id == -1) { - /* - * DT does not contain topology information for this cpu - * reset it to default behaviour - */ - pr_debug("CPU%u: No topology information configured\n", cpuid); - cpuid_topo->core_id = 0; - cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling); - cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling); - return; - } - /* update core and thread sibling masks */ for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; @@ -71,25 +238,65 @@ static void update_siblings_masks(unsigned int cpuid) void store_cpu_topology(unsigned int cpuid) { + struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + u64 mpidr; + + if (cpuid_topo->cluster_id != -1) + goto topology_populated; + + mpidr = read_cpuid_mpidr(); + + /* Uniprocessor systems can rely on default topology values */ + if (mpidr & MPIDR_UP_BITMASK) + return; + + /* Create cpu topology mapping based on MPIDR. */ + if (mpidr & MPIDR_MT_BITMASK) { + /* Multiprocessor system : Multi-threads per core */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } else { + /* Multiprocessor system : Single-thread per core */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + } + + pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", + cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid_topo->thread_id, mpidr); + +topology_populated: update_siblings_masks(cpuid); } -/* - * init_cpu_topology is called at boot when only one cpu is running - * which prevent simultaneous write access to cpu_topology array - */ -void __init init_cpu_topology(void) +static void __init reset_cpu_topology(void) { unsigned int cpu; - /* init core mask and power*/ for_each_possible_cpu(cpu) { struct cpu_topology *cpu_topo = &cpu_topology[cpu]; cpu_topo->thread_id = -1; - cpu_topo->core_id = -1; + cpu_topo->core_id = 0; cpu_topo->cluster_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); } } + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (parse_dt_topology()) + reset_cpu_topology(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7ffadddb645d..02cd3f023e9a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -156,7 +156,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = thread_saved_pc(tsk); } - printk("Call trace:\n"); + pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; int ret; @@ -251,10 +251,13 @@ void die(const char *str, struct pt_regs *regs, int err) void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err) { - if (user_mode(regs)) + if (user_mode(regs)) { + current->thread.fault_address = 0; + current->thread.fault_code = err; force_sig_info(info->si_signo, info, current); - else + } else { die(str, regs, err); + } } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) @@ -328,17 +331,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pte %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); +} + +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } void __init trap_init(void) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 50384fec56c4..a81a446a5786 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -88,22 +88,29 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - int ret; + static struct vm_special_mapping spec = { + .name = "[vectors]", + .pages = vectors_page, + + }; + void *ret; down_write(&mm->mmap_sem); current->mm->context.vdso = (void *)addr; /* Map vectors page at the high address. */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - vectors_page); + ret = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + &spec); up_write(&mm->mmap_sem); - return ret; + return PTR_ERR_OR_ZERO(ret); } #endif /* CONFIG_COMPAT */ +static struct vm_special_mapping vdso_spec[2]; + static int __init vdso_init(void) { int i; @@ -114,8 +121,8 @@ static int __init vdso_init(void) } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", - vdso_pages + 1, vdso_pages, 1L, &vdso_start); + pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", + vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -123,12 +130,23 @@ static int __init vdso_init(void) if (vdso_pagelist == NULL) return -ENOMEM; + /* Grab the vDSO data page. */ + vdso_pagelist[0] = virt_to_page(vdso_data); + /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); - /* Grab the vDSO data page. */ - vdso_pagelist[i] = virt_to_page(vdso_data); + /* Populate the special mapping structures */ + vdso_spec[0] = (struct vm_special_mapping) { + .name = "[vvar]", + .pages = vdso_pagelist, + }; + + vdso_spec[1] = (struct vm_special_mapping) { + .name = "[vdso]", + .pages = &vdso_pagelist[1], + }; return 0; } @@ -138,52 +156,42 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_mapping_len; - int ret; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; + ret = ERR_PTR(vdso_base); goto up_fail; } - mm->context.vdso = (void *)vdso_base; - - ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (ret) { - mm->context.vdso = NULL; + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + &vdso_spec[0]); + if (IS_ERR(ret)) goto up_fail; - } -up_fail: - up_write(&mm->mmap_sem); + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_spec[1]); + if (IS_ERR(ret)) + goto up_fail; - return ret; -} -const char *arch_vma_name(struct vm_area_struct *vma) -{ - /* - * We can re-use the vdso pointer in mm_context_t for identifying - * the vectors page for compat applications. The vDSO will always - * sit above TASK_UNMAPPED_BASE and so we don't need to worry about - * it conflicting with the vectors base. - */ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { -#ifdef CONFIG_COMPAT - if (vma->vm_start == AARCH32_VECTORS_BASE) - return "[vectors]"; -#endif - return "[vdso]"; - } + up_write(&mm->mmap_sem); + return 0; - return NULL; +up_fail: + mm->context.vdso = NULL; + up_write(&mm->mmap_sem); + return PTR_ERR(ret); } /* @@ -211,7 +219,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) void update_vsyscall(struct timekeeper *tk) { struct timespec xtime_coarse; - u32 use_syscall = strcmp(tk->clock->name, "arch_sys_counter"); + u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); @@ -224,11 +232,11 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->clock->cycle_last; + vdso_data->cs_cycle_last = tk->tkr.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->xtime_nsec; - vdso_data->cs_mult = tk->mult; - vdso_data->cs_shift = tk->shift; + vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; + vdso_data->cs_mult = tk->tkr.mult; + vdso_data->cs_shift = tk->tkr.shift; } smp_wmb(); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 6d20b7d162d8..ff3bddea482d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -43,13 +43,13 @@ $(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S +$(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< # Install commands for the unstripped file diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 8154b8d1c826..beca249bc2f3 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,6 +28,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { + PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -57,9 +58,6 @@ SECTIONS _end = .; PROVIDE(end = .); - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4ba7a55b49c7..97f0c0429dfa 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -9,11 +9,13 @@ #include <asm/memory.h> #include <asm/page.h> +#include "image.h" + #define ARM_EXIT_KEEP(x) #define ARM_EXIT_DISCARD(x) x OUTPUT_ARCH(aarch64) -ENTRY(stext) +ENTRY(_text) jiffies = jiffies_64; @@ -104,9 +106,18 @@ SECTIONS _edata = .; BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + swapper_pg_dir = .; + . += SWAPPER_DIR_SIZE; + _end = .; STABS_DEBUG + + HEAD_SYMBOLS } /* @@ -114,3 +125,8 @@ SECTIONS */ ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), "HYP init code too big") + +/* + * If padding is applied before .head.text, virt<->phys conversions will fail. + */ +ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") |
