diff options
Diffstat (limited to 'arch')
28 files changed, 195 insertions, 132 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 2c2ac3f3ff80..6312f607932f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -445,6 +445,7 @@ config LINUX_LINK_BASE However some customers have peripherals mapped at this addr, so Linux needs to be scooted a bit. If you don't know what the above means, leave this setting alone. + This needs to match memory start address specified in Device Tree config HIGHMEM bool "High Memory Support" diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index f3db32154973..44a578c10732 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -46,6 +46,7 @@ snps,pbl = < 32 >; clocks = <&apbclk>; clock-names = "stmmaceth"; + max-speed = <100>; }; ehci@0x40000 { diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index b0eb0e7fe21d..fc81879bc1f5 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -17,7 +17,8 @@ memory { device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */ + /* CONFIG_LINUX_LINK_BASE needs to match low mem start */ + reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MB low mem */ 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ }; diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index 6ff657a904b6..c28e6c347b49 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -23,7 +23,7 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_cpu_smp: for each CPU as it is coming up (SMP as well as UP) + * @init_per_cpu: for each CPU as it is coming up (SMP as well as UP) * [(M):init_IRQ(), (o):start_kernel_secondary()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) @@ -35,7 +35,7 @@ struct machine_desc { const char **dt_compat; void (*init_early)(void); #ifdef CONFIG_SMP - void (*init_cpu_smp)(unsigned int); + void (*init_per_cpu)(unsigned int); #endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 133c867d15af..991380438d6b 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -48,7 +48,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * @init_early_smp: A SMP specific h/w block can init itself * Could be common across platforms so not covered by * mach_desc->init_early() - * @init_irq_cpu: Called for each core so SMP h/w block driver can do + * @init_per_cpu: Called for each core so SMP h/w block driver can do * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu @@ -57,7 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq); struct plat_smp_ops { const char *info; void (*init_early_smp)(void); - void (*init_irq_cpu)(int cpu); + void (*init_per_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/include/asm/unwind.h b/arch/arc/include/asm/unwind.h index 7ca628b6ee2a..c11a25bb8158 100644 --- a/arch/arc/include/asm/unwind.h +++ b/arch/arc/include/asm/unwind.h @@ -112,7 +112,6 @@ struct unwind_frame_info { extern int arc_unwind(struct unwind_frame_info *frame); extern void arc_unwind_init(void); -extern void arc_unwind_setup(void); extern void *unwind_add_table(struct module *module, const void *table_start, unsigned long table_size); extern void unwind_remove_table(void *handle, int init_only); @@ -152,9 +151,6 @@ static inline void arc_unwind_init(void) { } -static inline void arc_unwind_setup(void) -{ -} #define unwind_add_table(a, b, c) #define unwind_remove_table(a, b) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 26c156827479..0394f9f61b46 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -106,10 +106,21 @@ static struct irq_chip arcv2_irq_chip = { static int arcv2_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - if (irq == TIMER0_IRQ || irq == IPI_IRQ) + /* + * core intc IRQs [16, 23]: + * Statically assigned always private-per-core (Timers, WDT, IPI, PCT) + */ + if (hw < 24) { + /* + * A subsequent request_percpu_irq() fails if percpu_devid is + * not set. That in turns sets NOAUTOEN, meaning each core needs + * to call enable_percpu_irq() + */ + irq_set_percpu_devid(irq); irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq); - else + } else { irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq); + } return 0; } diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 2ee226546c6a..ba17f85285cf 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -29,11 +29,11 @@ void __init init_IRQ(void) #ifdef CONFIG_SMP /* a SMP H/w block could do IPI IRQ request here */ - if (plat_smp_ops.init_irq_cpu) - plat_smp_ops.init_irq_cpu(smp_processor_id()); + if (plat_smp_ops.init_per_cpu) + plat_smp_ops.init_per_cpu(smp_processor_id()); - if (machine_desc->init_cpu_smp) - machine_desc->init_cpu_smp(smp_processor_id()); + if (machine_desc->init_per_cpu) + machine_desc->init_per_cpu(smp_processor_id()); #endif } @@ -51,6 +51,18 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } +/* + * API called for requesting percpu interrupts - called by each CPU + * - For boot CPU, actually request the IRQ with genirq core + enables + * - For subsequent callers only enable called locally + * + * Relies on being called by boot cpu first (i.e. request called ahead) of + * any enable as expected by genirq. Hence Suitable only for TIMER, IPI + * which are guaranteed to be setup on boot core first. + * Late probed peripherals such as perf can't use this as there no guarantee + * of being called on boot CPU first. + */ + void arc_request_percpu_irq(int irq, int cpu, irqreturn_t (*isr)(int irq, void *dev), const char *irq_nm, @@ -60,14 +72,17 @@ void arc_request_percpu_irq(int irq, int cpu, if (!cpu) { int rc; +#ifdef CONFIG_ISA_ARCOMPACT /* - * These 2 calls are essential to making percpu IRQ APIs work - * Ideally these details could be hidden in irq chip map function - * but the issue is IPIs IRQs being static (non-DT) and platform - * specific, so we can't identify them there. + * A subsequent request_percpu_irq() fails if percpu_devid is + * not set. That in turns sets NOAUTOEN, meaning each core needs + * to call enable_percpu_irq() + * + * For ARCv2, this is done in irq map function since we know + * which irqs are strictly per cpu */ irq_set_percpu_devid(irq); - irq_modify_status(irq, IRQ_NOAUTOEN, 0); /* @irq, @clr, @set */ +#endif rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev); if (rc) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 74a9b074ac3e..bd237acdf4f2 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -132,7 +132,7 @@ static void mcip_probe_n_setup(void) struct plat_smp_ops plat_smp_ops = { .info = smp_cpuinfo_buf, .init_early_smp = mcip_probe_n_setup, - .init_irq_cpu = mcip_setup_per_cpu, + .init_per_cpu = mcip_setup_per_cpu, .ipi_send = mcip_ipi_send, .ipi_clear = mcip_ipi_clear, }; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 0c08bb1ce15a..8b134cfe5e1f 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -428,12 +428,11 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) #endif /* CONFIG_ISA_ARCV2 */ -void arc_cpu_pmu_irq_init(void) +static void arc_cpu_pmu_irq_init(void *data) { - struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + int irq = *(int *)data; - arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr, - "ARC perf counters", pmu_cpu); + enable_percpu_irq(irq, IRQ_TYPE_NONE); /* Clear all pending interrupt flags */ write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); @@ -515,7 +514,6 @@ static int arc_pmu_device_probe(struct platform_device *pdev) if (has_interrupts) { int irq = platform_get_irq(pdev, 0); - unsigned long flags; if (irq < 0) { pr_err("Cannot get IRQ number for the platform\n"); @@ -524,24 +522,12 @@ static int arc_pmu_device_probe(struct platform_device *pdev) arc_pmu->irq = irq; - /* - * arc_cpu_pmu_irq_init() needs to be called on all cores for - * their respective local PMU. - * However we use opencoded on_each_cpu() to ensure it is called - * on core0 first, so that arc_request_percpu_irq() sets up - * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable - * perf IRQ on non master cores. - * see arc_request_percpu_irq() - */ - preempt_disable(); - local_irq_save(flags); - arc_cpu_pmu_irq_init(); - local_irq_restore(flags); - smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1); - preempt_enable(); - - /* Clean all pending interrupt flags */ - write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); + /* intc map function ensures irq_set_percpu_devid() called */ + request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", + this_cpu_ptr(&arc_pmu_cpu)); + + on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); + } else arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index c33e77c0ad3e..e1b87444ea9a 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -429,7 +429,6 @@ void __init setup_arch(char **cmdline_p) #endif arc_unwind_init(); - arc_unwind_setup(); } static int __init customize_machine(void) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 580587805fa3..ef6e9e15b82a 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -132,11 +132,11 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); /* Some SMP H/w setup - for each cpu */ - if (plat_smp_ops.init_irq_cpu) - plat_smp_ops.init_irq_cpu(cpu); + if (plat_smp_ops.init_per_cpu) + plat_smp_ops.init_per_cpu(cpu); - if (machine_desc->init_cpu_smp) - machine_desc->init_cpu_smp(cpu); + if (machine_desc->init_per_cpu) + machine_desc->init_per_cpu(cpu); arc_local_timer_setup(); diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 7352475451f6..cf2828ab0905 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -170,6 +170,23 @@ static struct unwind_table *find_table(unsigned long pc) static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType); +static void init_unwind_hdr(struct unwind_table *table, + void *(*alloc) (unsigned long)); + +/* + * wrappers for header alloc (vs. calling one vs. other at call site) + * to elide section mismatches warnings + */ +static void *__init unw_hdr_alloc_early(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, sizeof(unsigned int), + MAX_DMA_ADDRESS); +} + +static void *unw_hdr_alloc(unsigned long sz) +{ + return kmalloc(sz, GFP_KERNEL); +} static void init_unwind_table(struct unwind_table *table, const char *name, const void *core_start, unsigned long core_size, @@ -209,6 +226,8 @@ void __init arc_unwind_init(void) __start_unwind, __end_unwind - __start_unwind, NULL, 0); /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/ + + init_unwind_hdr(&root_table, unw_hdr_alloc_early); } static const u32 bad_cie, not_fde; @@ -241,8 +260,8 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) e2->fde = v; } -static void __init setup_unwind_table(struct unwind_table *table, - void *(*alloc) (unsigned long)) +static void init_unwind_hdr(struct unwind_table *table, + void *(*alloc) (unsigned long)) { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; @@ -274,13 +293,13 @@ static void __init setup_unwind_table(struct unwind_table *table, const u32 *cie = cie_for_fde(fde, table); signed ptrType; - if (cie == ¬_fde) + if (cie == ¬_fde) /* only process FDE here */ continue; if (cie == NULL || cie == &bad_cie) - return; + continue; /* say FDE->CIE.version != 1 */ ptrType = fde_pointer_type(cie); if (ptrType < 0) - return; + continue; ptr = (const u8 *)(fde + 2); if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, @@ -300,9 +319,11 @@ static void __init setup_unwind_table(struct unwind_table *table, hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long); + header = alloc(hdrSize); if (!header) return; + header->version = 1; header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native; header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4; @@ -322,6 +343,10 @@ static void __init setup_unwind_table(struct unwind_table *table, if (fde[1] == 0xffffffff) continue; /* this is a CIE */ + + if (*(u8 *)(cie + 2) != 1) + continue; /* FDE->CIE.version not supported */ + ptr = (const u8 *)(fde + 2); header->table[n].start = read_pointer(&ptr, (const u8 *)(fde + 1) + @@ -342,18 +367,6 @@ static void __init setup_unwind_table(struct unwind_table *table, table->header = (const void *)header; } -static void *__init balloc(unsigned long sz) -{ - return __alloc_bootmem_nopanic(sz, - sizeof(unsigned int), - __pa(MAX_DMA_ADDRESS)); -} - -void __init arc_unwind_setup(void) -{ - setup_unwind_table(&root_table, balloc); -} - #ifdef CONFIG_MODULES static struct unwind_table *last_table; @@ -377,6 +390,8 @@ void *unwind_add_table(struct module *module, const void *table_start, table_start, table_size, NULL, 0); + init_unwind_hdr(table, unw_hdr_alloc); + #ifdef UNWIND_DEBUG unw_debug("Table added for [%s] %lx %lx\n", module->name, table->core.pc, table->core.range); @@ -439,6 +454,7 @@ void unwind_remove_table(void *handle, int init_only) info.init_only = init_only; unlink_table(&info); /* XXX: SMP */ + kfree(table->header); kfree(table); } @@ -507,7 +523,8 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde) || (*cie & (sizeof(*cie) - 1)) - || (cie[1] != 0xffffffff)) + || (cie[1] != 0xffffffff) + || ( *(u8 *)(cie + 2) != 1)) /* version 1 supported */ return NULL; /* this is not a (valid) CIE */ return cie; } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index a9305b5a2cd4..7d2c4fbf4f22 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -51,7 +51,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) int in_use = 0; if (!low_mem_sz) { - BUG_ON(base != low_mem_start); + if (base != low_mem_start) + panic("CONFIG_LINUX_LINK_BASE != DT memory { }"); + low_mem_sz = size; in_use = 1; } else { diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index f2b0b1b0c72a..5654ece02c0d 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -370,16 +370,16 @@ COMPAT_SYS(execveat) PPC64ONLY(switch_endian) SYSCALL_SPU(userfaultfd) SYSCALL_SPU(membarrier) -SYSCALL(semop) -SYSCALL(semget) -COMPAT_SYS(semctl) -COMPAT_SYS(semtimedop) -COMPAT_SYS(msgsnd) -COMPAT_SYS(msgrcv) -SYSCALL(msgget) -COMPAT_SYS(msgctl) -COMPAT_SYS(shmat) -SYSCALL(shmdt) -SYSCALL(shmget) -COMPAT_SYS(shmctl) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) SYSCALL(mlock2) diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 1effea5193d6..12a05652377a 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -388,18 +388,6 @@ #define __NR_switch_endian 363 #define __NR_userfaultfd 364 #define __NR_membarrier 365 -#define __NR_semop 366 -#define __NR_semget 367 -#define __NR_semctl 368 -#define __NR_semtimedop 369 -#define __NR_msgsnd 370 -#define __NR_msgrcv 371 -#define __NR_msgget 372 -#define __NR_msgctl 373 -#define __NR_shmat 374 -#define __NR_shmdt 375 -#define __NR_shmget 376 -#define __NR_shmctl 377 #define __NR_mlock2 378 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 54b45b73195f..a7352b59e6f9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -224,6 +224,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; vcpu->arch.shregs.msr = msr; kvmppc_end_cede(vcpu); } diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 0a00e2aed393..e505223b4ec5 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -83,7 +83,19 @@ static void opal_event_unmask(struct irq_data *d) set_bit(d->hwirq, &opal_event_irqchip.mask); opal_poll_events(&events); - opal_handle_events(be64_to_cpu(events)); + last_outstanding_events = be64_to_cpu(events); + + /* + * We can't just handle the events now with opal_handle_events(). + * If we did we would deadlock when opal_event_unmask() is called from + * handle_level_irq() with the irq descriptor lock held, because + * calling opal_handle_events() would call generic_handle_irq() and + * then handle_level_irq() which would try to take the descriptor lock + * again. Instead queue the events for later. + */ + if (last_outstanding_events & opal_event_irqchip.mask) + /* Need to retrigger the interrupt */ + irq_work_queue(&opal_event_irq_work); } static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4296d55e88f3..57cffb80bc36 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -278,7 +278,7 @@ static void opal_handle_message(void) /* Sanity check */ if (type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Unknown message type: %u\n", __func__, type); + pr_warn_once("%s: Unknown message type: %u\n", __func__, type); return; } opal_message_do_notify(type, (void *)&msg); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 8140d10c6785..6e72961608f0 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,16 +1920,23 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); + /* + * Use four '%' characters below because of the + * following two conversions: + * + * 1) sprintf: %%%%r -> %%r + * 2) printk : %%r -> %r + */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%r%i", value); + ptr += sprintf(ptr, "%%%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%f%i", value); + ptr += sprintf(ptr, "%%%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%a%i", value); + ptr += sprintf(ptr, "%%%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%c%i", value); + ptr += sprintf(ptr, "%%%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%v%i", value); + ptr += sprintf(ptr, "%%%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 06332cb7e7d1..3f5c48ddba45 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -38,6 +38,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) return best && (best->ecx & bit(X86_FEATURE_XSAVE)); } +static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + return best && (best->edx & bit(X86_FEATURE_MTRR)); +} + static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 9e8bf13572e6..3f8c732117ec 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; } -static u8 mtrr_disabled_type(void) +static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) { /* * Intel SDM 11.11.2.2: all MTRRs are disabled when * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC * memory type is applied to all of physical memory. + * + * However, virtual machines can be run with CPUID such that + * there are no MTRRs. In that case, the firmware will never + * enable MTRRs and it is obviously undesirable to run the + * guest entirely with UC memory and we use WB. */ - return MTRR_TYPE_UNCACHABLE; + if (guest_cpuid_has_mtrr(vcpu)) + return MTRR_TYPE_UNCACHABLE; + else + return MTRR_TYPE_WRBACK; } /* @@ -267,7 +275,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr) for (seg = 0; seg < seg_num; seg++) { mtrr_seg = &fixed_seg_table[seg]; - if (mtrr_seg->start >= addr && addr < mtrr_seg->end) + if (mtrr_seg->start <= addr && addr < mtrr_seg->end) return seg; } @@ -300,7 +308,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) *start = range->base & PAGE_MASK; mask = range->mask & PAGE_MASK; - mask |= ~0ULL << boot_cpu_data.x86_phys_bits; /* This cannot overflow because writing to the reserved bits of * variable MTRRs causes a #GP. @@ -356,10 +363,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (var_mtrr_range_is_valid(cur)) list_del(&mtrr_state->var_ranges[index].node); + /* Extend the mask with all 1 bits to the left, since those + * bits must implicitly be 0. The bits are then cleared + * when reading them. + */ if (!is_mtrr_mask) cur->base = data; else - cur->mask = data; + cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu)); /* add it to the list if it's enabled. */ if (var_mtrr_range_is_valid(cur)) { @@ -426,6 +437,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; else *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; + + *pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1; } return 0; @@ -670,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) } if (iter.mtrr_disabled) - return mtrr_disabled_type(); + return mtrr_disabled_type(vcpu); /* not contained in any MTRRs. */ if (type == -1) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 83a1c643f9a5..899c40f826dd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3422,6 +3422,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; + trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); + if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -3892,8 +3894,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); - if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_handle_nmi(&svm->vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af823a388c19..44976a596fa6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2803,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu)) + if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) return 1; /* Otherwise falls through */ default: @@ -2909,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu)) + if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -8042,6 +8042,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; + trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); + /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before @@ -8668,7 +8670,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); /* * the KVM_REQ_EVENT optimization bit is only on for one entry, and if diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eed32283d22c..7ffc224bbe41 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3572,9 +3572,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { + int i; mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); - kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -3593,6 +3595,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) { int start = 0; + int i; u32 prev_legacy, cur_legacy; mutex_lock(&kvm->arch.vpit->pit_state.lock); prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; @@ -3602,7 +3605,8 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, sizeof(kvm->arch.vpit->pit_state.channels)); kvm->arch.vpit->pit_state.flags = ps->flags; - kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -6515,6 +6519,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); + trace_kvm_entry(vcpu->vcpu_id); + wait_lapic_expire(vcpu); __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -6527,8 +6533,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } - trace_kvm_entry(vcpu->vcpu_id); - wait_lapic_expire(vcpu); kvm_x86_ops->run(vcpu); /* diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index e5f854ce2d72..14fcd01ed992 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -470,7 +470,7 @@ long sys_sigreturn(void) struct sigcontext __user *sc = &frame->sc; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) || + if (copy_from_user(&set.sig[0], &sc->oldmask, sizeof(set.sig[0])) || copy_from_user(&set.sig[1], frame->extramask, sig_size)) goto segfault; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ac161db63388..cb5e266a8bf7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2495,14 +2495,9 @@ void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; - /* Optimization - we can use the HVM one but it has no idea which - * VCPUs are descheduled - which means that it will needlessly IPI - * them. Xen knows so let it do the job. - */ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; + if (xen_feature(XENFEAT_auto_translated_physmap)) return; - } + pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index feddabdab448..3705eabd7e22 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -68,26 +68,16 @@ static void xen_pv_post_suspend(int suspend_cancelled) void xen_arch_pre_suspend(void) { - int cpu; - - for_each_online_cpu(cpu) - xen_pmu_finish(cpu); - if (xen_pv_domain()) xen_pv_pre_suspend(); } void xen_arch_post_suspend(int cancelled) { - int cpu; - if (xen_pv_domain()) xen_pv_post_suspend(cancelled); else xen_hvm_post_suspend(cancelled); - - for_each_online_cpu(cpu) - xen_pmu_init(cpu); } static void xen_vcpu_notify_restore(void *data) @@ -106,10 +96,20 @@ static void xen_vcpu_notify_suspend(void *data) void xen_arch_resume(void) { + int cpu; + on_each_cpu(xen_vcpu_notify_restore, NULL, 1); + + for_each_online_cpu(cpu) + xen_pmu_init(cpu); } void xen_arch_suspend(void) { + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); } |