diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 6 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/acpi.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/page_64_types.h | 21 | ||||
-rw-r--r-- | arch/x86/include/asm/signal.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/stackprotector.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/early_printk.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/irq_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/irq_work.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 2 |
23 files changed, 200 insertions, 82 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d14cc6b79ad..70a97c33163b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -155,10 +155,10 @@ config ARCH_MAY_HAVE_PC_FDC def_bool ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool !X86_XADD || PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool X86_XADD && !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config ARCH_HAS_CPU_IDLE_WAIT def_bool y @@ -750,7 +750,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c799352e24fc..0efa5144a640 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -290,14 +290,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -314,14 +314,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -360,14 +360,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -384,14 +384,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -446,18 +446,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 610001d385dd..c1c23d256ac6 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -51,8 +51,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_DISABLE_IRQS() local_irq_disable_nort() +#define ACPI_ENABLE_IRQS() local_irq_enable_nort() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 7639dbf5d223..0883ecdbc905 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,21 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define STACKFAULT_STACK 0 +# define DOUBLEFAULT_STACK 1 +# define NMI_STACK 2 +# define DEBUG_STACK 0 +# define MCE_STACK 3 +# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */ +#else +# define STACKFAULT_STACK 1 +# define DOUBLEFAULT_STACK 2 +# define NMI_STACK 3 +# define DEBUG_STACK 4 +# define MCE_STACK 5 +# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#endif #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457cbd0f8..1213ebd7bee7 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,19 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* + * Because some traps use the IST stack, we must keep + * preemption disabled while calling do_trap(), but do_trap() + * may call force_sig_info() which will grab the signal spin_locks + * for the task, which in PREEMPT_RT_FULL are mutexes. + * By defining ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will + * set TIF_NOTIFY_RESUME and set up the signal to be sent on exit + * of the trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #else /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index b5d9533d2c38..0f3d7b1cf9fb 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -57,7 +57,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of + * it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = __native_read_tsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e88300d8e80a..01c76a0f2845 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2555,7 +2555,8 @@ atomic_t irq_mis_count; static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic(cfg); return true; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e258362a3d..97256335ad73 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1056,7 +1056,9 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = DEBUG_STKSZ +#endif }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09c087d..bb761c950c5a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -38,6 +38,7 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> #include <asm/processor.h> #include <asm/mce.h> @@ -1229,17 +1230,14 @@ void mce_log_therm_throt_event(__u64 status) * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). */ -static int check_interval = 5 * 60; /* 5 minutes */ +static unsigned long check_interval = 5 * 60; /* 5 minutes */ -static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ +static DEFINE_PER_CPU(struct hrtimer, mce_timer); -static void mce_start_timer(unsigned long data) +static enum hrtimer_restart mce_start_timer(struct hrtimer *timer) { - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; - - WARN_ON(smp_processor_id() != data); + unsigned long *n; if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, @@ -1252,21 +1250,22 @@ static void mce_start_timer(unsigned long data) */ n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) - *n = max(*n/2, HZ/100); + *n = max(*n/2, HZ/100UL); else - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); + *n = min(*n*2, round_jiffies_relative(check_interval*HZ)); - t->expires = jiffies + *n; - add_timer_on(t, smp_processor_id()); + hrtimer_forward(timer, timer->base->get_time(), + ns_to_ktime(jiffies_to_usecs(*n) * 1000)); + return HRTIMER_RESTART; } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1496,10 +1495,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(mce_next_interval); + struct hrtimer *t = &__get_cpu_var(mce_timer); + unsigned long *n = &__get_cpu_var(mce_next_interval); - setup_timer(t, mce_start_timer, smp_processor_id()); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_start_timer; if (mce_ignore_ce) return; @@ -1507,8 +1507,9 @@ static void __mcheck_cpu_init_timer(void) *n = check_interval * HZ; if (!*n) return; - t->expires = round_jiffies(jiffies + *n); - add_timer_on(t, smp_processor_id()); + + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(*n) * 1000), + 0 , HRTIMER_MODE_REL_PINNED); } /* Handle unconfigured int18 (should never happen) */ @@ -2160,6 +2161,8 @@ static void __cpuinit mce_disable_cpu(void *h) if (!mce_available(__this_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(&__get_cpu_var(mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -2186,6 +2189,7 @@ static void __cpuinit mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2193,7 +2197,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action) { case CPU_ONLINE: @@ -2210,16 +2213,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - __get_cpu_var(mce_next_interval)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; case CPU_POST_DEAD: diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 17107bd6e1f0..9d50b3012dab 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -21,10 +21,14 @@ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { +#if DEBUG_STACK > 0 [ DEBUG_STACK-1 ] = "#DB", +#endif [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", +#if STACKFAULT_STACK > 0 [ STACKFAULT_STACK-1 ] = "#SS", +#endif [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 9b9f18b49918..d15f575a861b 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -169,25 +169,9 @@ static struct console early_serial_console = { .index = -1, }; -/* Direct interface for emergencies */ -static struct console *early_console = &early_vga_console; -static int __initdata early_console_initialized; - -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); - va_end(ap); -} - static inline void early_console_register(struct console *con, int keep_early) { - if (early_console->index != -1) { + if (con->index != -1) { printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", con->name); return; @@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf) if (!buf) return 0; - if (early_console_initialized) + if (early_console) return 0; - early_console_initialized = 1; keep = (strstr(buf, "keep") != NULL); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cdc79b5cfcd9..1bfb07bc643c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1240,6 +1240,7 @@ ENTRY(kernel_execve) CFI_ENDPROC END(kernel_execve) +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1259,6 +1260,7 @@ ENTRY(call_softirq) ret CFI_ENDPROC END(call_softirq) +#endif #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad0de0c2714e..230a200763aa 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/hpet.h> #include <linux/init.h> +#include <linux/dmi.h> #include <linux/cpu.h> #include <linux/pm.h> #include <linux/io.h> @@ -570,6 +571,30 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) #define RESERVE_TIMERS 0 #endif +static int __init dmi_disable_hpet_msi(const struct dmi_system_id *d) +{ + hpet_msi_disable = 1; + return 0; +} + +static struct dmi_system_id __initdata dmi_hpet_table[] = { + /* + * MSI based per cpu timers lose interrupts when intel_idle() + * is enabled - independent of the c-state. With idle=poll the + * problem cannot be observed. We have no idea yet, whether + * this is a W510 specific issue or a general chipset oddity. + */ + { + .callback = dmi_disable_hpet_msi, + .ident = "Lenovo W510", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W510"), + }, + }, + {} +}; + static void hpet_msi_capability_lookup(unsigned int start_timer) { unsigned int id; @@ -577,6 +602,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + dmi_check_system(dmi_hpet_table); + if (hpet_msi_disable) return; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 58b7f27cb3e9..a7b4e1b2e6eb 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -179,6 +180,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index d04d3ecded62..831f247b5798 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - +#ifndef CONFIG_PREEMPT_RT_FULL extern void call_softirq(void); asmlinkage void do_softirq(void) @@ -108,3 +108,4 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } +#endif diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index ca8f703a1e70..129b8bb73de2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -28,3 +29,4 @@ void arch_irq_work_raise(void) apic_wait_icr_idle(); #endif } +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e213fc8408d2..b9edf1a32cf8 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -486,7 +486,6 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k * stepping. */ regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); return; } #endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ae6847303e26..2b0882add416 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -285,6 +286,41 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM + /* + * Save @prev's kmap_atomic stack + */ + prev_p->kmap_idx = __this_cpu_read(__kmap_atomic_idx); + if (unlikely(prev_p->kmap_idx)) { + int i; + + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + pte_t *ptep = kmap_pte - idx; + prev_p->kmap_pte[i] = *ptep; + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + + __this_cpu_write(__kmap_atomic_idx, 0); + } + + /* + * Restore @next_p's kmap_atomic stack + */ + if (unlikely(next_p->kmap_idx)) { + int i; + + __this_cpu_write(__kmap_atomic_idx, next_p->kmap_idx); + + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } + } +#endif + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 115eac431483..c67d83fe0d00 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -824,6 +824,15 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, + &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif + /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff9281f16029..0b019777d8f3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -87,9 +87,21 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void conditional_sti_ist(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 + /* + * X86_64 uses a per CPU stack on the IST for certain traps + * like int3. The task can not be preempted when using one + * of these stacks, thus preemption must be disabled, otherwise + * the stack can be corrupted if the task is scheduled out, + * and another task comes in and uses this stack. + * + * On x86_32 the task keeps its own stack and it is OK if the + * task schedules out. + */ inc_preempt_count(); +#endif if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -100,11 +112,13 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void conditional_cli_ist(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); +#ifdef CONFIG_X86_64 dec_preempt_count(); +#endif } static void __kprobes @@ -226,9 +240,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "stack segment", regs, error_code, X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) return; - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -320,9 +334,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); } @@ -423,12 +437,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + conditional_sti_ist(regs); if (regs->flags & X86_VM_MASK) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); return; } @@ -448,7 +462,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0bf7c1..47337cf39b3f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4882,6 +4882,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3ecfd1aaf214..9d57357c2e1d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1094,7 +1094,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(!mm || pagefault_disabled())) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 6f31ee56c008..ab8683ade5c8 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -43,7 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); + WARN_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); arch_flush_lazy_mmu_mode(); |