diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/early_printk.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 8 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/irq_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/irq_work.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 45 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 10 |
21 files changed, 165 insertions, 77 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f98d84caf94c..2cd2d93643dc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -876,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - exit_idle(); irq_enter(); + exit_idle(); local_apic_timer_interrupt(); irq_exit(); @@ -1809,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) { u32 v; - exit_idle(); irq_enter(); + exit_idle(); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1846,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs) "Illegal register address", /* APIC Error Bit 7 */ }; - exit_idle(); irq_enter(); + exit_idle(); /* First tickle the hardware, only then report what went on. -- REW */ v0 = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7847e2..91527bc8a621 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) unsigned vector, me; ack_APIC_irq(); - exit_idle(); irq_enter(); + exit_idle(); me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { @@ -2521,7 +2521,8 @@ static void ack_apic_level(struct irq_data *data) irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { do_unmask_irq = 1; mask_ioapic(cfg); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index aa003b13a831..d39b525fd321 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1055,7 +1055,9 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = DEBUG_STKSZ +#endif }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2af127d4c3d1..f1f1cb1cdad9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -38,6 +38,7 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> #include <asm/processor.h> #include <asm/mce.h> @@ -1106,17 +1107,14 @@ void mce_log_therm_throt_event(__u64 status) * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). */ -static int check_interval = 5 * 60; /* 5 minutes */ +static unsigned long check_interval = 5 * 60; /* 5 minutes */ -static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ +static DEFINE_PER_CPU(struct hrtimer, mce_timer); -static void mce_start_timer(unsigned long data) +static enum hrtimer_restart mce_start_timer(struct hrtimer *timer) { - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; - - WARN_ON(smp_processor_id() != data); + unsigned long *n; if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, @@ -1129,21 +1127,22 @@ static void mce_start_timer(unsigned long data) */ n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) - *n = max(*n/2, HZ/100); + *n = max(*n/2, HZ/100UL); else - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); + *n = min(*n*2, round_jiffies_relative(check_interval*HZ)); - t->expires = jiffies + *n; - add_timer_on(t, smp_processor_id()); + hrtimer_forward(timer, timer->base->get_time(), + ns_to_ktime(jiffies_to_usecs(*n) * 1000)); + return HRTIMER_RESTART; } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1375,10 +1374,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(mce_next_interval); + struct hrtimer *t = &__get_cpu_var(mce_timer); + unsigned long *n = &__get_cpu_var(mce_next_interval); - setup_timer(t, mce_start_timer, smp_processor_id()); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_start_timer; if (mce_ignore_ce) return; @@ -1386,8 +1386,9 @@ static void __mcheck_cpu_init_timer(void) *n = check_interval * HZ; if (!*n) return; - t->expires = round_jiffies(jiffies + *n); - add_timer_on(t, smp_processor_id()); + + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(*n) * 1000), + 0 , HRTIMER_MODE_REL_PINNED); } /* Handle unconfigured int18 (should never happen) */ @@ -2023,6 +2024,8 @@ static void __cpuinit mce_disable_cpu(void *h) if (!mce_available(__this_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(&__get_cpu_var(mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -2049,6 +2052,7 @@ static void __cpuinit mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2056,7 +2060,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action) { case CPU_ONLINE: @@ -2073,16 +2076,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - __get_cpu_var(mce_next_interval)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; case CPU_POST_DEAD: diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..ce215616d5b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) { - exit_idle(); irq_enter(); + exit_idle(); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); irq_exit(); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..aa578cadb940 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; asmlinkage void smp_threshold_interrupt(void) { - exit_idle(); irq_enter(); + exit_idle(); inc_irq_stat(irq_threshold_count); mce_threshold_vector(); irq_exit(); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6d728d9284bd..352beb75ef78 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -21,10 +21,14 @@ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { +#if DEBUG_STACK > 0 [ DEBUG_STACK-1 ] = "#DB", +#endif [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", +#if STACKFAULT_STACK > 0 [ STACKFAULT_STACK-1 ] = "#SS", +#endif [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f9..5f3d9c5e89c7 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -169,25 +169,9 @@ static struct console early_serial_console = { .index = -1, }; -/* Direct interface for emergencies */ -static struct console *early_console = &early_vga_console; -static int __initdata early_console_initialized; - -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); - va_end(ap); -} - static inline void early_console_register(struct console *con, int keep_early) { - if (early_console->index != -1) { + if (con->index != -1) { printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", con->name); return; @@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf) if (!buf) return 0; - if (early_console_initialized) + if (early_console) return 0; - early_console_initialized = 1; keep = (strstr(buf, "keep") != NULL); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001..3e660d2cebb6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -626,7 +626,11 @@ work_notifysig: # deal with pending signals and jne work_notifysig_v86 # returning to kernel-space or # vm86-space xorl %edx, %edx + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) call do_notify_resume + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF jmp resume_userspace_sig ALIGN @@ -639,7 +643,11 @@ work_notifysig_v86: movl %esp, %eax #endif xorl %edx, %edx + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) call do_notify_resume + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF jmp resume_userspace_sig END(work_pending) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0b..fb0f57824bd8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1192,6 +1192,7 @@ ENTRY(kernel_execve) CFI_ENDPROC END(kernel_execve) +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1211,6 +1212,7 @@ ENTRY(call_softirq) ret CFI_ENDPROC END(call_softirq) +#endif #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1bb0bf4d92cd..d86b82190b80 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/hpet.h> #include <linux/init.h> +#include <linux/dmi.h> #include <linux/cpu.h> #include <linux/pm.h> #include <linux/io.h> @@ -568,6 +569,30 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) #define RESERVE_TIMERS 0 #endif +static int __init dmi_disable_hpet_msi(const struct dmi_system_id *d) +{ + hpet_msi_disable = 1; + return 0; +} + +static struct dmi_system_id __initdata dmi_hpet_table[] = { + /* + * MSI based per cpu timers lose interrupts when intel_idle() + * is enabled - independent of the c-state. With idle=poll the + * problem cannot be observed. We have no idea yet, whether + * this is a W510 specific issue or a general chipset oddity. + */ + { + .callback = dmi_disable_hpet_msi, + .ident = "Lenovo W510", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W510"), + }, + }, + {} +}; + static void hpet_msi_capability_lookup(unsigned int start_timer) { unsigned int id; @@ -575,6 +600,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + dmi_check_system(dmi_hpet_table); + if (hpet_msi_disable) return; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 429e0c92924e..5d31e5bdbf85 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -181,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; - exit_idle(); irq_enter(); + exit_idle(); irq = __this_cpu_read(vector_irq[vector]); @@ -209,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs) ack_APIC_irq(); - exit_idle(); - irq_enter(); + exit_idle(); + inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 72090705a656..84417a251c3f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -179,6 +180,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 69bca468c47a..3fbc07df72e1 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -65,7 +65,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - +#ifndef CONFIG_PREEMPT_RT_FULL extern void call_softirq(void); asmlinkage void do_softirq(void) @@ -85,3 +85,4 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } +#endif diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index ca8f703a1e70..129b8bb73de2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -28,3 +29,4 @@ void arch_irq_work_raise(void) apic_wait_icr_idle(); #endif } +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64c..5604455ebc75 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -478,7 +478,6 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, * stepping. */ regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); return; } #endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 859829614085..631bba6e5788 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -39,6 +39,7 @@ #include <linux/io.h> #include <linux/kdebug.h> #include <linux/cpuidle.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -117,9 +118,7 @@ void cpu_idle(void) start_critical_timings(); } tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } @@ -341,6 +340,46 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM + /* + * Save @prev's kmap_atomic stack + */ + prev_p->kmap_idx = __this_cpu_read(__kmap_atomic_idx); + if (unlikely(prev_p->kmap_idx)) { + int i; + + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + pte_t *ptep = kmap_pte - idx; + prev_p->kmap_pte[i] = *ptep; + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + + __this_cpu_write(__kmap_atomic_idx, 0); + } + + /* + * Restore @next_p's kmap_atomic stack + */ + if (unlikely(next_p->kmap_idx)) { + int i; + + __this_cpu_write(__kmap_atomic_idx, next_p->kmap_idx); + + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } + } +#endif + + /* If we're going to preload the fpu context, make sure clts + is run while we're batching the cpu state updates. */ + if (preload_fpu) + clts(); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6a364a67e2b7..08840ab61a2e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,7 @@ void cpu_idle(void) } tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb221c1..12c4d53e09a1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -820,6 +820,15 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, + &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif + /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 31d9d0f04c2b..cc88aec09888 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -87,9 +87,21 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void conditional_sti_ist(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 + /* + * X86_64 uses a per CPU stack on the IST for certain traps + * like int3. The task can not be preempted when using one + * of these stacks, thus preemption must be disabled, otherwise + * the stack can be corrupted if the task is scheduled out, + * and another task comes in and uses this stack. + * + * On x86_32 the task keeps its own stack and it is OK if the + * task schedules out. + */ inc_preempt_count(); +#endif if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -100,11 +112,13 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void conditional_cli_ist(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); +#ifdef CONFIG_X86_64 dec_preempt_count(); +#endif } static void __kprobes @@ -222,9 +236,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 12, SIGBUS) == NOTIFY_STOP) return; - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -316,9 +330,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); } #ifdef CONFIG_X86_64 @@ -412,12 +426,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) return; /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + conditional_sti_ist(regs); if (regs->flags & X86_VM_MASK) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); return; } @@ -436,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); return; } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b94..aad7aaf319e1 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -54,7 +54,7 @@ DEFINE_VVAR(int, vgetcpu_mode); DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = { - .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), + .lock = __RAW_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), }; static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; @@ -82,10 +82,10 @@ void update_vsyscall_tz(void) { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + raw_write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); /* sys_tz has changed */ vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + raw_write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, @@ -93,7 +93,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + raw_write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); /* copy vsyscall data */ vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; @@ -106,7 +106,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + raw_write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |