summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c24
-rw-r--r--arch/x86/include/asm/acpi.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h21
-rw-r--r--arch/x86/include/asm/signal.h13
-rw-r--r--arch/x86/include/asm/stackprotector.h10
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c5
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c49
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c4
-rw-r--r--arch/x86/kernel/early_printk.c21
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/hpet.c27
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/irq_work.c2
-rw-r--r--arch/x86/kernel/kprobes.c1
-rw-r--r--arch/x86/kernel/process_32.c40
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/traps.c32
-rw-r--r--arch/x86/kernel/vsyscall_64.c16
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c16
32 files changed, 231 insertions, 119 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc09..c42146edf618 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -161,10 +161,10 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool ISA_DMA_API
config RWSEM_GENERIC_SPINLOCK
- def_bool !X86_XADD
+ def_bool !X86_XADD || PREEMPT_RT_FULL
config RWSEM_XCHGADD_ALGORITHM
- def_bool X86_XADD
+ def_bool X86_XADD && !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
@@ -730,7 +730,7 @@ config IOMMU_HELPER
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
- select CPUMASK_OFFSTACK
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
---help---
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce59818..0c9eaf1ca0a1 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -289,14 +289,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK);
+ nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -313,14 +313,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -359,14 +359,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -383,14 +383,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -445,18 +445,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ kernel_fpu_begin();
aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
+ kernel_fpu_begin();
ctr_crypt_final(ctx, &walk);
+ kernel_fpu_end();
err = blkcipher_walk_done(desc, &walk, 0);
}
- kernel_fpu_end();
return err;
}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 610001d385dd..c1c23d256ac6 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -51,8 +51,8 @@
#define ACPI_ASM_MACROS
#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS() local_irq_enable()
+#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
+#define ACPI_ENABLE_IRQS() local_irq_enable_nort()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf5d223..0883ecdbc905 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,12 +14,21 @@
#define IRQ_STACK_ORDER 2
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+#ifdef CONFIG_PREEMPT_RT_FULL
+# define STACKFAULT_STACK 0
+# define DOUBLEFAULT_STACK 1
+# define NMI_STACK 2
+# define DEBUG_STACK 0
+# define MCE_STACK 3
+# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */
+#else
+# define STACKFAULT_STACK 1
+# define DOUBLEFAULT_STACK 2
+# define NMI_STACK 3
+# define DEBUG_STACK 4
+# define MCE_STACK 5
+# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+#endif
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 598457cbd0f8..1213ebd7bee7 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,19 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
+/*
+ * Because some traps use the IST stack, we must keep
+ * preemption disabled while calling do_trap(), but do_trap()
+ * may call force_sig_info() which will grab the signal spin_locks
+ * for the task, which in PREEMPT_RT_FULL are mutexes.
+ * By defining ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will
+ * set TIF_NOTIFY_RESUME and set up the signal to be sent on exit
+ * of the trap.
+ */
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#else
/* Here we must cater to libcs that poke about in kernel headers. */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 157517763565..ac0703bcfe17 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -58,7 +58,7 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
+ u64 uninitialized_var(canary);
u64 tsc;
#ifdef CONFIG_X86_64
@@ -69,8 +69,16 @@ static __always_inline void boot_init_stack_canary(void)
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
+ *
+ * For preempt-rt we need to weaken the randomness a bit, as
+ * we can't call into the random generator from atomic context
+ * due to locking constraints. We just leave canary
+ * uninitialized and use the TSC based randomness on top of
+ * it.
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
get_random_bytes(&canary, sizeof(canary));
+#endif
tsc = __native_read_tsc();
canary += tsc + (tsc << 32UL);
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 815285bcaceb..1f007178c813 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -5,7 +5,7 @@
#include <linux/clocksource.h>
struct vsyscall_gtod_data {
- seqlock_t lock;
+ seqcount_t seq;
/* open coded 'struct timespec' */
time_t wall_time_sec;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f98d84caf94c..2cd2d93643dc 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -876,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- exit_idle();
irq_enter();
+ exit_idle();
local_apic_timer_interrupt();
irq_exit();
@@ -1809,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
{
u32 v;
- exit_idle();
irq_enter();
+ exit_idle();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1846,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs)
"Illegal register address", /* APIC Error Bit 7 */
};
- exit_idle();
irq_enter();
+ exit_idle();
/* First tickle the hardware, only then report what went on. -- REW */
v0 = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a25e276bf41e..e568a3c9d810 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
unsigned vector, me;
ack_APIC_irq();
- exit_idle();
irq_enter();
+ exit_idle();
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2521,7 +2521,8 @@ static void ack_apic_level(struct irq_data *data)
irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irqd_is_setaffinity_pending(data))) {
+ if (unlikely(irqd_is_setaffinity_pending(data) &&
+ !irqd_irq_inprogress(data))) {
do_unmask_irq = 1;
mask_ioapic(cfg);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa003b13a831..d39b525fd321 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1055,7 +1055,9 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1;
*/
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+#if DEBUG_STACK > 0
[DEBUG_STACK - 1] = DEBUG_STKSZ
+#endif
};
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2af127d4c3d1..f1f1cb1cdad9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -38,6 +38,7 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
+#include <linux/jiffies.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -1106,17 +1107,14 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
*/
-static int check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = 5 * 60; /* 5 minutes */
-static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
+static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
+static DEFINE_PER_CPU(struct hrtimer, mce_timer);
-static void mce_start_timer(unsigned long data)
+static enum hrtimer_restart mce_start_timer(struct hrtimer *timer)
{
- struct timer_list *t = &per_cpu(mce_timer, data);
- int *n;
-
- WARN_ON(smp_processor_id() != data);
+ unsigned long *n;
if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
@@ -1129,21 +1127,22 @@ static void mce_start_timer(unsigned long data)
*/
n = &__get_cpu_var(mce_next_interval);
if (mce_notify_irq())
- *n = max(*n/2, HZ/100);
+ *n = max(*n/2, HZ/100UL);
else
- *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+ *n = min(*n*2, round_jiffies_relative(check_interval*HZ));
- t->expires = jiffies + *n;
- add_timer_on(t, smp_processor_id());
+ hrtimer_forward(timer, timer->base->get_time(),
+ ns_to_ktime(jiffies_to_usecs(*n) * 1000));
+ return HRTIMER_RESTART;
}
-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
+/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */
static void mce_timer_delete_all(void)
{
int cpu;
for_each_online_cpu(cpu)
- del_timer_sync(&per_cpu(mce_timer, cpu));
+ hrtimer_cancel(&per_cpu(mce_timer, cpu));
}
static void mce_do_trigger(struct work_struct *work)
@@ -1375,10 +1374,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_timer(void)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(mce_next_interval);
+ struct hrtimer *t = &__get_cpu_var(mce_timer);
+ unsigned long *n = &__get_cpu_var(mce_next_interval);
- setup_timer(t, mce_start_timer, smp_processor_id());
+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->function = mce_start_timer;
if (mce_ignore_ce)
return;
@@ -1386,8 +1386,9 @@ static void __mcheck_cpu_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
- t->expires = round_jiffies(jiffies + *n);
- add_timer_on(t, smp_processor_id());
+
+ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(*n) * 1000),
+ 0 , HRTIMER_MODE_REL_PINNED);
}
/* Handle unconfigured int18 (should never happen) */
@@ -2023,6 +2024,8 @@ static void __cpuinit mce_disable_cpu(void *h)
if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
+ hrtimer_cancel(&__get_cpu_var(mce_timer));
+
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
@@ -2049,6 +2052,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
+ __mcheck_cpu_init_timer();
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -2056,7 +2060,6 @@ static int __cpuinit
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct timer_list *t = &per_cpu(mce_timer, cpu);
switch (action) {
case CPU_ONLINE:
@@ -2073,16 +2076,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- del_timer_sync(t);
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- if (!mce_ignore_ce && check_interval) {
- t->expires = round_jiffies(jiffies +
- __get_cpu_var(mce_next_interval));
- add_timer_on(t, cpu);
- }
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
case CPU_POST_DEAD:
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..ce215616d5b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
- exit_idle();
irq_enter();
+ exit_idle();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..aa578cadb940 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
asmlinkage void smp_threshold_interrupt(void)
{
- exit_idle();
irq_enter();
+ exit_idle();
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
irq_exit();
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6d728d9284bd..352beb75ef78 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -21,10 +21,14 @@
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
static char x86_stack_ids[][8] = {
+#if DEBUG_STACK > 0
[ DEBUG_STACK-1 ] = "#DB",
+#endif
[ NMI_STACK-1 ] = "NMI",
[ DOUBLEFAULT_STACK-1 ] = "#DF",
+#if STACKFAULT_STACK > 0
[ STACKFAULT_STACK-1 ] = "#SS",
+#endif
[ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index cd28a350f7f9..5f3d9c5e89c7 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -169,25 +169,9 @@ static struct console early_serial_console = {
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_vga_console;
-static int __initdata early_console_initialized;
-
-asmlinkage void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
- va_end(ap);
-}
-
static inline void early_console_register(struct console *con, int keep_early)
{
- if (early_console->index != -1) {
+ if (con->index != -1) {
printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
con->name);
return;
@@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf)
if (!buf)
return 0;
- if (early_console_initialized)
+ if (early_console)
return 0;
- early_console_initialized = 1;
keep = (strstr(buf, "keep") != NULL);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index bcda8163bb00..426cf512fdef 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,11 @@ work_notifysig: # deal with pending signals and
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
xorl %edx, %edx
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
call do_notify_resume
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
jmp resume_userspace_sig
ALIGN
@@ -642,7 +646,11 @@ work_notifysig_v86:
movl %esp, %eax
#endif
xorl %edx, %edx
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
call do_notify_resume
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
jmp resume_userspace_sig
END(work_pending)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..fb0f57824bd8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1192,6 +1192,7 @@ ENTRY(kernel_execve)
CFI_ENDPROC
END(kernel_execve)
+#ifndef CONFIG_PREEMPT_RT_FULL
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1211,6 +1212,7 @@ ENTRY(call_softirq)
ret
CFI_ENDPROC
END(call_softirq)
+#endif
#ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1bb0bf4d92cd..d86b82190b80 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/hpet.h>
#include <linux/init.h>
+#include <linux/dmi.h>
#include <linux/cpu.h>
#include <linux/pm.h>
#include <linux/io.h>
@@ -568,6 +569,30 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
#define RESERVE_TIMERS 0
#endif
+static int __init dmi_disable_hpet_msi(const struct dmi_system_id *d)
+{
+ hpet_msi_disable = 1;
+ return 0;
+}
+
+static struct dmi_system_id __initdata dmi_hpet_table[] = {
+ /*
+ * MSI based per cpu timers lose interrupts when intel_idle()
+ * is enabled - independent of the c-state. With idle=poll the
+ * problem cannot be observed. We have no idea yet, whether
+ * this is a W510 specific issue or a general chipset oddity.
+ */
+ {
+ .callback = dmi_disable_hpet_msi,
+ .ident = "Lenovo W510",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W510"),
+ },
+ },
+ {}
+};
+
static void hpet_msi_capability_lookup(unsigned int start_timer)
{
unsigned int id;
@@ -575,6 +600,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
unsigned int num_timers_used = 0;
int i;
+ dmi_check_system(dmi_hpet_table);
+
if (hpet_msi_disable)
return;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924e..5d31e5bdbf85 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -181,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
unsigned vector = ~regs->orig_ax;
unsigned irq;
- exit_idle();
irq_enter();
+ exit_idle();
irq = __this_cpu_read(vector_irq[vector]);
@@ -209,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
ack_APIC_irq();
- exit_idle();
-
irq_enter();
+ exit_idle();
+
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 72090705a656..84417a251c3f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
+#ifndef CONFIG_PREEMPT_RT_FULL
asmlinkage void do_softirq(void)
{
unsigned long flags;
@@ -179,6 +180,7 @@ asmlinkage void do_softirq(void)
local_irq_restore(flags);
}
+#endif
bool handle_irq(unsigned irq, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 69bca468c47a..3fbc07df72e1 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -65,7 +65,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-
+#ifndef CONFIG_PREEMPT_RT_FULL
extern void call_softirq(void);
asmlinkage void do_softirq(void)
@@ -85,3 +85,4 @@ asmlinkage void do_softirq(void)
}
local_irq_restore(flags);
}
+#endif
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703a1e70..129b8bb73de2 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs)
irq_exit();
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
@@ -28,3 +29,4 @@ void arch_irq_work_raise(void)
apic_wait_icr_idle();
#endif
}
+#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d8b64c..5604455ebc75 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -478,7 +478,6 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
return;
}
#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 859829614085..20f15731ec90 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,6 +39,7 @@
#include <linux/io.h>
#include <linux/kdebug.h>
#include <linux/cpuidle.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -117,9 +118,7 @@ void cpu_idle(void)
start_critical_timings();
}
tick_nohz_restart_sched_tick();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
@@ -341,6 +340,41 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
+ /*
+ * Save @prev's kmap_atomic stack
+ */
+ prev_p->kmap_idx = __this_cpu_read(__kmap_atomic_idx);
+ if (unlikely(prev_p->kmap_idx)) {
+ int i;
+
+ for (i = 0; i < prev_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+
+ pte_t *ptep = kmap_pte - idx;
+ prev_p->kmap_pte[i] = *ptep;
+ kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ }
+
+ __this_cpu_write(__kmap_atomic_idx, 0);
+ }
+
+ /*
+ * Restore @next_p's kmap_atomic stack
+ */
+ if (unlikely(next_p->kmap_idx)) {
+ int i;
+
+ __this_cpu_write(__kmap_atomic_idx, next_p->kmap_idx);
+
+ for (i = 0; i < next_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+
+ set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
+ }
+ }
+#endif
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a364a67e2b7..08840ab61a2e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -150,9 +150,7 @@ void cpu_idle(void)
}
tick_nohz_restart_sched_tick();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 54ddaeb221c1..12c4d53e09a1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -820,6 +820,15 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(t->forced_info.si_signo,
+ &t->forced_info, t);
+ t->forced_info.si_signo = 0;
+ }
+#endif
+
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 31d9d0f04c2b..cc88aec09888 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -87,9 +87,21 @@ static inline void conditional_sti(struct pt_regs *regs)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
+static inline void conditional_sti_ist(struct pt_regs *regs)
{
+#ifdef CONFIG_X86_64
+ /*
+ * X86_64 uses a per CPU stack on the IST for certain traps
+ * like int3. The task can not be preempted when using one
+ * of these stacks, thus preemption must be disabled, otherwise
+ * the stack can be corrupted if the task is scheduled out,
+ * and another task comes in and uses this stack.
+ *
+ * On x86_32 the task keeps its own stack and it is OK if the
+ * task schedules out.
+ */
inc_preempt_count();
+#endif
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -100,11 +112,13 @@ static inline void conditional_cli(struct pt_regs *regs)
local_irq_disable();
}
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void conditional_cli_ist(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
+#ifdef CONFIG_X86_64
dec_preempt_count();
+#endif
}
static void __kprobes
@@ -222,9 +236,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
12, SIGBUS) == NOTIFY_STOP)
return;
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
}
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -316,9 +330,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
return;
#endif
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
}
#ifdef CONFIG_X86_64
@@ -412,12 +426,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
return;
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
return;
}
@@ -436,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
return;
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b94..50392ee9a626 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -52,10 +52,7 @@
#include "vsyscall_trace.h"
DEFINE_VVAR(int, vgetcpu_mode);
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
-{
- .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
-};
+DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
@@ -80,20 +77,13 @@ early_param("vsyscall", vsyscall_setup);
void update_vsyscall_tz(void)
{
- unsigned long flags;
-
- write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
- /* sys_tz has changed */
vsyscall_gtod_data.sys_tz = sys_tz;
- write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
- unsigned long flags;
-
- write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ write_seqcount_begin(&vsyscall_gtod_data.seq);
/* copy vsyscall data */
vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
@@ -106,7 +96,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
vsyscall_gtod_data.wall_to_monotonic = *wtm;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
- write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+ write_seqcount_end(&vsyscall_gtod_data.seq);
}
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e04cae195e10..66d2844fd705 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5207,6 +5207,13 @@ int kvm_arch_init(void *opaque)
goto out;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
r = kvm_mmu_module_init();
if (r)
goto out;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5db0490deb07..b567837c8374 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1084,7 +1084,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
*/
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(!mm || pagefault_disabled())) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index f4f29b19fac5..71bd7d64e63c 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -43,7 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- BUG_ON(!pte_none(*(kmap_pte-idx)));
+ WARN_ON(!pte_none(*(kmap_pte-idx)));
set_pte(kmap_pte-idx, mk_pte(page, prot));
arch_flush_lazy_mmu_mode();
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6bc0e723b6e8..d8511fb90c64 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -86,11 +86,11 @@ notrace static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
ts->tv_sec = gtod->wall_time_sec;
ts->tv_nsec = gtod->wall_time_nsec;
ns = vgetns();
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
timespec_add_ns(ts, ns);
return 0;
}
@@ -99,12 +99,12 @@ notrace static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
secs = gtod->wall_time_sec;
ns = gtod->wall_time_nsec + vgetns();
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
/* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
* are all guaranteed to be nonnegative.
@@ -123,10 +123,10 @@ notrace static noinline int do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
return 0;
}
@@ -134,12 +134,12 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
secs = gtod->wall_time_coarse.tv_sec;
ns = gtod->wall_time_coarse.tv_nsec;
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
/* wall_time_nsec and wall_to_monotonic.tv_nsec are
* guaranteed to be between 0 and NSEC_PER_SEC.