summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c25
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c22
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/entry_64.S31
-rw-r--r--arch/x86/kernel/kprobes/core.c54
-rw-r--r--arch/x86/kernel/kprobes/opt.c2
-rw-r--r--arch/x86/kernel/module.c10
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/xsave.c7
11 files changed, 132 insertions, 55 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3d525c6124f6..803b684676ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
}
/*
+ * ACPI offers an alternative platform interface model that removes
+ * ACPI hardware requirements for platforms that do not implement
+ * the PC Architecture.
+ *
+ * We initialize the Hardware-reduced ACPI model here:
+ */
+static void __init acpi_reduced_hw_init(void)
+{
+ if (acpi_gbl_reduced_hardware) {
+ /*
+ * Override x86_init functions and bypass legacy pic
+ * in Hardware-reduced ACPI mode
+ */
+ x86_init.timers.timer_init = x86_init_noop;
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ legacy_pic = &null_legacy_pic;
+ }
+}
+
+/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
*/
@@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void)
*/
early_acpi_process_madt();
+ /*
+ * Hardware-reduced ACPI mode initialization:
+ */
+ acpi_reduced_hw_init();
+
return 0;
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c2fd21fed002..017149cded07 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -37,10 +37,12 @@ static const struct apic apic_numachip;
static unsigned int get_apic_id(unsigned long x)
{
unsigned long value;
- unsigned int id;
+ unsigned int id = (x >> 24) & 0xff;
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ id |= (value << 2) & 0xff00;
+ }
return id;
}
@@ -155,10 +157,18 @@ static int __init numachip_probe(void)
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
- if (c->phys_proc_id != node) {
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
+ u64 val;
+ u32 nodes = 1;
+
+ this_cpu_write(cpu_llc_id, node);
+
+ /* Account for nodes per socket in multi-core-module processors */
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, val);
+ nodes = ((val >> 3) & 7) + 1;
}
+
+ c->phys_proc_id = node / nodes;
}
static int __init numachip_system_init(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b5c8ff5e9dfc..2346c95c6ab1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1396,6 +1396,12 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
+ /*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
+
show_ucode_info_early();
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 94d7dcb12145..50163fa9034f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -565,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
{ 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
{ 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
- { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" },
- { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" },
+ { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" },
+ { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" },
{ 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
{ 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
{ 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 10074ad9ebf8..2babb393915e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -269,11 +269,14 @@ ENTRY(ret_from_fork)
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
jz 1f
- testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
- jnz int_ret_from_sys_call
-
- RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
- jmp ret_from_sys_call # go to the SYSRET fastpath
+ /*
+ * By the time we get here, we have no idea whether our pt_regs,
+ * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+ * the slow path, or one of the ia32entry paths.
+ * Use int_ret_from_sys_call to return, since it can safely handle
+ * all of the above.
+ */
+ jmp int_ret_from_sys_call
1:
subq $REST_SKIP, %rsp # leave space for volatiles
@@ -361,12 +364,21 @@ system_call_fastpath:
* Has incomplete stack frame and undefined top of stack.
*/
ret_from_sys_call:
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- jnz int_ret_from_sys_call_fixup /* Go the the slow path */
-
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
+
+ /*
+ * We must check ti flags with interrupts (or at least preemption)
+ * off because we must *never* return to userspace without
+ * processing exit work that is enqueued if we're preempted here.
+ * In particular, returning to userspace with any of the one-shot
+ * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
+ * very bad.
+ */
+ testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ jnz int_ret_from_sys_call_fixup /* Go the the slow path */
+
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
@@ -383,7 +395,7 @@ ret_from_sys_call:
int_ret_from_sys_call_fixup:
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
- jmp int_ret_from_sys_call
+ jmp int_ret_from_sys_call_irqs_off
/* Do syscall tracing */
tracesys:
@@ -429,6 +441,7 @@ tracesys_phase2:
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
+int_ret_from_sys_call_irqs_off:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
GLOBAL(int_with_check)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6a1146ea4d4d..4e3d5a9621fe 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -223,27 +223,48 @@ static unsigned long
__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
+ unsigned long faddr;
kp = get_kprobe((void *)addr);
- /* There is no probe, return original address */
- if (!kp)
+ faddr = ftrace_location(addr);
+ /*
+ * Addresses inside the ftrace location are refused by
+ * arch_check_ftrace_location(). Something went terribly wrong
+ * if such an address is checked here.
+ */
+ if (WARN_ON(faddr && faddr != addr))
+ return 0UL;
+ /*
+ * Use the current code if it is not modified by Kprobe
+ * and it cannot be modified by ftrace.
+ */
+ if (!kp && !faddr)
return addr;
/*
- * Basically, kp->ainsn.insn has an original instruction.
- * However, RIP-relative instruction can not do single-stepping
- * at different place, __copy_instruction() tweaks the displacement of
- * that instruction. In that case, we can't recover the instruction
- * from the kp->ainsn.insn.
+ * Basically, kp->ainsn.insn has an original instruction.
+ * However, RIP-relative instruction can not do single-stepping
+ * at different place, __copy_instruction() tweaks the displacement of
+ * that instruction. In that case, we can't recover the instruction
+ * from the kp->ainsn.insn.
*
- * On the other hand, kp->opcode has a copy of the first byte of
- * the probed instruction, which is overwritten by int3. And
- * the instruction at kp->addr is not modified by kprobes except
- * for the first byte, we can recover the original instruction
- * from it and kp->opcode.
+ * On the other hand, in case on normal Kprobe, kp->opcode has a copy
+ * of the first byte of the probed instruction, which is overwritten
+ * by int3. And the instruction at kp->addr is not modified by kprobes
+ * except for the first byte, we can recover the original instruction
+ * from it and kp->opcode.
+ *
+ * In case of Kprobes using ftrace, we do not have a copy of
+ * the original instruction. In fact, the ftrace location might
+ * be modified at anytime and even could be in an inconsistent state.
+ * Fortunately, we know that the original code is the ideal 5-byte
+ * long NOP.
*/
- memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- buf[0] = kp->opcode;
+ memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ if (faddr)
+ memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
+ else
+ buf[0] = kp->opcode;
return (unsigned long)buf;
}
@@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
* Recover the probed instruction at addr for further analysis.
* Caller must lock kprobes by kprobe_mutex, or disable preemption
* for preventing to release referencing kprobes.
+ * Returns zero if the instruction can not get recovered.
*/
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
{
@@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr)
* normally used, we just go through if there is no kprobe.
*/
__addr = recover_probed_instruction(buf, addr);
+ if (!__addr)
+ return 0;
kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
insn_get_length(&insn);
@@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src)
unsigned long recovered_insn =
recover_probed_instruction(buf, (unsigned long)src);
+ if (!recovered_insn)
+ return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint, failed to recover */
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 0dd8d089c315..7b3b9d15c47a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr)
*/
return 0;
recovered_insn = recover_probed_instruction(buf, addr);
+ if (!recovered_insn)
+ return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 9bbb9b35c144..d1ac80b72c72 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -47,13 +47,21 @@ do { \
#ifdef CONFIG_RANDOMIZE_BASE
static unsigned long module_load_offset;
+static int randomize_modules = 1;
/* Mutex protects the module_load_offset. */
static DEFINE_MUTEX(module_kaslr_mutex);
+static int __init parse_nokaslr(char *p)
+{
+ randomize_modules = 0;
+ return 0;
+}
+early_param("nokaslr", parse_nokaslr);
+
static unsigned long int get_module_load_offset(void)
{
- if (kaslr_enabled) {
+ if (randomize_modules) {
mutex_lock(&module_kaslr_mutex);
/*
* Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 98dc9317286e..0a2421cca01f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,8 +122,6 @@
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
-bool __read_mostly kaslr_enabled = false;
-
#ifdef CONFIG_DMI
RESERVE_BRK(dmi_alloc, 65536);
#endif
@@ -427,11 +425,6 @@ static void __init reserve_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
-static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
-{
- kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
-}
-
static void __init parse_setup_data(void)
{
struct setup_data *data;
@@ -457,9 +450,6 @@ static void __init parse_setup_data(void)
case SETUP_EFI:
parse_efi_setup(pa_data, data_len);
break;
- case SETUP_KASLR:
- parse_kaslr_setup(pa_data, data_len);
- break;
default:
break;
}
@@ -842,14 +832,10 @@ static void __init trim_low_memory_range(void)
static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{
- if (kaslr_enabled)
- pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
- (unsigned long)&_text - __START_KERNEL,
- __START_KERNEL,
- __START_KERNEL_map,
- MODULES_VADDR-1);
- else
- pr_emerg("Kernel Offset: disabled\n");
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
+ "(relocation range: 0x%lx-0x%lx)\n",
+ (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
+ __START_KERNEL_map, MODULES_VADDR-1);
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9d2073e2ecc9..4ff5d162ff9f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
goto exit;
conditional_sti(regs);
- if (!user_mode(regs))
+ if (!user_mode_vm(regs))
die("bounds", regs, error_code);
if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@@ -637,7 +637,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
- if (!dr6 && user_mode(regs))
+ if (!dr6 && user_mode_vm(regs))
user_icebp = 1;
/* Catch kmemcheck conditions first of all! */
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 34f66e58a896..cdc6cf903078 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Sanitize the copied state etc.
*/
- struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
@@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
*/
drop_fpu(tsk);
- if (__copy_from_user(xsave, buf_fx, state_size) ||
+ if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
__copy_from_user(&env, buf, sizeof(env))) {
+ fpu_finit(fpu);
err = -1;
} else {
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
- set_used_math();
}
+ set_used_math();
if (use_eager_fpu()) {
preempt_disable();
math_state_restore();