summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic.c27
-rw-r--r--arch/x86/kernel/dumpstack_64.c3
-rw-r--r--arch/x86/kernel/kvm.c158
-rw-r--r--arch/x86/kernel/smpboot.c47
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c113
-rw-r--r--arch/x86/kernel/x86_init.c1
8 files changed, 204 insertions, 150 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 81b9c63dae1b..e53dda210cd7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");
-
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
return ~0U;
}
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
return ~0U;
}
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
- boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
m = x86_match_cpu(deadline_match);
if (!m)
- return;
+ return true;
/*
* Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
rev = (u32)m->driver_data;
if (boot_cpu_data.microcode >= rev)
- return;
+ return true;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
}
/*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
- apic_check_deadline_errata();
+ if (apic_validate_deadline_timer())
+ pr_debug("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ recursion_check:
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ if (task == current)
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6efe0410fb72..b3d9b0d7a37d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -35,6 +35,8 @@
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
+DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+
static int kvmapf = 1;
static int __init parse_no_kvmapf(char *arg)
@@ -73,7 +75,6 @@ struct kvm_task_sleep_node {
struct swait_queue_head wq;
u32 token;
int cpu;
- bool halted;
};
static struct kvm_task_sleep_head {
@@ -96,77 +97,64 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
return NULL;
}
-/*
- * @interrupt_kernel: Is this called from a routine which interrupts the kernel
- * (other than user space)?
- */
-void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
+static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
{
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
- struct kvm_task_sleep_node n, *e;
- DECLARE_SWAITQUEUE(wait);
-
- rcu_irq_enter();
+ struct kvm_task_sleep_node *e;
raw_spin_lock(&b->lock);
e = _find_apf_task(b, token);
if (e) {
/* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del(&e->link);
- kfree(e);
raw_spin_unlock(&b->lock);
-
- rcu_irq_exit();
- return;
+ kfree(e);
+ return false;
}
- n.token = token;
- n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) ||
- (IS_ENABLED(CONFIG_PREEMPT_COUNT)
- ? preempt_count() > 1 || rcu_preempt_depth()
- : interrupt_kernel);
- init_swait_queue_head(&n.wq);
- hlist_add_head(&n.link, &b->list);
+ n->token = token;
+ n->cpu = smp_processor_id();
+ init_swait_queue_head(&n->wq);
+ hlist_add_head(&n->link, &b->list);
raw_spin_unlock(&b->lock);
+ return true;
+}
+
+/*
+ * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
+ * @token: Token to identify the sleep node entry
+ *
+ * Invoked from the async pagefault handling code or from the VM exit page
+ * fault handler. In both cases RCU is watching.
+ */
+void kvm_async_pf_task_wait_schedule(u32 token)
+{
+ struct kvm_task_sleep_node n;
+ DECLARE_SWAITQUEUE(wait);
+
+ lockdep_assert_irqs_disabled();
+
+ if (!kvm_async_pf_queue_task(token, &n))
+ return;
for (;;) {
- if (!n.halted)
- prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
- rcu_irq_exit();
-
- if (!n.halted) {
- local_irq_enable();
- schedule();
- local_irq_disable();
- } else {
- /*
- * We cannot reschedule. So halt.
- */
- native_safe_halt();
- local_irq_disable();
- }
-
- rcu_irq_enter();
+ local_irq_enable();
+ schedule();
+ local_irq_disable();
}
- if (!n.halted)
- finish_swait(&n.wq, &wait);
-
- rcu_irq_exit();
- return;
+ finish_swait(&n.wq, &wait);
}
-EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
+EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
static void apf_task_wake_one(struct kvm_task_sleep_node *n)
{
hlist_del_init(&n->link);
- if (n->halted)
- smp_send_reschedule(n->cpu);
- else if (swq_has_sleeper(&n->wq))
+ if (swq_has_sleeper(&n->wq))
swake_up_one(&n->wq);
}
@@ -175,12 +163,13 @@ static void apf_task_wake_all(void)
int i;
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
- struct hlist_node *p, *next;
struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
+ struct kvm_task_sleep_node *n;
+ struct hlist_node *p, *next;
+
raw_spin_lock(&b->lock);
hlist_for_each_safe(p, next, &b->list) {
- struct kvm_task_sleep_node *n =
- hlist_entry(p, typeof(*n), link);
+ n = hlist_entry(p, typeof(*n), link);
if (n->cpu == smp_processor_id())
apf_task_wake_one(n);
}
@@ -221,8 +210,9 @@ again:
n->cpu = smp_processor_id();
init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
- } else
+ } else {
apf_task_wake_one(n);
+ }
raw_spin_unlock(&b->lock);
return;
}
@@ -242,25 +232,39 @@ u32 kvm_read_and_reset_pf_reason(void)
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
-dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
- switch (kvm_read_and_reset_pf_reason()) {
- default:
- do_page_fault(regs, error_code, address);
- break;
+ u32 reason = kvm_read_and_reset_pf_reason();
+
+ switch (reason) {
case KVM_PV_REASON_PAGE_NOT_PRESENT:
- /* page is swapped out by the host. */
- kvm_async_pf_task_wait((u32)address, !user_mode(regs));
- break;
case KVM_PV_REASON_PAGE_READY:
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * If the host managed to inject an async #PF into an interrupt
+ * disabled region, then die hard as this is not going to end well
+ * and the host side is seriously broken.
+ */
+ if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
+ panic("Host injected async #PF in interrupt disabled region\n");
+
+ if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
+ if (unlikely(!(user_mode(regs))))
+ panic("Host injected async #PF in kernel mode\n");
+ /* Page is swapped out by the host. */
+ kvm_async_pf_task_wait_schedule(token);
+ } else {
rcu_irq_enter();
- kvm_async_pf_task_wake((u32)address);
+ kvm_async_pf_task_wake(token);
rcu_irq_exit();
- break;
}
+ return true;
}
-NOKPROBE_SYMBOL(do_async_page_fault);
+NOKPROBE_SYMBOL(__kvm_handle_async_pf);
static void __init paravirt_ops_setup(void)
{
@@ -306,11 +310,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
- u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
+ u64 pa;
-#ifdef CONFIG_PREEMPTION
- pa |= KVM_ASYNC_PF_SEND_ALWAYS;
-#endif
+ WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
+
+ pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
pa |= KVM_ASYNC_PF_ENABLED;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
@@ -318,12 +322,12 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
- printk(KERN_INFO"KVM setup async PF for cpu %d\n",
- smp_processor_id());
+ pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
unsigned long pa;
+
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__this_cpu_write(kvm_apic_eoi, 0);
@@ -344,8 +348,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
- printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
- smp_processor_id());
+ pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
}
static void kvm_pv_guest_cpu_reboot(void *unused)
@@ -592,12 +595,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
}
#endif
-static void __init kvm_apf_trap_init(void)
-{
- update_intr_gate(X86_TRAP_PF, async_page_fault);
-}
-
-
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
{
@@ -632,8 +629,6 @@ static void __init kvm_guest_init(void)
register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
raw_spin_lock_init(&async_pf_sleepers[i].lock);
- if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
- x86_init.irqs.trap_init = kvm_apf_trap_init;
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
has_steal_clock = 1;
@@ -649,6 +644,9 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
+ static_branch_enable(&kvm_async_pf_enabled);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe3ab9632f3b..8c89e4d9ad28 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -147,7 +147,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
-static void init_freq_invariance(void);
+static void init_freq_invariance(bool secondary);
/*
* Report back to the Boot Processor during boot time or to the caller processor
@@ -185,7 +185,7 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
- init_freq_invariance();
+ init_freq_invariance(true);
/*
* Get our bogomips.
@@ -1341,7 +1341,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
- init_freq_invariance();
+ init_freq_invariance(false);
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1877,9 +1877,6 @@ static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
int err, i;
u64 msr;
- if (!x86_match_cpu(has_knl_turbo_ratio_limits))
- return false;
-
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
@@ -1945,18 +1942,23 @@ static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
+ u64 msr;
int err;
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_freq);
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- *turbo_freq = (*turbo_freq >> 24) & 0xFF; /* 4C turbo */
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+ *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
+
+ /* The CPU may have less than 4 cores */
+ if (!*turbo_freq)
+ *turbo_freq = msr & 0xFF; /* 1C turbo */
return true;
}
@@ -1972,7 +1974,8 @@ static bool intel_set_max_freq_ratio(void)
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
- if (knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
+ knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
@@ -1985,13 +1988,22 @@ static bool intel_set_max_freq_ratio(void)
return false;
out:
+ /*
+ * Some hypervisors advertise X86_FEATURE_APERFMPERF
+ * but then fill all MSR's with zeroes.
+ */
+ if (!base_freq) {
+ pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n");
+ return false;
+ }
+
arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
base_freq);
arch_set_max_freq_ratio(turbo_disabled());
return true;
}
-static void init_counter_refs(void *arg)
+static void init_counter_refs(void)
{
u64 aperf, mperf;
@@ -2002,18 +2014,25 @@ static void init_counter_refs(void *arg)
this_cpu_write(arch_prev_mperf, mperf);
}
-static void init_freq_invariance(void)
+static void init_freq_invariance(bool secondary)
{
bool ret = false;
- if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
+ if (secondary) {
+ if (static_branch_likely(&arch_scale_freq_key)) {
+ init_counter_refs();
+ }
+ return;
+ }
+
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = intel_set_max_freq_ratio();
if (ret) {
- on_each_cpu(init_counter_refs, NULL, 1);
+ init_counter_refs();
static_branch_enable(&arch_scale_freq_key);
} else {
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d54cffdc7cac..821fac47eef6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -983,7 +983,5 @@ void __init trap_init(void)
idt_setup_ist_traps();
- x86_init.irqs.trap_init();
-
idt_setup_debugidt_traps();
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bad_address:
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;
+ if (state->task != current)
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..5b0bd8581fe6 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,19 +8,21 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...) \
+({ \
+ if (state->task == current) \
+ orc_warn(args); \
+})
extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
static inline unsigned long orc_ip(const int *ip)
{
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
- if (!orc_init)
- return NULL;
-
if (ip == 0)
return &null_orc_entry;
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
#ifdef CONFIG_MODULES
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true;
}
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs. This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+ unsigned long *val)
+{
+ unsigned int reg = reg_off/8;
+
+ if (!state->regs)
+ return false;
+
+ if (state->full_regs) {
+ *val = ((unsigned long *)state->regs)[reg];
+ return true;
+ }
+
+ if (state->prev_regs) {
+ *val = ((unsigned long *)state->prev_regs)[reg];
+ return true;
+ }
+
+ return false;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_REG_R10:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r10;
break;
case ORC_REG_R13:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r13;
break;
case ORC_REG_DI:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+ orc_warn_current("missing RDI value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->di;
break;
case ORC_REG_DX:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->dx;
break;
default:
- orc_warn("unknown SP base reg %d for ip %pB\n",
+ orc_warn("unknown SP base reg %d at %pB\n",
orc->sp_reg, (void *)state->ip);
goto err;
}
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp;
state->regs = NULL;
+ state->prev_regs = NULL;
state->signal = false;
break;
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
state->regs = (struct pt_regs *)sp;
+ state->prev_regs = NULL;
state->full_regs = true;
state->signal = true;
break;
case ORC_TYPE_REGS_IRET:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference iret registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access iret registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
+ if (state->full_regs)
+ state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false;
state->signal = true;
break;
default:
- orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc_warn("unknown .orc_unwind entry type %d at %pB\n",
orc->type, (void *)orig_ip);
- break;
+ goto err;
}
/* Find BP: */
switch (orc->bp_reg) {
case ORC_REG_UNDEFINED:
- if (state->regs && state->full_regs)
- state->bp = state->regs->bp;
+ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+ state->bp = tmp;
break;
case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%pB\n",
- (void *)orig_ip);
+ orc_warn_current("stack going in the wrong direction? at %pB\n",
+ (void *)orig_ip);
goto err;
}
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
+ if (!orc_init)
+ goto done;
+
memset(state, 0, sizeof(*state));
state->task = task;
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp <= (unsigned long)first_frame))
+ state->sp < (unsigned long)first_frame))
unwind_next_frame(state);
return;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 85f1a90c55cd..123f1c1f1788 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -79,7 +79,6 @@ struct x86_init_ops x86_init __initdata = {
.irqs = {
.pre_vector_init = init_ISA_irqs,
.intr_init = native_init_IRQ,
- .trap_init = x86_init_noop,
.intr_mode_select = apic_intr_mode_select,
.intr_mode_init = apic_intr_mode_init
},