summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/apb_timer.c8
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apm_32.c11
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/entry_64.S7
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S102
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/msr.c3
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/setup.c28
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/x86_init.c24
36 files changed, 733 insertions, 571 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..ac3b3d002833 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
+obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index afdc3f756dea..cc74fd0c90f2 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -311,7 +311,6 @@ void __init apbt_time_init(void)
#ifdef CONFIG_SMP
int i;
struct sfi_timer_table_entry *p_mtmr;
- unsigned int percpu_timer;
struct apbt_dev *adev;
#endif
@@ -346,13 +345,10 @@ void __init apbt_time_init(void)
return;
}
pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
- if (num_possible_cpus() <= sfi_mtimer_num) {
- percpu_timer = 1;
+ if (num_possible_cpus() <= sfi_mtimer_num)
apbt_num_timers_used = num_possible_cpus();
- } else {
- percpu_timer = 0;
+ else
apbt_num_timers_used = 1;
- }
pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
/* here we set up per CPU timer data structure */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7e..a5b4dce1b7ac 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
* Now that local APIC setup is completed for BP, configure the fault
* handling for interrupt remapping.
*/
- if (irq_remapping_enabled)
- irq_remap_enable_fault_handling();
+ irq_remap_enable_fault_handling();
}
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- if (irq_remapping_enabled)
- irq_remapping_disable();
+ irq_remapping_disable();
local_irq_restore(flags);
return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
return;
local_irq_save(flags);
- if (irq_remapping_enabled) {
- /*
- * IO-APIC and PIC have their own resume routines.
- * We just mask them here to make sure the interrupt
- * subsystem is completely quiet while we enable x2apic
- * and interrupt-remapping.
- */
- mask_ioapic_entries();
- legacy_pic->mask_all();
- }
+
+ /*
+ * IO-APIC and PIC have their own resume routines.
+ * We just mask them here to make sure the interrupt
+ * subsystem is completely quiet while we enable x2apic
+ * and interrupt-remapping.
+ */
+ mask_ioapic_entries();
+ legacy_pic->mask_all();
if (x2apic_mode)
enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- if (irq_remapping_enabled)
- irq_remapping_reenable(x2apic_mode);
+ irq_remapping_reenable(x2apic_mode);
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..9ed796ccc32c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
-#ifdef CONFIG_IRQ_REMAP
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return cfg->irq_2_iommu.iommu != NULL;
-}
-#else
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return false;
-}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
-}
-#endif
-
/*
* Is the SiS APIC rmw bug present ?
* -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
return cfg;
}
-static int alloc_irq_from(unsigned int from, int node)
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
{
- return irq_alloc_desc_from(from, node);
+ return irq_alloc_descs_from(from, count, node);
}
static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
}
-static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
+void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+void native_eoi_ioapic_pin(int apic, int pin, int vector)
{
if (mpc_ioapic_ver(apic) >= 0x20) {
- /*
- * Intr-remapping uses pin number as the virtual vector
- * in the RTE. Actual vector is programmed in
- * intr-remapping table entry. Hence for the io-apic
- * EOI we use the pin number.
- */
- if (cfg && irq_remapped(cfg))
- io_apic_eoi(apic, pin);
- else
- io_apic_eoi(apic, vector);
+ io_apic_eoi(apic, vector);
} else {
struct IO_APIC_route_entry entry, entry1;
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
}
}
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin)
- __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+ x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
+ cfg->vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
}
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+ x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi = false;
}
- if (irq_remapped(cfg)) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
+ if (setup_remapped_irq(irq, cfg, chip))
fasteoi = trigger != 0;
- }
hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
irq_set_chip_and_handler_name(irq, chip, hdl,
fasteoi ? "fasteoi" : "edge");
}
-static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr)
{
- if (irq_remapping_enabled)
- return setup_ioapic_remapped_entry(irq, entry, destination,
- vector, attr);
-
memset(entry, 0, sizeof(*entry));
entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
cfg->vector, irq, attr->trigger, attr->polarity, dest);
- if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
+ pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
__clear_irq_vector(irq, cfg);
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
struct IO_APIC_route_entry entry;
unsigned int dest;
- if (irq_remapping_enabled)
- return;
-
memset(&entry, 0, sizeof(entry));
/*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
ioapic_write_entry(ioapic_idx, pin, entry);
}
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
{
int i;
+
+ pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ pr_debug(" %02x %02X ", i, entry.dest);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector);
+ }
+}
+
+void intel_ir_io_apic_print_entries(unsigned int apic,
+ unsigned int nr_entries)
+{
+ int i;
+
+ pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IR_IO_APIC_route_entry *ir_entry;
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
+
+ pr_debug(" %02x %04X ", i, ir_entry->index);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %X %02X\n",
+ ir_entry->format,
+ ir_entry->mask,
+ ir_entry->trigger,
+ ir_entry->irr,
+ ir_entry->polarity,
+ ir_entry->delivery_status,
+ ir_entry->index2,
+ ir_entry->zero,
+ ir_entry->vector);
+ }
+}
+
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+{
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- if (irq_remapping_enabled) {
- printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
- " Pol Stat Indx2 Zero Vect:\n");
- } else {
- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
- " Stat Dmod Deli Vect:\n");
- }
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- if (irq_remapping_enabled) {
- struct IO_APIC_route_entry entry;
- struct IR_IO_APIC_route_entry *ir_entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
- printk(KERN_DEBUG " %02x %04X ",
- i,
- ir_entry->index
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %X %02X\n",
- ir_entry->format,
- ir_entry->mask,
- ir_entry->trigger,
- ir_entry->irr,
- ir_entry->polarity,
- ir_entry->delivery_status,
- ir_entry->index2,
- ir_entry->zero,
- ir_entry->vector
- );
- } else {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- printk(KERN_DEBUG " %02x %02X ",
- i,
- entry.dest
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
+ x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
}
__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
clear_IO_APIC();
}
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
+void native_disable_io_apic(void)
{
/*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- if (!legacy_pic->nr_legacy_irqs)
- return;
-
- /*
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
* so legacy interrupts can be delivered.
- *
- * With interrupt-remapping, for now we will use virtual wire A mode,
- * as virtual wire B is little complex (need to configure both
- * IOAPIC RTE as well as interrupt-remapping table entry).
- * As this gets called during crash dump, keep this simple for now.
*/
- if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
+ if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
+ if (cpu_has_apic || apic_from_smp_config())
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
/*
- * Use virtual wire A mode when interrupt remapping is enabled.
+ * Clear the IO-APIC before rebooting:
*/
- if (cpu_has_apic || apic_from_smp_config())
- disconnect_bsp_APIC(!irq_remapping_enabled &&
- ioapic_i8259.pin != -1);
+ clear_IO_APIC();
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ x86_io_apic_ops.disable();
}
#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
apic = entry->apic;
pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(cfg))
- io_apic_write(apic, 0x11 + pin*2, dest);
+
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return 0;
}
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+
+int native_ioapic_set_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force)
{
unsigned int dest, irq = data->irq;
unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
ioapic_irqd_unmask(data, cfg, masked);
}
-#ifdef CONFIG_IRQ_REMAP
-static void ir_ack_apic_edge(struct irq_data *data)
-{
- ack_APIC_irq();
-}
-
-static void ir_ack_apic_level(struct irq_data *data)
-{
- ack_APIC_irq();
- eoi_ioapic_irq(data->irq, data->chip_data);
-}
-
-static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
-{
- seq_printf(p, " IR-%s", data->chip->name);
-}
-
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
- chip->irq_print_chip = ir_print_prefix;
- chip->irq_ack = ir_ack_apic_edge;
- chip->irq_eoi = ir_ack_apic_level;
-
- chip->irq_set_affinity = set_remapped_irq_affinity;
-}
-#endif /* CONFIG_IRQ_REMAP */
-
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_unmask = unmask_ioapic_irq,
.irq_ack = ack_apic_edge,
.irq_eoi = ack_apic_level,
- .irq_set_affinity = ioapic_set_affinity,
+ .irq_set_affinity = native_ioapic_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- if (irq_remapping_enabled)
- panic("BIOS bug: timer not connected to IO-APIC");
+ panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
- if (irq_remapping_enabled)
- panic("timer doesn't work through Interrupt-remapped IO-APIC");
+ panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int from, int node)
+unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
{
- struct irq_cfg *cfg;
+ struct irq_cfg **cfg;
unsigned long flags;
- unsigned int ret = 0;
- int irq;
+ int irq, i;
if (from < nr_irqs_gsi)
from = nr_irqs_gsi;
- irq = alloc_irq_from(from, node);
- if (irq < 0)
- return 0;
- cfg = alloc_irq_cfg(irq, node);
- if (!cfg) {
- free_irq_at(irq, NULL);
+ cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+ if (!cfg)
return 0;
+
+ irq = alloc_irqs_from(from, count, node);
+ if (irq < 0)
+ goto out_cfgs;
+
+ for (i = 0; i < count; i++) {
+ cfg[i] = alloc_irq_cfg(irq + i, node);
+ if (!cfg[i])
+ goto out_irqs;
}
raw_spin_lock_irqsave(&vector_lock, flags);
- if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
- ret = irq;
+ for (i = 0; i < count; i++)
+ if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+ goto out_vecs;
raw_spin_unlock_irqrestore(&vector_lock, flags);
- if (ret) {
- irq_set_chip_data(irq, cfg);
- irq_clear_status_flags(irq, IRQ_NOREQUEST);
- } else {
- free_irq_at(irq, cfg);
+ for (i = 0; i < count; i++) {
+ irq_set_chip_data(irq + i, cfg[i]);
+ irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
}
- return ret;
+
+ kfree(cfg);
+ return irq;
+
+out_vecs:
+ for (i--; i >= 0; i--)
+ __clear_irq_vector(irq + i, cfg[i]);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+ for (i = 0; i < count; i++)
+ free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+ kfree(cfg);
+ return 0;
+}
+
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+ return __create_irqs(from, 1, node);
}
int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- if (irq_remapped(cfg))
- free_remapped_irq(irq);
+ free_remapped_irq(irq);
+
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
free_irq_at(irq, cfg);
}
+void destroy_irqs(unsigned int irq, unsigned int count)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ destroy_irq(irq + i);
+}
+
/*
* MSI message composition
*/
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
- struct msi_msg *msg, u8 hpet_id)
+void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id)
{
- struct irq_cfg *cfg;
- int err;
- unsigned dest;
-
- if (disable_apic)
- return -ENXIO;
-
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
+ struct irq_cfg *cfg = irq_cfg(irq);
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
-
- if (irq_remapped(cfg)) {
- compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
- return err;
- }
+ msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
- msg->address_hi = MSI_ADDR_BASE_HI |
- MSI_ADDR_EXT_DEST_ID(dest);
- else
- msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
msg->address_lo =
MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
MSI_DATA_DELIVERY_FIXED:
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
+}
- return err;
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
+{
+ struct irq_cfg *cfg;
+ int err;
+ unsigned dest;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (err)
+ return err;
+
+ err = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus(), &dest);
+ if (err)
+ return err;
+
+ x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+ return 0;
}
static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset)
{
struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
+ unsigned int irq = irq_base + irq_offset;
int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
- irq_set_msi_desc(irq, msidesc);
- write_msi_msg(irq, &msg);
+ irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
- if (irq_remapped(irq_get_chip_data(irq))) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
- }
+ /*
+ * MSI-X message is written per-IRQ, the offset is always 0.
+ * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+ */
+ if (!irq_offset)
+ write_msi_msg(irq, &msg);
+
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
struct msi_desc *msidesc;
+ int node, ret;
- /* x86 doesn't support multiple MSI yet */
+ /* Multiple MSI vectors only supported with interrupt remapping */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
- sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = create_irq_nr(irq_want, node);
if (irq == 0)
- return -1;
+ return -ENOSPC;
+
irq_want = irq + 1;
- if (!irq_remapping_enabled)
- goto no_ir;
- if (!sub_handle) {
- /*
- * allocate the consecutive block of IRTE's
- * for 'nvec'
- */
- index = msi_alloc_remapped_irq(dev, irq, nvec);
- if (index < 0) {
- ret = index;
- goto error;
- }
- } else {
- ret = msi_setup_remapped_irq(dev, irq, index,
- sub_handle);
- if (ret < 0)
- goto error;
- }
-no_ir:
- ret = setup_msi_irq(dev, msidesc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq, 0);
if (ret < 0)
goto error;
- sub_handle++;
}
return 0;
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
.irq_retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
- if (irq_remapping_enabled) {
- ret = setup_hpet_msi_remapped(irq, id);
- if (ret)
- return ret;
- }
-
ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(irq_get_chip_data(irq)))
- irq_remap_modify_chip_defaults(chip);
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- if (irq_remapping_enabled)
- set_remapped_irq_affinity(idata, mask, false);
- else
- ioapic_set_affinity(idata, mask, false);
+ x86_io_apic_ops.set_affinity(idata, mask, false);
}
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf26676..7434d8556d09 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
- if (WARN_ONCE(!mask, "empty IPI mask"))
+ if (!mask)
return;
local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e81..562a76d433c8 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
}
early_param("x2apic_phys", set_x2apic_phys_mode);
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static bool x2apic_fadt_phys(void)
{
- if (x2apic_phys)
- return x2apic_enabled();
- else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
- x2apic_enabled()) {
+ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
- return 1;
+ return true;
}
- else
- return 0;
+ return false;
+}
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && x2apic_phys)
+ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..8d7012b7f402 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
+ cputime_t stime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
+ task_cputime(current, NULL, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
- idle_percentage = current->stime - last_stime;
+ idle_percentage = stime - last_stime;
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
if (apm_info.forbid_idle)
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
}
+ last_jiffies = jiffies;
+ last_stime = stime;
+
bucket = IDLE_LEAKY_MAX;
while (!need_resched()) {
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d6..1e7e84a02eba 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
if (x86_hyper->init_platform)
x86_hyper->init_platform();
}
+
+bool __init hypervisor_x2apic_available(void)
+{
+ return x86_hyper &&
+ x86_hyper->x2apic_available &&
+ x86_hyper->x2apic_available();
+}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0e462404d6f1..7c6f7d548c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -298,8 +298,7 @@ struct _cache_attr {
unsigned int);
};
-#ifdef CONFIG_AMD_NB
-
+#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
/*
* L3 cache descriptors
*/
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
static struct _cache_attr subcaches =
__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
-#else /* CONFIG_AMD_NB */
+#else
#define amd_init_l3_cache(x, y)
-#endif /* CONFIG_AMD_NB */
+#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
static int
__cpuinit cpuid4_cache_lookup_regs(int index,
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a5576..bf0f01aea994 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = hwc->idx;
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
-struct perf_pmu_events_attr {
- struct device_attribute attr;
- u64 id;
-};
-
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-#define EVENT_ATTR(_name, _id) \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = PERF_COUNT_HW_##_id, \
-};
+#define EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
+ events_sysfs_show)
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea97746..7f5c75c2afdd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
+ int (*addr_offset)(int index, bool eventsel);
+ int (*rdpmc_index)(int index);
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
-static inline int x86_pmu_addr_offset(int index)
+static inline unsigned int x86_pmu_config_addr(int index)
{
- int offset;
-
- /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
- alternative_io(ASM_NOP2,
- "shll $1, %%eax",
- X86_FEATURE_PERFCTR_CORE,
- "=a" (offset),
- "a" (index));
-
- return offset;
+ return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, true) : index);
}
-static inline unsigned int x86_pmu_config_addr(int index)
+static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+ return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
}
-static inline unsigned int x86_pmu_event_addr(int index)
+static inline int x86_pmu_rdpmc_index(int index)
{
- return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+ return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e813a0..dfdab42aed27 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static struct event_constraint *amd_nb_event_constraint;
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int ret;
+ int offset, first, base;
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
+ if (!index)
+ return index;
+
+ if (eventsel)
+ offset = event_offsets[index];
+ else
+ offset = count_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * calculate the offset of NB counters with respect to
+ * base eventsel or perfctr
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+
+ if (eventsel)
+ base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
+ else
+ base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
+
+ offset = base + ((index - first) << 1);
+ } else if (!cpu_has_perfctr_core)
+ offset = index;
+ else
+ offset = index << 1;
+
+ if (eventsel)
+ event_offsets[index] = offset;
+ else
+ count_offsets[index] = offset;
+
+ return offset;
+}
+
+static inline int amd_pmu_rdpmc_index(int index)
+{
+ int ret, first;
+
+ if (!index)
+ return index;
+
+ ret = rdpmc_indexes[index];
- ret = x86_pmu_hw_config(event);
if (ret)
return ret;
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * according to the mnual, ECX value of the NB counters is
+ * the index of the NB counter (0, 1, 2 or 3) plus 6
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+ ret = index - first + 6;
+ } else
+ ret = index;
+
+ rdpmc_indexes[index] = ret;
+
+ return ret;
+}
+static int amd_core_hw_config(struct perf_event *event)
+{
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
ARCH_PERFMON_EVENTSEL_OS);
else if (event->attr.exclude_host)
- event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
else if (event->attr.exclude_guest)
- event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+ return 0;
+}
+
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ /* for NB, we only allow system wide counting mode */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
return 0;
}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
int i;
/*
- * only care about NB events
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return;
-
- /*
* need to scan whole list because event may not have
* been assigned during scheduling
*
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
}
}
+static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
+{
+ int core_id = cpu_data(smp_processor_id()).cpu_core_id;
+
+ /* deliver interrupts only to this core */
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
+ hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ hwc->config |= (u64)(core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
+}
+
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
*
* Given that resources are allocated (cmpxchg), they must be
* eventually freed for others to use. This is accomplished by
- * calling amd_put_event_constraints().
+ * calling __amd_put_nb_event_constraints()
*
* Non NB events are not impacted by this restriction.
*/
static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old = NULL;
- int max = x86_pmu.num_counters;
- int i, j, k = -1;
+ struct perf_event *old;
+ int idx, new = -1;
- /*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return &unconstrained;
+ if (!c)
+ c = &unconstrained;
+
+ if (cpuc->is_fake)
+ return c;
/*
* detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (i = 0; i < max; i++) {
- /*
- * keep track of first free slot
- */
- if (k == -1 && !nb->owners[i])
- k = i;
+ for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
/* already present, reuse */
- if (nb->owners[i] == event)
- goto done;
- }
- /*
- * not present, so grab a new slot
- * starting either at:
- */
- if (hwc->idx != -1) {
- /* previous assignment */
- i = hwc->idx;
- } else if (k != -1) {
- /* start from free slot found */
- i = k;
- } else {
- /*
- * event not found, no slot found in
- * first pass, try again from the
- * beginning
- */
- i = 0;
- }
- j = i;
- do {
- old = cmpxchg(nb->owners+i, NULL, event);
- if (!old)
+ if (old == event)
break;
- if (++i == max)
- i = 0;
- } while (i != j);
-done:
- if (!old)
- return &nb->event_constraints[i];
-
- return &emptyconstraint;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ if (amd_is_perfctr_nb_event(hwc))
+ amd_nb_interrupt_hw_config(hwc);
+
+ return &nb->event_constraints[new];
}
static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
if (boot_cpu_data.x86_max_cores < 2)
return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
+ .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
* SVM is disabled the Guest-only bits still gets set and the counter
* will not count anything.
*/
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e1181f83..4914e94ad6e8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2019,7 +2019,10 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
- case 54: /* Cedariew */
+ case 38: /* Lincroft */
+ case 39: /* Penwell */
+ case 53: /* Cloverview */
+ case 54: /* Cedarview */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2084,6 +2087,7 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
case 58: /* IvyBridge */
+ case 62: /* IvyBridge EP */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index f2af39f5dc3d..4820c232a0b9 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] =
};
-static __initconst u64 p6_hw_cache_event_ids
+static u64 p6_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcfd..03a36321ec54 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
#define VMWARE_PORT_CMD_GETVERSION 10
#define VMWARE_PORT_CMD_GETHZ 45
+#define VMWARE_PORT_CMD_GETVCPU_INFO 68
+#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
+#define VMWARE_PORT_CMD_VCPU_RESERVED 31
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
__asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
}
+/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
+static bool __init vmware_legacy_x2apic_available(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
+ return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
+ (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+}
+
const __refconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware",
.detect = vmware_platform,
.set_cpu_features = vmware_set_cpu_features,
.init_platform = vmware_platform_setup,
+ .x2apic_available = vmware_legacy_x2apic_available,
};
EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07a7a04529bc..cb3c591339aa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1781,6 +1781,7 @@ first_nmi:
* Leave room for the "copied" frame
*/
subq $(5*8), %rsp
+ CFI_ADJUST_CFA_OFFSET 5*8
/* Copy the stack frame to the Saved frame */
.rept 5
@@ -1863,10 +1864,8 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_ALL 8
-
- /* Pop the extra iret frame */
- addq $(5*8), %rsp
+ /* Pop the extra iret frame at once */
+ RESTORE_ALL 6*8
/* Clear the NMI executing stack variable */
movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d10101..6773c918b8cc 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
#include <asm/tlbflush.h>
+#include <asm/bootparam_utils.h>
static void __init i386_default_early_setup(void)
{
@@ -30,6 +31,8 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
+ sanitize_boot_params(&boot_params);
+
memblock_reserve(__pa_symbol(&_text),
__pa_symbol(&__bss_stop) - __pa_symbol(&_text));
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99ac..849fc9e63c2f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,7 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+#include <asm/bootparam_utils.h>
static void __init zap_identity_mappings(void)
{
@@ -46,6 +47,7 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
memcpy(&boot_params, real_mode_data, sizeof boot_params);
+ sanitize_boot_params(&boot_params);
if (boot_params.hdr.cmd_line_ptr) {
command_line = __va(boot_params.hdr.cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8e7f6556028f..3c3f58a0808f 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -300,37 +300,52 @@ ENTRY(startup_32_smp)
leal -__PAGE_OFFSET(%ecx),%esp
default_entry:
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+ X86_CR0_PG)
+ movl $(CR0_STATE & ~X86_CR0_PG),%eax
+ movl %eax,%cr0
+
+/*
+ * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
+ * bits like NT set. This would confuse the debugger if this code is traced. So
+ * initialize them properly now before switching to protected mode. That means
+ * DF in particular (even though we have cleared it earlier after copying the
+ * command line) because GCC expects it.
+ */
+ pushl $0
+ popfl
+
/*
- * New page tables may be in 4Mbyte page mode and may
- * be using the global pages.
+ * New page tables may be in 4Mbyte page mode and may be using the global pages.
*
- * NOTE! If we are on a 486 we may have no cr4 at all!
- * Specifically, cr4 exists if and only if CPUID exists
- * and has flags other than the FPU flag set.
+ * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
+ * if and only if CPUID exists and has flags other than the FPU flag set.
*/
+ movl $-1,pa(X86_CPUID) # preset CPUID level
movl $X86_EFLAGS_ID,%ecx
pushl %ecx
- popfl
+ popfl # set EFLAGS=ID
pushfl
- popl %eax
- pushl $0
- popfl
- pushfl
- popl %edx
- xorl %edx,%eax
- testl %ecx,%eax
- jz 6f # No ID flag = no CPUID = no CR4
+ popl %eax # get EFLAGS
+ testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
+ jz enable_paging # hw disallowed setting of ID bit
+ # which means no CPUID and no CR4
+
+ xorl %eax,%eax
+ cpuid
+ movl %eax,pa(X86_CPUID) # save largest std CPUID function
movl $1,%eax
cpuid
- andl $~1,%edx # Ignore CPUID.FPU
- jz 6f # No flags or only CPUID.FPU = no CR4
+ andl $~1,%edx # Ignore CPUID.FPU
+ jz enable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
- jz 6f
+ jz enable_paging
/* Check if extended functions are implemented */
movl $0x80000000, %eax
@@ -338,7 +353,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
- ja 6f
+ ja enable_paging
/* Clear bogus XD_DISABLE bits */
call verify_cpu
@@ -347,7 +362,7 @@ default_entry:
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
- jnc 6f
+ jnc enable_paging
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
@@ -357,15 +372,14 @@ default_entry:
/* Make changes effective */
wrmsr
-6:
+enable_paging:
/*
* Enable paging
*/
movl $pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
- movl %cr0,%eax
- orl $X86_CR0_PG,%eax
+ movl $CR0_STATE,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
@@ -373,14 +387,6 @@ default_entry:
addl $__PAGE_OFFSET, %esp
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
- */
- pushl $0
- popfl
-
-/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
@@ -389,31 +395,11 @@ default_entry:
jz 1f # Did we do this already?
call *%eax
1:
-
-/* check if it is 486 or 386. */
+
/*
- * XXX - this does a lot of unnecessary setup. Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
+ * Check if it is 486
*/
- movl $-1,X86_CPUID # -1 for no CPUID initially
- movb $3,X86 # at least 386
- pushfl # push EFLAGS
- popl %eax # get EFLAGS
- movl %eax,%ecx # save original EFLAGS
- xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
- pushl %eax # copy to EFLAGS
- popfl # set EFLAGS
- pushfl # get new EFLAGS
- popl %eax # put it in eax
- xorl %ecx,%eax # change in flags
- pushl %ecx # restore original EFLAGS
- popfl
- testl $0x40000,%eax # check if AC bit changed
- je is386
-
- movb $4,X86 # at least 486
- testl $0x200000,%eax # check if ID bit changed
+ cmpl $-1,X86_CPUID
je is486
/* get vendor info */
@@ -439,11 +425,10 @@ default_entry:
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
-is486: movl $0x50022,%ecx # set AM, WP, NE and MP
- jmp 2f
-
-is386: movl $2,%ecx # set MP
-2: movl %cr0,%eax
+is486:
+ movb $4,X86
+ movl $0x50022,%ecx # set AM, WP, NE and MP
+ movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
orl %ecx,%eax
movl %eax,%cr0
@@ -468,7 +453,6 @@ is386: movl $2,%ecx # set MP
xorl %eax,%eax # Clear LDT
lldt %ax
- cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
jmp *(initial_code)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a589..da85a8e830a1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
- if (arch_setup_hpet_msi(irq, hpet_blockid)) {
+ if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
destroy_irq(irq);
return -EINVAL;
}
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 000000000000..0d33169cc1a2
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for kernel probes
+#
+
+obj-$(CONFIG_KPROBES) += core.o
+obj-$(CONFIG_OPTPROBES) += opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
return addr;
}
#endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ return 0;
+}
+#endif
#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf6..e124554598ee 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
void jprobe_return_end(void);
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
* Groups, and some special opcodes can not boost.
* This is non-const and volatile to keep gcc from statically
* optimizing it out, as variable_test_bit makes gcc think only
- * *(unsigned long*) is used.
+ * *(unsigned long*) is used.
*/
static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
struct __arch_relative_insn {
u8 op;
s32 raddr;
- } __attribute__((packed)) *insn;
+ } __packed *insn;
insn = (struct __arch_relative_insn *)from;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
- if (kprobe_ftrace(p)) {
- skip_singlestep(p, regs, kcb);
- return 1;
- }
-#endif
- setup_singlestep(p, regs, kcb, 0);
+ if (!skip_singlestep(p, regs, kcb))
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
-{
- struct kprobe *p;
- struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto end;
-
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
- regs->ip = ip + sizeof(kprobe_opcode_t);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
- skip_singlestep(p, regs, kcb);
- /*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
- */
- }
-end:
- local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.insn = NULL;
- p->ainsn.boostable = -1;
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 000000000000..23ef5c556f06
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb);
+ else
+ return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+ regs->ip = ip + sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb);
+ /*
+ * If pre_handler returns !0, it sets regs->ip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed403..76dc6f095724 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9c2bd8bd4b4c..2b44ea5f269d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -505,6 +505,7 @@ static bool __init kvm_detect(void)
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
.name = "KVM",
.detect = kvm_detect,
+ .x2apic_available = kvm_para_available,
};
EXPORT_SYMBOL_GPL(x86_hyper_kvm);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a7c5661f8496..4929502c1372 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -174,6 +174,9 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu;
struct cpuinfo_x86 *c;
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 0f5dec5c80e0..872079a67e4d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = {
EXPORT_SYMBOL(x86_dma_fallback_dev);
/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES 32768
+#define PREALLOC_DMA_DEBUG_ENTRIES 65536
int dma_set_mask(struct device *dev, u64 mask)
{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe0d9bd..29a8120e6fe8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/context_tracking.h>
#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0f..76fa1e9a2b39 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
break;
case BOOT_EFI:
- if (efi_enabled)
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode ?
EFI_RESET_WARM :
EFI_RESET_COLD,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 801602b5d745..2e8f3d3b5641 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
if (century) {
century = bcd2bin(century);
year += century * 100;
- printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
} else
year += CMOS_YEARS_OFFS;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 00f6c1472b85..8b24289cc10c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -807,15 +807,15 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- efi_enabled = 1;
- efi_64bit = false;
+ set_bit(EFI_BOOT, &x86_efi_facility);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- efi_enabled = 1;
- efi_64bit = true;
+ set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_64BIT, &x86_efi_facility);
}
- if (efi_enabled && efi_memblock_x86_reserve_range())
- efi_enabled = 0;
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
#endif
x86_init.oem.arch_setup();
@@ -888,7 +888,7 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
- if (efi_enabled)
+ if (efi_enabled(EFI_BOOT))
efi_init();
dmi_scan_machine();
@@ -971,7 +971,7 @@ void __init setup_arch(char **cmdline_p)
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
*/
- if (efi_enabled)
+ if (efi_enabled(EFI_MEMMAP))
efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
@@ -1114,7 +1114,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
@@ -1131,14 +1131,14 @@ void __init setup_arch(char **cmdline_p)
register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI
- /* Once setup is done above, disable efi_enabled on mismatched
- * firmware/kernel archtectures since there is no support for
- * runtime services.
+ /* Once setup is done above, unmap the EFI memory map on
+ * mismatched firmware/kernel archtectures since there is no
+ * support for runtime services.
*/
- if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ if (efi_enabled(EFI_BOOT) &&
+ IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
efi_unmap_memmap();
- efi_enabled = 0;
}
#endif
}
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 97ef74b88e0f..dbded5aedb81 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (flags & MAP_FIXED)
return addr;
- /* for MAP_32BIT mappings we force the legact mmap base */
+ /* for MAP_32BIT mappings we force the legacy mmap base */
if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
goto bottomup;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06ccb5073a3f..4b9ea101fe3b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
ns_now = __cycles_2_ns(tsc_now);
if (cpu_khz) {
- *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
+ cpu_khz / 2) / cpu_khz;
*offset = ns_now - mult_frac(tsc_now, *scale,
(1UL << CYC2NS_SCALE_FACTOR));
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b67462..0ba4cfb4f412 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->insn[i] == 0x66)
continue;
- if (auprobe->insn[i] == 0x90)
+ if (auprobe->insn[i] == 0x90) {
+ regs->ip += i + 1;
return true;
+ }
break;
}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814a..d065d67c2672 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
+#include <asm/hpet.h>
#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -111,15 +112,22 @@ struct x86_platform_ops x86_platform = {
EXPORT_SYMBOL_GPL(x86_platform);
struct x86_msi_ops x86_msi = {
- .setup_msi_irqs = native_setup_msi_irqs,
- .teardown_msi_irq = native_teardown_msi_irq,
- .teardown_msi_irqs = default_teardown_msi_irqs,
- .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_msi_irqs = native_setup_msi_irqs,
+ .compose_msi_msg = native_compose_msi_msg,
+ .teardown_msi_irq = native_teardown_msi_irq,
+ .teardown_msi_irqs = default_teardown_msi_irqs,
+ .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_hpet_msi = default_setup_hpet_msi,
};
struct x86_io_apic_ops x86_io_apic_ops = {
- .init = native_io_apic_init_mappings,
- .read = native_io_apic_read,
- .write = native_io_apic_write,
- .modify = native_io_apic_modify,
+ .init = native_io_apic_init_mappings,
+ .read = native_io_apic_read,
+ .write = native_io_apic_write,
+ .modify = native_io_apic_modify,
+ .disable = native_disable_io_apic,
+ .print_entries = native_io_apic_print_entries,
+ .set_affinity = native_ioapic_set_affinity,
+ .setup_entry = native_setup_ioapic_entry,
+ .eoi_ioapic_pin = native_eoi_ioapic_pin,
};