summaryrefslogtreecommitdiff
path: root/arch/x86/xen/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/smp.c')
-rw-r--r--arch/x86/xen/smp.c419
1 files changed, 296 insertions, 123 deletions
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e69000f982..d8faf79a0a1d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,6 +15,7 @@
* This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
+#include <linux/kernel_stat.h>
#include <linux/err.h>
#include <linux/smp.h>
@@ -35,28 +36,17 @@
#include "xen-ops.h"
#include "mmu.h"
-static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
+static void __cpuinit xen_init_lock_cpu(int cpu);
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
+cpumask_t xen_cpu_initialized_map;
-static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
/*
* Reschedule call back. Nothing to do,
@@ -65,6 +55,12 @@ static struct call_data_struct *call_data;
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
+#ifdef CONFIG_X86_32
+ __get_cpu_var(irq_stat).irq_resched_count++;
+#else
+ add_pda(irq_resched_count, 1);
+#endif
+
return IRQ_HANDLED;
}
@@ -73,13 +69,22 @@ static __cpuinit void cpu_bringup_and_idle(void)
int cpu = smp_processor_id();
cpu_init();
+ preempt_disable();
+
xen_enable_sysenter();
+ xen_enable_syscall();
- preempt_disable();
- per_cpu(cpu_state, cpu) = CPU_ONLINE;
+ cpu = smp_processor_id();
+ smp_store_cpu_info(cpu);
+ cpu_data(cpu).x86_max_cores = 1;
+ set_cpu_sibling_map(cpu);
xen_setup_cpu_clockevents();
+ cpu_set(cpu, cpu_online_map);
+ x86_write_percpu(cpu_state, CPU_ONLINE);
+ wmb();
+
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -122,6 +127,17 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(debug_irq, cpu) = rc;
+ callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+ cpu,
+ xen_call_function_single_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ callfunc_name,
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(callfuncsingle_irq, cpu) = rc;
+
return 0;
fail:
@@ -131,59 +147,45 @@ static int xen_smp_intr_init(unsigned int cpu)
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
if (per_cpu(debug_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+ if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
return rc;
}
-void __init xen_fill_possible_map(void)
+static void __init xen_fill_possible_map(void)
{
int i, rc;
for (i = 0; i < NR_CPUS; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
+ if (rc >= 0) {
+ num_processors++;
cpu_set(i, cpu_possible_map);
+ }
}
}
-void __init xen_smp_prepare_boot_cpu(void)
+static void __init xen_smp_prepare_boot_cpu(void)
{
- int cpu;
-
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
- make_lowmem_page_readwrite(&per_cpu__gdt_page);
-
- for_each_possible_cpu(cpu) {
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- /*
- * cpu_core_map lives in a per cpu area that is cleared
- * when the per cpu array is allocated.
- *
- * cpus_clear(per_cpu(cpu_core_map, cpu));
- */
- }
+ make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
xen_setup_vcpu_info_placement();
}
-void __init xen_smp_prepare_cpus(unsigned int max_cpus)
+static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
- for_each_possible_cpu(cpu) {
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- /*
- * cpu_core_ map will be zeroed when the per
- * cpu area is allocated.
- *
- * cpus_clear(per_cpu(cpu_core_map, cpu));
- */
- }
+ xen_init_lock_cpu(0);
smp_store_cpu_info(0);
+ cpu_data(0).x86_max_cores = 1;
set_cpu_sibling_map(0);
if (xen_smp_intr_init(0))
@@ -218,7 +220,7 @@ static __cpuinit int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
struct vcpu_guest_context *ctxt;
- struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
+ struct desc_struct *gdt;
if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
return 0;
@@ -227,12 +229,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
if (ctxt == NULL)
return -ENOMEM;
+ gdt = get_cpu_gdt_table(cpu);
+
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = 0;
ctxt->user_regs.ss = __KERNEL_DS;
+#ifdef CONFIG_X86_32
+ ctxt->user_regs.fs = __KERNEL_PERCPU;
+#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
@@ -242,11 +247,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->ldt_ents = 0;
- BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
- make_lowmem_page_readonly(gdt->gdt);
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ make_lowmem_page_readonly(gdt);
- ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
- ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
+ ctxt->gdt_frames[0] = virt_to_mfn(gdt);
+ ctxt->gdt_ents = GDT_ENTRIES;
ctxt->user_regs.cs = __KERNEL_CS;
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
@@ -254,9 +259,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0;
+#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_cs = __KERNEL_CS;
+#endif
+ ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
@@ -269,7 +276,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
}
-int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu)
{
struct task_struct *idle = idle_task(cpu);
int rc;
@@ -280,10 +287,28 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
return rc;
#endif
+#ifdef CONFIG_X86_64
+ /* Allocate node local memory for AP pdas */
+ WARN_ON(cpu == 0);
+ if (cpu > 0) {
+ rc = get_local_pda(cpu);
+ if (rc)
+ return rc;
+ }
+#endif
+
+#ifdef CONFIG_X86_32
init_gdt(cpu);
per_cpu(current_task, cpu) = idle;
irq_ctx_init(cpu);
+#else
+ cpu_pda(cpu)->pcurrent = idle;
+ clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
xen_setup_timer(cpu);
+ xen_init_lock_cpu(cpu);
+
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* make sure interrupts start blocked */
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -299,20 +324,18 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
if (rc)
return rc;
- smp_store_cpu_info(cpu);
- set_cpu_sibling_map(cpu);
- /* This must be done before setting cpu_online_map */
- wmb();
-
- cpu_set(cpu, cpu_online_map);
-
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
BUG_ON(rc);
+ while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+ HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ barrier();
+ }
+
return 0;
}
-void xen_smp_cpus_done(unsigned int max_cpus)
+static void xen_smp_cpus_done(unsigned int max_cpus)
{
}
@@ -328,104 +351,254 @@ static void stop_self(void *v)
BUG();
}
-void xen_smp_send_stop(void)
+static void xen_smp_send_stop(void)
{
- smp_call_function(stop_self, NULL, 0, 0);
+ smp_call_function(stop_self, NULL, 0);
}
-void xen_smp_send_reschedule(int cpu)
+static void xen_smp_send_reschedule(int cpu)
{
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-
static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
{
unsigned cpu;
cpus_and(mask, mask, cpu_online_map);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu_mask_nr(cpu, mask)
xen_send_IPI_one(cpu, vector);
}
+static void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+ int cpu;
+
+ xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+ /* Make sure other vcpus get a chance to run if they need to. */
+ for_each_cpu_mask_nr(cpu, mask) {
+ if (xen_vcpu_stolen(cpu)) {
+ HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ break;
+ }
+ }
+}
+
+static void xen_smp_send_call_function_single_ipi(int cpu)
+{
+ xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
irq_enter();
- (*func)(info);
+ generic_smp_call_function_interrupt();
+#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
irq_exit();
- if (wait) {
- mb(); /* commit everything before setting finished */
- atomic_inc(&call_data->finished);
- }
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
+{
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+ __get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
+ irq_exit();
return IRQ_HANDLED;
}
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait)
+struct xen_spinlock {
+ unsigned char lock; /* 0 -> free; 1 -> locked */
+ unsigned short spinners; /* count of waiting cpus */
+};
+
+static int xen_spin_is_locked(struct raw_spinlock *lock)
{
- struct call_data_struct data;
- int cpus, cpu;
- bool yield;
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
+ return xl->lock != 0;
+}
- cpu_clear(smp_processor_id(), mask);
+static int xen_spin_is_contended(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- cpus = cpus_weight(mask);
- if (!cpus) {
- spin_unlock(&call_lock);
+ /* Not strictly true; this is only the count of contended
+ lock-takers entering the slow path. */
+ return xl->spinners != 0;
+}
+
+static int xen_spin_trylock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ u8 old = 1;
+
+ asm("xchgb %b0,%1"
+ : "+q" (old), "+m" (xl->lock) : : "memory");
+
+ return old == 0;
+}
+
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+
+static inline void spinning_lock(struct xen_spinlock *xl)
+{
+ __get_cpu_var(lock_spinners) = xl;
+ wmb(); /* set lock of interest before count */
+ asm(LOCK_PREFIX " incw %0"
+ : "+m" (xl->spinners) : : "memory");
+}
+
+static inline void unspinning_lock(struct xen_spinlock *xl)
+{
+ asm(LOCK_PREFIX " decw %0"
+ : "+m" (xl->spinners) : : "memory");
+ wmb(); /* decrement count before clearing lock */
+ __get_cpu_var(lock_spinners) = NULL;
+}
+
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int irq = __get_cpu_var(lock_kicker_irq);
+ int ret;
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
return 0;
+
+ /* announce we're spinning */
+ spinning_lock(xl);
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+
+ /* check again make sure it didn't become free while
+ we weren't looking */
+ ret = xen_spin_trylock(lock);
+ if (ret)
+ goto out;
+
+ /* block until irq becomes pending */
+ xen_poll_irq(irq);
+ kstat_this_cpu.irqs[irq]++;
+
+out:
+ unspinning_lock(xl);
+ return ret;
+}
+
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int timeout;
+ u8 oldval;
+
+ do {
+ timeout = 1 << 10;
+
+ asm("1: xchgb %1,%0\n"
+ " testb %1,%1\n"
+ " jz 3f\n"
+ "2: rep;nop\n"
+ " cmpb $0,%0\n"
+ " je 1b\n"
+ " dec %2\n"
+ " jnz 2b\n"
+ "3:\n"
+ : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
+ : "1" (1)
+ : "memory");
+
+ } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+}
+
+static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* XXX should mix up next cpu selection */
+ if (per_cpu(lock_spinners, cpu) == xl) {
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+ break;
+ }
}
+}
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
+static void xen_spin_unlock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
+ smp_wmb(); /* make sure no writes get moved after unlock */
+ xl->lock = 0; /* release lock */
- call_data = &data;
- mb(); /* write everything before IPI */
+ /* make sure unlock happens before kick */
+ barrier();
- /* Send a message to other CPUs and wait for them to respond */
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+ if (unlikely(xl->spinners))
+ xen_spin_unlock_slow(xl);
+}
- /* Make sure other vcpus get a chance to run if they need to. */
- yield = false;
- for_each_cpu_mask(cpu, mask)
- if (xen_vcpu_stolen(cpu))
- yield = true;
+static __cpuinit void xen_init_lock_cpu(int cpu)
+{
+ int irq;
+ const char *name;
+
+ name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+ irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+ cpu,
+ xen_reschedule_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ name,
+ NULL);
+
+ if (irq >= 0) {
+ disable_irq(irq); /* make sure it's never delivered */
+ per_cpu(lock_kicker_irq, cpu) = irq;
+ }
- if (yield)
- HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ printk("cpu %d spinlock event irq %d\n", cpu, irq);
+}
- /* Wait for response */
- while (atomic_read(&data.started) != cpus ||
- (wait && atomic_read(&data.finished) != cpus))
- cpu_relax();
+static void __init xen_init_spinlocks(void)
+{
+ pv_lock_ops.spin_is_locked = xen_spin_is_locked;
+ pv_lock_ops.spin_is_contended = xen_spin_is_contended;
+ pv_lock_ops.spin_lock = xen_spin_lock;
+ pv_lock_ops.spin_trylock = xen_spin_trylock;
+ pv_lock_ops.spin_unlock = xen_spin_unlock;
+}
- spin_unlock(&call_lock);
+static const struct smp_ops xen_smp_ops __initdata = {
+ .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = xen_smp_prepare_cpus,
+ .cpu_up = xen_cpu_up,
+ .smp_cpus_done = xen_smp_cpus_done,
- return 0;
+ .smp_send_stop = xen_smp_send_stop,
+ .smp_send_reschedule = xen_smp_send_reschedule,
+
+ .send_call_func_ipi = xen_smp_send_call_function_ipi,
+ .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
+};
+
+void __init xen_smp_init(void)
+{
+ smp_ops = xen_smp_ops;
+ xen_fill_possible_map();
+ xen_init_spinlocks();
}