summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/asm-arm/system.h30
-rw-r--r--include/asm-ia64/system.h10
-rw-r--r--include/asm-mips/system.h10
-rw-r--r--include/asm-s390/system.h17
-rw-r--r--include/asm-sparc/system.h4
-rw-r--r--include/asm-sparc64/system.h14
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/sched.c132
9 files changed, 131 insertions, 97 deletions
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index 39dd7008013c..3d0d2860b6db 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -145,34 +145,12 @@ extern unsigned int user_debug;
#define set_wmb(var, value) do { var = value; wmb(); } while (0)
#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
-#ifdef CONFIG_SMP
/*
- * Define our own context switch locking. This allows us to enable
- * interrupts over the context switch, otherwise we end up with high
- * interrupt latency. The real problem area is switch_mm() which may
- * do a full cache flush.
+ * switch_mm() may do a full cache flush over the context switch,
+ * so enable interrupts over the context switch to avoid high
+ * latency.
*/
-#define prepare_arch_switch(rq,next) \
-do { \
- spin_lock(&(next)->switch_lock); \
- spin_unlock_irq(&(rq)->lock); \
-} while (0)
-
-#define finish_arch_switch(rq,prev) \
- spin_unlock(&(prev)->switch_lock)
-
-#define task_running(rq,p) \
- ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
-#else
-/*
- * Our UP-case is more simple, but we assume knowledge of how
- * spin_unlock_irq() and friends are implemented. This avoids
- * us needlessly decrementing and incrementing the preempt count.
- */
-#define prepare_arch_switch(rq,next) local_irq_enable()
-#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock)
-#define task_running(rq,p) ((rq)->curr == (p))
-#endif
+#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
/*
* switch_to(prev, next) should switch from task `prev' to `next'
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 6f516e76d1f0..cd2cf76b2db1 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -183,8 +183,6 @@ do { \
#ifdef __KERNEL__
-#define prepare_to_switch() do { } while(0)
-
#ifdef CONFIG_IA32_SUPPORT
# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0)
#else
@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task);
* of that CPU which will not be released, because there we wait for the
* tasklist_lock to become available.
*/
-#define prepare_arch_switch(rq, next) \
-do { \
- spin_lock(&(next)->switch_lock); \
- spin_unlock(&(rq)->lock); \
-} while (0)
-#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
-#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+#define __ARCH_WANT_UNLOCKED_CTXSW
#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 888fd8908467..169f3d4265b1 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file,
extern int stop_a_enabled;
/*
- * Taken from include/asm-ia64/system.h; prevents deadlock on SMP
+ * See include/asm-ia64/system.h; prevents deadlock on SMP
* systems.
*/
-#define prepare_arch_switch(rq, next) \
-do { \
- spin_lock(&(next)->switch_lock); \
- spin_unlock(&(rq)->lock); \
-} while (0)
-#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
-#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+#define __ARCH_WANT_UNLOCKED_CTXSW
#define arch_align_stack(x) (x)
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index e3cb3ce1d24a..b4a9f05a93d6 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs)
prev = __switch_to(prev,next); \
} while (0)
-#define prepare_arch_switch(rq, next) do { } while(0)
-#define task_running(rq, p) ((rq)->curr == (p))
-
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_user_vtime(struct task_struct *);
extern void account_system_vtime(struct task_struct *);
-
-#define finish_arch_switch(rq, prev) do { \
- set_fs(current->thread.mm_segment); \
- spin_unlock(&(rq)->lock); \
- account_system_vtime(prev); \
- local_irq_enable(); \
-} while (0)
-
#else
+#define account_system_vtime(prev) do { } while (0)
+#endif
#define finish_arch_switch(rq, prev) do { \
set_fs(current->thread.mm_segment); \
- spin_unlock_irq(&(rq)->lock); \
+ account_system_vtime(prev); \
} while (0)
-#endif
-
#define nop() __asm__ __volatile__ ("nop")
#define xchg(ptr,x) \
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 80cf20cfaee1..898562ebe94c 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
* SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
* XXX WTF is the above comment? Found in late teen 2.4.x.
*/
-#define prepare_arch_switch(rq, next) do { \
+#define prepare_arch_switch(next) do { \
__asm__ __volatile__( \
".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
"save %sp, -0x40, %sp\n\t" \
"restore; restore; restore; restore; restore; restore; restore"); \
} while(0)
-#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
-#define task_running(rq, p) ((rq)->curr == (p))
/* Much care has gone into this code, do not touch it.
*
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index fd12ca386f48..f9be2c5b4dc9 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -139,19 +139,13 @@ extern void __flushw_user(void);
#define flush_user_windows flushw_user
#define flush_register_windows flushw_all
-#define prepare_arch_switch(rq, next) \
-do { spin_lock(&(next)->switch_lock); \
- spin_unlock(&(rq)->lock); \
+/* Don't hold the runqueue lock over context switch */
+#define __ARCH_WANT_UNLOCKED_CTXSW
+#define prepare_arch_switch(next) \
+do { \
flushw_all(); \
} while (0)
-#define finish_arch_switch(rq, prev) \
-do { spin_unlock_irq(&(prev)->switch_lock); \
-} while (0)
-
-#define task_running(rq, p) \
- ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
-
/* See what happens when you design the chip correctly?
*
* We tell gcc we clobber all non-fixed-usage registers except
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6a8c1a38d5e..03206a425d7a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,7 +108,6 @@ extern struct group_info init_groups;
.blocked = {{0}}, \
.alloc_lock = SPIN_LOCK_UNLOCKED, \
.proc_lock = SPIN_LOCK_UNLOCKED, \
- .switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 36a10781c3f3..d27be9337425 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -368,6 +368,11 @@ struct signal_struct {
#endif
};
+/* Context switch must be unlocked if interrupts are to be enabled */
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+# define __ARCH_WANT_UNLOCKED_CTXSW
+#endif
+
/*
* Bits in flags field of signal_struct.
*/
@@ -594,6 +599,9 @@ struct task_struct {
int lock_depth; /* BKL lock depth */
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+ int oncpu;
+#endif
int prio, static_prio;
struct list_head run_list;
prio_array_t *array;
@@ -716,8 +724,6 @@ struct task_struct {
spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
-/* context-switch lock */
- spinlock_t switch_lock;
/* journalling filesystem info */
void *journal_info;
diff --git a/kernel/sched.c b/kernel/sched.c
index 98bf1c091da5..b1410577f9a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-/*
- * Default context-switch locking:
- */
#ifndef prepare_arch_switch
-# define prepare_arch_switch(rq, next) do { } while (0)
-# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
-# define task_running(rq, p) ((rq)->curr == (p))
+# define prepare_arch_switch(next) do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev) do { } while (0)
+#endif
+
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+static inline int task_running(runqueue_t *rq, task_t *p)
+{
+ return rq->curr == p;
+}
+
+static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+{
+}
+
+static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+{
+ spin_unlock_irq(&rq->lock);
+}
+
+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
+static inline int task_running(runqueue_t *rq, task_t *p)
+{
+#ifdef CONFIG_SMP
+ return p->oncpu;
+#else
+ return rq->curr == p;
+#endif
+}
+
+static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+{
+#ifdef CONFIG_SMP
+ /*
+ * We can optimise this out completely for !SMP, because the
+ * SMP rebalancing from interrupt is the only thing that cares
+ * here.
+ */
+ next->oncpu = 1;
+#endif
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ spin_unlock_irq(&rq->lock);
+#else
+ spin_unlock(&rq->lock);
#endif
+}
+
+static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+{
+#ifdef CONFIG_SMP
+ /*
+ * After ->oncpu is cleared, the task can be moved to a different CPU.
+ * We must ensure this doesn't happen until the switch is completely
+ * finished.
+ */
+ smp_wmb();
+ prev->oncpu = 0;
+#endif
+#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_enable();
+#endif
+}
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/*
* task_rq_lock - lock the runqueue a given task resides on and disable
@@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p)
p->state = TASK_RUNNING;
INIT_LIST_HEAD(&p->run_list);
p->array = NULL;
- spin_lock_init(&p->switch_lock);
#ifdef CONFIG_SCHEDSTATS
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+ p->oncpu = 0;
+#endif
#ifdef CONFIG_PREEMPT
- /*
- * During context-switch we hold precisely one spinlock, which
- * schedule_tail drops. (in the common case it's this_rq()->lock,
- * but it also can be p->switch_lock.) So we compensate with a count
- * of 1. Also, we want to start with kernel preemption disabled.
- */
+ /* Want to start with kernel preemption disabled. */
p->thread_info->preempt_count = 1;
#endif
/*
@@ -1388,22 +1442,40 @@ void fastcall sched_exit(task_t * p)
}
/**
+ * prepare_task_switch - prepare to switch tasks
+ * @rq: the runqueue preparing to switch
+ * @next: the task we are going to switch to.
+ *
+ * This is called with the rq lock held and interrupts off. It must
+ * be paired with a subsequent finish_task_switch after the context
+ * switch.
+ *
+ * prepare_task_switch sets up locking and calls architecture specific
+ * hooks.
+ */
+static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
+{
+ prepare_lock_switch(rq, next);
+ prepare_arch_switch(next);
+}
+
+/**
* finish_task_switch - clean up after a task-switch
* @prev: the thread we just switched away from.
*
- * We enter this with the runqueue still locked, and finish_arch_switch()
- * will unlock it along with doing any other architecture-specific cleanup
- * actions.
+ * finish_task_switch must be called after the context switch, paired
+ * with a prepare_task_switch call before the context switch.
+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
+ * and do any other architecture-specific cleanup actions.
*
* Note that we may have delayed dropping an mm in context_switch(). If
* so, we finish that here outside of the runqueue lock. (Doing it
* with the lock held can cause deadlocks; see schedule() for
* details.)
*/
-static inline void finish_task_switch(task_t *prev)
+static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
__releases(rq->lock)
{
- runqueue_t *rq = this_rq();
struct mm_struct *mm = rq->prev_mm;
unsigned long prev_task_flags;
@@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev)
* Manfred Spraul <manfred@colorfullife.com>
*/
prev_task_flags = prev->flags;
- finish_arch_switch(rq, prev);
+ finish_arch_switch(prev);
+ finish_lock_switch(rq, prev);
if (mm)
mmdrop(mm);
if (unlikely(prev_task_flags & PF_DEAD))
@@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev)
asmlinkage void schedule_tail(task_t *prev)
__releases(rq->lock)
{
- finish_task_switch(prev);
-
+ runqueue_t *rq = this_rq();
+ finish_task_switch(rq, prev);
+#ifdef __ARCH_WANT_UNLOCKED_CTXSW
+ /* In this case, finish_task_switch does not reenable preemption */
+ preempt_enable();
+#endif
if (current->set_child_tid)
put_user(current->pid, current->set_child_tid);
}
@@ -2816,11 +2893,15 @@ switch_tasks:
rq->curr = next;
++*switch_count;
- prepare_arch_switch(rq, next);
+ prepare_task_switch(rq, next);
prev = context_switch(rq, prev, next);
barrier();
-
- finish_task_switch(prev);
+ /*
+ * this_rq must be evaluated again because prev may have moved
+ * CPUs since it called schedule(), thus the 'rq' on its stack
+ * frame will be invalid.
+ */
+ finish_task_switch(this_rq(), prev);
} else
spin_unlock_irq(&rq->lock);
@@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu)
spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle;
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+ idle->oncpu = 1;
+#endif
set_tsk_need_resched(idle);
spin_unlock_irqrestore(&rq->lock, flags);