diff options
-rw-r--r-- | include/asm-arm/system.h | 30 | ||||
-rw-r--r-- | include/asm-ia64/system.h | 10 | ||||
-rw-r--r-- | include/asm-mips/system.h | 10 | ||||
-rw-r--r-- | include/asm-s390/system.h | 17 | ||||
-rw-r--r-- | include/asm-sparc/system.h | 4 | ||||
-rw-r--r-- | include/asm-sparc64/system.h | 14 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | kernel/sched.c | 132 |
9 files changed, 131 insertions, 97 deletions
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h index 39dd7008013c..3d0d2860b6db 100644 --- a/include/asm-arm/system.h +++ b/include/asm-arm/system.h @@ -145,34 +145,12 @@ extern unsigned int user_debug; #define set_wmb(var, value) do { var = value; wmb(); } while (0) #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); -#ifdef CONFIG_SMP /* - * Define our own context switch locking. This allows us to enable - * interrupts over the context switch, otherwise we end up with high - * interrupt latency. The real problem area is switch_mm() which may - * do a full cache flush. + * switch_mm() may do a full cache flush over the context switch, + * so enable interrupts over the context switch to avoid high + * latency. */ -#define prepare_arch_switch(rq,next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock_irq(&(rq)->lock); \ -} while (0) - -#define finish_arch_switch(rq,prev) \ - spin_unlock(&(prev)->switch_lock) - -#define task_running(rq,p) \ - ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) -#else -/* - * Our UP-case is more simple, but we assume knowledge of how - * spin_unlock_irq() and friends are implemented. This avoids - * us needlessly decrementing and incrementing the preempt count. - */ -#define prepare_arch_switch(rq,next) local_irq_enable() -#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock) -#define task_running(rq,p) ((rq)->curr == (p)) -#endif +#define __ARCH_WANT_INTERRUPTS_ON_CTXSW /* * switch_to(prev, next) should switch from task `prev' to `next' diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 6f516e76d1f0..cd2cf76b2db1 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -183,8 +183,6 @@ do { \ #ifdef __KERNEL__ -#define prepare_to_switch() do { } while(0) - #ifdef CONFIG_IA32_SUPPORT # define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) #else @@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task); * of that CPU which will not be released, because there we wait for the * tasklist_lock to become available. */ -#define prepare_arch_switch(rq, next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ -} while (0) -#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock) -#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) +#define __ARCH_WANT_UNLOCKED_CTXSW #define ia64_platform_is(x) (strcmp(x, platform_name) == 0) diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index 888fd8908467..169f3d4265b1 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h @@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file, extern int stop_a_enabled; /* - * Taken from include/asm-ia64/system.h; prevents deadlock on SMP + * See include/asm-ia64/system.h; prevents deadlock on SMP * systems. */ -#define prepare_arch_switch(rq, next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ -} while (0) -#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock) -#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) +#define __ARCH_WANT_UNLOCKED_CTXSW #define arch_align_stack(x) (x) diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index e3cb3ce1d24a..b4a9f05a93d6 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -#define prepare_arch_switch(rq, next) do { } while(0) -#define task_running(rq, p) ((rq)->curr == (p)) - #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void account_user_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); - -#define finish_arch_switch(rq, prev) do { \ - set_fs(current->thread.mm_segment); \ - spin_unlock(&(rq)->lock); \ - account_system_vtime(prev); \ - local_irq_enable(); \ -} while (0) - #else +#define account_system_vtime(prev) do { } while (0) +#endif #define finish_arch_switch(rq, prev) do { \ set_fs(current->thread.mm_segment); \ - spin_unlock_irq(&(rq)->lock); \ + account_system_vtime(prev); \ } while (0) -#endif - #define nop() __asm__ __volatile__ ("nop") #define xchg(ptr,x) \ diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 80cf20cfaee1..898562ebe94c 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work) * XXX WTF is the above comment? Found in late teen 2.4.x. */ -#define prepare_arch_switch(rq, next) do { \ +#define prepare_arch_switch(next) do { \ __asm__ __volatile__( \ ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \ "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \ @@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, "save %sp, -0x40, %sp\n\t" \ "restore; restore; restore; restore; restore; restore; restore"); \ } while(0) -#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) -#define task_running(rq, p) ((rq)->curr == (p)) /* Much care has gone into this code, do not touch it. * diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index fd12ca386f48..f9be2c5b4dc9 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -139,19 +139,13 @@ extern void __flushw_user(void); #define flush_user_windows flushw_user #define flush_register_windows flushw_all -#define prepare_arch_switch(rq, next) \ -do { spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ +/* Don't hold the runqueue lock over context switch */ +#define __ARCH_WANT_UNLOCKED_CTXSW +#define prepare_arch_switch(next) \ +do { \ flushw_all(); \ } while (0) -#define finish_arch_switch(rq, prev) \ -do { spin_unlock_irq(&(prev)->switch_lock); \ -} while (0) - -#define task_running(rq, p) \ - ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) - /* See what happens when you design the chip correctly? * * We tell gcc we clobber all non-fixed-usage registers except diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a6a8c1a38d5e..03206a425d7a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,7 +108,6 @@ extern struct group_info init_groups; .blocked = {{0}}, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \ .proc_lock = SPIN_LOCK_UNLOCKED, \ - .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ } diff --git a/include/linux/sched.h b/include/linux/sched.h index 36a10781c3f3..d27be9337425 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -368,6 +368,11 @@ struct signal_struct { #endif }; +/* Context switch must be unlocked if interrupts are to be enabled */ +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW +# define __ARCH_WANT_UNLOCKED_CTXSW +#endif + /* * Bits in flags field of signal_struct. */ @@ -594,6 +599,9 @@ struct task_struct { int lock_depth; /* BKL lock depth */ +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + int oncpu; +#endif int prio, static_prio; struct list_head run_list; prio_array_t *array; @@ -716,8 +724,6 @@ struct task_struct { spinlock_t alloc_lock; /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ spinlock_t proc_lock; -/* context-switch lock */ - spinlock_t switch_lock; /* journalling filesystem info */ void *journal_info; diff --git a/kernel/sched.c b/kernel/sched.c index 98bf1c091da5..b1410577f9a8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -/* - * Default context-switch locking: - */ #ifndef prepare_arch_switch -# define prepare_arch_switch(rq, next) do { } while (0) -# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) -# define task_running(rq, p) ((rq)->curr == (p)) +# define prepare_arch_switch(next) do { } while (0) +#endif +#ifndef finish_arch_switch +# define finish_arch_switch(prev) do { } while (0) +#endif + +#ifndef __ARCH_WANT_UNLOCKED_CTXSW +static inline int task_running(runqueue_t *rq, task_t *p) +{ + return rq->curr == p; +} + +static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +{ +} + +static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +{ + spin_unlock_irq(&rq->lock); +} + +#else /* __ARCH_WANT_UNLOCKED_CTXSW */ +static inline int task_running(runqueue_t *rq, task_t *p) +{ +#ifdef CONFIG_SMP + return p->oncpu; +#else + return rq->curr == p; +#endif +} + +static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +{ +#ifdef CONFIG_SMP + /* + * We can optimise this out completely for !SMP, because the + * SMP rebalancing from interrupt is the only thing that cares + * here. + */ + next->oncpu = 1; +#endif +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + spin_unlock_irq(&rq->lock); +#else + spin_unlock(&rq->lock); #endif +} + +static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +{ +#ifdef CONFIG_SMP + /* + * After ->oncpu is cleared, the task can be moved to a different CPU. + * We must ensure this doesn't happen until the switch is completely + * finished. + */ + smp_wmb(); + prev->oncpu = 0; +#endif +#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_enable(); +#endif +} +#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ /* * task_rq_lock - lock the runqueue a given task resides on and disable @@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p) p->state = TASK_RUNNING; INIT_LIST_HEAD(&p->run_list); p->array = NULL; - spin_lock_init(&p->switch_lock); #ifdef CONFIG_SCHEDSTATS memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + p->oncpu = 0; +#endif #ifdef CONFIG_PREEMPT - /* - * During context-switch we hold precisely one spinlock, which - * schedule_tail drops. (in the common case it's this_rq()->lock, - * but it also can be p->switch_lock.) So we compensate with a count - * of 1. Also, we want to start with kernel preemption disabled. - */ + /* Want to start with kernel preemption disabled. */ p->thread_info->preempt_count = 1; #endif /* @@ -1388,22 +1442,40 @@ void fastcall sched_exit(task_t * p) } /** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch + * @next: the task we are going to switch to. + * + * This is called with the rq lock held and interrupts off. It must + * be paired with a subsequent finish_task_switch after the context + * switch. + * + * prepare_task_switch sets up locking and calls architecture specific + * hooks. + */ +static inline void prepare_task_switch(runqueue_t *rq, task_t *next) +{ + prepare_lock_switch(rq, next); + prepare_arch_switch(next); +} + +/** * finish_task_switch - clean up after a task-switch * @prev: the thread we just switched away from. * - * We enter this with the runqueue still locked, and finish_arch_switch() - * will unlock it along with doing any other architecture-specific cleanup - * actions. + * finish_task_switch must be called after the context switch, paired + * with a prepare_task_switch call before the context switch. + * finish_task_switch will reconcile locking set up by prepare_task_switch, + * and do any other architecture-specific cleanup actions. * * Note that we may have delayed dropping an mm in context_switch(). If * so, we finish that here outside of the runqueue lock. (Doing it * with the lock held can cause deadlocks; see schedule() for * details.) */ -static inline void finish_task_switch(task_t *prev) +static inline void finish_task_switch(runqueue_t *rq, task_t *prev) __releases(rq->lock) { - runqueue_t *rq = this_rq(); struct mm_struct *mm = rq->prev_mm; unsigned long prev_task_flags; @@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev) * Manfred Spraul <manfred@colorfullife.com> */ prev_task_flags = prev->flags; - finish_arch_switch(rq, prev); + finish_arch_switch(prev); + finish_lock_switch(rq, prev); if (mm) mmdrop(mm); if (unlikely(prev_task_flags & PF_DEAD)) @@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev) asmlinkage void schedule_tail(task_t *prev) __releases(rq->lock) { - finish_task_switch(prev); - + runqueue_t *rq = this_rq(); + finish_task_switch(rq, prev); +#ifdef __ARCH_WANT_UNLOCKED_CTXSW + /* In this case, finish_task_switch does not reenable preemption */ + preempt_enable(); +#endif if (current->set_child_tid) put_user(current->pid, current->set_child_tid); } @@ -2816,11 +2893,15 @@ switch_tasks: rq->curr = next; ++*switch_count; - prepare_arch_switch(rq, next); + prepare_task_switch(rq, next); prev = context_switch(rq, prev, next); barrier(); - - finish_task_switch(prev); + /* + * this_rq must be evaluated again because prev may have moved + * CPUs since it called schedule(), thus the 'rq' on its stack + * frame will be invalid. + */ + finish_task_switch(this_rq(), prev); } else spin_unlock_irq(&rq->lock); @@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu) spin_lock_irqsave(&rq->lock, flags); rq->curr = rq->idle = idle; +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + idle->oncpu = 1; +#endif set_tsk_need_resched(idle); spin_unlock_irqrestore(&rq->lock, flags); |