From 39be350127ec60a078edffe5b4915dafba4ba514 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2012 12:44:34 +0100 Subject: sched, block: Unify cache detection The block layer has some code trying to determine if two CPUs share a cache, the scheduler has a similar function. Expose the function used by the scheduler and make the block layer use it, thereby removing the block layers usage of CONFIG_SCHED* and topology bits. Signed-off-by: Peter Zijlstra Acked-by: Jens Axboe Link: http://lkml.kernel.org/r/1327579450.2446.95.camel@twins --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 513f52459872..0e1959568836 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1052,6 +1052,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); +bool cpus_share_cache(int this_cpu, int that_cpu); + #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -1061,6 +1063,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ -- cgit v1.2.3 From 4ec4412e1e91f44a3dcb97b6c9172a13fc78bac9 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 12 Dec 2011 20:21:08 +0100 Subject: sched: Ensure cpu_power periodic update With a lot of small tasks, the softirq sched is nearly never called when no_hz is enabled. In this case load_balance() is mainly called with the newly_idle mode which doesn't update the cpu_power. Add a next_update field which ensure a maximum update period when there is short activity. Having stale cpu_power information can skew the load-balancing decisions, this is cured by the guaranteed update. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1323717668-2143-1-git-send-email-vincent.guittot@linaro.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e1959568836..92313a3f6f77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -905,6 +905,7 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + unsigned long next_update; /* * Number of busy cpus in this group. */ -- cgit v1.2.3 From 9388dc3047a88bedfd867e9ba3e1980c815ac524 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 9 Feb 2012 20:45:19 +0400 Subject: sched: Turn lock_task_sighand() into a static inline It appears that sparse tool understands static inline functions for context balance checking, so let's turn the macros into an inline func. This makes the code a little bit more robust. Suggested-by: Oleg Nesterov Signed-off-by: Anton Vorontsov Cc: KOSAKI Motohiro Cc: Greg KH Cc: Arve Cc: San Mehat Cc: Colin Cross Cc: Eric W. Biederman Cc: Paul E. McKenney Cc: kernel-team@android.com Cc: linaro-kernel@lists.linaro.org Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120209164519.GA10266@oksana.dev.rtsoft.ru Signed-off-by: Ingo Molnar --- include/linux/sched.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 92313a3f6f77..5dba2ad52431 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2393,12 +2393,15 @@ static inline void task_unlock(struct task_struct *p) extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); -#define lock_task_sighand(tsk, flags) \ -({ struct sighand_struct *__ss; \ - __cond_lock(&(tsk)->sighand->siglock, \ - (__ss = __lock_task_sighand(tsk, flags))); \ - __ss; \ -}) \ +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) -- cgit v1.2.3 From de5bdff7a72acc281219be2b8edeeca1fd81c542 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 16 Feb 2012 14:52:21 +0900 Subject: sched: Make initial SCHED_RR timeslace DEF_TIMESLICE Current the initial SCHED_RR timeslice of init_task is HZ, which means 1s, and is not same as the default SCHED_RR timeslice DEF_TIMESLICE. Change that initial timeslice to the DEF_TIMESLICE. Signed-off-by: Hiroshi Shimamoto [ s/DEF_TIMESLICE/RR_TIMESLICE/g ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4F3C9995.3010800@ct.jp.nec.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 +- include/linux/sched.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c66b1ada9d7..f994d51f70f2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,7 +149,7 @@ extern struct cred init_cred; }, \ .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ - .time_slice = HZ, \ + .time_slice = RR_TIMESLICE, \ .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5dba2ad52431..eb5de466f099 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1234,6 +1234,12 @@ struct sched_rt_entity { #endif }; +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + struct rcu_node; enum perf_event_task_context { -- cgit v1.2.3 From c5491ea779793f977d282754db478157cc409d82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Mar 2011 12:09:35 +0100 Subject: sched/rt: Add schedule_preempt_disabled() Add helper to get rid of the ever repeating: preempt_enable_no_resched(); schedule(); preempt_disable(); patterns. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wxx7btox7coby6ifv5vzhzgp@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1a6398424fab..03dd224d0667 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -361,6 +361,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); +extern void schedule_preempt_disabled(void); extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; -- cgit v1.2.3 From ba74c1448f127649046615ec017bded7b2a76f29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Mar 2011 13:32:17 +0100 Subject: sched/rt: Document scheduler related skip-resched-check sites Create a distinction between scheduler related preempt_enable_no_resched() calls and the nearly one hundred other places in the kernel that do not want to reschedule, for one reason or another. This distinction matters for -rt, where the scheduler and the non-scheduler preempt models (and checks) are different. For upstream it's purely documentational. Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-gs88fvx2mdv5psnzxnv575ke@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 58969b2a8a82..5a710b9c578e 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -48,12 +48,14 @@ do { \ barrier(); \ } while (0) -#define preempt_enable_no_resched() \ +#define sched_preempt_enable_no_resched() \ do { \ barrier(); \ dec_preempt_count(); \ } while (0) +#define preempt_enable_no_resched() sched_preempt_enable_no_resched() + #define preempt_enable() \ do { \ preempt_enable_no_resched(); \ @@ -92,6 +94,7 @@ do { \ #else /* !CONFIG_PREEMPT_COUNT */ #define preempt_disable() do { } while (0) +#define sched_preempt_enable_no_resched() do { } while (0) #define preempt_enable_no_resched() do { } while (0) #define preempt_enable() do { } while (0) -- cgit v1.2.3 From 63b2001169e75cd71e917ec953fdab572e3f944a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Dec 2011 00:04:00 +0100 Subject: sched/wait: Add __wake_up_all_locked() API For code which protects the waitqueue itself with another lock it makes no sense to acquire the waitqueue lock for wakeup all. Provide __wake_up_all_locked(). This is an optimization on the vanilla kernel (to be used by the PCI code) and an important semantic distinction on -rt. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ux6m4b8jonb9inx8xafh77ds@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index a9ce45e8501c..7d9a9e990ce6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -157,7 +157,7 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_bit(wait_queue_head_t *, void *, int); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); @@ -170,7 +170,8 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) -#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL) +#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) +#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) -- cgit v1.2.3 From 3c7d51843b03a6839e9ec7cda724e54d2319a63a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 17 Jul 2011 20:46:52 +0200 Subject: sched/rt: Do not submit new work when PI-blocked When we are PI-blocked then we want to get things done ASAP. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-vw8et3445km5b8mpihf4trae@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 03dd224d0667..c628a9151437 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2082,12 +2082,20 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); extern void rt_mutex_adjust_pi(struct task_struct *p); +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return tsk->pi_blocked_on != NULL; +} #else static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } # define rt_mutex_adjust_pi(p) do { } while (0) +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return false; +} #endif extern bool yield_to(struct task_struct *p, bool preempt); -- cgit v1.2.3 From 2e5b5b3a1b7768c89fbfeca18e75f8ee377e924c Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 23 Feb 2012 17:41:27 +0900 Subject: sched: Clean up parameter passing of proc_sched_autogroup_set_nice() Pass nice as a value to proc_sched_autogroup_set_nice(). No side effect is expected, and the variable err will be overwritten with the return value. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4F45FBB7.5090607@ct.jp.nec.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index c628a9151437..c298fb9cf5ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2065,7 +2065,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig); extern void sched_autogroup_exit(struct signal_struct *sig); #ifdef CONFIG_PROC_FS extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); #endif #else static inline void sched_autogroup_create_attach(struct task_struct *p) { } -- cgit v1.2.3 From 3ccf3e8306156a28213adc720aba807e9a901ad5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Feb 2012 10:47:00 +0100 Subject: printk/sched: Introduce special printk_sched() for those awkward moments There's a few awkward printk()s inside of scheduler guts that people prefer to keep but really are rather deadlock prone. Fudge around it by storing the text in a per-cpu buffer and poll it using the existing printk_tick() handler. This will drop output when its more frequent than once a tick, however only the affinity thing could possible go that fast and for that just one should suffice to notify the admin he's done something silly.. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-wua3lmkt3dg8nfts66o6brne@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/printk.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index f0e22f75143f..1f77a4174ee0 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -100,6 +100,11 @@ int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); +/* + * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + */ +__printf(1, 2) __cold int printk_sched(const char *fmt, ...); + /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use @@ -127,6 +132,11 @@ int printk(const char *s, ...) { return 0; } +static inline __printf(1, 2) __cold +int printk_sched(const char *s, ...) +{ + return 0; +} static inline int printk_ratelimit(void) { return 0; -- cgit v1.2.3 From 9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Fri, 9 Mar 2012 16:41:01 -0800 Subject: sched/x86: Fix overflow in cyc2ns_offset When a machine boots up, the TSC generally gets reset. However, when kexec is used to boot into a kernel, the TSC value would be carried over from the previous kernel. The computation of cycns_offset in set_cyc2ns_scale is prone to an overflow, if the machine has been up more than 208 days prior to the kexec. The overflow happens when we multiply *scale, even though there is enough room to store the final answer. We fix this issue by decomposing tsc_now into the quotient and remainder of division by CYC2NS_SCALE_FACTOR and then performing the multiplication separately on the two components. Refactor code to share the calculation with the previous fix in __cycles_2_ns(). Signed-off-by: Salman Qazi Acked-by: John Stultz Acked-by: Peter Zijlstra Cc: Paul Turner Cc: john stultz Link: http://lkml.kernel.org/r/20120310004027.19291.88460.stgit@dungbeetle.mtv.corp.google.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e8343422240a..d801acb5e680 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -85,6 +85,19 @@ } \ ) +/* + * Multiplies an integer by a fraction, while avoiding unnecessary + * overflow or loss of precision. + */ +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + + #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -- cgit v1.2.3