From 39be350127ec60a078edffe5b4915dafba4ba514 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 26 Jan 2012 12:44:34 +0100
Subject: sched, block: Unify cache detection

The block layer has some code trying to determine if two CPUs share a
cache, the scheduler has a similar function. Expose the function used
by the scheduler and make the block layer use it, thereby removing the
block layers usage of CONFIG_SCHED* and topology bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Jens Axboe <axboe@kernel.dk>
Link: http://lkml.kernel.org/r/1327579450.2446.95.camel@twins
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 513f52459872..0e1959568836 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1052,6 +1052,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
 
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1061,6 +1063,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			struct sched_domain_attr *dattr_new)
 {
 }
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+	return true;
+}
+
 #endif	/* !CONFIG_SMP */
 
 
-- 
cgit v1.2.3


From 4ec4412e1e91f44a3dcb97b6c9172a13fc78bac9 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Mon, 12 Dec 2011 20:21:08 +0100
Subject: sched: Ensure cpu_power periodic update

With a lot of small tasks, the softirq sched is nearly never called
when no_hz is enabled. In this case load_balance() is mainly called
with the newly_idle mode which doesn't update the cpu_power.

Add a next_update field which ensure a maximum update period when
there is short activity.

Having stale cpu_power information can skew the load-balancing
decisions, this is cured by the guaranteed update.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1323717668-2143-1-git-send-email-vincent.guittot@linaro.org
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0e1959568836..92313a3f6f77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -905,6 +905,7 @@ struct sched_group_power {
 	 * single CPU.
 	 */
 	unsigned int power, power_orig;
+	unsigned long next_update;
 	/*
 	 * Number of busy cpus in this group.
 	 */
-- 
cgit v1.2.3


From 9388dc3047a88bedfd867e9ba3e1980c815ac524 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 9 Feb 2012 20:45:19 +0400
Subject: sched: Turn lock_task_sighand() into a static inline

It appears that sparse tool understands static inline functions
for context balance checking, so let's turn the macros into an
inline func.

This makes the code a little bit more robust.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Arve <arve@android.com>
Cc: San Mehat <san@google.com>
Cc: Colin Cross <ccross@android.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: kernel-team@android.com
Cc: linaro-kernel@lists.linaro.org
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120209164519.GA10266@oksana.dev.rtsoft.ru
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 92313a3f6f77..5dba2ad52431 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2393,12 +2393,15 @@ static inline void task_unlock(struct task_struct *p)
 extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 							unsigned long *flags);
 
-#define lock_task_sighand(tsk, flags)					\
-({	struct sighand_struct *__ss;					\
-	__cond_lock(&(tsk)->sighand->siglock,				\
-		    (__ss = __lock_task_sighand(tsk, flags)));		\
-	__ss;								\
-})									\
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+						       unsigned long *flags)
+{
+	struct sighand_struct *ret;
+
+	ret = __lock_task_sighand(tsk, flags);
+	(void)__cond_lock(&tsk->sighand->siglock, ret);
+	return ret;
+}
 
 static inline void unlock_task_sighand(struct task_struct *tsk,
 						unsigned long *flags)
-- 
cgit v1.2.3


From de5bdff7a72acc281219be2b8edeeca1fd81c542 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 16 Feb 2012 14:52:21 +0900
Subject: sched: Make initial SCHED_RR timeslace DEF_TIMESLICE

Current the initial SCHED_RR timeslice of init_task is HZ, which means
1s, and is not same as the default SCHED_RR timeslice DEF_TIMESLICE.

Change that initial timeslice to the DEF_TIMESLICE.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
[ s/DEF_TIMESLICE/RR_TIMESLICE/g ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4F3C9995.3010800@ct.jp.nec.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 2 +-
 include/linux/sched.h     | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c66b1ada9d7..f994d51f70f2 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,7 +149,7 @@ extern struct cred init_cred;
 	},								\
 	.rt		= {						\
 		.run_list	= LIST_HEAD_INIT(tsk.rt.run_list),	\
-		.time_slice	= HZ, 					\
+		.time_slice	= RR_TIMESLICE,				\
 		.nr_cpus_allowed = NR_CPUS,				\
 	},								\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5dba2ad52431..eb5de466f099 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1234,6 +1234,12 @@ struct sched_rt_entity {
 #endif
 };
 
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
 struct rcu_node;
 
 enum perf_event_task_context {
-- 
cgit v1.2.3


From c5491ea779793f977d282754db478157cc409d82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Mar 2011 12:09:35 +0100
Subject: sched/rt: Add schedule_preempt_disabled()

Add helper to get rid of the ever repeating:

    preempt_enable_no_resched();
    schedule();
    preempt_disable();

patterns.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-wxx7btox7coby6ifv5vzhzgp@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1a6398424fab..03dd224d0667 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -361,6 +361,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
+extern void schedule_preempt_disabled(void);
 extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 
 struct nsproxy;
-- 
cgit v1.2.3


From ba74c1448f127649046615ec017bded7b2a76f29 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Mar 2011 13:32:17 +0100
Subject: sched/rt: Document scheduler related skip-resched-check sites

Create a distinction between scheduler related preempt_enable_no_resched()
calls and the nearly one hundred other places in the kernel that do not
want to reschedule, for one reason or another.

This distinction matters for -rt, where the scheduler and the non-scheduler
preempt models (and checks) are different. For upstream it's purely
documentational.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/n/tip-gs88fvx2mdv5psnzxnv575ke@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/preempt.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 58969b2a8a82..5a710b9c578e 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -48,12 +48,14 @@ do { \
 	barrier(); \
 } while (0)
 
-#define preempt_enable_no_resched() \
+#define sched_preempt_enable_no_resched() \
 do { \
 	barrier(); \
 	dec_preempt_count(); \
 } while (0)
 
+#define preempt_enable_no_resched()	sched_preempt_enable_no_resched()
+
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
@@ -92,6 +94,7 @@ do { \
 #else /* !CONFIG_PREEMPT_COUNT */
 
 #define preempt_disable()		do { } while (0)
+#define sched_preempt_enable_no_resched()	do { } while (0)
 #define preempt_enable_no_resched()	do { } while (0)
 #define preempt_enable()		do { } while (0)
 
-- 
cgit v1.2.3


From 63b2001169e75cd71e917ec953fdab572e3f944a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 1 Dec 2011 00:04:00 +0100
Subject: sched/wait: Add __wake_up_all_locked() API

For code which protects the waitqueue itself with another lock it
makes no sense to acquire the waitqueue lock for wakeup all. Provide
__wake_up_all_locked().

This is an optimization on the vanilla kernel (to be used by the
PCI code) and an important semantic distinction on -rt.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-ux6m4b8jonb9inx8xafh77ds@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a9ce45e8501c..7d9a9e990ce6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -157,7 +157,7 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
 			void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_bit(wait_queue_head_t *, void *, int);
 int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
@@ -170,7 +170,8 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
 #define wake_up_all(x)			__wake_up(x, TASK_NORMAL, 0, NULL)
-#define wake_up_locked(x)		__wake_up_locked((x), TASK_NORMAL)
+#define wake_up_locked(x)		__wake_up_locked((x), TASK_NORMAL, 1)
+#define wake_up_all_locked(x)		__wake_up_locked((x), TASK_NORMAL, 0)
 
 #define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
-- 
cgit v1.2.3


From 3c7d51843b03a6839e9ec7cda724e54d2319a63a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 20:46:52 +0200
Subject: sched/rt: Do not submit new work when PI-blocked

When we are PI-blocked then we want to get things done ASAP.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-vw8et3445km5b8mpihf4trae@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 03dd224d0667..c628a9151437 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2082,12 +2082,20 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return tsk->pi_blocked_on != NULL;
+}
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
 {
 	return p->normal_prio;
 }
 # define rt_mutex_adjust_pi(p)		do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return false;
+}
 #endif
 
 extern bool yield_to(struct task_struct *p, bool preempt);
-- 
cgit v1.2.3


From 2e5b5b3a1b7768c89fbfeca18e75f8ee377e924c Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 23 Feb 2012 17:41:27 +0900
Subject: sched: Clean up parameter passing of proc_sched_autogroup_set_nice()

Pass nice as a value to proc_sched_autogroup_set_nice().

No side effect is expected, and the variable err will be overwritten with
the return value.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4F45FBB7.5090607@ct.jp.nec.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c628a9151437..c298fb9cf5ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2065,7 +2065,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
 #endif
 #else
 static inline void sched_autogroup_create_attach(struct task_struct *p) { }
-- 
cgit v1.2.3


From 3ccf3e8306156a28213adc720aba807e9a901ad5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 27 Feb 2012 10:47:00 +0100
Subject: printk/sched: Introduce special printk_sched() for those awkward
 moments

There's a few awkward printk()s inside of scheduler guts that people
prefer to keep but really are rather deadlock prone. Fudge around it
by storing the text in a per-cpu buffer and poll it using the existing
printk_tick() handler.

This will drop output when its more frequent than once a tick, however
only the affinity thing could possible go that fast and for that just
one should suffice to notify the admin he's done something silly..

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-wua3lmkt3dg8nfts66o6brne@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/printk.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index f0e22f75143f..1f77a4174ee0 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -100,6 +100,11 @@ int vprintk(const char *fmt, va_list args);
 asmlinkage __printf(1, 2) __cold
 int printk(const char *fmt, ...);
 
+/*
+ * Special printk facility for scheduler use only, _DO_NOT_USE_ !
+ */
+__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
+
 /*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
  * with all other unrelated printk_ratelimit() callsites.  Instead use
@@ -127,6 +132,11 @@ int printk(const char *s, ...)
 {
 	return 0;
 }
+static inline __printf(1, 2) __cold
+int printk_sched(const char *s, ...)
+{
+	return 0;
+}
 static inline int printk_ratelimit(void)
 {
 	return 0;
-- 
cgit v1.2.3


From 9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Fri, 9 Mar 2012 16:41:01 -0800
Subject: sched/x86: Fix overflow in cyc2ns_offset

When a machine boots up, the TSC generally gets reset.  However,
when kexec is used to boot into a kernel, the TSC value would be
carried over from the previous kernel.  The computation of
cycns_offset in set_cyc2ns_scale is prone to an overflow, if the
machine has been up more than 208 days prior to the kexec.  The
overflow happens when we multiply *scale, even though there is
enough room to store the final answer.

We fix this issue by decomposing tsc_now into the quotient and
remainder of division by CYC2NS_SCALE_FACTOR and then performing
the multiplication separately on the two components.

Refactor code to share the calculation with the previous
fix in __cycles_2_ns().

Signed-off-by: Salman Qazi <sqazi@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: john stultz <johnstul@us.ibm.com>
Link: http://lkml.kernel.org/r/20120310004027.19291.88460.stgit@dungbeetle.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e8343422240a..d801acb5e680 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,6 +85,19 @@
 }							\
 )
 
+/*
+ * Multiplies an integer by a fraction, while avoiding unnecessary
+ * overflow or loss of precision.
+ */
+#define mult_frac(x, numer, denom)(			\
+{							\
+	typeof(x) quot = (x) / (denom);			\
+	typeof(x) rem  = (x) % (denom);			\
+	(quot * (numer)) + ((rem * (numer)) / (denom));	\
+}							\
+)
+
+
 #define _RET_IP_		(unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
 
-- 
cgit v1.2.3