5 files changed, 84 insertions, 31 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8e63ffb6ed05..dfae1bf6d5b2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	ktime_t now;
 
-	if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return;
 
 	if (hrtimer_active(&rt_b->rt_period_timer))
@@ -9319,6 +9319,16 @@ static int sched_rt_global_constraints(void)
 
 	return ret;
 }
+
+int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
+{
+	/* Don't accept realtime tasks when there is no way for them to run */
+	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+		return 0;
+
+	return 1;
+}
+
 #else /* !CONFIG_RT_GROUP_SCHED */
 static int sched_rt_global_constraints(void)
 {
@@ -9412,8 +9422,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		      struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
-	/* Don't accept realtime tasks when there is no way for them to run */
-	if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
+	if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
 		return -EINVAL;
 #else
 	/* We don't support RT-tasks being in separate groups */
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..390f33234bd0 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,11 @@
  * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
  * consistent between cpus (never more than 2 jiffies difference).
  */
-#include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 static __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
 
 struct sched_clock_data {
 	/*
@@ -87,7 +88,7 @@ void sched_clock_init(void)
 }
 
 /*
- * min,max except they take wrapping into account
+ * min, max except they take wrapping into account
  */
 
 static inline u64 wrap_min(u64 x, u64 y)
@@ -111,15 +112,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
 	s64 delta = now - scd->tick_raw;
 	u64 clock, min_clock, max_clock;
 
-	WARN_ON_ONCE(!irqs_disabled());
-
 	if (unlikely(delta < 0))
 		delta = 0;
 
 	/*
 	 * scd->clock = clamp(scd->tick_gtod + delta,
-	 * 		      max(scd->tick_gtod, scd->clock),
-	 * 		      scd->tick_gtod + TICK_NSEC);
+	 *		      max(scd->tick_gtod, scd->clock),
+	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
 	clock = scd->tick_gtod + delta;
@@ -148,12 +147,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
 
 u64 sched_clock_cpu(int cpu)
 {
-	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
+	struct sched_clock_data *scd;
 
-	if (unlikely(!sched_clock_running))
-		return 0ull;
+	if (sched_clock_stable)
+		return sched_clock();
 
+	scd = cpu_sdc(cpu);
 	WARN_ON_ONCE(!irqs_disabled());
 	now = sched_clock();
 
@@ -195,14 +195,18 @@ u64 sched_clock_cpu(int cpu)
 
 void sched_clock_tick(void)
 {
-	struct sched_clock_data *scd = this_scd();
+	struct sched_clock_data *scd;
 	u64 now, now_gtod;
 
+	if (sched_clock_stable)
+		return;
+
 	if (unlikely(!sched_clock_running))
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
 
+	scd = this_scd();
 	now_gtod = ktime_to_ns(ktime_get());
 	now = sched_clock();
 
@@ -250,7 +254,7 @@ u64 sched_clock_cpu(int cpu)
 	return sched_clock();
 }
 
-#endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 unsigned long long cpu_clock(int cpu)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index f145c415bc16..37f458e6882a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -559,7 +559,7 @@ error:
 	abort_creds(new);
 	return retval;
 }
-  
+
 /*
  * change the user struct in a credentials set to match the new UID
  */
@@ -571,6 +571,11 @@ static int set_user(struct cred *new)
 	if (!new_user)
 		return -EAGAIN;
 
+	if (!task_can_switch_user(new_user, current)) {
+		free_uid(new_user);
+		return -EINVAL;
+	}
+
 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
 			new_user != INIT_USER) {
@@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 			goto error;
 	}
 
-	retval = -EAGAIN;
-	if (new->uid != old->uid && set_user(new) < 0)
-		goto error;
-
+	if (new->uid != old->uid) {
+		retval = set_user(new);
+		if (retval < 0)
+			goto error;
+	}
 	if (ruid != (uid_t) -1 ||
 	    (euid != (uid_t) -1 && euid != old->uid))
 		new->suid = new->euid;
@@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
 	retval = -EPERM;
 	if (capable(CAP_SETUID)) {
 		new->suid = new->uid = uid;
-		if (uid != old->uid && set_user(new) < 0) {
-			retval = -EAGAIN;
-			goto error;
+		if (uid != old->uid) {
+			retval = set_user(new);
+			if (retval < 0)
+				goto error;
 		}
 	} else if (uid != old->uid && uid != new->suid) {
 		goto error;
@@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 			goto error;
 	}
 
-	retval = -EAGAIN;
 	if (ruid != (uid_t) -1) {
 		new->uid = ruid;
-		if (ruid != old->uid && set_user(new) < 0)
-			goto error;
+		if (ruid != old->uid) {
+			retval = set_user(new);
+			if (retval < 0)
+				goto error;
+		}
 	}
 	if (euid != (uid_t) -1)
 		new->euid = euid;
diff --git a/kernel/user.c b/kernel/user.c
index 3551ac742395..6a9b696128c8 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -362,6 +362,24 @@ static void free_user(struct user_struct *up, unsigned long flags)
 
 #endif
 
+#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
+/*
+ * We need to check if a setuid can take place. This function should be called
+ * before successfully completing the setuid.
+ */
+int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
+{
+
+	return sched_rt_can_attach(up->tg, tsk);
+
+}
+#else
+int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
+{
+	return 1;
+}
+#endif
+
 /*
  * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
  * caller must undo that ref with free_uid().
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 79084311ee57..076c7c8215b0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -60,12 +60,25 @@ int create_user_ns(struct cred *new)
 	return 0;
 }
 
-void free_user_ns(struct kref *kref)
+/*
+ * Deferred destructor for a user namespace.  This is required because
+ * free_user_ns() may be called with uidhash_lock held, but we need to call
+ * back to free_uid() which will want to take the lock again.
+ */
+static void free_user_ns_work(struct work_struct *work)
 {
-	struct user_namespace *ns;
-
-	ns = container_of(kref, struct user_namespace, kref);
+	struct user_namespace *ns =
+		container_of(work, struct user_namespace, destroyer);
 	free_uid(ns->creator);
 	kfree(ns);
 }
+
+void free_user_ns(struct kref *kref)
+{
+	struct user_namespace *ns =
+		container_of(kref, struct user_namespace, kref);
+
+	INIT_WORK(&ns->destroyer, free_user_ns_work);
+	schedule_work(&ns->destroyer);
+}
 EXPORT_SYMBOL(free_user_ns);