Merge tag 'v4.1.38' into 4.1-2.0.x-imx

Linux 4.1.38 * tag 'v4.1.38': (109 commits) Linux 4.1.38 gro: Allow tunnel stacking in the case of FOU/GUE tunnels: Don't apply GRO to multiple layers of encapsulation. net: ipv4: Convert IP network timestamps to be y2038 safe ipip: Properly mark ipip GRO packets as encapsulated. sg_write()/bsg_write() is not fit to be called under KERNEL_DS fs: exec: apply CLOEXEC before changing dumpable task flags IB/cma: Fix a race condition in iboe_addr_get_sgid() Revert "ALSA: usb-audio: Fix race at stopping the stream" kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF) drivers/gpu/drm/ast: Fix infinite loop if read fails target/user: Fix use-after-free of tcmu_cmds if they are expired kernel/debug/debug_core.c: more properly delay for secondary CPUs scsi: avoid a permanent stop of the scsi device's request queue IB/multicast: Check ib_find_pkey() return value IPoIB: Avoid reading an uninitialized member variable block_dev: don't test bdev->bd_contains when it is not stable btrfs: limit async_work allocation and worker func duration mm/vmscan.c: set correct defer count for shrinker Input: drv260x - fix input device's parent assignment ...
author: Otavio Salvador <otavio@ossystems.com.br> 2017-01-23 14:03:26 -0200
committer: Otavio Salvador <otavio@ossystems.com.br> 2017-01-23 14:03:26 -0200
commit: 739f0c753a6df735d66278b4452eb960c0ff6dd1 (patch)
tree: 0fb311aec7d6210c3c3d95743db208d179c87501 /kernel
parent: 80e3b3c3c85a3a8b70ef6403bc806901628c7446 (diff)
parent: f40b3cc69de8c97bbcdb74e3cffda06ffcad2cd7 (diff)
6 files changed, 69 insertions, 15 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 388fc6f78c6f..71403502411b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -323,8 +323,7 @@ static struct file_system_type cpuset_fs_type = {
 /*
  * Return in pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus.  The top
- * cpuset always has some cpus online.
+ * until we find one that does have some online cpus.
  *
  * One way or another, we guarantee to return some non-empty subset
  * of cpu_online_mask.
@@ -333,8 +332,20 @@ static struct file_system_type cpuset_fs_type = {
  */
 static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
 {
-	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
+	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
 		cs = parent_cs(cs);
+		if (unlikely(!cs)) {
+			/*
+			 * The top cpuset doesn't have any online cpu as a
+			 * consequence of a race between cpuset_hotplug_work
+			 * and cpu hotplug notifier.  But we know the top
+			 * cpuset's effective_cpus is on its way to to be
+			 * identical to cpu_online_mask.
+			 */
+			cpumask_copy(pmask, cpu_online_mask);
+			return;
+		}
+	}
 	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
 }
 
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0874e2edd275..79517e5549f1 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -598,11 +598,11 @@ return_normal:
 	/*
 	 * Wait for the other CPUs to be notified and be waiting for us:
 	 */
-	time_left = loops_per_jiffy * HZ;
+	time_left = MSEC_PER_SEC;
 	while (kgdb_do_roundup && --time_left &&
 	       (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
 		   online_cpus)
-		cpu_relax();
+		udelay(1000);
 	if (!time_left)
 		pr_crit("Timed out waiting for secondary CPUs.\n");
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 261ee21e62db..9650e7aee267 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -20,6 +20,7 @@
 #include <linux/uio.h>
 #include <linux/audit.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/regset.h>
@@ -207,12 +208,34 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	return ret;
 }
 
-static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+static bool ptrace_has_cap(const struct cred *tcred, unsigned int mode)
 {
+	struct user_namespace *tns = tcred->user_ns;
+
+	/* When a root-owned process enters a user namespace created by a
+	 * malicious user, the user shouldn't be able to execute code under
+	 * uid 0 by attaching to the root-owned process via ptrace.
+	 * Therefore, similar to the capable_wrt_inode_uidgid() check,
+	 * verify that all the uids and gids of the target process are
+	 * mapped into a namespace below the current one in which the caller
+	 * is capable.
+	 * No fsuid/fsgid check because __ptrace_may_access doesn't do it
+	 * either.
+	 */
+	while (
+	    !kuid_has_mapping(tns, tcred->euid) ||
+	    !kuid_has_mapping(tns, tcred->suid) ||
+	    !kuid_has_mapping(tns, tcred->uid)  ||
+	    !kgid_has_mapping(tns, tcred->egid) ||
+	    !kgid_has_mapping(tns, tcred->sgid) ||
+	    !kgid_has_mapping(tns, tcred->gid)) {
+		tns = tns->parent;
+	}
+
 	if (mode & PTRACE_MODE_NOAUDIT)
-		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
+		return has_ns_capability_noaudit(current, tns, CAP_SYS_PTRACE);
 	else
-		return has_ns_capability(current, ns, CAP_SYS_PTRACE);
+		return has_ns_capability(current, tns, CAP_SYS_PTRACE);
 }
 
 /* Returns 0 on success, -errno on denial. */
@@ -264,7 +287,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	    gid_eq(caller_gid, tcred->sgid) &&
 	    gid_eq(caller_gid, tcred->gid))
 		goto ok;
-	if (ptrace_has_cap(tcred->user_ns, mode))
+	if (ptrace_has_cap(tcred, mode))
 		goto ok;
 	rcu_read_unlock();
 	return -EPERM;
@@ -275,7 +298,7 @@ ok:
 		dumpable = get_dumpable(task->mm);
 	rcu_read_lock();
 	if (dumpable != SUID_DUMP_USER &&
-	    !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+	    !ptrace_has_cap(__task_cred(task), mode)) {
 		rcu_read_unlock();
 		return -EPERM;
 	}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d4900404c94..cebbff5f34fe 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
+#include <linux/mount.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1709,6 +1710,14 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "mount-max",
+		.data		= &sysctl_mount_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
 	{ }
 };
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d9f112bd42a7..d296b904685b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -309,10 +309,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
 					  cycle_t delta)
 {
-	s64 nsec;
+	u64 nsec;
 
 	nsec = delta * tkr->mult + tkr->xtime_nsec;
 	nsec >>= tkr->shift;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a51e79688455..972ce5b596f4 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -779,6 +779,10 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
 
+		/* If a graph tracer ignored set_graph_notrace */
+		if (call->depth < -1)
+			call->depth += FTRACE_NOTRACE_DEPTH;
+
 		/*
 		 * Comments display at + 1 to depth. Since
 		 * this is a leaf function, keep the comments
@@ -787,7 +791,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 		cpu_data->depth = call->depth - 1;
 
 		/* No need to keep this function around for this depth */
-		if (call->depth < FTRACE_RETFUNC_DEPTH)
+		if (call->depth < FTRACE_RETFUNC_DEPTH &&
+		    !WARN_ON_ONCE(call->depth < 0))
 			cpu_data->enter_funcs[call->depth] = 0;
 	}
 
@@ -816,11 +821,16 @@ print_graph_entry_nested(struct trace_iterator *iter,
 		struct fgraph_cpu_data *cpu_data;
 		int cpu = iter->cpu;
 
+		/* If a graph tracer ignored set_graph_notrace */
+		if (call->depth < -1)
+			call->depth += FTRACE_NOTRACE_DEPTH;
+
 		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
 		cpu_data->depth = call->depth;
 
 		/* Save this function pointer to see if the exit matches */
-		if (call->depth < FTRACE_RETFUNC_DEPTH)
+		if (call->depth < FTRACE_RETFUNC_DEPTH &&
+		    !WARN_ON_ONCE(call->depth < 0))
 			cpu_data->enter_funcs[call->depth] = call->func;
 	}
 
@@ -1048,7 +1058,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 		 */
 		cpu_data->depth = trace->depth - 1;
 
-		if (trace->depth < FTRACE_RETFUNC_DEPTH) {
+		if (trace->depth < FTRACE_RETFUNC_DEPTH &&
+		    !WARN_ON_ONCE(trace->depth < 0)) {
 			if (cpu_data->enter_funcs[trace->depth] != trace->func)
 				func_match = 0;
 			cpu_data->enter_funcs[trace->depth] = 0;
author	Otavio Salvador <otavio@ossystems.com.br>	2017-01-23 14:03:26 -0200
committer	Otavio Salvador <otavio@ossystems.com.br>	2017-01-23 14:03:26 -0200
commit	739f0c753a6df735d66278b4452eb960c0ff6dd1 (patch)
tree	0fb311aec7d6210c3c3d95743db208d179c87501 /kernel
parent	80e3b3c3c85a3a8b70ef6403bc806901628c7446 (diff)
parent	f40b3cc69de8c97bbcdb74e3cffda06ffcad2cd7 (diff)