From 0c740d0afc3bff0a097ad03a1c8df92757516f5c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jan 2014 15:49:56 -0800 Subject: introduce for_each_thread() to replace the buggy while_each_thread() while_each_thread() and next_thread() should die, almost every lockless usage is wrong. 1. Unless g == current, the lockless while_each_thread() is not safe. while_each_thread(g, t) can loop forever if g exits, next_thread() can't reach the unhashed thread in this case. Note that this can happen even if g is the group leader, it can exec. 2. Even if while_each_thread() itself was correct, people often use it wrongly. It was never safe to just take rcu_read_lock() and loop unless you verify that pid_alive(g) == T, even the first next_thread() can point to the already freed/reused memory. This patch adds signal_struct->thread_head and task->thread_node to create the normal rcu-safe list with the stable head. The new for_each_thread(g, t) helper is always safe under rcu_read_lock() as long as this task_struct can't go away. Note: of course it is ugly to have both task_struct->thread_node and the old task_struct->thread_group, we will kill it later, after we change the users of while_each_thread() to use for_each_thread(). Perhaps we can kill it even before we convert all users, we can reimplement next_thread(t) using the new thread_head/thread_node. But we can't do this right now because this will lead to subtle behavioural changes. For example, do/while_each_thread() always sees at least one task, while for_each_thread() can do nothing if the whole thread group has died. Or thread_group_empty(), currently its semantics is not clear unless thread_group_leader(p) and we need to audit the callers before we can change it. So this patch adds the new interface which has to coexist with the old one for some time, hopefully the next changes will be more or less straightforward and the old one will go away soon. Signed-off-by: Oleg Nesterov Reviewed-by: Sergey Dyasly Tested-by: Sergey Dyasly Reviewed-by: Sameer Nanda Acked-by: David Rientjes Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Mandeep Singh Baines Cc: "Ma, Xindong" Cc: Michal Hocko Cc: "Tu, Xiaobing" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ffccdad050b5..485234d2fd42 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -549,6 +549,7 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; + struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ @@ -1271,6 +1272,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; + struct list_head thread_node; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; -- cgit v1.2.3 From 7288e1187ba935996232246916418c64bb88da30 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:32 -0800 Subject: coredump: kill MMF_DUMPABLE and MMF_DUMP_SECURELY Nobody actually needs MMF_DUMPABLE/MMF_DUMP_SECURELY, they are only used to enforce the encoding of SUID_DUMP_* enum in mm->flags & MMF_DUMPABLE_MASK. Now that set_dumpable() updates both bits atomically we can kill them and simply store the value "as is" in 2 lower bits. Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Alex Kelly Cc: "Eric W. Biederman" Cc: Josh Triplett Cc: Petr Matousek Cc: Vasily Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 485234d2fd42..124430ba569b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -400,10 +400,8 @@ extern int get_dumpable(struct mm_struct *mm); #define SUID_DUMP_ROOT 2 /* Dump as root */ /* mm flags */ -/* dumpable bits */ -#define MMF_DUMPABLE 0 /* core dump is permitted */ -#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +/* for SUID_DUMP_* above */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) -- cgit v1.2.3 From 942be3875a1931c379bbc37053829dd6847e0f3f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:34 -0800 Subject: coredump: make __get_dumpable/get_dumpable inline, kill fs/coredump.h 1. Remove fs/coredump.h. It is not clear why do we need it, it only declares __get_dumpable(), signal.c includes it for no reason. 2. Now that get_dumpable() and __get_dumpable() are really trivial make them inline in linux/sched.h. Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Alex Kelly Cc: "Eric W. Biederman" Cc: Josh Triplett Cc: Petr Matousek Cc: Vasily Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 124430ba569b..cf9e414dbb9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -391,10 +391,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif - -extern void set_dumpable(struct mm_struct *mm, int value); -extern int get_dumpable(struct mm_struct *mm); - #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ @@ -405,6 +401,23 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 -- cgit v1.2.3 From 74e37200de8e9c4e09b70c21c3f13c2071e77457 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:35 -0800 Subject: proc: cleanup/simplify get_task_state/task_state_array get_task_state() and task_state_array[] look confusing and suboptimal, it is not clear what it can actually report to user-space and task_state_array[] blows .data for no reason. 1. state = (tsk->state & TASK_REPORT) | tsk->exit_state is not clear. TASK_REPORT is self-documenting but it is not clear what ->exit_state can add. Move the potential exit_state's (EXIT_ZOMBIE and EXIT_DEAD) into TASK_REPORT and use it to calculate the final result. 2. With the change above it is obvious that task_state_array[] has the unused entries just to make BUILD_BUG_ON() happy. Change this BUILD_BUG_ON() to use TASK_REPORT rather than TASK_STATE_MAX and shrink task_state_array[]. 3. Turn the "while (state)" loop into fls(state). Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: David Laight Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index cf9e414dbb9e..33e4e9e1f621 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -229,7 +229,7 @@ extern char ___assert_task_state[1 - 2*!!( /* get_task_state() */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED) + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -- cgit v1.2.3 From ff252c1fc537b0c9e40f62da0a9d11bf0737b7db Mon Sep 17 00:00:00 2001 From: DaeSeok Youn Date: Thu, 23 Jan 2014 15:55:46 -0800 Subject: kernel/fork.c: make dup_mm() static dup_mm() is used only in kernel/fork.c Signed-off-by: Daeseok Youn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33e4e9e1f621..66a17ad55bcb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2295,8 +2295,6 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); -/* Allocate a new mm structure and copy contents from tsk->mm */ -extern struct mm_struct *dup_mm(struct task_struct *tsk); extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); -- cgit v1.2.3 From 98611e4e6a2b4a03fd2d4750cce8e4455a995c8d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:52 -0800 Subject: exec: kill task_struct->did_exec We can kill either task->did_exec or PF_FORKNOEXEC, they are mutually exclusive. The patch kills ->did_exec because it has a single user. Signed-off-by: Oleg Nesterov Acked-by: KOSAKI Motohiro Cc: Al Viro Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 66a17ad55bcb..68a0e84463a0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1239,7 +1239,6 @@ struct task_struct { /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; - unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ unsigned in_iowait:1; -- cgit v1.2.3