From 42b7772812d15b86543a23b82bd6070eef9a08b1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 23 Jul 2008 21:27:10 -0700 Subject: mm: remove double indirection on tlb parameter to free_pgd_range() & Co The double indirection here is not needed anywhere and hence (at least) confusing. Signed-off-by: Jan Beulich Cc: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Luck, Tony" Cc: Paul Mundt Cc: "David S. Miller" Acked-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index fd9234379e8d..190ed1f92774 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(&tlb, new_end, old_end, new_end, + free_pgd_range(tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(&tlb, old_start, old_end, new_end, + free_pgd_range(tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } tlb_finish_mmu(tlb, new_end, old_end); -- cgit v1.2.3 From ba92a43dbaee339cf5915ef766d3d3ffbaaf103c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 25 Jul 2008 01:45:43 -0700 Subject: exec: remove some includes fs/exec.c used to need mman.h pagemap.h swap.h and rmap.h when it did mm-ish stuff in install_arg_page(); but no need for them after 2.6.22. [akpm@linux-foundation.org: unbreak arm] Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 190ed1f92774..e41aef0fb351 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -25,19 +25,18 @@ #include #include #include -#include +#include #include #include #include +#include #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -47,7 +46,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e4901f92a8dbe843e76651a50f7a2a6dd3d53474 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:31 -0700 Subject: coredump: zap_threads: comments && use while_each_thread() No changes in fs/exec.o The for_each_process() loop in zap_threads() is very subtle, it is not clear why we don't race with fork/exit/exec. Add the fat comment. Also, change the code to use while_each_thread(). Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index e41aef0fb351..af249af4ccab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1517,7 +1517,7 @@ static void zap_process(struct task_struct *start) sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } - } while ((t = next_thread(t)) != start); + } while_each_thread(start, t); } static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, @@ -1539,7 +1539,36 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) goto done; - + /* + * We should find and kill all tasks which use this mm, and we should + * count them correctly into mm->core_waiters. We don't take tasklist + * lock, but this is safe wrt: + * + * fork: + * None of sub-threads can fork after zap_process(leader). All + * processes which were created before this point should be + * visible to zap_threads() because copy_process() adds the new + * process to the tail of init_task.tasks list, and lock/unlock + * of ->siglock provides a memory barrier. + * + * do_exit: + * The caller holds mm->mmap_sem. This means that the task which + * uses this mm can't pass exit_mm(), so it can't exit or clear + * its ->mm. + * + * de_thread: + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), + * we must see either old or new leader, this does not matter. + * However, it can change p->sighand, so lock_task_sighand(p) + * must be used. Since p->mm != NULL and we hold ->mmap_sem + * it can't fail. + * + * Note also that "g" can be the old leader with ->mm == NULL + * and already unhashed and thus removed from ->thread_group. + * This is OK, __unhash_process()->list_del_rcu() does not + * clear the ->next pointer, we will find the new leader via + * next_thread(). + */ rcu_read_lock(); for_each_process(g) { if (g == tsk->group_leader) @@ -1549,17 +1578,13 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, do { if (p->mm) { if (p->mm == mm) { - /* - * p->sighand can't disappear, but - * may be changed by de_thread() - */ lock_task_sighand(p, &flags); zap_process(p); unlock_task_sighand(p, &flags); } break; } - } while ((p = next_thread(p)) != g); + } while_each_thread(g, p); } rcu_read_unlock(); done: -- cgit v1.2.3 From 7b34e4283c685f5cc6ba6d30e939906eee0d4bcf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:37 -0700 Subject: introduce PF_KTHREAD flag Introduce the new PF_KTHREAD flag to mark the kernel threads. It is set by INIT_TASK() and copied to the forked childs (we could set it in kthreadd() along with PF_NOFREEZE instead). daemonize() was changed as well. In that case testing of PF_KTHREAD is racy, but daemonize() is hopeless anyway. This flag is cleared in do_execve(), before search_binary_handler(). Probably not the best place, we can do this in exec_mmap() or in start_thread(), or clear it along with PF_FORKNOEXEC. But I think this doesn't matter in practice, and if do_execve() fails kthread should die soon. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index af249af4ccab..cd2e8c9b1249 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1326,6 +1326,7 @@ int do_execve(char * filename, if (retval < 0) goto out; + current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ -- cgit v1.2.3 From 15b9f360c0316c06d37c09b02d85565edbaf9dd3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:39 -0700 Subject: coredump: zap_threads() must skip kernel threads The main loop in zap_threads() must skip kthreads which may use the same mm. Otherwise we "kill" this thread erroneously (for example, it can not fork or exec after that), and the coredumping task stucks in the TASK_UNINTERRUPTIBLE state forever because of the wrong ->core_waiters count. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index cd2e8c9b1249..e347e6ed1617 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1574,11 +1574,12 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, for_each_process(g) { if (g == tsk->group_leader) continue; - + if (g->flags & PF_KTHREAD) + continue; p = g; do { if (p->mm) { - if (p->mm == mm) { + if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); zap_process(p); unlock_task_sighand(p, &flags); -- cgit v1.2.3 From 32ecb1f26dd50eeaac4e3f4dea4541c97848e459 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:41 -0700 Subject: coredump: turn mm->core_startup_done into the pointer to struct core_state mm->core_startup_done points to "struct completion startup_done" allocated on the coredump_wait()'s stack. Introduce the new structure, core_state, which holds this "struct completion". This way we can add more info visible to the threads participating in coredump without enlarging mm_struct. No changes in affected .o files. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index e347e6ed1617..71734568f018 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1597,13 +1597,13 @@ static int coredump_wait(int exit_code) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion startup_done; + struct core_state core_state; struct completion *vfork_done; int core_waiters; init_completion(&mm->core_done); - init_completion(&startup_done); - mm->core_startup_done = &startup_done; + init_completion(&core_state.startup); + mm->core_state = &core_state; core_waiters = zap_threads(tsk, mm, exit_code); up_write(&mm->mmap_sem); @@ -1622,7 +1622,7 @@ static int coredump_wait(int exit_code) } if (core_waiters) - wait_for_completion(&startup_done); + wait_for_completion(&core_state.startup); fail: BUG_ON(mm->core_waiters); return core_waiters; -- cgit v1.2.3 From 999d9fc1670bc082928b93b11d1f2e0e417d973c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:41 -0700 Subject: coredump: move mm->core_waiters into struct core_state Move mm->core_waiters into "struct core_state" allocated on stack. This shrinks mm_struct a little bit and allows further changes. This patch mostly does s/core_waiters/core_state. The only essential change is that coredump_wait() must clear mm->core_state before return. The coredump_wait()'s path is uglified and .text grows by 30 bytes, this is fixed by the next patch. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 71734568f018..50de3aaff4d0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -722,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm) * Make sure that if there is a core dump in progress * for the old mm, we get out and die instead of going * through with the exec. We must hold mmap_sem around - * checking core_waiters and changing tsk->mm. The - * core-inducing thread will increment core_waiters for - * each thread whose ->mm == old_mm. + * checking core_state and changing tsk->mm. */ down_read(&old_mm->mmap_sem); - if (unlikely(old_mm->core_waiters)) { + if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); return -EINTR; } @@ -1514,7 +1512,7 @@ static void zap_process(struct task_struct *start) t = start; do { if (t != current && t->mm) { - t->mm->core_waiters++; + t->mm->core_state->nr_threads++; sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } @@ -1538,11 +1536,11 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (err) return err; - if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) + if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1) goto done; /* * We should find and kill all tasks which use this mm, and we should - * count them correctly into mm->core_waiters. We don't take tasklist + * count them correctly into ->nr_threads. We don't take tasklist * lock, but this is safe wrt: * * fork: @@ -1590,7 +1588,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, } rcu_read_unlock(); done: - return mm->core_waiters; + return mm->core_state->nr_threads; } static int coredump_wait(int exit_code) @@ -1603,9 +1601,12 @@ static int coredump_wait(int exit_code) init_completion(&mm->core_done); init_completion(&core_state.startup); + core_state.nr_threads = 0; mm->core_state = &core_state; core_waiters = zap_threads(tsk, mm, exit_code); + if (core_waiters < 0) + mm->core_state = NULL; up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) @@ -1623,8 +1624,8 @@ static int coredump_wait(int exit_code) if (core_waiters) wait_for_completion(&core_state.startup); + mm->core_state = NULL; fail: - BUG_ON(mm->core_waiters); return core_waiters; } @@ -1702,7 +1703,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) /* * If another thread got here first, or we are not dumpable, bail out. */ - if (mm->core_waiters || !get_dumpable(mm)) { + if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); goto fail; } -- cgit v1.2.3 From 8cd9c249128a59e8e833d454a784b0cbd338d468 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:42 -0700 Subject: coredump: simplify core_state->nr_threads calculation Change zap_process() to return int instead of incrementing mm->core_state->nr_threads directly. Change zap_threads() to set mm->core_state only on success. This patch restores the original size of .text, and more importantly now ->nr_threads is used in two places only. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 50de3aaff4d0..c74bb34eeeff 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1502,9 +1502,10 @@ out: return ispipe; } -static void zap_process(struct task_struct *start) +static int zap_process(struct task_struct *start) { struct task_struct *t; + int nr = 0; start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_stop_count = 0; @@ -1512,31 +1513,33 @@ static void zap_process(struct task_struct *start) t = start; do { if (t != current && t->mm) { - t->mm->core_state->nr_threads++; sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); + nr++; } } while_each_thread(start, t); + + return nr; } static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - int exit_code) + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; - int err = -EAGAIN; + int nr = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { + mm->core_state = core_state; tsk->signal->group_exit_code = exit_code; - zap_process(tsk); - err = 0; + nr = zap_process(tsk); } spin_unlock_irq(&tsk->sighand->siglock); - if (err) - return err; + if (unlikely(nr < 0)) + return nr; - if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1) + if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* * We should find and kill all tasks which use this mm, and we should @@ -1579,7 +1582,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (p->mm) { if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); - zap_process(p); + nr += zap_process(p); unlock_task_sighand(p, &flags); } break; @@ -1588,7 +1591,8 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, } rcu_read_unlock(); done: - return mm->core_state->nr_threads; + core_state->nr_threads = nr; + return nr; } static int coredump_wait(int exit_code) @@ -1601,12 +1605,7 @@ static int coredump_wait(int exit_code) init_completion(&mm->core_done); init_completion(&core_state.startup); - core_state.nr_threads = 0; - mm->core_state = &core_state; - - core_waiters = zap_threads(tsk, mm, exit_code); - if (core_waiters < 0) - mm->core_state = NULL; + core_waiters = zap_threads(tsk, mm, &core_state, exit_code); up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) -- cgit v1.2.3 From c5f1cc8c1828486a61ab3e575da6e2c62b34d399 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:42 -0700 Subject: coredump: turn core_state->nr_threads into atomic_t Turn core_state->nr_threads into atomic_t and kill now unneeded down_write(&mm->mmap_sem) in exit_mm(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index c74bb34eeeff..15d493fe8aa3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1591,7 +1591,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, } rcu_read_unlock(); done: - core_state->nr_threads = nr; + atomic_set(&core_state->nr_threads, nr); return nr; } -- cgit v1.2.3 From 9d5b327bf198d2720666de958dcc2ae219d86952 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:43 -0700 Subject: coredump: make mm->core_state visible to ->core_dump() Move the "struct core_state core_state" from coredump_wait() to do_coredump(), this makes mm->core_state visible to binfmt->core_dump(). Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 15d493fe8aa3..b8ee842d93cd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1595,17 +1595,16 @@ done: return nr; } -static int coredump_wait(int exit_code) +static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct core_state core_state; struct completion *vfork_done; int core_waiters; init_completion(&mm->core_done); - init_completion(&core_state.startup); - core_waiters = zap_threads(tsk, mm, &core_state, exit_code); + init_completion(&core_state->startup); + core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) @@ -1622,8 +1621,7 @@ static int coredump_wait(int exit_code) } if (core_waiters) - wait_for_completion(&core_state.startup); - mm->core_state = NULL; + wait_for_completion(&core_state->startup); fail: return core_waiters; } @@ -1679,6 +1677,7 @@ int get_dumpable(struct mm_struct *mm) int do_coredump(long signr, int exit_code, struct pt_regs * regs) { + struct core_state core_state; char corename[CORENAME_MAX_SIZE + 1]; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; @@ -1717,7 +1716,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) current->fsuid = 0; /* Dump root private */ } - retval = coredump_wait(exit_code); + retval = coredump_wait(exit_code, &core_state); if (retval < 0) goto fail; @@ -1812,6 +1811,7 @@ fail_unlock: current->fsuid = fsuid; complete_all(&mm->core_done); + mm->core_state = NULL; fail: return retval; } -- cgit v1.2.3 From b564daf806d492dd4f7afe9b6c83b8d35d137669 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:44 -0700 Subject: coredump: construct the list of coredumping threads at startup time binfmt->core_dump() has to iterate over the all threads in system in order to find the coredumping threads and construct the list using the GFP_ATOMIC allocations. With this patch each thread allocates the list node on exit_mm()'s stack and adds itself to the list. This allows us to do further changes: - simplify ->core_dump() - change exit_mm() to clear ->mm first, then wait for ->core_done. this makes the coredumping process visible to oom_kill - kill mm->core_done Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b8ee842d93cd..fe2873b8037f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1604,6 +1604,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state) init_completion(&mm->core_done); init_completion(&core_state->startup); + core_state->dumper.task = tsk; + core_state->dumper.next = NULL; core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); -- cgit v1.2.3 From a94e2d408eaedbd85aae259621d46fafc10479a2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:46 -0700 Subject: coredump: kill mm->core_done Now that we have core_state->dumper list we can use it to wake up the sub-threads waiting for the coredump completion. This uglifies the code and .text grows by 47 bytes, but otoh mm_struct lessens by sizeof(struct completion). Also, with this change we can decouple exit_mm() from the coredumping code. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index fe2873b8037f..bff43aeb235e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1602,7 +1602,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state) struct completion *vfork_done; int core_waiters; - init_completion(&mm->core_done); init_completion(&core_state->startup); core_state->dumper.task = tsk; core_state->dumper.next = NULL; @@ -1628,6 +1627,27 @@ fail: return core_waiters; } +static void coredump_finish(struct mm_struct *mm) +{ + struct core_thread *curr, *next; + struct task_struct *task; + + next = mm->core_state->dumper.next; + while ((curr = next) != NULL) { + next = curr->next; + task = curr->task; + /* + * see exit_mm(), curr->task must not see + * ->task == NULL before we read ->next. + */ + smp_mb(); + curr->task = NULL; + wake_up_process(task); + } + + mm->core_state = NULL; +} + /* * set_dumpable converts traditional three-value dumpable to two flags and * stores them into mm->flags. It modifies lower two bits of mm->flags, but @@ -1812,8 +1832,7 @@ fail_unlock: argv_free(helper_argv); current->fsuid = fsuid; - complete_all(&mm->core_done); - mm->core_state = NULL; + coredump_finish(mm); fail: return retval; } -- cgit v1.2.3 From 565b9b14e7f48131bca58840aa404bbef058fa89 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:47 -0700 Subject: coredump: format_corename: fix the "core_uses_pid" logic I don't understand why the multi-thread coredump implies the core_uses_pid behaviour, but we shouldn't use mm->mm_users for that. This counter can be incremented by get_task_mm(). Use the valued returned by coredump_wait() instead. Also, remove the "const char *pattern" argument, format_corename() can use core_pattern directly. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Alan Cox Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index bff43aeb235e..5e559013e303 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1379,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(char *corename, const char *pattern, long signr) +static int format_corename(char *corename, int nr_threads, long signr) { - const char *pat_ptr = pattern; + const char *pat_ptr = core_pattern; + int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; char *const out_end = corename + CORENAME_MAX_SIZE; int rc; int pid_in_pattern = 0; - int ispipe = 0; - - if (*pattern == '|') - ispipe = 1; /* Repeat as long as we have more pattern to process and more output space */ @@ -1490,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (!ispipe && !pid_in_pattern - && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { + && (core_uses_pid || nr_threads)) { rc = snprintf(out_ptr, out_end - out_ptr, ".%d", task_tgid_vnr(current)); if (rc > out_end - out_ptr) @@ -1753,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * uses lock_kernel() */ lock_kernel(); - ispipe = format_corename(corename, core_pattern, signr); + ispipe = format_corename(corename, retval, signr); unlock_kernel(); /* * Don't bother to check the RLIMIT_CORE value if core_pattern points -- cgit v1.2.3 From 6341c393fcc37d58727865f1ee2f65e632e9d4f0 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 25 Jul 2008 19:45:44 -0700 Subject: tracehook: exec This moves all the ptrace hooks related to exec into tracehook.h inlines. This also lifts the calls for tracing out of the binfmt load_binary hooks into search_binary_handler() after it calls into the binfmt module. This change has no effect, since all the binfmt modules' load_binary functions did the call at the end on success, and now search_binary_handler() does it immediately after return if successful. We consolidate the repeated code, and binfmt modules no longer need to import ptrace_notify(). Signed-off-by: Roland McGrath Cc: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 5e559013e303..b8792a131533 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -42,13 +42,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -1071,13 +1071,8 @@ EXPORT_SYMBOL(prepare_binprm); static int unsafe_exec(struct task_struct *p) { - int unsafe = 0; - if (p->ptrace & PT_PTRACED) { - if (p->ptrace & PT_PTRACE_CAP) - unsafe |= LSM_UNSAFE_PTRACE_CAP; - else - unsafe |= LSM_UNSAFE_PTRACE; - } + int unsafe = tracehook_unsafe_exec(p); + if (atomic_read(&p->fs->count) > 1 || atomic_read(&p->files->count) > 1 || atomic_read(&p->sighand->count) > 1) @@ -1214,6 +1209,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) read_unlock(&binfmt_lock); retval = fn(bprm, regs); if (retval >= 0) { + tracehook_report_exec(fmt, bprm, regs); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) -- cgit v1.2.3 From b77b0646ef4efe31a7449bb3d9360fd00f95433d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jul 2008 09:37:02 -0400 Subject: [PATCH] pass MAY_OPEN to vfs_permission() explicitly ... and get rid of the last "let's deduce mask from nameidata->flags" bit. Signed-off-by: Al Viro --- fs/exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b8792a131533..0ba5d355c5a1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -118,7 +118,7 @@ asmlinkage long sys_uselib(const char __user * library) if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) goto exit; - error = vfs_permission(&nd, MAY_READ | MAY_EXEC); + error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -666,7 +666,7 @@ struct file *open_exec(const char *name) struct inode *inode = nd.path.dentry->d_inode; file = ERR_PTR(-EACCES); if (S_ISREG(inode->i_mode)) { - int err = vfs_permission(&nd, MAY_EXEC); + int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); file = ERR_PTR(err); if (!err) { file = nameidata_to_filp(&nd, -- cgit v1.2.3 From e56b6a5dda1a36ffaa532df6f975ea324298fa4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 May 2008 07:53:34 +0200 Subject: Re: [PATCH 3/6] vfs: open_exec cleanup On Mon, May 19, 2008 at 12:01:49AM +0200, Marcin Slusarz wrote: > open_exec is needlessly indented, calls ERR_PTR with 0 argument > (which is not valid errno) and jumps into middle of function > just to return value. > So clean it up a bit. Still looks rather messy. See below for a better version. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/exec.c | 58 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 0ba5d355c5a1..346e3f69c6e0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -656,38 +656,40 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { struct nameidata nd; - int err; struct file *file; + int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); - file = ERR_PTR(err); - - if (!err) { - struct inode *inode = nd.path.dentry->d_inode; - file = ERR_PTR(-EACCES); - if (S_ISREG(inode->i_mode)) { - int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); - file = ERR_PTR(err); - if (!err) { - file = nameidata_to_filp(&nd, - O_RDONLY|O_LARGEFILE); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { - fput(file); - file = ERR_PTR(err); - } - } -out: - return file; - } - } - release_open_intent(&nd); - path_put(&nd.path); + err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, + FMODE_READ|FMODE_EXEC); + if (err) + goto out; + + err = -EACCES; + if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + goto out_path_put; + + err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); + if (err) + goto out_path_put; + + file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); + if (IS_ERR(file)) + return file; + + err = deny_write_access(file); + if (err) { + fput(file); + goto out; } - goto out; -} + return file; + + out_path_put: + release_open_intent(&nd); + path_put(&nd.path); + out: + return ERR_PTR(err); +} EXPORT_SYMBOL(open_exec); int kernel_read(struct file *file, unsigned long offset, -- cgit v1.2.3 From 30524472c2f728c20d6bf35191042a5d455c0a64 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Jul 2008 00:02:33 -0400 Subject: [PATCH] take noexec checks to very few callers that care Signed-off-by: Al Viro --- fs/exec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 346e3f69c6e0..eca58c29eded 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -118,6 +118,10 @@ asmlinkage long sys_uselib(const char __user * library) if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) goto exit; + error = -EACCES; + if (nd.path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; + error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -668,6 +672,9 @@ struct file *open_exec(const char *name) if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) goto out_path_put; + if (nd.path.mnt->mnt_flags & MNT_NOEXEC) + goto out_path_put; + err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); if (err) goto out_path_put; -- cgit v1.2.3 From 964bd183624c03680796b63b4ab97ee3905a806a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Jul 2008 03:33:14 -0400 Subject: [PATCH] get rid of __user_path_lookup_open Signed-off-by: Al Viro --- fs/exec.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index eca58c29eded..9696bbf0f0b1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -106,11 +106,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ asmlinkage long sys_uselib(const char __user * library) { - struct file * file; + struct file *file; struct nameidata nd; - int error; - - error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); + char *tmp = getname(library); + int error = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + error = path_lookup_open(AT_FDCWD, tmp, + LOOKUP_FOLLOW, &nd, + FMODE_READ|FMODE_EXEC); + putname(tmp); + } if (error) goto out; -- cgit v1.2.3