From 39efa3ef3a376a4e53de2f82fc91182459d34200 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Use GROUP_STOP_PENDING to stop once for a single group stop Currently task->signal->group_stop_count is used to decide whether to stop for group stop. However, if there is a task in the group which is taking a long time to stop, other tasks which are continued by ptrace would repeatedly stop for the same group stop until the group stop is complete. Conversely, if a ptraced task is in TASK_TRACED state, the debugger won't get notified of group stops which is inconsistent compared to the ptraced task in any other state. This patch introduces GROUP_STOP_PENDING which tracks whether a task is yet to stop for the group stop in progress. The flag is set when a group stop starts and cleared when the task stops the first time for the group stop, and consulted whenever whether the task should participate in a group stop needs to be determined. Note that now tasks in TASK_TRACED also participate in group stop. This results in the following behavior changes. * For a single group stop, a ptracer would see at most one stop reported. * A ptracee in TASK_TRACED now also participates in group stop and the tracer would get the notification. However, as a ptraced task could be in TASK_STOPPED state or any ptrace trap could consume group stop, the notification may still be missing. These will be addressed with further patches. * A ptracee may start a group stop while one is still in progress if the tracer let it continue with stop signal delivery. Group stop code handles this correctly. Oleg: * Spotted that a task might skip signal check even when its GROUP_STOP_PENDING is set. Fixed by updating recalc_sigpending_tsk() to check GROUP_STOP_PENDING instead of group_stop_count. * Pointed out that task->group_stop should be cleared whenever task->signal->group_stop_count is cleared. Fixed accordingly. * Pointed out the behavior inconsistency between TASK_TRACED and RUNNING and the last behavior change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 5e62d26a4fec..8328beb9016f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { + task_clear_group_stop_pending(t); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); -- cgit v1.2.3 From 1d1dbf8135ab2f3603cc72e39e0f68784f453c39 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Mar 2011 18:02:21 +0100 Subject: exec: introduce get_user_arg_ptr() helper Introduce get_user_arg_ptr() helper, convert count() and copy_strings() to use it. No functional changes, preparation. This helper is trivial, it just reads the pointer from argv/envp user-space array. Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro --- fs/exec.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 5e62d26a4fec..b12e24fe1c5e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -398,6 +398,17 @@ err: return err; } +static const char __user * +get_user_arg_ptr(const char __user * const __user *argv, int nr) +{ + const char __user *ptr; + + if (get_user(ptr, argv + nr)) + return ERR_PTR(-EFAULT); + + return ptr; +} + /* * count() counts the number of strings in array ARGV. */ @@ -407,13 +418,14 @@ static int count(const char __user * const __user * argv, int max) if (argv != NULL) { for (;;) { - const char __user * p; + const char __user *p = get_user_arg_ptr(argv, i); - if (get_user(p, argv)) - return -EFAULT; if (!p) break; - argv++; + + if (IS_ERR(p)) + return -EFAULT; + if (i++ >= max) return -E2BIG; @@ -443,16 +455,18 @@ static int copy_strings(int argc, const char __user *const __user *argv, int len; unsigned long pos; - if (get_user(str, argv+argc) || - !(len = strnlen_user(str, MAX_ARG_STRLEN))) { - ret = -EFAULT; + ret = -EFAULT; + str = get_user_arg_ptr(argv, argc); + if (IS_ERR(str)) goto out; - } - if (!valid_arg_len(bprm, len)) { - ret = -E2BIG; + len = strnlen_user(str, MAX_ARG_STRLEN); + if (!len) + goto out; + + ret = -E2BIG; + if (!valid_arg_len(bprm, len)) goto out; - } /* We're going to work our way backwords. */ pos = bprm->p; -- cgit v1.2.3 From ba2d01629d0d167598cfea85adc7926822bbfc45 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Mar 2011 18:02:37 +0100 Subject: exec: introduce struct user_arg_ptr No functional changes, preparation. Introduce struct user_arg_ptr, change do_execve() paths to use it instead of "char __user * const __user *argv". This makes the argv/envp arguments opaque, we are ready to handle the compat case which needs argv pointing to compat_uptr_t. Suggested-by: Linus Torvalds Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro --- fs/exec.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b12e24fe1c5e..526a0399d963 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -398,12 +398,15 @@ err: return err; } -static const char __user * -get_user_arg_ptr(const char __user * const __user *argv, int nr) +struct user_arg_ptr { + const char __user *const __user *native; +}; + +static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) { const char __user *ptr; - if (get_user(ptr, argv + nr)) + if (get_user(ptr, argv.native + nr)) return ERR_PTR(-EFAULT); return ptr; @@ -412,11 +415,11 @@ get_user_arg_ptr(const char __user * const __user *argv, int nr) /* * count() counts the number of strings in array ARGV. */ -static int count(const char __user * const __user * argv, int max) +static int count(struct user_arg_ptr argv, int max) { int i = 0; - if (argv != NULL) { + if (argv.native != NULL) { for (;;) { const char __user *p = get_user_arg_ptr(argv, i); @@ -442,7 +445,7 @@ static int count(const char __user * const __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, const char __user *const __user *argv, +static int copy_strings(int argc, struct user_arg_ptr argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -533,14 +536,19 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc, const char *const *argv, +int copy_strings_kernel(int argc, const char *const *__argv, struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); + struct user_arg_ptr argv = { + .native = (const char __user *const __user *)__argv, + }; + set_fs(KERNEL_DS); - r = copy_strings(argc, (const char __user *const __user *)argv, bprm); + r = copy_strings(argc, argv, bprm); set_fs(oldfs); + return r; } EXPORT_SYMBOL(copy_strings_kernel); @@ -1393,10 +1401,10 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(const char * filename, - const char __user *const __user *argv, - const char __user *const __user *envp, - struct pt_regs * regs) +static int do_execve_common(const char *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + struct pt_regs *regs) { struct linux_binprm *bprm; struct file *file; @@ -1503,6 +1511,16 @@ out_ret: return retval; } +int do_execve(const char *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { .native = __argv }; + struct user_arg_ptr envp = { .native = __envp }; + return do_execve_common(filename, argv, envp, regs); +} + void set_binfmt(struct linux_binfmt *new) { struct mm_struct *mm = current->mm; -- cgit v1.2.3 From 0e028465d18b7c6797fcbdea632299d16097c5cd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Mar 2011 18:02:54 +0100 Subject: exec: unify do_execve/compat_do_execve code Add the appropriate members into struct user_arg_ptr and teach get_user_arg_ptr() to handle is_compat = T case correctly. This allows us to remove the compat_do_execve() code from fs/compat.c and reimplement compat_do_execve() as the trivial wrapper on top of do_execve_common(is_compat => true). In fact, this fixes another (minor) bug. "compat_uptr_t str" can overflow after "str += len" in compat_copy_strings() if a 64bit application execs via sys32_execve(). Unexport acct_arg_size() and get_arg_page(), fs/compat.c doesn't need them any longer. Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro --- fs/exec.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 12 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 526a0399d963..89d788ca7829 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -167,7 +168,7 @@ out: #ifdef CONFIG_MMU -void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; long diff = (long)(pages - bprm->vma_pages); @@ -186,7 +187,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) #endif } -struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; @@ -305,11 +306,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) #else -void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { } -struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; @@ -399,17 +400,36 @@ err: } struct user_arg_ptr { - const char __user *const __user *native; +#ifdef CONFIG_COMPAT + bool is_compat; +#endif + union { + const char __user *const __user *native; +#ifdef CONFIG_COMPAT + compat_uptr_t __user *compat; +#endif + } ptr; }; static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) { - const char __user *ptr; + const char __user *native; + +#ifdef CONFIG_COMPAT + if (unlikely(argv.is_compat)) { + compat_uptr_t compat; + + if (get_user(compat, argv.ptr.compat + nr)) + return ERR_PTR(-EFAULT); - if (get_user(ptr, argv.native + nr)) + return compat_ptr(compat); + } +#endif + + if (get_user(native, argv.ptr.native + nr)) return ERR_PTR(-EFAULT); - return ptr; + return native; } /* @@ -419,7 +439,7 @@ static int count(struct user_arg_ptr argv, int max) { int i = 0; - if (argv.native != NULL) { + if (argv.ptr.native != NULL) { for (;;) { const char __user *p = get_user_arg_ptr(argv, i); @@ -542,7 +562,7 @@ int copy_strings_kernel(int argc, const char *const *__argv, int r; mm_segment_t oldfs = get_fs(); struct user_arg_ptr argv = { - .native = (const char __user *const __user *)__argv, + .ptr.native = (const char __user *const __user *)__argv, }; set_fs(KERNEL_DS); @@ -1516,10 +1536,28 @@ int do_execve(const char *filename, const char __user *const __user *__envp, struct pt_regs *regs) { - struct user_arg_ptr argv = { .native = __argv }; - struct user_arg_ptr envp = { .native = __envp }; + struct user_arg_ptr argv = { .ptr.native = __argv }; + struct user_arg_ptr envp = { .ptr.native = __envp }; + return do_execve_common(filename, argv, envp, regs); +} + +#ifdef CONFIG_COMPAT +int compat_do_execve(char *filename, + compat_uptr_t __user *__argv, + compat_uptr_t __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { + .is_compat = true, + .ptr.compat = __argv, + }; + struct user_arg_ptr envp = { + .is_compat = true, + .ptr.compat = __envp, + }; return do_execve_common(filename, argv, envp, regs); } +#endif void set_binfmt(struct linux_binfmt *new) { -- cgit v1.2.3 From ae6b585eeb74670a2dec1fe4394bdfbdb9395cc2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Mar 2011 18:03:11 +0100 Subject: exec: document acct_arg_size() Add the comment to explain acct_arg_size(). Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro --- fs/exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 89d788ca7829..5cb53f0232b1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -167,7 +167,12 @@ out: } #ifdef CONFIG_MMU - +/* + * The nascent bprm->mm is not visible until exec_mmap() but it can + * use a lot of memory, account these pages in current->mm temporary + * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we + * change the counter back via acct_arg_size(0). + */ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; -- cgit v1.2.3 From 7d74f492e4dd0034a61458eb80f70b1d2862ed07 Mon Sep 17 00:00:00 2001 From: J Freyensee Date: Fri, 6 May 2011 16:56:47 -0700 Subject: export kernel call get_task_comm(). This allows drivers who call this function to be compiled modularly. Otherwise, a driver who is interested in this type of functionality has to implement their own get_task_comm() call, causing code duplication in the Linux source tree. Signed-off-by: J Freyensee Acked-by: David Rientjes Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 5e62d26a4fec..7de94950a5b4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1004,6 +1004,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk) task_unlock(tsk); return buf; } +EXPORT_SYMBOL_GPL(get_task_comm); void set_task_comm(struct task_struct *tsk, char *buf) { -- cgit v1.2.3 From d05f3169c0fbca16132ec7c2be71685c6de638b5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 24 May 2011 17:11:44 -0700 Subject: mm: make expand_downwards() symmetrical with expand_upwards() Currently we have expand_upwards exported while expand_downwards is accessible only via expand_stack or expand_stack_downwards. check_stack_guard_page is a nice example of the asymmetry. It uses expand_stack for VM_GROWSDOWN while expand_upwards is called for VM_GROWSUP case. Let's clean this up by exporting both functions and make those names consistent. Let's use expand_{upwards,downwards} because expanding doesn't always involve stack manipulation (an example is ia64_do_page_fault which uses expand_upwards for registers backing store expansion). expand_downwards has to be defined for both CONFIG_STACK_GROWS{UP,DOWN} because get_arg_page calls the downwards version in the early process initialization phase for growsup configuration. Signed-off-by: Michal Hocko Acked-by: Hugh Dickins Cc: James Bottomley Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index c1cf372f17a7..e276d5e0abb9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, #ifdef CONFIG_STACK_GROWSUP if (write) { - ret = expand_stack_downwards(bprm->vma, pos); + ret = expand_downwards(bprm->vma, pos); if (ret < 0) return NULL; } -- cgit v1.2.3 From d16dfc550f5326a4000f3322582a7c05dec91d7a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:11:45 -0700 Subject: mm: mmu_gather rework Rework the existing mmu_gather infrastructure. The direct purpose of these patches was to allow preemptible mmu_gather, but even without that I think these patches provide an improvement to the status quo. The first 9 patches rework the mmu_gather infrastructure. For review purpose I've split them into generic and per-arch patches with the last of those a generic cleanup. The next patch provides generic RCU page-table freeing, and the followup is a patch converting s390 to use this. I've also got 4 patches from DaveM lined up (not included in this series) that uses this to implement gup_fast() for sparc64. Then there is one patch that extends the generic mmu_gather batching. After that follow the mm preemptibility patches, these make part of the mm a lot more preemptible. It converts i_mmap_lock and anon_vma->lock to mutexes which together with the mmu_gather rework makes mmu_gather preemptible as well. Making i_mmap_lock a mutex also enables a clean-up of the truncate code. This also allows for preemptible mmu_notifiers, something that XPMEM I think wants. Furthermore, it removes the new and universially detested unmap_mutex. This patch: Remove the first obstacle towards a fully preemptible mmu_gather. The current scheme assumes mmu_gather is always done with preemption disabled and uses per-cpu storage for the page batches. Change this to try and allocate a page for batching and in case of failure, use a small on-stack array to make some progress. Preemptible mmu_gather is desired in general and usable once i_mmap_lock becomes a mutex. Doing it before the mutex conversion saves us from having to rework the code by moving the mmu_gather bits inside the pte_lock. Also avoid flushing the tlb batches from under the pte lock, this is useful even without the i_mmap_lock conversion as it significantly reduces pte lock hold times. [akpm@linux-foundation.org: fix comment tpyo] Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Acked-by: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index e276d5e0abb9..936f5776655c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) unsigned long length = old_end - old_start; unsigned long new_start = old_start - shift; unsigned long new_end = old_end - shift; - struct mmu_gather *tlb; + struct mmu_gather tlb; BUG_ON(new_start > new_end); @@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(tlb, new_end, old_end, new_end, + free_pgd_range(&tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(tlb, old_start, old_end, new_end, + free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } - tlb_finish_mmu(tlb, new_end, old_end); + tlb_finish_mmu(&tlb, new_end, old_end); /* * Shrink the vma to just the new range. Always succeeds. -- cgit v1.2.3 From 3864601387cf4196371e3c1897fdffa5228296f9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 26 May 2011 16:25:46 -0700 Subject: mm: extract exe_file handling from procfs Setup and cleanup of mm_struct->exe_file is currently done in fs/proc/. This was because exe_file was needed only for /proc//exe. Since we will need the exe_file functionality also for core dumps (so core name can contain full binary path), built this functionality always into the kernel. To achieve that move that out of proc FS to the kernel/ where in fact it should belong. By doing that we can make dup_mm_exe_file static. Also we can drop linux/proc_fs.h inclusion in fs/exec.c and kernel/fork.c. Signed-off-by: Jiri Slaby Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 936f5776655c..88a16c572282 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 57cc083ad9e1bfeeb4a0ee831e7bb008c8865bf0 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 26 May 2011 16:25:46 -0700 Subject: coredump: add support for exe_file in core name Now, exe_file is not proc FS dependent, so we can use it to name core file. So we add %E pattern for core file name cration which extract path from mm_struct->exe_file. Then it converts slashes to exclamation marks and pastes the result to the core file name itself. This is useful for environments where binary names are longer than 16 character (the current->comm limitation). Also where there are binaries with same name but in a different path. Further in case the binery itself changes its current->comm after exec. So by doing (s/$/#/ -- # is treated as git comment): $ sysctl kernel.core_pattern='core.%p.%e.%E' $ ln /bin/cat cat45678901234567890 $ ./cat45678901234567890 ^Z $ rm cat45678901234567890 $ fg ^\Quit (core dumped) $ ls core* we now get: core.2434.cat456789012345.!root!cat45678901234567890 (deleted) Signed-off-by: Jiri Slaby Cc: Al Viro Cc: Alan Cox Reviewed-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 88a16c572282..ea5f748906a8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1623,6 +1623,41 @@ expand_fail: return ret; } +static int cn_print_exe_file(struct core_name *cn) +{ + struct file *exe_file; + char *pathbuf, *path, *p; + int ret; + + exe_file = get_mm_exe_file(current->mm); + if (!exe_file) + return cn_printf(cn, "(unknown)"); + + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + if (!pathbuf) { + ret = -ENOMEM; + goto put_exe_file; + } + + path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + goto free_buf; + } + + for (p = path; *p; p++) + if (*p == '/') + *p = '!'; + + ret = cn_printf(cn, "%s", path); + +free_buf: + kfree(pathbuf); +put_exe_file: + fput(exe_file); + return ret; +} + /* format_corename will inspect the pattern parameter, and output a * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. @@ -1694,6 +1729,9 @@ static int format_corename(struct core_name *cn, long signr) case 'e': err = cn_printf(cn, "%s", current->comm); break; + case 'E': + err = cn_print_exe_file(cn); + break; /* core limit size */ case 'c': err = cn_printf(cn, "%lu", -- cgit v1.2.3 From dac853ae89043f1b7752875300faf614de43c74b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 9 Jun 2011 20:05:18 +0200 Subject: exec: delay address limit change until point of no return Unconditionally changing the address limit to USER_DS and not restoring it to its old value in the error path is wrong because it prevents us using kernel memory on repeated calls to this function. This, in fact, breaks the fallback of hard coded paths to the init program from being ever successful if the first candidate fails to load. With this patch applied switching to USER_DS is delayed until the point of no return is reached which makes it possible to have a multi-arch rootfs with one arch specific init binary for each of the (hard coded) probed paths. Since the address limit is already set to USER_DS when start_thread() will be invoked, this redundancy can be safely removed. Signed-off-by: Mathias Krause Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/exec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index ea5f748906a8..97e0d52d72fd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ + set_fs(USER_DS); current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval) return retval; - /* kernel module loader fixup */ - /* so we don't try to load run modprobe in kernel space. */ - set_fs(USER_DS); - retval = audit_bprm(bprm); if (retval) return retval; -- cgit v1.2.3 From 7f81c8890c15a10f5220bebae3b6dfae4961962a Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP Commit a8bef8ff6ea1 ("mm: migration: avoid race between shift_arg_pages() and rmap_walk() during migration by not migrating temporary stacks") introduced a BUG_ON() to ensure that VM_STACK_FLAGS and VM_STACK_INCOMPLETE_SETUP do not overlap. The check is a compile time one, so BUILD_BUG_ON is more appropriate. Signed-off-by: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..b54f74f3cd80 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; -- cgit v1.2.3 From 13fca640bb8ab611a50e0ba120b186faa2994d6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Jun 2011 21:53:52 -0700 Subject: Revert "fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP" This reverts commit 7f81c8890c15a10f5220bebae3b6dfae4961962a. It turns out that it's not actually a build-time check on x86-64 UML, which does some seriously crazy stuff with VM_STACK_FLAGS. The VM_STACK_FLAGS define depends on the arch-supplied VM_STACK_DEFAULT_FLAGS value, and on x86-64 UML we have arch/um/sys-x86_64/shared/sysdep/vm-flags.h: #define VM_STACK_DEFAULT_FLAGS \ (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) #define VM_STACK_DEFAULT_FLAGS vm_stack_flags (yes, seriously: two different #define's for that thing, with the first one being inside an "#ifdef TIF_IA32") It's possible that it is UML that should just be fixed in this area, but for now let's just undo the (very small) optimization. Reported-by: Randy Dunlap Acked-by: Andrew Morton Cc: Michal Hocko Cc: Richard Weinberger Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b54f74f3cd80..97e0d52d72fd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; -- cgit v1.2.3 From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..6075a1e727ae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info) +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *rp, *wp; struct fdtable *fdt; -- cgit v1.2.3