diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 20:51:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 20:51:42 -0700 |
commit | b81a618dcd3ea99de292dbe624f41ca68f464376 (patch) | |
tree | c5fbe44f944da9d7dc0c224116be77094d379c8a | |
parent | 2f284c846331fa44be1300a3c2c3e85800268a00 (diff) | |
parent | a9712bc12c40c172e393f85a9b2ba8db4bf59509 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
deal with races in /proc/*/{syscall,stack,personality}
proc: enable writing to /proc/pid/mem
proc: make check_mem_permission() return an mm_struct on success
proc: hold cred_guard_mutex in check_mem_permission()
proc: disable mem_write after exec
mm: implement access_remote_vm
mm: factor out main logic of access_process_vm
mm: use mm_struct to resolve gate vma's in __get_user_pages
mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm
mm: arch: make in_gate_area take an mm_struct instead of a task_struct
mm: arch: make get_gate_vma take an mm_struct instead of a task_struct
x86: mark associated mm when running a task in 32 bit compatibility mode
x86: add context tag to mark mm when running a task in 32-bit compatibility mode
auxv: require the target to be tracable (or yourself)
close race in /proc/*/environ
report errors in /proc/*/*map* sanely
pagemap: close races with suid execve
make sessionid permissions in /proc/*/task/* match those in /proc/*
fix leaks in path_lookupat()
Fix up trivial conflicts in fs/proc/base.c
-rw-r--r-- | arch/powerpc/kernel/vdso.c | 6 | ||||
-rw-r--r-- | arch/s390/kernel/vdso.c | 6 | ||||
-rw-r--r-- | arch/sh/kernel/vsyscall/vsyscall.c | 6 | ||||
-rw-r--r-- | arch/x86/ia32/ia32_aout.c | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 16 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32-setup.c | 15 | ||||
-rw-r--r-- | fs/binfmt_elf.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 7 | ||||
-rw-r--r-- | fs/proc/base.c | 181 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 23 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 6 | ||||
-rw-r--r-- | include/linux/mm.h | 12 | ||||
-rw-r--r-- | kernel/kallsyms.c | 4 | ||||
-rw-r--r-- | mm/memory.c | 73 | ||||
-rw-r--r-- | mm/mlock.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 2 |
18 files changed, 244 insertions, 134 deletions
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729abc..142ab1008c3b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,17 +820,17 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedbd..d73630b4fe1d 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -337,17 +337,17 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 242117cbad67..1d6d51a1ce79 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -94,17 +94,17 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *task) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } -int in_gate_area(struct task_struct *task, unsigned long address) +int in_gate_area(struct mm_struct *mm, unsigned long address) { return 0; } -int in_gate_area_no_task(unsigned long address) +int in_gate_area_no_mm(unsigned long address) { return 0; } diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2d93bdbc9ac0..fd843877e841 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); + current->mm->context.ia32_compat = 1; setup_new_exec(bprm); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 80a1dee5bea5..aeff3e89b222 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -13,6 +13,12 @@ typedef struct { int size; struct mutex lock; void *vdso; + +#ifdef CONFIG_X86_64 + /* True if mm supports a task running in 32 bit compatibility mode. */ + unsigned short ia32_compat; +#endif + } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b4..6c9dd922ac0d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -501,6 +501,10 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); + /* Ensure the corresponding mm is not marked. */ + if (current->mm) + current->mm->context.ia32_compat = 0; + /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, so it's not too bad. The main problem is just that @@ -516,6 +520,10 @@ void set_personality_ia32(void) set_thread_flag(TIF_IA32); current->personality |= force_personality32; + /* Mark the associated mm as containing 32-bit tasks. */ + if (current->mm) + current->mm->context.ia32_compat = 1; + /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2362b646178e..794233587287 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -862,18 +862,18 @@ static struct vm_area_struct gate_vma = { .vm_flags = VM_READ | VM_EXEC }; -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) + if (!mm || mm->context.ia32_compat) return NULL; #endif return &gate_vma; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task); + struct vm_area_struct *vma = get_gate_vma(mm); if (!vma) return 0; @@ -882,11 +882,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr) } /* - * Use this when you have no reliable task/vma, typically from interrupt - * context. It is less reliable than using the task's vma and may give - * false positives: + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. */ -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b2..468d591dde31 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -417,24 +417,25 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - struct mm_struct *mm = tsk->mm; - - /* Check to see if this task was created in compat vdso mode */ + /* + * Check to see if the corresponding task was created in compat vdso + * mode. + */ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) return &gate_vma; return NULL; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task); + const struct vm_area_struct *vma = get_gate_vma(mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b2fae009a4b7..f34078d702d3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); - gate_vma = get_gate_vma(current); + gate_vma = get_gate_vma(current->mm); if (gate_vma != NULL) segs++; diff --git a/fs/namei.c b/fs/namei.c index fc858b1124c2..d0066e17d45d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1650,13 +1650,16 @@ static int path_lookupat(int dfd, const char *name, err = -ECHILD; } - if (!err) + if (!err) { err = handle_reval_path(nd); + if (err) + path_put(&nd->path); + } if (!err && nd->flags & LOOKUP_DIRECTORY) { if (!nd->inode->i_op->lookup) { path_put(&nd->path); - return -ENOTDIR; + err = -ENOTDIR; } } diff --git a/fs/proc/base.c b/fs/proc/base.c index daba13653256..5a670c11aeac 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -/* - * Return zero if current may access user memory in @task, -error if not. - */ -static int check_mem_permission(struct task_struct *task) +static struct mm_struct *__check_mem_permission(struct task_struct *task) { + struct mm_struct *mm; + + mm = get_task_mm(task); + if (!mm) + return ERR_PTR(-EINVAL); + /* * A task can always look at itself, in case it chooses * to use system calls instead of load instructions. */ if (task == current) - return 0; + return mm; /* * If current is actively ptrace'ing, and would also be @@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task) match = (tracehook_tracer_task(task) == current); rcu_read_unlock(); if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) - return 0; + return mm; } /* * Noone else is allowed. */ - return -EPERM; + mmput(mm); + return ERR_PTR(-EPERM); +} + +/* + * If current may access user memory in @task return a reference to the + * corresponding mm, otherwise ERR_PTR. + */ +static struct mm_struct *check_mem_permission(struct task_struct *task) +{ + struct mm_struct *mm; + int err; + + /* + * Avoid racing if task exec's as we might get a new mm but validate + * against old credentials. + */ + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); + + mm = __check_mem_permission(task); + mutex_unlock(&task->signal->cred_guard_mutex); + + return mm; } struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; + int err; - if (mutex_lock_killable(&task->signal->cred_guard_mutex)) - return NULL; + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); mm = get_task_mm(task); if (mm && mm != current->mm && !ptrace_may_access(task, PTRACE_MODE_READ)) { mmput(mm); - mm = NULL; + mm = ERR_PTR(-EACCES); } mutex_unlock(&task->signal->cred_guard_mutex); @@ -279,9 +308,9 @@ out: static int proc_pid_auxv(struct task_struct *task, char *buffer) { - int res = 0; - struct mm_struct *mm = get_task_mm(task); - if (mm) { + struct mm_struct *mm = mm_for_maps(task); + int res = PTR_ERR(mm); + if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; do { nwords += 2; @@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +static int lock_trace(struct task_struct *task) +{ + int err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return err; + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + mutex_unlock(&task->signal->cred_guard_mutex); + return -EPERM; + } + return 0; +} + +static void unlock_trace(struct task_struct *task) +{ + mutex_unlock(&task->signal->cred_guard_mutex); +} + #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 @@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, { struct stack_trace trace; unsigned long *entries; + int err; int i; entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); @@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, trace.max_entries = MAX_STACK_TRACE_DEPTH; trace.entries = entries; trace.skip = 0; - save_stack_trace_tsk(task, &trace); - for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%pK>] %pS\n", - (void *)entries[i], (void *)entries[i]); + err = lock_trace(task); + if (!err) { + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%pK>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + unlock_trace(task); } kfree(entries); - return 0; + return err; } #endif @@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) { long nr; unsigned long args[6], sp, pc; + int res = lock_trace(task); + if (res) + return res; if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) - return sprintf(buffer, "running\n"); - - if (nr < 0) - return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); - - return sprintf(buffer, + res = sprintf(buffer, "running\n"); + else if (nr < 0) + res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + else + res = sprintf(buffer, "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", nr, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); + unlock_trace(task); + return res; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -775,19 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - ret = check_mem_permission(task); - if (ret) - goto out; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; - ret = 0; - - mm = get_task_mm(task); - if (!mm) + mm = check_mem_permission(task); + ret = PTR_ERR(mm); + if (IS_ERR(mm)) goto out_free; ret = -EIO; @@ -801,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf, int this_len, retval; this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || check_mem_permission(task)) { + retval = access_remote_vm(mm, src, page, this_len, 0); + if (!retval) { if (!ret) ret = -EIO; break; @@ -830,10 +881,6 @@ out_no_task: return ret; } -#define mem_write NULL - -#ifndef mem_write -/* This is a security hazard */ static ssize_t mem_write(struct file * file, const char __user *buf, size_t count, loff_t *ppos) { @@ -841,19 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf, char *page; struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); unsigned long dst = *ppos; + struct mm_struct *mm; copied = -ESRCH; if (!task) goto out_no_task; - copied = check_mem_permission(task); - if (copied) - goto out; + mm = check_mem_permission(task); + copied = PTR_ERR(mm); + if (IS_ERR(mm)) + goto out_task; + + copied = -EIO; + if (file->private_data != (void *)((long)current->self_exec_id)) + goto out_mm; copied = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; + goto out_mm; copied = 0; while (count > 0) { @@ -864,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, copied = -EFAULT; break; } - retval = access_process_vm(task, dst, page, this_len, 1); + retval = access_remote_vm(mm, dst, page, this_len, 1); if (!retval) { if (!copied) copied = -EIO; @@ -877,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf, } *ppos = dst; free_page((unsigned long) page); -out: +out_mm: + mmput(mm); +out_task: put_task_struct(task); out_no_task: return copied; } -#endif loff_t mem_lseek(struct file *file, loff_t offset, int orig) { @@ -919,21 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!task) goto out_no_task; - ret = -EPERM; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; - ret = 0; - mm = get_task_mm(task); - if (!mm) + mm = mm_for_maps(task); + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) goto out_free; + ret = 0; while (count > 0) { int this_len, retval, max_len; @@ -2751,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - seq_printf(m, "%08x\n", task->personality); - return 0; + int err = lock_trace(task); + if (!err) { + seq_printf(m, "%08x\n", task->personality); + unlock_trace(task); + } + return err; } /* @@ -2771,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUSR, proc_pid_personality), + ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), @@ -2781,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, proc_pid_syscall), + INF("syscall", S_IRUGO, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tgid_stat), @@ -2800,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -2809,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUSR, proc_pid_stack), + ONE("stack", S_IRUGO, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), @@ -3111,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUSR, proc_pid_personality), + ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, proc_pid_syscall), + INF("syscall", S_IRUGO, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), @@ -3136,7 +3191,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -3145,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUSR, proc_pid_stack), + ONE("stack", S_IRUGO, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), @@ -3164,7 +3219,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), - REG("sessionid", S_IRUSR, proc_sessionid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 636f1a1fdf87..7c708a418acc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -121,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) - return NULL; + if (!mm || IS_ERR(mm)) + return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -279,7 +279,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -468,7 +469,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -764,8 +766,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!task) goto out; - ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + mm = mm_for_maps(task); + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) goto out_task; ret = -EINVAL; @@ -778,10 +781,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - mm = get_task_mm(task); - if (!mm) - goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index b535d3e5d5f1..980de547c070 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* pin the task and mm whilst we play with them */ priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) { + if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL; - return NULL; + return mm; } down_read(&mm->mmap_sem); diff --git a/include/linux/mm.h b/include/linux/mm.h index 294104e0891d..f9535b2c9558 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -982,6 +982,8 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write); int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, unsigned int foll_flags, @@ -1592,13 +1594,13 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); +extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_task(unsigned long addr); -int in_gate_area(struct task_struct *task, unsigned long addr); +int in_gate_area_no_mm(unsigned long addr); +int in_gate_area(struct mm_struct *mm, unsigned long addr); #else -int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) +int in_gate_area_no_mm(unsigned long addr); +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 75dcca37d61a..a56aa58b9cb0 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static int is_ksym_addr(unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index 20d5f7499ce2..51a5c23704af 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,9 +1486,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk, start)) { + if (!vma && in_gate_area(mm, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk); + struct vm_area_struct *gate_vma = get_gate_vma(mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -1591,10 +1591,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, return i ? i : -EFAULT; BUG(); } - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } if (ret & VM_FAULT_RETRY) { if (nonblocking) @@ -1641,7 +1644,8 @@ EXPORT_SYMBOL(__get_user_pages); /** * get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -3499,7 +3503,7 @@ static int __init gate_vma_init(void) __initcall(gate_vma_init); #endif -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef AT_SYSINFO_EHDR return &gate_vma; @@ -3508,7 +3512,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) #endif } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { #ifdef AT_SYSINFO_EHDR if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) @@ -3649,20 +3653,15 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #endif /* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages + * Access another process' address space as given in mm. If non-NULL, use the + * given task for page fault accounting. */ -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, int write) { - struct mm_struct *mm; struct vm_area_struct *vma; void *old_buf = buf; - mm = get_task_mm(tsk); - if (!mm) - return 0; - down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ while (len) { @@ -3711,11 +3710,47 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in addr += bytes; } up_read(&mm->mmap_sem); - mmput(mm); return buf - old_buf; } +/** + * @access_remote_vm - access another process' address space + * @mm: the mm_struct of the target address space + * @addr: start address to access + * @buf: source or destination buffer + * @len: number of bytes to transfer + * @write: whether the access is a write + * + * The caller must hold a reference on @mm. + */ +int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write) +{ + return __access_remote_vm(NULL, mm, addr, buf, len, write); +} + +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, int write) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + mmput(mm); + + return ret; +} + /* * Print the name of a VMA. */ diff --git a/mm/mlock.c b/mm/mlock.c index c3924c7f00be..2689a08c79af 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) { + vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); @@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, int lock = newflags & VM_LOCKED; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; /* don't set VM_LOCKED, don't count */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); diff --git a/mm/nommu.c b/mm/nommu.c index f59e1424d3db..e629143f9440 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1963,7 +1963,7 @@ error: return -ENOMEM; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } |