diff options
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/Kconfig | 2 | ||||
| -rw-r--r-- | fs/proc/array.c | 1 | ||||
| -rw-r--r-- | fs/proc/base.c | 24 | ||||
| -rw-r--r-- | fs/proc/inode.c | 2 | ||||
| -rw-r--r-- | fs/proc/meminfo.c | 1 | ||||
| -rw-r--r-- | fs/proc/nommu.c | 1 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 149 | ||||
| -rw-r--r-- | fs/proc/root.c | 10 | ||||
| -rw-r--r-- | fs/proc/self.c | 2 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 34 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 18 | ||||
| -rw-r--r-- | fs/proc/thread_self.c | 2 | ||||
| -rw-r--r-- | fs/proc/vmcore.c | 1 |
13 files changed, 197 insertions, 50 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 971a42f6357d..c930001056f9 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -66,7 +66,7 @@ config PROC_SYSCTL depends on PROC_FS select SYSCTL default y - ---help--- + help The sysctl interface provides a means of dynamically changing certain kernel parameters and variables on the fly without requiring a recompile of the kernel or reboot of the system. The primary diff --git a/fs/proc/array.c b/fs/proc/array.c index 713ffac59bbb..55ecbeb3a721 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -92,7 +92,6 @@ #include <linux/user_namespace.h> #include <linux/fs_struct.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include "internal.h" diff --git a/fs/proc/base.c b/fs/proc/base.c index 066d9c0f4664..d86c0afc8a85 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2112,11 +2112,11 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { - status = down_read_killable(&mm->mmap_sem); + status = mmap_read_lock_killable(mm); if (!status) { exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); } } @@ -2163,7 +2163,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; - rc = down_read_killable(&mm->mmap_sem); + rc = mmap_read_lock_killable(mm); if (rc) goto out_mmput; @@ -2174,7 +2174,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) path_get(path); rc = 0; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_mmput: mmput(mm); @@ -2264,7 +2264,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out_put_task; result = ERR_PTR(-EINTR); - if (down_read_killable(&mm->mmap_sem)) + if (mmap_read_lock_killable(mm)) goto out_put_mm; result = ERR_PTR(-ENOENT); @@ -2277,7 +2277,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: @@ -2322,7 +2322,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) if (!mm) goto out_put_task; - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); goto out_put_task; @@ -2333,11 +2333,11 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) /* * We need two passes here: * - * 1) Collect vmas of mapped files with mmap_sem taken - * 2) Release mmap_sem and instantiate entries + * 1) Collect vmas of mapped files with mmap_lock taken + * 2) Release mmap_lock and instantiate entries * * otherwise we get lockdep complained, since filldir() - * routine might require mmap_sem taken in might_fault(). + * routine might require mmap_lock taken in might_fault(). */ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { @@ -2349,7 +2349,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); if (!p) { ret = -ENOMEM; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); goto out_put_task; } @@ -2358,7 +2358,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) p->end = vma->vm_end; p->mode = vma->vm_file->f_mode; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); for (i = 0; i < nr_files; i++) { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f40c2532c057..28d6105e908e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -617,7 +617,7 @@ const struct inode_operations proc_link_inode_operations = { struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = new_inode_pseudo(sb); + struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = de->low_ino; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index ecc63ce01be7..e9a6841fc25b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -17,7 +17,6 @@ #include <linux/cma.h> #endif #include <asm/page.h> -#include <asm/pgtable.h> #include "internal.h" void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 14c2badb8fd9..13452b32e2bd 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -22,7 +22,6 @@ #include <linux/hugetlb.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/tlb.h> #include <asm/div64.h> #include "internal.h" diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index df2143e05c57..42c5128c7d1c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/bpf-cgroup.h> +#include <linux/mount.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -564,6 +565,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, if (!table->proc_handler) goto out; + /* don't even try if the size is too large */ + if (count > KMALLOC_MAX_SIZE) + return -ENOMEM; + if (write) { kbuf = memdup_user_nul(ubuf, count); if (IS_ERR(kbuf)) { @@ -1703,3 +1708,147 @@ int __init proc_sys_init(void) return sysctl_init(); } + +struct sysctl_alias { + const char *kernel_param; + const char *sysctl_param; +}; + +/* + * Historically some settings had both sysctl and a command line parameter. + * With the generic sysctl. parameter support, we can handle them at a single + * place and only keep the historical name for compatibility. This is not meant + * to add brand new aliases. When adding existing aliases, consider whether + * the possibly different moment of changing the value (e.g. from early_param + * to the moment do_sysctl_args() is called) is an issue for the specific + * parameter. + */ +static const struct sysctl_alias sysctl_aliases[] = { + {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" }, + {"hung_task_panic", "kernel.hung_task_panic" }, + {"numa_zonelist_order", "vm.numa_zonelist_order" }, + {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, + {"softlockup_panic", "kernel.softlockup_panic" }, + { } +}; + +static const char *sysctl_find_alias(char *param) +{ + const struct sysctl_alias *alias; + + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { + if (strcmp(alias->kernel_param, param) == 0) + return alias->sysctl_param; + } + + return NULL; +} + +/* Set sysctl value passed on kernel command line. */ +static int process_sysctl_arg(char *param, char *val, + const char *unused, void *arg) +{ + char *path; + struct vfsmount **proc_mnt = arg; + struct file_system_type *proc_fs_type; + struct file *file; + int len; + int err; + loff_t pos = 0; + ssize_t wret; + + if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { + param += sizeof("sysctl") - 1; + + if (param[0] != '/' && param[0] != '.') + return 0; + + param++; + } else { + param = (char *) sysctl_find_alias(param); + if (!param) + return 0; + } + + /* + * To set sysctl options, we use a temporary mount of proc, look up the + * respective sys/ file and write to it. To avoid mounting it when no + * options were given, we mount it only when the first sysctl option is + * found. Why not a persistent mount? There are problems with a + * persistent mount of proc in that it forces userspace not to use any + * proc mount options. + */ + if (!*proc_mnt) { + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) { + pr_err("Failed to find procfs to set sysctl from command line\n"); + return 0; + } + *proc_mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(*proc_mnt)) { + pr_err("Failed to mount procfs to set sysctl from command line\n"); + return 0; + } + } + + path = kasprintf(GFP_KERNEL, "sys/%s", param); + if (!path) + panic("%s: Failed to allocate path for %s\n", __func__, param); + strreplace(path, '.', '/'); + + file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0); + if (IS_ERR(file)) { + err = PTR_ERR(file); + if (err == -ENOENT) + pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", + param, val); + else if (err == -EACCES) + pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", + param, val); + else + pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", + file, param, val); + goto out; + } + len = strlen(val); + wret = kernel_write(file, val, len, &pos); + if (wret < 0) { + err = wret; + if (err == -EINVAL) + pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", + param, val); + else + pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", + ERR_PTR(err), param, val); + } else if (wret != len) { + pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", + wret, len, path, param, val); + } + + err = filp_close(file, NULL); + if (err) + pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", + ERR_PTR(err), param, val); +out: + kfree(path); + return 0; +} + +void do_sysctl_args(void) +{ + char *command_line; + struct vfsmount *proc_mnt = NULL; + + command_line = kstrdup(saved_command_line, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate copy of command line\n", __func__); + + parse_args("Setting sysctl args", command_line, + NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); + + if (proc_mnt) + kern_unmount(proc_mnt); + + kfree(command_line); +} diff --git a/fs/proc/root.c b/fs/proc/root.c index ffebed1999e5..5e444d4f9717 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -264,11 +264,13 @@ static void proc_kill_sb(struct super_block *sb) { struct proc_fs_info *fs_info = proc_sb_info(sb); - if (fs_info->proc_self) - dput(fs_info->proc_self); + if (!fs_info) { + kill_anon_super(sb); + return; + } - if (fs_info->proc_thread_self) - dput(fs_info->proc_thread_self); + dput(fs_info->proc_self); + dput(fs_info->proc_thread_self); kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); diff --git a/fs/proc/self.c b/fs/proc/self.c index ca5158fa561c..72cd69bcaf4a 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -43,7 +43,7 @@ int proc_setup_self(struct super_block *s) inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { - struct inode *inode = new_inode_pseudo(s); + struct inode *inode = new_inode(s); if (inode) { inode->i_ino = self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6ad407d5efe2..dbda4499a859 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -145,7 +145,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return NULL; } - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -188,7 +188,7 @@ static void m_stop(struct seq_file *m, void *v) return; release_task_mempolicy(priv); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -593,7 +593,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) goto out; /* - * The mmap_sem held all the way back in m_start() is what + * The mmap_lock held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things * in here. */ @@ -752,7 +752,7 @@ static void smap_gather_stats(struct vm_area_struct *vma, } } #endif - /* mmap_sem is held in m_start */ + /* mmap_lock is held in m_start */ walk_page_vma(vma, &smaps_walk_ops, mss); } @@ -847,7 +847,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) memset(&mss, 0, sizeof(mss)); - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; @@ -866,7 +866,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) __show_smap(m, &mss, true); release_task_mempolicy(priv); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_put_mm: mmput(mm); @@ -1140,7 +1140,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (down_write_killable(&mm->mmap_sem)) { + if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1150,11 +1150,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); goto out_mm; } - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1163,8 +1163,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - up_read(&mm->mmap_sem); - if (down_write_killable(&mm->mmap_sem)) { + mmap_read_unlock(mm); + if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1183,14 +1183,14 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * failed like if * get_proc_task() fails? */ - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); goto out_mm; } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); } - downgrade_write(&mm->mmap_sem); + mmap_write_downgrade(mm); break; } @@ -1203,7 +1203,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_mm: mmput(mm); } @@ -1564,11 +1564,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_free; ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); @@ -1827,7 +1827,7 @@ static int show_numa_map(struct seq_file *m, void *v) if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); - /* mmap_sem is held by m_start */ + /* mmap_lock is held by m_start */ walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 7907e6419e57..a6d21fc0033c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -25,7 +25,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) struct rb_node *p; unsigned long bytes = 0, sbytes = 0, slack = 0, size; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); @@ -77,7 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "Shared:\t%8lu bytes\n", bytes, slack, sbytes); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); } unsigned long task_vsize(struct mm_struct *mm) @@ -86,12 +86,12 @@ unsigned long task_vsize(struct mm_struct *mm) struct rb_node *p; unsigned long vsize = 0; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); vsize += vma->vm_end - vma->vm_start; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return vsize; } @@ -104,7 +104,7 @@ unsigned long task_statm(struct mm_struct *mm, struct rb_node *p; unsigned long size = kobjsize(mm); - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); size += kobjsize(vma); @@ -119,7 +119,7 @@ unsigned long task_statm(struct mm_struct *mm, >> PAGE_SHIFT; *data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK)) >> PAGE_SHIFT; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); size >>= PAGE_SHIFT; size += *text + *data; *resident = size; @@ -211,7 +211,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm || !mmget_not_zero(mm)) return NULL; - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { mmput(mm); return ERR_PTR(-EINTR); } @@ -221,7 +221,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (n-- == 0) return p; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); return NULL; } @@ -231,7 +231,7 @@ static void m_stop(struct seq_file *m, void *_vml) struct proc_maps_private *priv = m->private; if (!IS_ERR_OR_NULL(_vml)) { - up_read(&priv->mm->mmap_sem); + mmap_read_unlock(priv->mm); mmput(priv->mm); } if (priv->task) { diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index ac284f409568..a553273fbd41 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -43,7 +43,7 @@ int proc_setup_thread_self(struct super_block *s) inode_lock(root_inode); thread_self = d_alloc_name(s->s_root, "thread-self"); if (thread_self) { - struct inode *inode = new_inode_pseudo(s); + struct inode *inode = new_inode(s); if (inode) { inode->i_ino = thread_self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index c663202da8de..c3a345c28a93 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -27,7 +27,6 @@ #include <linux/pagemap.h> #include <linux/uaccess.h> #include <linux/mem_encrypt.h> -#include <asm/pgtable.h> #include <asm/io.h> #include "internal.h" |
