From 1f702603e7125a390b5cdf5ce00539781cfcc86a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:19 -0600 Subject: exec: Simplify unshare_files Now that exec no longer needs to return the unshared files to their previous value there is no reason to return displaced. Instead when unshare_fd creates a copy of the file table, call put_files_struct before returning from unshare_files. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-2-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-2-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/fork.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 32083db7a2a2..837b546528c8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3023,21 +3023,21 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) * the exec layer of the kernel. */ -int unshare_files(struct files_struct **displaced) +int unshare_files(void) { struct task_struct *task = current; - struct files_struct *copy = NULL; + struct files_struct *old, *copy = NULL; int error; error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©); - if (error || !copy) { - *displaced = NULL; + if (error || !copy) return error; - } - *displaced = task->files; + + old = task->files; task_lock(task); task->files = copy; task_unlock(task); + put_files_struct(old); return 0; } -- cgit v1.2.3 From f43c283a89a7dc531a47d4b1e001503cf3dc3234 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:21 -0600 Subject: kcmp: In kcmp_epoll_target use fget_task Use the helper fget_task and simplify the code. As well as simplifying the code this removes one unnecessary increment of struct files_struct. This unnecessary increment of files_struct.count can result in exec unnecessarily unsharing files_struct and breaking posix locks, and it can result in fget_light having to fallback to fget reducing performance. Suggested-by: Oleg Nesterov Reviewed-by: Cyrill Gorcunov v1: https://lkml.kernel.org/r/20200817220425.9389-4-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-4-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/kcmp.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/kcmp.c b/kernel/kcmp.c index b3ff9288c6cc..87c48c0104ad 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -107,7 +107,6 @@ static int kcmp_epoll_target(struct task_struct *task1, { struct file *filp, *filp_epoll, *filp_tgt; struct kcmp_epoll_slot slot; - struct files_struct *files; if (copy_from_user(&slot, uslot, sizeof(slot))) return -EFAULT; @@ -116,23 +115,12 @@ static int kcmp_epoll_target(struct task_struct *task1, if (!filp) return -EBADF; - files = get_files_struct(task2); - if (!files) + filp_epoll = fget_task(task2, slot.efd); + if (!filp_epoll) return -EBADF; - spin_lock(&files->file_lock); - filp_epoll = fcheck_files(files, slot.efd); - if (filp_epoll) - get_file(filp_epoll); - else - filp_tgt = ERR_PTR(-EBADF); - spin_unlock(&files->file_lock); - put_files_struct(files); - - if (filp_epoll) { - filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); - fput(filp_epoll); - } + filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); + fput(filp_epoll); if (IS_ERR(filp_tgt)) return PTR_ERR(filp_tgt); -- cgit v1.2.3 From b48845af0152d790a54b8ab78cc2b7c07485fc98 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:22 -0600 Subject: bpf: In bpf_task_fd_query use fget_task Use the helper fget_task to simplify bpf_task_fd_query. As well as simplifying the code this removes one unnecessary increment of struct files_struct. This unnecessary increment of files_struct.count can result in exec unnecessarily unsharing files_struct and breaking posix locks, and it can result in fget_light having to fallback to fget reducing performance. This simplification comes from the observation that none of the callers of get_files_struct actually need to call get_files_struct that was made when discussing[1] exec and posix file locks. [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com Suggested-by: Oleg Nesterov v1: https://lkml.kernel.org/r/20200817220425.9389-5-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-5-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/bpf/syscall.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f50c9c19f1b..6d49c2e1634c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3878,7 +3878,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr, pid_t pid = attr->task_fd_query.pid; u32 fd = attr->task_fd_query.fd; const struct perf_event *event; - struct files_struct *files; struct task_struct *task; struct file *file; int err; @@ -3896,23 +3895,11 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!task) return -ENOENT; - files = get_files_struct(task); - put_task_struct(task); - if (!files) - return -ENOENT; - err = 0; - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); + file = fget_task(task, fd); + put_task_struct(task); if (!file) - err = -EBADF; - else - get_file(file); - spin_unlock(&files->file_lock); - put_files_struct(files); - - if (err) - goto out; + return -EBADF; if (file->f_op == &bpf_link_fops) { struct bpf_link *link = file->private_data; @@ -3952,7 +3939,6 @@ out_not_supp: err = -ENOTSUPP; put_file: fput(file); -out: return err; } -- cgit v1.2.3 From f36c2943274199cb8aef32ac96531ffb7c4b43d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:26 -0600 Subject: file: Replace fcheck_files with files_lookup_fd_rcu This change renames fcheck_files to files_lookup_fd_rcu. All of the remaining callers take the rcu_read_lock before calling this function so the _rcu suffix is appropriate. This change also tightens up the debug check to verify that all callers hold the rcu_read_lock. All callers that used to call files_check with the files->file_lock held have now been changed to call files_lookup_fd_locked. This change of name has helped remind me of which locks and which guarantees are in place helping me to catch bugs later in the patchset. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-9-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/bpf/task_iter.c | 2 +- kernel/kcmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 5b6af30bfbcd..5ab2ccfb96cb 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -183,7 +183,7 @@ again: for (; curr_fd < max_fds; curr_fd++) { struct file *f; - f = fcheck_files(curr_files, curr_fd); + f = files_lookup_fd_rcu(curr_files, curr_fd); if (!f) continue; if (!get_file_rcu(f)) diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 87c48c0104ad..990717c1aed3 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -67,7 +67,7 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx) rcu_read_lock(); if (task->files) - file = fcheck_files(task->files, idx); + file = files_lookup_fd_rcu(task->files, idx); rcu_read_unlock(); task_unlock(task); -- cgit v1.2.3 From ed77e80e14a3cd55c73848b9e8043020e717ce12 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:30 -0600 Subject: kcmp: In get_file_raw_ptr use task_lookup_fd_rcu Modify get_file_raw_ptr to use task_lookup_fd_rcu. The helper task_lookup_fd_rcu does the work of taking the task lock and verifying that task->files != NULL and then calls files_lookup_fd_rcu. So let use the helper to make a simpler implementation of get_file_raw_ptr. Acked-by: Cyrill Gorcunov Link: https://lkml.kernel.org/r/20201120231441.29911-13-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/kcmp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 990717c1aed3..36e58eb5a11d 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -61,16 +61,11 @@ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) static struct file * get_file_raw_ptr(struct task_struct *task, unsigned int idx) { - struct file *file = NULL; + struct file *file; - task_lock(task); rcu_read_lock(); - - if (task->files) - file = files_lookup_fd_rcu(task->files, idx); - + file = task_lookup_fd_rcu(task, idx); rcu_read_unlock(); - task_unlock(task); return file; } -- cgit v1.2.3 From 66ed594409a10b1cc6fa1e8d22bc8aed2a080d0c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:33 -0600 Subject: bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu When discussing[1] exec and posix file locks it was realized that none of the callers of get_files_struct fundamentally needed to call get_files_struct, and that by switching them to helper functions instead it will both simplify their code and remove unnecessary increments of files_struct.count. Those unnecessary increments can result in exec unnecessarily unsharing files_struct which breaking posix locks, and it can result in fget_light having to fallback to fget reducing system performance. Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by moving the checking for the maximum file descritor into the generic code, and by remvoing the need for capturing and releasing a reference on files_struct. As the reference count of files_struct no longer needs to be maintained bpf_iter_seq_task_file_info can have it's files member removed and task_file_seq_get_next no longer needs it's fstruct argument. The curr_fd local variable does need to become unsigned to be used with fnext_task. As curr_fd is assigned from and assigned a u32 making curr_fd an unsigned int won't cause problems and might prevent them. [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com Suggested-by: Oleg Nesterov v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-16-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/bpf/task_iter.c | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 5ab2ccfb96cb..4ec63170c741 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; - struct files_struct *files; u32 tid; u32 fd; }; static struct file * task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, - struct task_struct **task, struct files_struct **fstruct) + struct task_struct **task) { struct pid_namespace *ns = info->common.ns; - u32 curr_tid = info->tid, max_fds; - struct files_struct *curr_files; + u32 curr_tid = info->tid; struct task_struct *curr_task; - int curr_fd = info->fd; + unsigned int curr_fd = info->fd; /* If this function returns a non-NULL file object, - * it held a reference to the task/files_struct/file. + * it held a reference to the task/file. * Otherwise, it does not hold any reference. */ again: if (*task) { curr_task = *task; - curr_files = *fstruct; curr_fd = info->fd; } else { curr_task = task_seq_get_next(ns, &curr_tid, true); if (!curr_task) return NULL; - curr_files = get_files_struct(curr_task); - if (!curr_files) { - put_task_struct(curr_task); - curr_tid = ++(info->tid); - info->fd = 0; - goto again; - } - - /* set *fstruct, *task and info->tid */ - *fstruct = curr_files; + /* set *task and info->tid */ *task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; @@ -179,13 +167,11 @@ again: } rcu_read_lock(); - max_fds = files_fdtable(curr_files)->max_fds; - for (; curr_fd < max_fds; curr_fd++) { + for (;; curr_fd++) { struct file *f; - - f = files_lookup_fd_rcu(curr_files, curr_fd); + f = task_lookup_next_fd_rcu(curr_task, &curr_fd); if (!f) - continue; + break; if (!get_file_rcu(f)) continue; @@ -197,10 +183,8 @@ again: /* the current task is done, go to the next task */ rcu_read_unlock(); - put_files_struct(curr_files); put_task_struct(curr_task); *task = NULL; - *fstruct = NULL; info->fd = 0; curr_tid = ++(info->tid); goto again; @@ -209,13 +193,11 @@ again: static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = NULL; struct task_struct *task = NULL; struct file *file; - file = task_file_seq_get_next(info, &task, &files); + file = task_file_seq_get_next(info, &task); if (!file) { - info->files = NULL; info->task = NULL; return NULL; } @@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) ++*pos; info->task = task; - info->files = files; return file; } @@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = info->files; struct task_struct *task = info->task; struct file *file; ++*pos; ++info->fd; fput((struct file *)v); - file = task_file_seq_get_next(info, &task, &files); + file = task_file_seq_get_next(info, &task); if (!file) { - info->files = NULL; info->task = NULL; return NULL; } info->task = task; - info->files = files; return file; } @@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) (void)__task_file_seq_show(seq, v, true); } else { fput((struct file *)v); - put_files_struct(info->files); put_task_struct(info->task); - info->files = NULL; info->task = NULL; } } -- cgit v1.2.3