diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
| commit | 1136fa0c07de570dc17858745af8be169d1440ba (patch) | |
| tree | 3221b003517dd3cb13df5ba4b85637cd9ed82692 /kernel/cgroup/cgroup.c | |
| parent | ba115adf61b36b8c167126425a62b0efc23f72c0 (diff) | |
| parent | 754e0b0e35608ed5206d6a67a791563c631cec07 (diff) | |
Merge tag 'v5.17-rc4' into for-linus
Merge with mainline to get the Intel ASoC generic helpers header and
other changes.
Diffstat (limited to 'kernel/cgroup/cgroup.c')
| -rw-r--r-- | kernel/cgroup/cgroup.c | 222 |
1 files changed, 126 insertions, 96 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ea08f01d0111..9d05c3ca2d5e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -30,6 +30,7 @@ #include "cgroup-internal.h" +#include <linux/bpf-cgroup.h> #include <linux/cred.h> #include <linux/errno.h> #include <linux/init_task.h> @@ -1740,6 +1741,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; int ssid, i, ret; + u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1756,8 +1758,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; + + /* + * Collect ssid's that need to be disabled from default + * hierarchy. + */ + if (ss->root == &cgrp_dfl_root) + dfl_disable_ss_mask |= 1 << ssid; + } while_each_subsys_mask(); + if (dfl_disable_ss_mask) { + struct cgroup *scgrp = &cgrp_dfl_root.cgrp; + + /* + * Controllers from default hierarchy that need to be rebound + * are all disabled together in one go. + */ + cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } + do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; @@ -1766,10 +1788,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); - /* disable from the source */ - src_root->subsys_mask &= ~(1 << ssid); - WARN_ON(cgroup_apply_control(scgrp)); - cgroup_finalize_control(scgrp, 0); + if (src_root != &cgrp_dfl_root) { + /* disable from the source */ + src_root->subsys_mask &= ~(1 << ssid); + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); @@ -2627,11 +2651,11 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; - src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - if (!list_empty(&src_cset->mg_preload_node)) return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); + WARN_ON(src_cset->mg_src_cgrp); WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); @@ -3607,6 +3631,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { + struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; @@ -3618,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3625,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); - + smp_store_release(&ctx->psi.trigger, new); cgroup_put(cgrp); return nbytes; @@ -3656,12 +3686,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { - return psi_trigger_poll(&of->priv, of->file, pt); + struct cgroup_file_ctx *ctx = of->priv; + + return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + struct cgroup_file_ctx *ctx = of->priv; + + psi_trigger_destroy(ctx->psi.trigger); } bool cgroup_psi_enabled(void) @@ -3788,24 +3822,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx; + int ret; - if (cft->open) - return cft->open(of); - return 0; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); + if (ret) { + put_cgroup_ns(ctx->ns); + kfree(ctx); + } + return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); + kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; @@ -3822,7 +3875,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) @@ -4728,21 +4781,21 @@ void css_task_iter_end(struct css_task_iter *it) static void cgroup_procs_release(struct kernfs_open_file *of) { - if (of->priv) { - css_task_iter_end(of->priv); - kfree(of->priv); - } + struct cgroup_file_ctx *ctx = of->priv; + + if (ctx->procs.started) + css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; - return css_task_iter_next(it); + return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, @@ -4750,21 +4803,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ - if (!it) { + if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); - - it = kzalloc(sizeof(*it), GFP_KERNEL); - if (!it) - return ERR_PTR(-ENOMEM); - of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); + ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); @@ -4815,9 +4865,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; int ret; @@ -4846,11 +4896,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, static int cgroup_attach_permissions(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb, bool threadgroup) + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) { int ret = 0; - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); if (ret) return ret; @@ -4867,8 +4918,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, bool threadgroup) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; + const struct cred *saved_cred; ssize_t ret; bool locked; @@ -4886,9 +4939,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); - /* process and thread migrations follow same delegation rule */ + /* + * Process and thread migrations follow same delegation rule. Check + * permissions using the credentials from file open to protect against + * inherited fd attacks. + */ + saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, threadgroup); + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); + revert_creds(saved_cred); if (ret) goto out_finish; @@ -5688,7 +5748,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* Create the root cgroup state for this subsystem */ ss->root = &cgrp_dfl_root; - css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss)); + css = ss->css_alloc(NULL); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); @@ -5911,17 +5971,20 @@ struct cgroup *cgroup_get_from_id(u64 id) struct kernfs_node *kn; struct cgroup *cgrp = NULL; - mutex_lock(&cgroup_mutex); kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); if (!kn) - goto out_unlock; + goto out; - cgrp = kn->priv; - if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp)) + rcu_read_lock(); + + cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); + if (cgrp && !cgroup_tryget(cgrp)) cgrp = NULL; + + rcu_read_unlock(); + kernfs_put(kn); -out_unlock: - mutex_unlock(&cgroup_mutex); +out: return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_id); @@ -6104,7 +6167,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) goto err; ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, - !(kargs->flags & CLONE_THREAD)); + !(kargs->flags & CLONE_THREAD), + current->nsproxy->cgroup_ns); if (ret) goto err; @@ -6474,30 +6538,34 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) * * Find the cgroup at @path on the default hierarchy, increment its * reference count and return it. Returns pointer to the found cgroup on - * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) - * if @path points to a non-directory. + * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already + * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory. */ struct cgroup *cgroup_get_from_path(const char *path) { struct kernfs_node *kn; - struct cgroup *cgrp; - - mutex_lock(&cgroup_mutex); + struct cgroup *cgrp = ERR_PTR(-ENOENT); kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); - if (kn) { - if (kernfs_type(kn) == KERNFS_DIR) { - cgrp = kn->priv; - cgroup_get_live(cgrp); - } else { - cgrp = ERR_PTR(-ENOTDIR); - } - kernfs_put(kn); - } else { - cgrp = ERR_PTR(-ENOENT); + if (!kn) + goto out; + + if (kernfs_type(kn) != KERNFS_DIR) { + cgrp = ERR_PTR(-ENOTDIR); + goto out_kernfs; } - mutex_unlock(&cgroup_mutex); + rcu_read_lock(); + + cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); + if (!cgrp || !cgroup_tryget(cgrp)) + cgrp = ERR_PTR(-ENOENT); + + rcu_read_unlock(); + +out_kernfs: + kernfs_put(kn); +out: return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_path); @@ -6625,44 +6693,6 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ -#ifdef CONFIG_CGROUP_BPF -int cgroup_bpf_attach(struct cgroup *cgrp, - struct bpf_prog *prog, struct bpf_prog *replace_prog, - struct bpf_cgroup_link *link, - enum bpf_attach_type type, - u32 flags) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); - mutex_unlock(&cgroup_mutex); - return ret; -} - -int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); - mutex_unlock(&cgroup_mutex); - return ret; -} - -int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr) -{ - int ret; - - mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_query(cgrp, attr, uattr); - mutex_unlock(&cgroup_mutex); - return ret; -} -#endif /* CONFIG_CGROUP_BPF */ - #ifdef CONFIG_SYSFS static ssize_t show_delegatable_files(struct cftype *files, char *buf, ssize_t size, const char *prefix) |
