From 2fd752ed77ab9880da927257b73294f29a199f1a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 11 Jan 2022 15:23:09 -0800 Subject: psi: Fix uaf issue when psi trigger is destroyed while being polled commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream. With write operation on psi files replacing old trigger with a new one, the lifetime of its waitqueue is totally arbitrary. Overwriting an existing trigger causes its waitqueue to be freed and pending poll() will stumble on trigger->event_wait which was destroyed. Fix this by disallowing to redefine an existing psi trigger. If a write operation is used on a file descriptor with an already existing psi trigger, the operation will fail with EBUSY error. Also bypass a check for psi_disabled in the psi_trigger_destroy as the flag can be flipped after the trigger is created, leading to a memory leak. Fixes: 0e94682b73bf ("psi: introduce psi monitor") Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com Suggested-by: Linus Torvalds Analyzed-by: Eric Biggers Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Eric Biggers Acked-by: Johannes Weiner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com [surenb: backported to 5.4 kernel] CC: stable@vger.kernel.org # 5.4 Signed-off-by: Suren Baghdasaryan Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 11 ++++++--- kernel/sched/psi.c | 66 ++++++++++++++++++++++---------------------------- 2 files changed, 37 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1904ffcee0f1..ce1745ac7b8c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3659,6 +3659,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (of->priv) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3666,8 +3672,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); - + smp_store_release(&of->priv, new); cgroup_put(cgrp); return nbytes; @@ -3702,7 +3707,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + psi_trigger_destroy(of->priv); } #endif /* CONFIG_PSI */ diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 9154e745f097..9dd83eb74a9d 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1046,7 +1046,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); mutex_lock(&group->trigger_lock); @@ -1079,15 +1078,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct kthread_worker *kworker_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1122,9 +1125,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_kworker RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_kworker + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1146,18 +1149,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1167,24 +1158,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } @@ -1208,14 +1190,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1250,7 +1242,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } -- cgit v1.2.3