From c02b872a7ca7842e4cdbbf621f77607d0a655f83 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 26 Jun 2022 10:10:56 +0100 Subject: Documentation: update watch_queue.rst references Changeset f5461124d59b ("Documentation: move watch_queue to core-api") renamed: Documentation/watch_queue.rst to: Documentation/core-api/watch_queue.rst. Update the cross-references accordingly. Fixes: f5461124d59b ("Documentation: move watch_queue to core-api") Reviewed-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/1c220de9c58f35e815a3df9458ac2bea323c8bfb.1656234456.git.mchehab@kernel.org Signed-off-by: Jonathan Corbet --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/watch_queue.c') diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 230038d4f908..869fea4fe26b 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -4,7 +4,7 @@ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * See Documentation/watch_queue.rst + * See Documentation/core-api/watch_queue.rst */ #define pr_fmt(fmt) "watchq: " fmt -- cgit v1.2.3 From 353f7988dd8413c47718f7ca79c030b6fb62cfe5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Jul 2022 11:09:01 -0700 Subject: watchqueue: make sure to serialize 'wqueue->defunct' properly When the pipe is closed, we mark the associated watchqueue defunct by calling watch_queue_clear(). However, while that is protected by the watchqueue lock, new watchqueue entries aren't actually added under that lock at all: they use the pipe->rd_wait.lock instead, and looking up that pipe happens without any locking. The watchqueue code uses the RCU read-side section to make sure that the wqueue entry itself hasn't disappeared, but that does not protect the pipe_info in any way. So make sure to actually hold the wqueue lock when posting watch events, properly serializing against the pipe being torn down. Reported-by: Noam Rathaus Cc: Greg KH Cc: David Howells Signed-off-by: Linus Torvalds --- kernel/watch_queue.c | 53 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 16 deletions(-) (limited to 'kernel/watch_queue.c') diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 230038d4f908..8b28fad1319b 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -34,6 +34,27 @@ MODULE_LICENSE("GPL"); #define WATCH_QUEUE_NOTE_SIZE 128 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) +/* + * This must be called under the RCU read-lock, which makes + * sure that the wqueue still exists. It can then take the lock, + * and check that the wqueue hasn't been destroyed, which in + * turn makes sure that the notification pipe still exists. + */ +static inline bool lock_wqueue(struct watch_queue *wqueue) +{ + spin_lock_bh(&wqueue->lock); + if (unlikely(wqueue->defunct)) { + spin_unlock_bh(&wqueue->lock); + return false; + } + return true; +} + +static inline void unlock_wqueue(struct watch_queue *wqueue) +{ + spin_unlock_bh(&wqueue->lock); +} + static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { /* * Post a notification to a watch queue. + * + * Must be called with the RCU lock for reading, and the + * watch_queue lock held, which guarantees that the pipe + * hasn't been released. */ static bool post_one_notification(struct watch_queue *wqueue, struct watch_notification *n) @@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue, spin_lock_irq(&pipe->rd_wait.lock); - if (wqueue->defunct) - goto out; - mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; @@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist, if (security_post_notification(watch->cred, cred, n) < 0) continue; - post_one_notification(wqueue, n); + if (lock_wqueue(wqueue)) { + post_one_notification(wqueue, n); + unlock_wqueue(wqueue);; + } } rcu_read_unlock(); @@ -462,11 +487,12 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) return -EAGAIN; } - spin_lock_bh(&wqueue->lock); - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); - spin_unlock_bh(&wqueue->lock); + if (lock_wqueue(wqueue)) { + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + unlock_wqueue(wqueue); + } hlist_add_head(&watch->list_node, &wlist->watchers); return 0; @@ -520,20 +546,15 @@ found: wqueue = rcu_dereference(watch->queue); - /* We don't need the watch list lock for the next bit as RCU is - * protecting *wqueue from deallocation. - */ - if (wqueue) { + if (lock_wqueue(wqueue)) { post_one_notification(wqueue, &n.watch); - spin_lock_bh(&wqueue->lock); - if (!hlist_unhashed(&watch->queue_node)) { hlist_del_init_rcu(&watch->queue_node); put_watch(watch); } - spin_unlock_bh(&wqueue->lock); + unlock_wqueue(wqueue); } if (wlist->release_watch) { -- cgit v1.2.3 From 44e29e64cf1ac0cffb152e0532227ea6d002aa28 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jul 2022 10:30:14 -0700 Subject: watch-queue: remove spurious double semicolon Sedat Dilek noticed that I had an extraneous semicolon at the end of a line in the previous patch. It's harmless, but unintentional, and while compilers just treat it as an extra empty statement, for all I know some other tooling might warn about it. So clean it up before other people notice too ;) Fixes: 353f7988dd84 ("watchqueue: make sure to serialize 'wqueue->defunct' properly") Reported-by: Sedat Dilek Signed-off-by: Linus Torvalds Reported-by: Sedat Dilek --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/watch_queue.c') diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 8b28fad1319b..bb9962b33f95 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -227,7 +227,7 @@ void __post_watch_notification(struct watch_list *wlist, if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); - unlock_wqueue(wqueue);; + unlock_wqueue(wqueue); } } -- cgit v1.2.3 From e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 28 Jul 2022 10:31:06 +0100 Subject: watch_queue: Fix missing rcu annotation Since __post_watch_notification() walks wlist->watchers with only the RCU read lock held, we need to use RCU methods to add to the list (we already use RCU methods to remove from the list). Fix add_watch_to_object() to use hlist_add_head_rcu() instead of hlist_add_head() for that list. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/watch_queue.c') diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index bb9962b33f95..2c351765c409 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -494,7 +494,7 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) unlock_wqueue(wqueue); } - hlist_add_head(&watch->list_node, &wlist->watchers); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); return 0; } EXPORT_SYMBOL(add_watch_to_object); -- cgit v1.2.3 From e64ab2dbd882933b65cd82ff6235d705ad65dbb6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jul 2022 10:31:12 +0100 Subject: watch_queue: Fix missing locking in add_watch_to_object() If a watch is being added to a queue, it needs to guard against interference from addition of a new watch, manual removal of a watch and removal of a watch due to some other queue being destroyed. KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by holding the key->sem writelocked and by holding refs on both the key and the queue - but that doesn't prevent interaction from other {key,queue} pairs. While add_watch_to_object() does take the spinlock on the event queue, it doesn't take the lock on the source's watch list. The assumption was that the caller would prevent that (say by taking key->sem) - but that doesn't prevent interference from the destruction of another queue. Fix this by locking the watcher list in add_watch_to_object(). Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com Signed-off-by: David Howells cc: keyrings@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/watch_queue.c | 58 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 22 deletions(-) (limited to 'kernel/watch_queue.c') diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 2c351765c409..59ddb00d6944 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -454,6 +454,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) rcu_assign_pointer(watch->queue, wqueue); } +static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) +{ + const struct cred *cred; + struct watch *w; + + hlist_for_each_entry(w, &wlist->watchers, list_node) { + struct watch_queue *wq = rcu_access_pointer(w->queue); + if (wqueue == wq && watch->id == w->id) + return -EBUSY; + } + + cred = current_cred(); + if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { + atomic_dec(&cred->user->nr_watches); + return -EAGAIN; + } + + watch->cred = get_cred(cred); + rcu_assign_pointer(watch->watch_list, wlist); + + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); + return 0; +} + /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add @@ -468,34 +495,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { - struct watch_queue *wqueue = rcu_access_pointer(watch->queue); - struct watch *w; - - hlist_for_each_entry(w, &wlist->watchers, list_node) { - struct watch_queue *wq = rcu_access_pointer(w->queue); - if (wqueue == wq && watch->id == w->id) - return -EBUSY; - } - - watch->cred = get_current_cred(); - rcu_assign_pointer(watch->watch_list, wlist); + struct watch_queue *wqueue; + int ret = -ENOENT; - if (atomic_inc_return(&watch->cred->user->nr_watches) > - task_rlimit(current, RLIMIT_NOFILE)) { - atomic_dec(&watch->cred->user->nr_watches); - put_cred(watch->cred); - return -EAGAIN; - } + rcu_read_lock(); + wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); + spin_lock(&wlist->lock); + ret = add_one_watch(watch, wlist, wqueue); + spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } - hlist_add_head_rcu(&watch->list_node, &wlist->watchers); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(add_watch_to_object); -- cgit v1.2.3