summaryrefslogtreecommitdiff
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c104
1 files changed, 86 insertions, 18 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b1c8e23ddf65..adbe328b957c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -600,8 +600,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
wait_queue_head_t *whead;
rcu_read_lock();
- /* If it is cleared by POLLFREE, it should be rcu-safe */
- whead = rcu_dereference(pwq->whead);
+ /*
+ * If it is cleared by POLLFREE, it should be rcu-safe.
+ * If we read NULL we need a barrier paired with
+ * smp_store_release() in ep_poll_callback(), otherwise
+ * we rely on whead->lock.
+ */
+ whead = smp_load_acquire(&pwq->whead);
if (whead)
remove_wait_queue(whead, &pwq->wait);
rcu_read_unlock();
@@ -960,10 +965,14 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
mutex_lock(&ep->mtx);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+ struct inode *inode = file_inode(epi->ffd.file);
- seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+ seq_printf(m, "tfd: %8d events: %8x data: %16llx "
+ " pos:%lli ino:%lx sdev:%x\n",
epi->ffd.fd, epi->event.events,
- (long long)epi->event.data);
+ (long long)epi->event.data,
+ (long long)epi->ffd.file->f_pos,
+ inode->i_ino, inode->i_sb->s_dev);
if (seq_has_overflowed(m))
break;
}
@@ -1073,6 +1082,50 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
return epir;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (epi->ffd.fd == tfd) {
+ if (toff == 0)
+ return epi;
+ else
+ toff--;
+ }
+ cond_resched();
+ }
+
+ return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+ unsigned long toff)
+{
+ struct file *file_raw;
+ struct eventpoll *ep;
+ struct epitem *epi;
+
+ if (!is_file_epoll(file))
+ return ERR_PTR(-EINVAL);
+
+ ep = file->private_data;
+
+ mutex_lock(&ep->mtx);
+ epi = ep_find_tfd(ep, tfd, toff);
+ if (epi)
+ file_raw = epi->ffd.file;
+ else
+ file_raw = ERR_PTR(-ENOENT);
+ mutex_unlock(&ep->mtx);
+
+ return file_raw;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
@@ -1086,17 +1139,6 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
struct eventpoll *ep = epi->ep;
int ewake = 0;
- if ((unsigned long)key & POLLFREE) {
- ep_pwq_from_wait(wait)->whead = NULL;
- /*
- * whead = NULL above can race with ep_remove_wait_queue()
- * which can do another remove_wait_queue() after us, so we
- * can't use __remove_wait_queue(). whead->lock is held by
- * the caller.
- */
- list_del_init(&wait->entry);
- }
-
spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1180,10 +1222,26 @@ out_unlock:
if (pwake)
ep_poll_safewake(&ep->poll_wait);
- if (epi->event.events & EPOLLEXCLUSIVE)
- return ewake;
+ if (!(epi->event.events & EPOLLEXCLUSIVE))
+ ewake = 1;
+
+ if ((unsigned long)key & POLLFREE) {
+ /*
+ * If we race with ep_remove_wait_queue() it can miss
+ * ->whead = NULL and do another remove_wait_queue() after
+ * us, so we can't use __remove_wait_queue().
+ */
+ list_del_init(&wait->entry);
+ /*
+ * ->whead != NULL protects us from the race with ep_free()
+ * or ep_remove(), ep_remove_wait_queue() takes whead->lock
+ * held by the caller. Once we nullify it, nothing protects
+ * ep/epi or even wait.
+ */
+ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL);
+ }
- return 1;
+ return ewake;
}
/*
@@ -1748,6 +1806,16 @@ fetch_events:
* to TASK_INTERRUPTIBLE before doing the checks.
*/
set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * Always short-circuit for fatal signals to allow
+ * threads to make a timely exit without the chance of
+ * finding more events available and fetching
+ * repeatedly.
+ */
+ if (fatal_signal_pending(current)) {
+ res = -EINTR;
+ break;
+ }
if (ep_events_available(ep) || timed_out)
break;
if (signal_pending(current)) {