summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-03-09 17:00:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-03-09 17:00:54 -0700
commitb695f31f4efd91c7cab97324ccbcb33201ebaaa2 (patch)
tree0cb3111136ba025dfa08a5908cbd241bbc37d011
parent9eccca0843205f87c00404b663188b88eb248051 (diff)
parent8603e1b30027f943cc9c1eef2b291d42c3347af1 (diff)
Merge branch 'for-4.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fix from Tejun Heo: "One fix patch for a subtle livelock condition which can happen on PREEMPT_NONE kernels involving two racing cancel_work calls. Whoever comes in the second has to wait for the previous one to finish. This was implemented by making the later one block for the same condition that the former would be (work item completion) and then loop and retest; unfortunately, depending on the wake up order, the later one could lock out the former one to finish by busy looping on the cpu. This is fixed by implementing explicit wait mechanism. Work item might not belong anywhere at this point and there's remote possibility of thundering herd problem. I originally tried to use bit_waitqueue but it didn't work for static work items on modules. It's currently using single wait queue with filtering wake up function and exclusive wakeup. If this ever becomes a problem, which is not very likely, we can try to figure out a way to piggy back on bit_waitqueue" * 'for-4.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s for PREEMPT_NONE
-rw-r--r--include/linux/workqueue.h3
-rw-r--r--kernel/workqueue.c56
2 files changed, 54 insertions, 5 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 74db135f9957..f597846ff605 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,8 @@ enum {
/* data contains off-queue information when !WORK_STRUCT_PWQ */
WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
- WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
+ __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
+ WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING),
/*
* When a work item is off queue, its high bits point to the last
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..41ff75b478c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(flush_work);
+struct cwt_wait {
+ wait_queue_t wait;
+ struct work_struct *work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+ if (cwait->work != key)
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
{
+ static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
unsigned long flags;
int ret;
do {
ret = try_to_grab_pending(work, is_dwork, &flags);
/*
- * If someone else is canceling, wait for the same event it
- * would be waiting for before retrying.
+ * If someone else is already canceling, wait for it to
+ * finish. flush_work() doesn't work for PREEMPT_NONE
+ * because we may get scheduled between @work's completion
+ * and the other canceling task resuming and clearing
+ * CANCELING - flush_work() will return false immediately
+ * as @work is no longer busy, try_to_grab_pending() will
+ * return -ENOENT as @work is still being canceled and the
+ * other canceling task won't be able to clear CANCELING as
+ * we're hogging the CPU.
+ *
+ * Let's wait for completion using a waitqueue. As this
+ * may lead to the thundering herd problem, use a custom
+ * wake function which matches @work along with exclusive
+ * wait and wakeup.
*/
- if (unlikely(ret == -ENOENT))
- flush_work(work);
+ if (unlikely(ret == -ENOENT)) {
+ struct cwt_wait cwait;
+
+ init_wait(&cwait.wait);
+ cwait.wait.func = cwt_wakefn;
+ cwait.work = work;
+
+ prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+ TASK_UNINTERRUPTIBLE);
+ if (work_is_canceling(work))
+ schedule();
+ finish_wait(&cancel_waitq, &cwait.wait);
+ }
} while (unlikely(ret < 0));
/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
flush_work(work);
clear_work_data(work);
+
+ /*
+ * Paired with prepare_to_wait() above so that either
+ * waitqueue_active() is visible here or !work_is_canceling() is
+ * visible there.
+ */
+ smp_mb();
+ if (waitqueue_active(&cancel_waitq))
+ __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
return ret;
}