From 3b87487ac5008072f138953b07505a7e3493327f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Dec 2011 13:24:40 -0800 Subject: Revert "clockevents: Set noop handler in clockevents_exchange_device()" This reverts commit de28f25e8244c7353abed8de0c7792f5f883588c. It results in resume problems for various people. See for example http://thread.gmane.org/gmane.linux.kernel/1233033 http://thread.gmane.org/gmane.linux.kernel/1233389 http://thread.gmane.org/gmane.linux.kernel/1233159 http://thread.gmane.org/gmane.linux.kernel/1227868/focus=1230877 and the fedora and ubuntu bug reports https://bugzilla.redhat.com/show_bug.cgi?id=767248 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/904569 which got bisected down to the stable version of this commit. Reported-by: Jonathan Nieder Reported-by: Phil Miller Reported-by: Philip Langdale Reported-by: Tim Gardner Cc: Thomas Gleixner Cc: Greg KH Cc: stable@kernel.org # for stable kernels that applied the original Signed-off-by: Linus Torvalds --- kernel/time/clockevents.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c4eb71c8b2ea..1ecd6ba36d6c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -387,7 +387,6 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { - old->event_handler = clockevents_handle_noop; clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); -- cgit v1.2.3 From e6780f7243eddb133cc20ec37fa69317c218b709 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 31 Dec 2011 11:44:01 -0800 Subject: futex: Fix uninterruptible loop due to gate_area It was found (by Sasha) that if you use a futex located in the gate area we get stuck in an uninterruptible infinite loop, much like the ZERO_PAGE issue. While looking at this problem, PeterZ realized you'll get into similar trouble when hitting any install_special_pages() mapping. And are there still drivers setting up their own special mmaps without page->mapping, and without special VM or pte flags to make get_user_pages fail? In most cases, if page->mapping is NULL, we do not need to retry at all: Linus points out that even /proc/sys/vm/drop_caches poses no problem, because it ends up using remove_mapping(), which takes care not to interfere when the page reference count is raised. But there is still one case which does need a retry: if memory pressure called shmem_writepage in between get_user_pages_fast dropping page table lock and our acquiring page lock, then the page gets switched from filecache to swapcache (and ->mapping set to NULL) whatever the refcount. Fault it back in to get the page->mapping needed for key->shared.inode. Reported-by: Sasha Levin Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/futex.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index ea87f4d2f455..1614be20173d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -314,17 +314,29 @@ again: #endif lock_page(page_head); + + /* + * If page_head->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page_head->mapping. + */ if (!page_head->mapping) { + int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page_head == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* -- cgit v1.2.3 From f9fab10bbd768b0e5254e53a4a8477a94bfc4b96 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Tue, 3 Jan 2012 14:41:13 -0800 Subject: hung_task: fix false positive during vfork vfork parent uninterruptibly and unkillably waits for its child to exec/exit. This wait is of unbounded length. Ignore such waits in the hung_task detector. Signed-off-by: Mandeep Singh Baines Reported-by: Sasha Levin LKML-Reference: <1325344394.28904.43.camel@lappy> Cc: Linus Torvalds Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: John Kacur Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 8b1748d0172c..2e48ec0c2e91 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -74,11 +74,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) /* * Ensure the task is not frozen. - * Also, when a freshly created task is scheduled once, changes - * its state to TASK_UNINTERRUPTIBLE without having ever been - * switched out once, it musn't be checked. + * Also, skip vfork and any other user process that freezer should skip. */ - if (unlikely(t->flags & PF_FROZEN || !switch_count)) + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + + /* + * When a freshly created task is scheduled once, changes its state to + * TASK_UNINTERRUPTIBLE without having ever been switched out once, it + * musn't be checked. + */ + if (unlikely(!switch_count)) return; if (switch_count != t->last_switch_count) { -- cgit v1.2.3 From 50b8d257486a45cba7b65ca978986ed216bbcc10 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jan 2012 17:29:02 +0100 Subject: ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race Test-case: int main(void) { int pid, status; pid = fork(); if (!pid) { for (;;) { if (!fork()) return 0; if (waitpid(-1, &status, 0) < 0) { printf("ERR!! wait: %m\n"); return 0; } } } assert(ptrace(PTRACE_ATTACH, pid, 0,0) == 0); assert(waitpid(-1, NULL, 0) == pid); assert(ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_TRACEFORK) == 0); do { ptrace(PTRACE_CONT, pid, 0, 0); pid = waitpid(-1, NULL, 0); } while (pid > 0); return 1; } It fails because ->real_parent sees its child in EXIT_DEAD state while the tracer is going to change the state back to EXIT_ZOMBIE in wait_task_zombie(). The offending commit is 823b018e which moved the EXIT_DEAD check, but in fact we should not blame it. The original code was not correct as well because it didn't take ptrace_reparented() into account and because we can't really trust ->ptrace. This patch adds the additional check to close this particular race but it doesn't solve the whole problem. We simply can't rely on ->ptrace in this case, it can be cleared if the tracer is multithreaded by the exiting ->parent. I think we should kill EXIT_DEAD altogether, we should always remove the soon-to-be-reaped child from ->children or at least we should never do the DEAD->ZOMBIE transition. But this is too complex for 3.2. Reported-and-tested-by: Denys Vlasenko Tested-by: Lukasz Michalik Acked-by: Tejun Heo Cc: [3.0+] Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index d0b7d988f873..e6e01b959a0e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1540,8 +1540,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* dead body doesn't have much to contribute */ - if (p->exit_state == EXIT_DEAD) + if (unlikely(p->exit_state == EXIT_DEAD)) { + /* + * But do not ignore this task until the tracer does + * wait_task_zombie()->do_notify_parent(). + */ + if (likely(!ptrace) && unlikely(ptrace_reparented(p))) + wo->notask_error = 0; return 0; + } /* slay zombie? */ if (p->exit_state == EXIT_ZOMBIE) { -- cgit v1.2.3 From 8a88951b5878dc475dcd841cefc767e36397d14e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jan 2012 17:29:20 +0100 Subject: ptrace: ensure JOBCTL_STOP_SIGMASK is not zero after detach This is the temporary simple fix for 3.2, we need more changes in this area. 1. do_signal_stop() assumes that the running untraced thread in the stopped thread group is not possible. This was our goal but it is not yet achieved: a stopped-but-resumed tracee can clone the running thread which can initiate another group-stop. Remove WARN_ON_ONCE(!current->ptrace). 2. A new thread always starts with ->jobctl = 0. If it is auto-attached and this group is stopped, __ptrace_unlink() sets JOBCTL_STOP_PENDING but JOBCTL_STOP_SIGMASK part is zero, this triggers WANR_ON(!signr) in do_jobctl_trap() if another debugger attaches. Change __ptrace_unlink() to set the artificial SIGSTOP for report. Alternatively we could change ptrace_init_task() to copy signr from current, but this means we can copy it for no reason and hide the possible similar problems. Acked-by: Tejun Heo Cc: [3.1] Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 13 ++++++++++++- kernel/signal.c | 2 -- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 24d04477b257..78ab24a7b0e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -96,9 +96,20 @@ void __ptrace_unlink(struct task_struct *child) */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count)) + child->signal->group_stop_count)) { child->jobctl |= JOBCTL_STOP_PENDING; + /* + * This is only possible if this thread was cloned by the + * traced task running in the stopped group, set the signal + * for the future reports. + * FIXME: we should change ptrace_init_task() to handle this + * case. + */ + if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) + child->jobctl |= SIGSTOP; + } + /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..206551563cce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1994,8 +1994,6 @@ static bool do_signal_stop(int signr) */ if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; - else - WARN_ON_ONCE(!current->ptrace); sig->group_stop_count = 0; -- cgit v1.2.3