From edf2ed153bcae52de70db00a98b0e81a5668e563 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Kill tracehook_notify_jctl() tracehook_notify_jctl() aids in determining whether and what to report to the parent when a task is stopped or continued. The function also adds an extra requirement that siglock may be released across it, which is currently unused and quite difficult to satisfy in well-defined manner. As job control and the notifications are about to receive major overhaul, remove the tracehook and open code it. If ever necessary, let's factor it out after the overhaul. * Oleg spotted incorrect CLD_CONTINUED/STOPPED selection when ptraced. Fixed. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: Roland McGrath --- include/linux/tracehook.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3a2e66d88a32..b073f3c8adc3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -468,33 +468,6 @@ static inline int tracehook_get_signal(struct task_struct *task, return 0; } -/** - * tracehook_notify_jctl - report about job control stop/continue - * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED - * @why: %CLD_STOPPED or %CLD_CONTINUED - * - * This is called when we might call do_notify_parent_cldstop(). - * - * @notify is zero if we would not ordinarily send a %SIGCHLD, - * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. - * - * @why is %CLD_STOPPED when about to stop for job control; - * we are already in %TASK_STOPPED state, about to call schedule(). - * It might also be that we have just exited (check %PF_EXITING), - * but need to report that a group-wide stop is complete. - * - * @why is %CLD_CONTINUED when waking up after job control stop and - * ready to make a delayed @notify report. - * - * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. - * - * Called with the siglock held. - */ -static inline int tracehook_notify_jctl(int notify, int why) -{ - return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; -} - /** * tracehook_finish_jctl - report about return from job control stop * -- cgit v1.2.3 From e5c1902e9260a0075ea52cb5ef627a8d9aaede89 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Fix premature completion of group stop when interfered by ptrace task->signal->group_stop_count is used to track the progress of group stop. It's initialized to the number of tasks which need to stop for group stop to finish and each stopping or trapping task decrements. However, each task doesn't keep track of whether it decremented the counter or not and if woken up before the group stop is complete and stops again, it can decrement the counter multiple times. Please consider the following example code. static void *worker(void *arg) { while (1) ; return NULL; } int main(void) { pthread_t thread; pid_t pid; int i; pid = fork(); if (!pid) { for (i = 0; i < 5; i++) pthread_create(&thread, NULL, worker, NULL); while (1) ; return 0; } ptrace(PTRACE_ATTACH, pid, NULL, NULL); while (1) { waitid(P_PID, pid, NULL, WSTOPPED); ptrace(PTRACE_SINGLESTEP, pid, NULL, (void *)(long)SIGSTOP); } return 0; } The child creates five threads and the parent continuously traps the first thread and whenever the child gets a signal, SIGSTOP is delivered. If an external process sends SIGSTOP to the child, all other threads in the process should reliably stop. However, due to the above bug, the first thread will often end up consuming group_stop_count multiple times and SIGSTOP often ends up stopping none or part of the other four threads. This patch adds a new field task->group_stop which is protected by siglock and uses GROUP_STOP_CONSUME flag to track which task is still to consume group_stop_count to fix this bug. task_clear_group_stop_pending() and task_participate_group_stop() are added to help manipulating group stop states. As ptrace_stop() now also uses task_participate_group_stop(), it will set SIGNAL_STOP_STOPPED if it completes a group stop. There still are many issues regarding the interaction between group stop and ptrace. Patches to address them will follow. - Oleg spotted duplicate GROUP_STOP_CONSUME. Dropped. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b601be3dace..85f51042c2b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1260,6 +1260,7 @@ struct task_struct { int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ + unsigned int group_stop; /* GROUP_STOP_*, siglock protected */ /* ??? */ unsigned int personality; unsigned did_exec:1; @@ -1771,6 +1772,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +/* + * task->group_stop flags + */ +#define GROUP_STOP_CONSUME (1 << 17) /* consume group stop count */ + #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -- cgit v1.2.3 From 39efa3ef3a376a4e53de2f82fc91182459d34200 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Use GROUP_STOP_PENDING to stop once for a single group stop Currently task->signal->group_stop_count is used to decide whether to stop for group stop. However, if there is a task in the group which is taking a long time to stop, other tasks which are continued by ptrace would repeatedly stop for the same group stop until the group stop is complete. Conversely, if a ptraced task is in TASK_TRACED state, the debugger won't get notified of group stops which is inconsistent compared to the ptraced task in any other state. This patch introduces GROUP_STOP_PENDING which tracks whether a task is yet to stop for the group stop in progress. The flag is set when a group stop starts and cleared when the task stops the first time for the group stop, and consulted whenever whether the task should participate in a group stop needs to be determined. Note that now tasks in TASK_TRACED also participate in group stop. This results in the following behavior changes. * For a single group stop, a ptracer would see at most one stop reported. * A ptracee in TASK_TRACED now also participates in group stop and the tracer would get the notification. However, as a ptraced task could be in TASK_STOPPED state or any ptrace trap could consume group stop, the notification may still be missing. These will be addressed with further patches. * A ptracee may start a group stop while one is still in progress if the tracer let it continue with stop signal delivery. Group stop code handles this correctly. Oleg: * Spotted that a task might skip signal check even when its GROUP_STOP_PENDING is set. Fixed by updating recalc_sigpending_tsk() to check GROUP_STOP_PENDING instead of group_stop_count. * Pointed out that task->group_stop should be cleared whenever task->signal->group_stop_count is cleared. Fixed accordingly. * Pointed out the behavior inconsistency between TASK_TRACED and RUNNING and the last behavior change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 85f51042c2b8..b2a17dfbdbad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1775,8 +1775,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * task->group_stop flags */ +#define GROUP_STOP_PENDING (1 << 16) /* task should stop for group stop */ #define GROUP_STOP_CONSUME (1 << 17) /* consume group stop count */ +extern void task_clear_group_stop_pending(struct task_struct *task); + #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -- cgit v1.2.3 From d79fdd6d96f46fabb779d86332e3677c6f5c2a4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Clean transitions between TASK_STOPPED and TRACED Currently, if the task is STOPPED on ptrace attach, it's left alone and the state is silently changed to TRACED on the next ptrace call. The behavior breaks the assumption that arch_ptrace_stop() is called before any task is poked by ptrace and is ugly in that a task manipulates the state of another task directly. With GROUP_STOP_PENDING, the transitions between TASK_STOPPED and TRACED can be made clean. The tracer can use the flag to tell the tracee to retry stop on attach and detach. On retry, the tracee will enter the desired state in the correct way. The lower 16bits of task->group_stop is used to remember the signal number which caused the last group stop. This is used while retrying for ptrace attach as the original group_exit_code could have been consumed with wait(2) by then. As the real parent may wait(2) and consume the group_exit_code anytime, the group_exit_code needs to be saved separately so that it can be used when switching from regular sleep to ptrace_stop(). This is recorded in the lower 16bits of task->group_stop. If a task is already stopped and there's no intervening SIGCONT, a ptrace request immediately following a successful PTRACE_ATTACH should always succeed even if the tracer doesn't wait(2) for attach completion; however, with this change, the tracee might still be TASK_RUNNING trying to enter TASK_TRACED which would cause the following request to fail with -ESRCH. This intermediate state is hidden from the ptracer by setting GROUP_STOP_TRAPPING on attach and making ptrace_check_attach() wait for it to clear on its signal->wait_chldexit. Completing the transition or getting killed clears TRAPPING and wakes up the tracer. Note that the STOPPED -> RUNNING -> TRACED transition is still visible to other threads which are in the same group as the ptracer and the reverse transition is visible to all. Please read the comments for details. Oleg: * Spotted a race condition where a task may retry group stop without proper bookkeeping. Fixed by redoing bookkeeping on retry. * Spotted that the transition is visible to userland in several different ways. Most are fixed with GROUP_STOP_TRAPPING. Unhandled corner case is documented. * Pointed out not setting GROUP_STOP_SIGMASK on an already stopped task would result in more consistent behavior. * Pointed out that calling ptrace_stop() from do_signal_stop() in TASK_STOPPED can race with group stop start logic and then confuse the TRAPPING wait in ptrace_check_attach(). ptrace_stop() is now called with TASK_RUNNING. * Suggested using signal->wait_chldexit instead of bit wait. * Spotted a race condition between TRACED transition and clearing of TRAPPING. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath Cc: Jan Kratochvil --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b2a17dfbdbad..456d80ed3b78 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1775,8 +1775,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * task->group_stop flags */ +#define GROUP_STOP_SIGMASK 0xffff /* signr of the last group stop */ #define GROUP_STOP_PENDING (1 << 16) /* task should stop for group stop */ #define GROUP_STOP_CONSUME (1 << 17) /* consume group stop count */ +#define GROUP_STOP_TRAPPING (1 << 18) /* switching from STOPPED to TRACED */ extern void task_clear_group_stop_pending(struct task_struct *task); -- cgit v1.2.3 From ee77f075921730b2b465880f9fd4367003bdab39 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Apr 2011 20:12:38 +0200 Subject: signal: Turn SIGNAL_STOP_DEQUEUED into GROUP_STOP_DEQUEUED This patch moves SIGNAL_STOP_DEQUEUED from signal_struct->flags to task_struct->group_stop, and thus makes it per-thread. Like SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can be false-positive after return from get_signal_to_deliver(), this is fine. The only purpose of this bit is: we can drop ->siglock after __dequeue_signal() returns the sig_kernel_stop() signal and before we call do_signal_stop(), in this case we must not miss SIGCONT if it comes in between. But, unlike SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can not be false-positive in do_signal_stop() if multiple threads dequeue the sig_kernel_stop() signal at the same time. Consider two threads T1 and T2, SIGTTIN has a hanlder. - T1 dequeues SIGTSTP and sets SIGNAL_STOP_DEQUEUED, then it drops ->siglock - SIGCONT comes and clears SIGNAL_STOP_DEQUEUED, SIGTSTP should be cancelled. - T2 dequeues SIGTTIN and sets SIGNAL_STOP_DEQUEUED again. Since we have a handler we should not stop, T2 returns to usermode to run the handler. - T1 continues, calls do_signal_stop() and wrongly starts the group stop because SIGNAL_STOP_DEQUEUED was restored in between. With or without this change: - we need to do something with ptrace_signal() which can return SIGSTOP, but this needs another discussion - SIGSTOP can be lost if it races with the mt exec, will be fixed later. Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 456d80ed3b78..8cef82d4cf77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -652,9 +652,8 @@ struct signal_struct { * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ -#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ /* * Pending notifications to parent. */ @@ -1779,6 +1778,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define GROUP_STOP_PENDING (1 << 16) /* task should stop for group stop */ #define GROUP_STOP_CONSUME (1 << 17) /* consume group stop count */ #define GROUP_STOP_TRAPPING (1 << 18) /* switching from STOPPED to TRACED */ +#define GROUP_STOP_DEQUEUED (1 << 19) /* stop signal dequeued */ extern void task_clear_group_stop_pending(struct task_struct *task); -- cgit v1.2.3 From e6fa16ab9c1e9b344428e6fea4d29e3cc4b28fb0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 20:59:41 +0200 Subject: signal: sigprocmask() should do retarget_shared_pending() In short, almost every changing of current->blocked is wrong, or at least can lead to the unexpected results. For example. Two threads T1 and T2, T1 sleeps in sigtimedwait/pause/etc. kill(tgid, SIG) can pick T2 for TIF_SIGPENDING. If T2 calls sigprocmask() and blocks SIG before it notices the pending signal, nobody else can handle this pending shared signal. I am not sure this is bug, but at least this looks strange imho. T1 should not sleep forever, there is a signal which should wake it up. This patch moves the code which actually changes ->blocked into the new helper, set_current_blocked() and changes this code to call retarget_shared_pending() as exit_signals() does. We should only care about the signals we just blocked, we use "newset & ~current->blocked" as a mask. We do not check !sigisemptyset(newblocked), retarget_shared_pending() is cheap unless mask & shared_pending. Note: for this particular case we could simply change sigprocmask() to return -EINTR if signal_pending(), but then we should change other callers and, more importantly, if we need this fix then set_current_blocked() will have more callers and some of them can't restart. See the next patch as a random example. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- include/linux/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index fcd2b14b1932..ba009c167275 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -243,6 +243,7 @@ extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); +extern void set_current_blocked(const sigset_t *); extern int show_unhandled_signals; struct pt_regs; -- cgit v1.2.3 From 943df1485a8ff0e600729e082e568ece04d4de9e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 21:44:14 +0200 Subject: signal: introduce do_sigtimedwait() to factor out compat/native code Factor out the common code in sys_rt_sigtimedwait/compat_sys_rt_sigtimedwait to the new helper, do_sigtimedwait(). Add the comment to document the extra tick we add to timespec_to_jiffies(ts), thanks to Linus who explained this to me. Perhaps it would be better to move compat_sys_rt_sigtimedwait() into signal.c under CONFIG_COMPAT, then we can make do_sigtimedwait() static. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Reviewed-by: Matt Fleming --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index ba009c167275..782546d661ba 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -242,6 +242,8 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); +extern int do_sigtimedwait(const sigset_t *, siginfo_t *, + const struct timespec *); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -- cgit v1.2.3 From 702a5073fdb71eb29cd4912575289fb5044c1894 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 22:01:27 +0200 Subject: signal: rename signandsets() to sigandnsets() As Tejun and Linus pointed out, "nand" is the wrong name for "x & ~y", it should be "andn". Rename signandsets() as suggested. Suggested-by: Tejun Heo Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- include/linux/signal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 782546d661ba..7e2526374fd7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -123,13 +123,13 @@ _SIG_SET_BINOP(sigorsets, _sig_or) #define _sig_and(x,y) ((x) & (y)) _SIG_SET_BINOP(sigandsets, _sig_and) -#define _sig_nand(x,y) ((x) & ~(y)) -_SIG_SET_BINOP(signandsets, _sig_nand) +#define _sig_andn(x,y) ((x) & ~(y)) +_SIG_SET_BINOP(sigandnsets, _sig_andn) #undef _SIG_SET_BINOP #undef _sig_or #undef _sig_and -#undef _sig_nand +#undef _sig_andn #define _SIG_SET_OP(name, op) \ static inline void name(sigset_t *set) \ -- cgit v1.2.3 From b2b07e4fdbc51383cfc0ba5618c2ddf5c9d038f2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 18 May 2011 15:08:03 +0200 Subject: signal: trivial, fix the "timespec declared inside parameter list" warning Fix the compile warning, do_sigtimedwait(struct timespec *) in signal.h needs the forward declaration of timespec. Reported-and-acked-by: Mike Frysinger Signed-off-by: Oleg Nesterov --- include/linux/signal.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 7e2526374fd7..a44e7f062238 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -234,6 +234,9 @@ static inline int valid_signal(unsigned long sig) return sig <= _NSIG ? 1 : 0; } +struct timespec; +struct pt_regs; + extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, bool group); @@ -248,7 +251,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); extern void exit_signals(struct task_struct *tsk); -- cgit v1.2.3