summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-17 23:57:20 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-17 23:57:20 +0200
commit393d81aa026e19b6ede6f5f11955c97ee62e5df5 (patch)
treea1d9511e488e19d41089ff0a736f6ce52a81c6e5 /kernel
parent93a0886e2368eafb9df5e2021fb185195cee88b2 (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
Merge branch 'linus' into xen-64bit
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/exit.c451
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/power/disk.c50
-rw-r--r--kernel/power/main.c16
-rw-r--r--kernel/power/process.c97
-rw-r--r--kernel/power/user.c71
-rw-r--r--kernel/ptrace.c37
10 files changed, 442 insertions, 292 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a7ed838984b..985ddb7da4d0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
-CFLAGS_REMOVE_sched.o = -mno-spe
-
ifdef CONFIG_FTRACE
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b11f06dc149a..cfb1d43ab801 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu)
cpu_maps_update_done();
return err;
}
+EXPORT_SYMBOL(cpu_down);
#endif /*CONFIG_HOTPLUG_CPU*/
/* Requires cpu_add_remove_lock to be held */
diff --git a/kernel/exit.c b/kernel/exit.c
index ceb258782835..93d2711b9381 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p)
__get_cpu_var(process_counts)--;
}
list_del_rcu(&p->thread_group);
- remove_parent(p);
+ list_del_init(&p->sibling);
}
/*
@@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(container_of(rhp, struct task_struct, rcu));
}
+/*
+ * Do final ptrace-related cleanup of a zombie being reaped.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_release_task(struct task_struct *p)
+{
+ BUG_ON(!list_empty(&p->ptraced));
+ ptrace_unlink(p);
+ BUG_ON(!list_empty(&p->ptrace_entry));
+}
+
void release_task(struct task_struct * p)
{
struct task_struct *leader;
@@ -160,8 +172,7 @@ repeat:
atomic_dec(&p->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
- ptrace_unlink(p);
- BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
+ ptrace_release_task(p);
__exit_signal(p);
/*
@@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void)
ptrace_unlink(current);
/* Reparent to init */
- remove_parent(current);
current->real_parent = current->parent = kthreadd_task;
- add_parent(current);
+ list_move_tail(&current->sibling, &current->real_parent->children);
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
@@ -692,37 +702,97 @@ static void exit_mm(struct task_struct * tsk)
mmput(mm);
}
-static void
-reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
+/*
+ * Return nonzero if @parent's children should reap themselves.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static int ignoring_children(struct task_struct *parent)
{
- if (p->pdeath_signal)
- /* We already hold the tasklist_lock here. */
- group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+ int ret;
+ struct sighand_struct *psig = parent->sighand;
+ unsigned long flags;
+ spin_lock_irqsave(&psig->siglock, flags);
+ ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+ (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
+ spin_unlock_irqrestore(&psig->siglock, flags);
+ return ret;
+}
- /* Move the child from its dying parent to the new one. */
- if (unlikely(traced)) {
- /* Preserve ptrace links if someone else is tracing this child. */
- list_del_init(&p->ptrace_list);
- if (ptrace_reparented(p))
- list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
- } else {
- /* If this child is being traced, then we're the one tracing it
- * anyway, so let go of it.
+/*
+ * Detach all tasks we were using ptrace on.
+ * Any that need to be release_task'd are put on the @dead list.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
+{
+ struct task_struct *p, *n;
+ int ign = -1;
+
+ list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+ __ptrace_unlink(p);
+
+ if (p->exit_state != EXIT_ZOMBIE)
+ continue;
+
+ /*
+ * If it's a zombie, our attachedness prevented normal
+ * parent notification or self-reaping. Do notification
+ * now if it would have happened earlier. If it should
+ * reap itself, add it to the @dead list. We can't call
+ * release_task() here because we already hold tasklist_lock.
+ *
+ * If it's our own child, there is no notification to do.
+ * But if our normal children self-reap, then this child
+ * was prevented by ptrace and we must reap it now.
*/
- p->ptrace = 0;
- remove_parent(p);
- p->parent = p->real_parent;
- add_parent(p);
+ if (!task_detached(p) && thread_group_empty(p)) {
+ if (!same_thread_group(p->real_parent, parent))
+ do_notify_parent(p, p->exit_signal);
+ else {
+ if (ign < 0)
+ ign = ignoring_children(parent);
+ if (ign)
+ p->exit_signal = -1;
+ }
+ }
- if (task_is_traced(p)) {
+ if (task_detached(p)) {
/*
- * If it was at a trace stop, turn it into
- * a normal stop since it's no longer being
- * traced.
+ * Mark it as in the process of being reaped.
*/
- ptrace_untrace(p);
+ p->exit_state = EXIT_DEAD;
+ list_add(&p->ptrace_entry, dead);
}
}
+}
+
+/*
+ * Finish up exit-time ptrace cleanup.
+ *
+ * Called without locks.
+ */
+static void ptrace_exit_finish(struct task_struct *parent,
+ struct list_head *dead)
+{
+ struct task_struct *p, *n;
+
+ BUG_ON(!list_empty(&parent->ptraced));
+
+ list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+ list_del_init(&p->ptrace_entry);
+ release_task(p);
+ }
+}
+
+static void reparent_thread(struct task_struct *p, struct task_struct *father)
+{
+ if (p->pdeath_signal)
+ /* We already hold the tasklist_lock here. */
+ group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+
+ list_move_tail(&p->sibling, &p->real_parent->children);
/* If this is a threaded reparent there is no need to
* notify anyone anything has happened.
@@ -737,7 +807,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
/* If we'd notified the old parent about this child's death,
* also notify the new parent.
*/
- if (!traced && p->exit_state == EXIT_ZOMBIE &&
+ if (!ptrace_reparented(p) &&
+ p->exit_state == EXIT_ZOMBIE &&
!task_detached(p) && thread_group_empty(p))
do_notify_parent(p, p->exit_signal);
@@ -754,12 +825,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
static void forget_original_parent(struct task_struct *father)
{
struct task_struct *p, *n, *reaper = father;
- struct list_head ptrace_dead;
-
- INIT_LIST_HEAD(&ptrace_dead);
+ LIST_HEAD(ptrace_dead);
write_lock_irq(&tasklist_lock);
+ /*
+ * First clean up ptrace if we were using it.
+ */
+ ptrace_exit(father, &ptrace_dead);
+
do {
reaper = next_thread(reaper);
if (reaper == father) {
@@ -768,58 +842,19 @@ static void forget_original_parent(struct task_struct *father)
}
} while (reaper->flags & PF_EXITING);
- /*
- * There are only two places where our children can be:
- *
- * - in our child list
- * - in our ptraced child list
- *
- * Search them and reparent children.
- */
list_for_each_entry_safe(p, n, &father->children, sibling) {
- int ptrace;
-
- ptrace = p->ptrace;
-
- /* if father isn't the real parent, then ptrace must be enabled */
- BUG_ON(father != p->real_parent && !ptrace);
-
- if (father == p->real_parent) {
- /* reparent with a reaper, real father it's us */
- p->real_parent = reaper;
- reparent_thread(p, father, 0);
- } else {
- /* reparent ptraced task to its real parent */
- __ptrace_unlink (p);
- if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
- thread_group_empty(p))
- do_notify_parent(p, p->exit_signal);
- }
-
- /*
- * if the ptraced child is a detached zombie we must collect
- * it before we exit, or it will remain zombie forever since
- * we prevented it from self-reap itself while it was being
- * traced by us, to be able to see it in wait4.
- */
- if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
- list_add(&p->ptrace_list, &ptrace_dead);
- }
-
- list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
p->real_parent = reaper;
- reparent_thread(p, father, 1);
+ if (p->parent == father) {
+ BUG_ON(p->ptrace);
+ p->parent = p->real_parent;
+ }
+ reparent_thread(p, father);
}
write_unlock_irq(&tasklist_lock);
BUG_ON(!list_empty(&father->children));
- BUG_ON(!list_empty(&father->ptrace_children));
-
- list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
- list_del_init(&p->ptrace_list);
- release_task(p);
- }
+ ptrace_exit_finish(father, &ptrace_dead);
}
/*
@@ -1180,13 +1215,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
return 0;
}
- /*
- * Do not consider detached threads that are
- * not ptraced:
- */
- if (task_detached(p) && !p->ptrace)
- return 0;
-
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
@@ -1197,14 +1225,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
return 0;
err = security_task_wait(p);
- if (likely(!err))
- return 1;
+ if (err)
+ return err;
- if (type != PIDTYPE_PID)
- return 0;
- /* This child was explicitly requested, abort */
- read_unlock(&tasklist_lock);
- return err;
+ return 1;
}
static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1238,7 +1262,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_zombie(struct task_struct *p, int noreap,
+static int wait_task_zombie(struct task_struct *p, int options,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
@@ -1246,7 +1270,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
int retval, status, traced;
pid_t pid = task_pid_vnr(p);
- if (unlikely(noreap)) {
+ if (!likely(options & WEXITED))
+ return 0;
+
+ if (unlikely(options & WNOWAIT)) {
uid_t uid = p->uid;
int exit_code = p->exit_code;
int why, status;
@@ -1396,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_stopped(struct task_struct *p,
- int noreap, struct siginfo __user *infop,
+static int wait_task_stopped(int ptrace, struct task_struct *p,
+ int options, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code, why;
uid_t uid = 0; /* unneeded, required by compiler */
pid_t pid;
+ if (!(options & WUNTRACED))
+ return 0;
+
exit_code = 0;
spin_lock_irq(&p->sighand->siglock);
if (unlikely(!task_is_stopped_or_traced(p)))
goto unlock_sig;
- if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0)
+ if (!ptrace && p->signal->group_stop_count > 0)
/*
* A group stop is in progress and this is the group leader.
* We won't report until all threads have stopped.
@@ -1421,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p,
if (!exit_code)
goto unlock_sig;
- if (!noreap)
+ if (!unlikely(options & WNOWAIT))
p->exit_code = 0;
uid = p->uid;
@@ -1439,10 +1469,10 @@ unlock_sig:
*/
get_task_struct(p);
pid = task_pid_vnr(p);
- why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+ why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
read_unlock(&tasklist_lock);
- if (unlikely(noreap))
+ if (unlikely(options & WNOWAIT))
return wait_noreap_copyout(p, pid, uid,
why, exit_code,
infop, ru);
@@ -1476,7 +1506,7 @@ unlock_sig:
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_continued(struct task_struct *p, int noreap,
+static int wait_task_continued(struct task_struct *p, int options,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
@@ -1484,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
pid_t pid;
uid_t uid;
+ if (!unlikely(options & WCONTINUED))
+ return 0;
+
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
return 0;
@@ -1493,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
spin_unlock_irq(&p->sighand->siglock);
return 0;
}
- if (!noreap)
+ if (!unlikely(options & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
spin_unlock_irq(&p->sighand->siglock);
@@ -1519,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap,
return retval;
}
+/*
+ * Consider @p for a wait by @parent.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue;
+ * then *@notask_error is 0 if @p is an eligible child,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int wait_consider_task(struct task_struct *parent, int ptrace,
+ struct task_struct *p, int *notask_error,
+ enum pid_type type, struct pid *pid, int options,
+ struct siginfo __user *infop,
+ int __user *stat_addr, struct rusage __user *ru)
+{
+ int ret = eligible_child(type, pid, options, p);
+ if (!ret)
+ return ret;
+
+ if (unlikely(ret < 0)) {
+ /*
+ * If we have not yet seen any eligible child,
+ * then let this error code replace -ECHILD.
+ * A permission error will give the user a clue
+ * to look for security policy problems, rather
+ * than for mysterious wait bugs.
+ */
+ if (*notask_error)
+ *notask_error = ret;
+ }
+
+ if (likely(!ptrace) && unlikely(p->ptrace)) {
+ /*
+ * This child is hidden by ptrace.
+ * We aren't allowed to see it now, but eventually we will.
+ */
+ *notask_error = 0;
+ return 0;
+ }
+
+ if (p->exit_state == EXIT_DEAD)
+ return 0;
+
+ /*
+ * We don't reap group leaders with subthreads.
+ */
+ if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
+ return wait_task_zombie(p, options, infop, stat_addr, ru);
+
+ /*
+ * It's stopped or running now, so it might
+ * later continue, exit, or stop again.
+ */
+ *notask_error = 0;
+
+ if (task_is_stopped_or_traced(p))
+ return wait_task_stopped(ptrace, p, options,
+ infop, stat_addr, ru);
+
+ return wait_task_continued(p, options, infop, stat_addr, ru);
+}
+
+/*
+ * Do the work of do_wait() for one thread in the group, @tsk.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue; then
+ * *@notask_error is 0 if there were any eligible children,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int do_wait_thread(struct task_struct *tsk, int *notask_error,
+ enum pid_type type, struct pid *pid, int options,
+ struct siginfo __user *infop, int __user *stat_addr,
+ struct rusage __user *ru)
+{
+ struct task_struct *p;
+
+ list_for_each_entry(p, &tsk->children, sibling) {
+ /*
+ * Do not consider detached threads.
+ */
+ if (!task_detached(p)) {
+ int ret = wait_consider_task(tsk, 0, p, notask_error,
+ type, pid, options,
+ infop, stat_addr, ru);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
+ enum pid_type type, struct pid *pid, int options,
+ struct siginfo __user *infop, int __user *stat_addr,
+ struct rusage __user *ru)
+{
+ struct task_struct *p;
+
+ /*
+ * Traditionally we see ptrace'd stopped tasks regardless of options.
+ */
+ options |= WUNTRACED;
+
+ list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
+ int ret = wait_consider_task(tsk, 1, p, notask_error,
+ type, pid, options,
+ infop, stat_addr, ru);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static long do_wait(enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop, int __user *stat_addr,
struct rusage __user *ru)
{
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
- int flag, retval;
+ int retval;
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
- /* If there is nothing that can match our critier just get out */
+ /*
+ * If there is nothing that can match our critiera just get out.
+ * We will clear @retval to zero if we see any child that might later
+ * match our criteria, even if we are not able to reap it yet.
+ */
retval = -ECHILD;
if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
goto end;
- /*
- * We will set this flag if we see any child that might later
- * match our criteria, even if we are not able to reap it yet.
- */
- flag = retval = 0;
current->state = TASK_INTERRUPTIBLE;
read_lock(&tasklist_lock);
tsk = current;
do {
- struct task_struct *p;
-
- list_for_each_entry(p, &tsk->children, sibling) {
- int ret = eligible_child(type, pid, options, p);
- if (!ret)
- continue;
-
- if (unlikely(ret < 0)) {
- retval = ret;
- } else if (task_is_stopped_or_traced(p)) {
- /*
- * It's stopped now, so it might later
- * continue, exit, or stop again.
- */
- flag = 1;
- if (!(p->ptrace & PT_PTRACED) &&
- !(options & WUNTRACED))
- continue;
-
- retval = wait_task_stopped(p,
- (options & WNOWAIT), infop,
- stat_addr, ru);
- } else if (p->exit_state == EXIT_ZOMBIE &&
- !delay_group_leader(p)) {
- /*
- * We don't reap group leaders with subthreads.
- */
- if (!likely(options & WEXITED))
- continue;
- retval = wait_task_zombie(p,
- (options & WNOWAIT), infop,
- stat_addr, ru);
- } else if (p->exit_state != EXIT_DEAD) {
- /*
- * It's running now, so it might later
- * exit, stop, or stop and then continue.
- */
- flag = 1;
- if (!unlikely(options & WCONTINUED))
- continue;
- retval = wait_task_continued(p,
- (options & WNOWAIT), infop,
- stat_addr, ru);
- }
- if (retval != 0) /* tasklist_lock released */
- goto end;
- }
- if (!flag) {
- list_for_each_entry(p, &tsk->ptrace_children,
- ptrace_list) {
- flag = eligible_child(type, pid, options, p);
- if (!flag)
- continue;
- if (likely(flag > 0))
- break;
- retval = flag;
- goto end;
- }
+ int tsk_result = do_wait_thread(tsk, &retval,
+ type, pid, options,
+ infop, stat_addr, ru);
+ if (!tsk_result)
+ tsk_result = ptrace_do_wait(tsk, &retval,
+ type, pid, options,
+ infop, stat_addr, ru);
+ if (tsk_result) {
+ /*
+ * tasklist_lock is unlocked and we have a final result.
+ */
+ retval = tsk_result;
+ goto end;
}
+
if (options & __WNOTHREAD)
break;
tsk = next_thread(tsk);
@@ -1609,16 +1714,14 @@ repeat:
} while (tsk != current);
read_unlock(&tasklist_lock);
- if (flag) {
- if (options & WNOHANG)
- goto end;
+ if (!retval && !(options & WNOHANG)) {
retval = -ERESTARTSYS;
- if (signal_pending(current))
- goto end;
- schedule();
- goto repeat;
+ if (!signal_pending(current)) {
+ schedule();
+ goto repeat;
+ }
}
- retval = -ECHILD;
+
end:
current->state = TASK_RUNNING;
remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4bd2f516401f..adefc1131f27 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->thread_group);
- INIT_LIST_HEAD(&p->ptrace_children);
- INIT_LIST_HEAD(&p->ptrace_list);
+ INIT_LIST_HEAD(&p->ptrace_entry);
+ INIT_LIST_HEAD(&p->ptraced);
/* Now that the task is set up, run cgroup callbacks if
* necessary. We need to run them before the task is visible
@@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (likely(p->pid)) {
- add_parent(p);
+ list_add_tail(&p->sibling, &p->real_parent->children);
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 97747cdd37c9..ac3fb7326641 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -235,7 +235,7 @@ int kthreadd(void *unused)
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
set_cpus_allowed(tsk, CPU_MASK_ALL);
- current->flags |= PF_NOFREEZE;
+ current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 14a656cdc652..f011e0870b52 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -180,6 +180,17 @@ static void platform_restore_cleanup(int platform_mode)
}
/**
+ * platform_recover - recover the platform from a failure to suspend
+ * devices.
+ */
+
+static void platform_recover(int platform_mode)
+{
+ if (platform_mode && hibernation_ops && hibernation_ops->recover)
+ hibernation_ops->recover();
+}
+
+/**
* create_image - freeze devices that need to be frozen with interrupts
* off, create the hibernation image and thaw those devices. Control
* reappears in this routine after a restore.
@@ -193,6 +204,7 @@ static int create_image(int platform_mode)
if (error)
return error;
+ device_pm_lock();
local_irq_disable();
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
@@ -224,9 +236,11 @@ static int create_image(int platform_mode)
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
- device_power_up();
+ device_power_up(in_suspend ?
+ (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
Enable_irqs:
local_irq_enable();
+ device_pm_unlock();
return error;
}
@@ -255,10 +269,10 @@ int hibernation_snapshot(int platform_mode)
suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error)
- goto Resume_console;
+ goto Recover_platform;
if (hibernation_test(TEST_DEVICES))
- goto Resume_devices;
+ goto Recover_platform;
error = platform_pre_snapshot(platform_mode);
if (error || hibernation_test(TEST_PLATFORM))
@@ -280,12 +294,16 @@ int hibernation_snapshot(int platform_mode)
Finish:
platform_finish(platform_mode);
Resume_devices:
- device_resume();
- Resume_console:
+ device_resume(in_suspend ?
+ (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
resume_console();
Close:
platform_end(platform_mode);
return error;
+
+ Recover_platform:
+ platform_recover(platform_mode);
+ goto Resume_devices;
}
/**
@@ -300,8 +318,9 @@ static int resume_target_kernel(void)
{
int error;
+ device_pm_lock();
local_irq_disable();
- error = device_power_down(PMSG_PRETHAW);
+ error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
@@ -329,9 +348,10 @@ static int resume_target_kernel(void)
swsusp_free();
restore_processor_state();
touch_softlockup_watchdog();
- device_power_up();
+ device_power_up(PMSG_RECOVER);
Enable_irqs:
local_irq_enable();
+ device_pm_unlock();
return error;
}
@@ -350,7 +370,7 @@ int hibernation_restore(int platform_mode)
pm_prepare_console();
suspend_console();
- error = device_suspend(PMSG_PRETHAW);
+ error = device_suspend(PMSG_QUIESCE);
if (error)
goto Finish;
@@ -362,7 +382,7 @@ int hibernation_restore(int platform_mode)
enable_nonboot_cpus();
}
platform_restore_cleanup(platform_mode);
- device_resume();
+ device_resume(PMSG_RECOVER);
Finish:
resume_console();
pm_restore_console();
@@ -392,8 +412,11 @@ int hibernation_platform_enter(void)
suspend_console();
error = device_suspend(PMSG_HIBERNATE);
- if (error)
- goto Resume_console;
+ if (error) {
+ if (hibernation_ops->recover)
+ hibernation_ops->recover();
+ goto Resume_devices;
+ }
error = hibernation_ops->prepare();
if (error)
@@ -403,6 +426,7 @@ int hibernation_platform_enter(void)
if (error)
goto Finish;
+ device_pm_lock();
local_irq_disable();
error = device_power_down(PMSG_HIBERNATE);
if (!error) {
@@ -411,6 +435,7 @@ int hibernation_platform_enter(void)
while (1);
}
local_irq_enable();
+ device_pm_unlock();
/*
* We don't need to reenable the nonboot CPUs or resume consoles, since
@@ -419,8 +444,7 @@ int hibernation_platform_enter(void)
Finish:
hibernation_ops->finish();
Resume_devices:
- device_resume();
- Resume_console:
+ device_resume(PMSG_RESTORE);
resume_console();
Close:
hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6a6d5eb3524e..3398f4651aa1 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state)
{
int error = 0;
+ device_pm_lock();
arch_suspend_disable_irqs();
BUG_ON(!irqs_disabled());
@@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state)
if (!suspend_test(TEST_CORE))
error = suspend_ops->enter(state);
- device_power_up();
+ device_power_up(PMSG_RESUME);
Done:
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
+ device_pm_unlock();
return error;
}
@@ -267,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state)
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to suspend\n");
- goto Resume_console;
+ goto Recover_platform;
}
if (suspend_test(TEST_DEVICES))
- goto Resume_devices;
+ goto Recover_platform;
if (suspend_ops->prepare) {
error = suspend_ops->prepare();
@@ -291,13 +293,17 @@ int suspend_devices_and_enter(suspend_state_t state)
if (suspend_ops->finish)
suspend_ops->finish();
Resume_devices:
- device_resume();
- Resume_console:
+ device_resume(PMSG_RESUME);
resume_console();
Close:
if (suspend_ops->end)
suspend_ops->end();
return error;
+
+ Recover_platform:
+ if (suspend_ops->recover)
+ suspend_ops->recover();
+ goto Resume_devices;
}
/**
diff --git a/kernel/power/process.c b/kernel/power/process.c
index f1d0b345c9ba..5fb87652f214 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -19,9 +19,6 @@
*/
#define TIMEOUT (20 * HZ)
-#define FREEZER_KERNEL_THREADS 0
-#define FREEZER_USER_SPACE 1
-
static inline int freezeable(struct task_struct * p)
{
if ((p == current) ||
@@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p)
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
-static int has_mm(struct task_struct *p)
+static inline bool should_send_signal(struct task_struct *p)
{
- return (p->mm && !(p->flags & PF_BORROWED_MM));
+ return !(p->flags & PF_FREEZER_NOSIG);
}
/**
* freeze_task - send a freeze request to given task
* @p: task to send the request to
- * @with_mm_only: if set, the request will only be sent if the task has its
- * own mm
- * Return value: 0, if @with_mm_only is set and the task has no mm of its
- * own or the task is frozen, 1, otherwise
+ * @sig_only: if set, the request will only be sent if the task has the
+ * PF_FREEZER_NOSIG flag unset
+ * Return value: 'false', if @sig_only is set and the task has
+ * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
*
- * The freeze request is sent by seting the tasks's TIF_FREEZE flag and
+ * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
* either sending a fake signal to it or waking it up, depending on whether
- * or not it has its own mm (ie. it is a user land task). If @with_mm_only
- * is set and the task has no mm of its own (ie. it is a kernel thread),
- * its TIF_FREEZE flag should not be set.
- *
- * The task_lock() is necessary to prevent races with exit_mm() or
- * use_mm()/unuse_mm() from occuring.
+ * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
+ * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ * TIF_FREEZE flag will not be set.
*/
-static int freeze_task(struct task_struct *p, int with_mm_only)
+static bool freeze_task(struct task_struct *p, bool sig_only)
{
- int ret = 1;
+ /*
+ * We first check if the task is freezing and next if it has already
+ * been frozen to avoid the race with frozen_process() which first marks
+ * the task as frozen and next clears its TIF_FREEZE.
+ */
+ if (!freezing(p)) {
+ rmb();
+ if (frozen(p))
+ return false;
- task_lock(p);
- if (freezing(p)) {
- if (has_mm(p)) {
- if (!signal_pending(p))
- fake_signal_wake_up(p);
- } else {
- if (with_mm_only)
- ret = 0;
- else
- wake_up_state(p, TASK_INTERRUPTIBLE);
- }
+ if (!sig_only || should_send_signal(p))
+ set_freeze_flag(p);
+ else
+ return false;
+ }
+
+ if (should_send_signal(p)) {
+ if (!signal_pending(p))
+ fake_signal_wake_up(p);
+ } else if (sig_only) {
+ return false;
} else {
- rmb();
- if (frozen(p)) {
- ret = 0;
- } else {
- if (has_mm(p)) {
- set_freeze_flag(p);
- fake_signal_wake_up(p);
- } else {
- if (with_mm_only) {
- ret = 0;
- } else {
- set_freeze_flag(p);
- wake_up_state(p, TASK_INTERRUPTIBLE);
- }
- }
- }
+ wake_up_state(p, TASK_INTERRUPTIBLE);
}
- task_unlock(p);
- return ret;
+
+ return true;
}
static void cancel_freezing(struct task_struct *p)
@@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p)
}
}
-static int try_to_freeze_tasks(int freeze_user_space)
+static int try_to_freeze_tasks(bool sig_only)
{
struct task_struct *g, *p;
unsigned long end_time;
@@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space)
if (frozen(p) || !freezeable(p))
continue;
- if (!freeze_task(p, freeze_user_space))
+ if (!freeze_task(p, sig_only))
continue;
/*
@@ -235,13 +222,13 @@ int freeze_processes(void)
int error;
printk("Freezing user space processes ... ");
- error = try_to_freeze_tasks(FREEZER_USER_SPACE);
+ error = try_to_freeze_tasks(true);
if (error)
goto Exit;
printk("done.\n");
printk("Freezing remaining freezable tasks ... ");
- error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
+ error = try_to_freeze_tasks(false);
if (error)
goto Exit;
printk("done.");
@@ -251,7 +238,7 @@ int freeze_processes(void)
return error;
}
-static void thaw_tasks(int thaw_user_space)
+static void thaw_tasks(bool nosig_only)
{
struct task_struct *g, *p;
@@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space)
if (!freezeable(p))
continue;
- if (!p->mm == thaw_user_space)
+ if (nosig_only && should_send_signal(p))
continue;
thaw_process(p);
@@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space)
void thaw_processes(void)
{
printk("Restarting tasks ... ");
- thaw_tasks(FREEZER_KERNEL_THREADS);
- thaw_tasks(FREEZER_USER_SPACE);
+ thaw_tasks(true);
+ thaw_tasks(false);
schedule();
printk("done.\n");
}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index f5512cb3aa86..a6332a313262 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,6 +23,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -69,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp)
struct snapshot_data *data;
int error;
- if (!atomic_add_unless(&snapshot_device_available, -1, 0))
- return -EBUSY;
+ mutex_lock(&pm_mutex);
+
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
atomic_inc(&snapshot_device_available);
- return -ENOSYS;
+ error = -ENOSYS;
+ goto Unlock;
}
if(create_basic_memory_bitmaps()) {
atomic_inc(&snapshot_device_available);
- return -ENOMEM;
+ error = -ENOMEM;
+ goto Unlock;
}
nonseekable_open(inode, filp);
data = &snapshot_state;
@@ -98,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp)
if (error)
pm_notifier_call_chain(PM_POST_HIBERNATION);
}
- if (error) {
+ if (error)
atomic_inc(&snapshot_device_available);
- return error;
- }
data->frozen = 0;
data->ready = 0;
data->platform_support = 0;
- return 0;
+ Unlock:
+ mutex_unlock(&pm_mutex);
+
+ return error;
}
static int snapshot_release(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
+ mutex_lock(&pm_mutex);
+
swsusp_free();
free_basic_memory_bitmaps();
data = filp->private_data;
free_all_swap_pages(data->swap);
- if (data->frozen) {
- mutex_lock(&pm_mutex);
+ if (data->frozen)
thaw_processes();
- mutex_unlock(&pm_mutex);
- }
pm_notifier_call_chain(data->mode == O_WRONLY ?
PM_POST_HIBERNATION : PM_POST_RESTORE);
atomic_inc(&snapshot_device_available);
+
+ mutex_unlock(&pm_mutex);
+
return 0;
}
@@ -134,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
struct snapshot_data *data;
ssize_t res;
+ mutex_lock(&pm_mutex);
+
data = filp->private_data;
- if (!data->ready)
- return -ENODATA;
+ if (!data->ready) {
+ res = -ENODATA;
+ goto Unlock;
+ }
res = snapshot_read_next(&data->handle, count);
if (res > 0) {
if (copy_to_user(buf, data_of(data->handle), res))
@@ -144,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
else
*offp = data->handle.offset;
}
+
+ Unlock:
+ mutex_unlock(&pm_mutex);
+
return res;
}
@@ -153,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
struct snapshot_data *data;
ssize_t res;
+ mutex_lock(&pm_mutex);
+
data = filp->private_data;
res = snapshot_write_next(&data->handle, count);
if (res > 0) {
@@ -161,11 +181,14 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
else
*offp = data->handle.offset;
}
+
+ mutex_unlock(&pm_mutex);
+
return res;
}
-static int snapshot_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long snapshot_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
{
int error = 0;
struct snapshot_data *data;
@@ -179,6 +202,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!mutex_trylock(&pm_mutex))
+ return -EBUSY;
+
data = filp->private_data;
switch (cmd) {
@@ -186,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
case SNAPSHOT_FREEZE:
if (data->frozen)
break;
- mutex_lock(&pm_mutex);
printk("Syncing filesystems ... ");
sys_sync();
printk("done.\n");
@@ -194,7 +219,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = freeze_processes();
if (error)
thaw_processes();
- mutex_unlock(&pm_mutex);
if (!error)
data->frozen = 1;
break;
@@ -202,9 +226,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
case SNAPSHOT_UNFREEZE:
if (!data->frozen || data->ready)
break;
- mutex_lock(&pm_mutex);
thaw_processes();
- mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
@@ -307,16 +329,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -EPERM;
break;
}
- if (!mutex_trylock(&pm_mutex)) {
- error = -EBUSY;
- break;
- }
/*
* Tasks are frozen and the notifiers have been called with
* PM_HIBERNATION_PREPARE
*/
error = suspend_devices_and_enter(PM_SUSPEND_MEM);
- mutex_unlock(&pm_mutex);
break;
case SNAPSHOT_PLATFORM_SUPPORT:
@@ -390,6 +407,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
}
+ mutex_unlock(&pm_mutex);
+
return error;
}
@@ -399,7 +418,7 @@ static const struct file_operations snapshot_fops = {
.read = snapshot_read,
.write = snapshot_write,
.llseek = no_llseek,
- .ioctl = snapshot_ioctl,
+ .unlocked_ioctl = snapshot_ioctl,
};
static struct miscdevice snapshot_device = {
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e337390fce01..8392a9da6450 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -33,13 +33,9 @@
*/
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
- BUG_ON(!list_empty(&child->ptrace_list));
- if (child->parent == new_parent)
- return;
- list_add(&child->ptrace_list, &child->parent->ptrace_children);
- remove_parent(child);
+ BUG_ON(!list_empty(&child->ptrace_entry));
+ list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
- add_parent(child);
}
/*
@@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child)
BUG_ON(!child->ptrace);
child->ptrace = 0;
- if (ptrace_reparented(child)) {
- list_del_init(&child->ptrace_list);
- remove_parent(child);
- child->parent = child->real_parent;
- add_parent(child);
- }
+ child->parent = child->real_parent;
+ list_del_init(&child->ptrace_entry);
if (task_is_traced(child))
ptrace_untrace(child);
@@ -492,15 +484,34 @@ int ptrace_traceme(void)
/*
* Are we already being traced?
*/
+repeat:
task_lock(current);
if (!(current->ptrace & PT_PTRACED)) {
+ /*
+ * See ptrace_attach() comments about the locking here.
+ */
+ unsigned long flags;
+ if (!write_trylock_irqsave(&tasklist_lock, flags)) {
+ task_unlock(current);
+ do {
+ cpu_relax();
+ } while (!write_can_lock(&tasklist_lock));
+ goto repeat;
+ }
+
ret = security_ptrace(current->parent, current,
PTRACE_MODE_ATTACH);
+
/*
* Set the ptrace bit in the process ptrace flags.
+ * Then link us on our parent's ptraced list.
*/
- if (!ret)
+ if (!ret) {
current->ptrace |= PT_PTRACED;
+ __ptrace_link(current, current->real_parent);
+ }
+
+ write_unlock_irqrestore(&tasklist_lock, flags);
}
task_unlock(current);
return ret;