From fca178c0c6e8d52a1875be36b070f30884ebfae9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:49 -0700 Subject: [PATCH] fill_tgid: fix task_struct leak and possible oops 1. fill_tgid() forgets to do put_task_struct(first). 2. release_task(first) can happen after fill_tgid() drops tasklist_lock, it is unsafe to dereference first->signal. This is a temporary fix, imho the locking should be reworked. Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 5d6a8c54ee85..9aeee511a463 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -237,14 +237,17 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, } else get_task_struct(first); - /* Start with stats from dead tasks */ - spin_lock_irqsave(&first->signal->stats_lock, flags); - if (first->signal->stats) - memcpy(stats, first->signal->stats, sizeof(*stats)); - spin_unlock_irqrestore(&first->signal->stats_lock, flags); tsk = first; read_lock(&tasklist_lock); + /* Start with stats from dead tasks */ + if (first->signal) { + spin_lock_irqsave(&first->signal->stats_lock, flags); + if (first->signal->stats) + memcpy(stats, first->signal->stats, sizeof(*stats)); + spin_unlock_irqrestore(&first->signal->stats_lock, flags); + } + do { if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) continue; @@ -264,7 +267,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, * Accounting subsytems can also add calls here to modify * fields of taskstats. */ - + put_task_struct(first); return 0; } -- cgit v1.2.3 From b8534d7bd89df0cd41cd47bcd6733a05ea9a691a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:53 -0700 Subject: [PATCH] taskstats: kill ->taskstats_lock in favor of ->siglock signal_struct is (mostly) protected by ->sighand->siglock, I think we don't need ->taskstats_lock to protect ->stats. This also allows us to simplify the locking in fill_tgid(). Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9aeee511a463..b2efda94615a 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -241,11 +241,11 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, tsk = first; read_lock(&tasklist_lock); /* Start with stats from dead tasks */ - if (first->signal) { - spin_lock_irqsave(&first->signal->stats_lock, flags); + if (first->sighand) { + spin_lock_irqsave(&first->sighand->siglock, flags); if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); - spin_unlock_irqrestore(&first->signal->stats_lock, flags); + spin_unlock_irqrestore(&first->sighand->siglock, flags); } do { @@ -276,7 +276,7 @@ static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; - spin_lock_irqsave(&tsk->signal->stats_lock, flags); + spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; @@ -288,7 +288,7 @@ static void fill_tgid_exit(struct task_struct *tsk) */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: - spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } @@ -464,15 +464,10 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, size_t size; int is_thread_group; struct nlattr *na; - unsigned long flags; if (!family_registered || !tidstats) return; - spin_lock_irqsave(&tsk->signal->stats_lock, flags); - is_thread_group = tsk->signal->stats ? 1 : 0; - spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); - rc = 0; /* * Size includes space for nested attributes @@ -480,6 +475,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + is_thread_group = (tsk->signal->stats != NULL); if (is_thread_group) size = 2 * size; /* PID + STATS + TGID + STATS */ -- cgit v1.2.3 From a98b6094261c0112e9c455c96995972181bff049 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:54 -0700 Subject: [PATCH] taskstats: don't use tasklist_lock Remove tasklist_lock from taskstats.c. find_task_by_pid() is rcu-safe. ->siglock allows us to traverse subthread without tasklist. Q: delay accounting looks wrong to me. If sub-thread has already called taskstats_exit_send() but didn't call release_task(self) yet it will be accounted twice. The window is big. No? Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 59 ++++++++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 35 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b2efda94615a..b724aeea5443 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -174,21 +174,19 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) up_write(&listeners->sem); } -static int fill_pid(pid_t pid, struct task_struct *pidtsk, +static int fill_pid(pid_t pid, struct task_struct *tsk, struct taskstats *stats) { int rc = 0; - struct task_struct *tsk = pidtsk; - if (!pidtsk) { - read_lock(&tasklist_lock); + if (!tsk) { + rcu_read_lock(); tsk = find_task_by_pid(pid); - if (!tsk) { - read_unlock(&tasklist_lock); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) return -ESRCH; - } - get_task_struct(tsk); - read_unlock(&tasklist_lock); } else get_task_struct(tsk); @@ -214,40 +212,28 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, } -static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, +static int fill_tgid(pid_t tgid, struct task_struct *first, struct taskstats *stats) { - struct task_struct *tsk, *first; + struct task_struct *tsk; unsigned long flags; + int rc = -ESRCH; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ - first = tgidtsk; - if (!first) { - read_lock(&tasklist_lock); + rcu_read_lock(); + if (!first) first = find_task_by_pid(tgid); - if (!first) { - read_unlock(&tasklist_lock); - return -ESRCH; - } - get_task_struct(first); - read_unlock(&tasklist_lock); - } else - get_task_struct(first); + if (!first || !lock_task_sighand(first, &flags)) + goto out; - tsk = first; - read_lock(&tasklist_lock); - /* Start with stats from dead tasks */ - if (first->sighand) { - spin_lock_irqsave(&first->sighand->siglock, flags); - if (first->signal->stats) - memcpy(stats, first->signal->stats, sizeof(*stats)); - spin_unlock_irqrestore(&first->sighand->siglock, flags); - } + if (first->signal->stats) + memcpy(stats, first->signal->stats, sizeof(*stats)); + tsk = first; do { if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) continue; @@ -260,15 +246,18 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, delayacct_add_tsk(stats, tsk); } while_each_thread(first, tsk); - read_unlock(&tasklist_lock); - stats->version = TASKSTATS_VERSION; + unlock_task_sighand(first, &flags); + rc = 0; +out: + rcu_read_unlock(); + + stats->version = TASKSTATS_VERSION; /* * Accounting subsytems can also add calls here to modify * fields of taskstats. */ - put_task_struct(first); - return 0; + return rc; } -- cgit v1.2.3 From d7c3f5f231c60d7e6ada5770b536df2b3ec1bd08 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:54 -0700 Subject: [PATCH] fill_tgid: cleanup delays accounting fill_tgid() should skip not only an already exited group leader. If the task has ->exit_state != 0 it already did exit_notify(), so it also did fill_tgid_exit()->delayacct_add_tsk(->signal->stats) and we should skip it to avoid a double accounting. This patch doesn't close the race completely, but it cleanups the code. Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b724aeea5443..8adfb8069c6d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -235,7 +235,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, tsk = first; do { - if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) + if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to -- cgit v1.2.3 From d46a3d0d07ba539aea5b0e1ad30e568f0cb03576 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Oct 2006 16:45:58 +0300 Subject: [PATCH] taskstats: fix sk_buff leak 'return genlmsg_cancel()' in taskstats_user_cmd/taskstats_exit_send potentially leaks a skb. Unless we pass 'rep_skb' to the netlink layer we own sk_buff. This means we should always do kfree_skb() on failure. [ Thomas acked and pointed out missing return value in original version ] Signed-off-by: Oleg Nesterov Acked-by: Thomas Graf Cc: Andrew Morton Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 8adfb8069c6d..f3c3e9d43d2c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -411,7 +411,7 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) return send_reply(rep_skb, info->snd_pid); nla_put_failure: - return genlmsg_cancel(rep_skb, reply); + rc = genlmsg_cancel(rep_skb, reply); err: nlmsg_free(rep_skb); return rc; @@ -507,7 +507,6 @@ send: nla_put_failure: genlmsg_cancel(rep_skb, reply); - goto ret; err_skb: nlmsg_free(rep_skb); ret: -- cgit v1.2.3 From 3d8334def5cf831d2ed438aae021696a2faa4ddd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Oct 2006 18:57:16 +0300 Subject: [PATCH] taskstats: fix sk_buff size calculation prepare_reply() adds GENL_HDRLEN to the payload (genlmsg_total_size()), but then it does genlmsg_put()->nlmsg_put(). This means we forget to reserve a room for 'struct nlmsghdr'. Signed-off-by: Oleg Nesterov Cc: Thomas Graf Cc: Andrew Morton Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index f3c3e9d43d2c..2039585ec5e1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -77,7 +77,8 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL); + size = nlmsg_total_size(genlmsg_total_size(size)); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; -- cgit v1.2.3 From 4a279ff1ea1cf325775ada983035123fcdc8e986 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 30 Oct 2006 22:07:15 -0800 Subject: [PATCH] taskstats: fix sub-threads accounting If there are no listeners, taskstats_exit_send() just returns because taskstats_exit_alloc() didn't allocate *tidstats. This is wrong, each sub-thread should do fill_tgid_exit() on exit, otherwise its ->delays is not recorded in ->signal->stats and lost. Q: We don't send TASKSTATS_TYPE_AGGR_TGID when single-threaded process exits. Is it good? How can the listener figure out that it was actually a process exit, not sub-thread? Signed-off-by: Oleg Nesterov Cc: Balbir Singh Acked-by: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 2039585ec5e1..f45c5e70773c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -455,10 +455,9 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, int is_thread_group; struct nlattr *na; - if (!family_registered || !tidstats) + if (!family_registered) return; - rc = 0; /* * Size includes space for nested attributes */ @@ -466,8 +465,15 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); is_thread_group = (tsk->signal->stats != NULL); - if (is_thread_group) - size = 2 * size; /* PID + STATS + TGID + STATS */ + if (is_thread_group) { + /* PID + STATS + TGID + STATS */ + size = 2 * size; + /* fill the tsk->signal->stats structure */ + fill_tgid_exit(tsk); + } + + if (!tidstats) + return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); if (rc < 0) @@ -487,11 +493,8 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, goto send; /* - * tsk has/had a thread group so fill the tsk->signal->stats structure * Doesn't matter if tsk is the leader or the last group member leaving */ - - fill_tgid_exit(tsk); if (!group_dead) goto send; -- cgit v1.2.3