From 365e9c87a982c03d0af3886e29d877f581b59611 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:18 -0700 Subject: [PATCH] mm: update_hiwaters just in time update_mem_hiwater has attracted various criticisms, in particular from those concerned with mm scalability. Originally it was called whenever rss or total_vm got raised. Then many of those callsites were replaced by a timer tick call from account_system_time. Now Frank van Maarseveen reports that to be found inadequate. How about this? Works for Frank. Replace update_mem_hiwater, a poor combination of two unrelated ops, by macros update_hiwater_rss and update_hiwater_vm. Don't attempt to keep mm->hiwater_rss up to date at timer tick, nor every time we raise rss (usually by 1): those are hot paths. Do the opposite, update only when about to lower rss (usually by many), or just before final accounting in do_exit. Handle mm->hiwater_vm in the same way, though it's much less of an issue. Demand that whoever collects these hiwater statistics do the work of taking the maximum with rss or total_vm. And there has been no collector of these hiwater statistics in the tree. The new convention needs an example, so match Frank's usage by adding a VmPeak line above VmSize to /proc//status, and also a VmHWM line above VmRSS (High-Water-Mark or High-Water-Memory). There was a particular anomaly during mremap move, that hiwater_vm might be captured too high. A fleeting such anomaly remains, but it's quickly corrected now, whereas before it would stick. What locking? None: if the app is racy then these statistics will be racy, it's not worth any overhead to make them exact. But whenever it suits, hiwater_vm is updated under exclusive mmap_sem, and hiwater_rss under page_table_lock (for now) or with preemption disabled (later on): without going to any trouble, minimize the time between reading current values and updating, to minimize those occasions when a racing thread bumps a count up and back down in between. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 3b25b182d2be..79f52b85d6ed 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -839,7 +839,10 @@ fastcall NORET_TYPE void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - update_mem_hiwater(tsk); + if (tsk->mm) { + update_hiwater_rss(tsk->mm); + update_hiwater_vm(tsk->mm); + } group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { del_timer_sync(&tsk->signal->real_timer); -- cgit v1.2.3 From 7407251a0e2ed099e4b12b742b635503e981507c Mon Sep 17 00:00:00 2001 From: Coywolf Qi Hunt Date: Sun, 30 Oct 2005 15:02:47 -0800 Subject: [PATCH] PF_DEAD cleanup The PF_DEAD setting doesn't belong to exit_notify(), move it to a proper place. Signed-off-by: Coywolf Qi Hunt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 79f52b85d6ed..6ef8f7356a74 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -783,10 +783,6 @@ static void exit_notify(struct task_struct *tsk) /* If the process is dead, release it - nobody will wait for it */ if (state == EXIT_DEAD) release_task(tsk); - - /* PF_DEAD causes final put_task_struct after we schedule. */ - preempt_disable(); - tsk->flags |= PF_DEAD; } fastcall NORET_TYPE void do_exit(long code) @@ -873,7 +869,11 @@ fastcall NORET_TYPE void do_exit(long code) tsk->mempolicy = NULL; #endif - BUG_ON(!(current->flags & PF_DEAD)); + /* PF_DEAD causes final put_task_struct after we schedule. */ + preempt_disable(); + BUG_ON(tsk->flags & PF_DEAD); + tsk->flags |= PF_DEAD; + schedule(); BUG(); /* Avoid "noreturn function does return". */ -- cgit v1.2.3 From 7f2a52555998c699a7e89f24636c909d6fc08a60 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 30 Oct 2005 15:02:50 -0800 Subject: [PATCH] wait4 PTRACE_ATTACH race fix Back about a year ago when I last fiddled heavily with the do_wait code, I was thinking too hard about the wrong thing and I now think I introduced a bug whose inverse thought I was fixing. Apparently noone was looking too hard over much shoulder, so as to cite my bogus reasoning at the time. In the race condition when PTRACE_ATTACH is about to steal a child and then the child hits a tracing event (what my_ptrace_child checks for), the real parent does need to set its flag noting it has some eligible live children. Otherwise a spurious ECHILD error is possible, since the child in question is not yet on the ptrace_children list. Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 6ef8f7356a74..2d39ccc367e6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1383,6 +1383,15 @@ repeat: switch (p->state) { case TASK_TRACED: + /* + * When we hit the race with PTRACE_ATTACH, + * we will not report this child. But the + * race means it has not yet been moved to + * our ptrace_children list, so we need to + * set the flag here to avoid a spurious ECHILD + * when the race happens with the only child. + */ + flag = 1; if (!my_ptrace_child(p)) continue; /*FALLTHROUGH*/ -- cgit v1.2.3 From b67a1b9e4bf878aa5d4b6b44cb5a251a2f425f0d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Oct 2005 15:03:44 -0800 Subject: [PATCH] remove hardcoded SEND_SIG_xxx constants This patch replaces hardcoded SEND_SIG_xxx constants with their symbolic names. No changes in affected .o files. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 2d39ccc367e6..537394b25e8d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -547,7 +547,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced) if (p->pdeath_signal) /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, (void *) 0, p); + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); /* Move the child from its dying parent to the new one. */ if (unlikely(traced)) { @@ -591,8 +591,8 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced) int pgrp = process_group(p); if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) { - __kill_pg_info(SIGHUP, (void *)1, pgrp); - __kill_pg_info(SIGCONT, (void *)1, pgrp); + __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); + __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); } } } @@ -727,8 +727,8 @@ static void exit_notify(struct task_struct *tsk) (t->signal->session == tsk->signal->session) && will_become_orphaned_pgrp(process_group(tsk), tsk) && has_stopped_jobs(process_group(tsk))) { - __kill_pg_info(SIGHUP, (void *)1, process_group(tsk)); - __kill_pg_info(SIGCONT, (void *)1, process_group(tsk)); + __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); + __kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk)); } /* Let father know we died -- cgit v1.2.3