From 3de463c7d9d58f8cf3395268230cb20a4c15bffa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 14:34:03 +0400 Subject: [PATCH] posix-timers: remove false BUG_ON() from run_posix_cpu_timers() do_exit() clears ->it_##clock##_expires, but nothing prevents another cpu to attach the timer to exiting process after that. After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and before do_exit() calls 'schedule() local timer interrupt can find tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu does sys_wait4) interrupted task has ->signal == NULL. At this moment exiting task has no pending cpu timers, they were cleaned up in __exit_signal()->posix_cpu_timers_exit{,_group}(), so we can just return from irq. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 3b25b182d2be..4897977a1f4b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -825,14 +825,6 @@ fastcall NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITING; - /* - * Make sure we don't try to process any timer firings - * while we are already exiting. - */ - tsk->it_virt_expires = cputime_zero; - tsk->it_prof_expires = cputime_zero; - tsk->it_sched_expires = 0; - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, -- cgit v1.2.3 From a362f463a6d316d14daed0f817e151835ce97ff7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 Oct 2005 09:07:33 -0700 Subject: Revert "remove false BUG_ON() from run_posix_cpu_timers()" This reverts commit 3de463c7d9d58f8cf3395268230cb20a4c15bffa. Roland has another patch that allows us to leave the BUG_ON() in place by just making sure that the condition it tests for really is always true. That goes in next. Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 4897977a1f4b..3b25b182d2be 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -825,6 +825,14 @@ fastcall NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITING; + /* + * Make sure we don't try to process any timer firings + * while we are already exiting. + */ + tsk->it_virt_expires = cputime_zero; + tsk->it_prof_expires = cputime_zero; + tsk->it_sched_expires = 0; + if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, -- cgit v1.2.3 From 365e9c87a982c03d0af3886e29d877f581b59611 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:18 -0700 Subject: [PATCH] mm: update_hiwaters just in time update_mem_hiwater has attracted various criticisms, in particular from those concerned with mm scalability. Originally it was called whenever rss or total_vm got raised. Then many of those callsites were replaced by a timer tick call from account_system_time. Now Frank van Maarseveen reports that to be found inadequate. How about this? Works for Frank. Replace update_mem_hiwater, a poor combination of two unrelated ops, by macros update_hiwater_rss and update_hiwater_vm. Don't attempt to keep mm->hiwater_rss up to date at timer tick, nor every time we raise rss (usually by 1): those are hot paths. Do the opposite, update only when about to lower rss (usually by many), or just before final accounting in do_exit. Handle mm->hiwater_vm in the same way, though it's much less of an issue. Demand that whoever collects these hiwater statistics do the work of taking the maximum with rss or total_vm. And there has been no collector of these hiwater statistics in the tree. The new convention needs an example, so match Frank's usage by adding a VmPeak line above VmSize to /proc//status, and also a VmHWM line above VmRSS (High-Water-Mark or High-Water-Memory). There was a particular anomaly during mremap move, that hiwater_vm might be captured too high. A fleeting such anomaly remains, but it's quickly corrected now, whereas before it would stick. What locking? None: if the app is racy then these statistics will be racy, it's not worth any overhead to make them exact. But whenever it suits, hiwater_vm is updated under exclusive mmap_sem, and hiwater_rss under page_table_lock (for now) or with preemption disabled (later on): without going to any trouble, minimize the time between reading current values and updating, to minimize those occasions when a racing thread bumps a count up and back down in between. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 3b25b182d2be..79f52b85d6ed 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -839,7 +839,10 @@ fastcall NORET_TYPE void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - update_mem_hiwater(tsk); + if (tsk->mm) { + update_hiwater_rss(tsk->mm); + update_hiwater_vm(tsk->mm); + } group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { del_timer_sync(&tsk->signal->real_timer); -- cgit v1.2.3