From 6ec3cfeca04622e3d80c9270191cd7f5f88214af Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Mon, 13 Apr 2009 15:20:58 -0700 Subject: x86, irq: Remove IRQ_DISABLED check in process context IRQ move As discussed in the thread here: http://marc.info/?l=linux-kernel&m=123964468521142&w=2 Eric W. Biederman observed: > It looks like some additional bugs have slipped in since last I looked. > > set_irq_affinity does this: > ifdef CONFIG_GENERIC_PENDING_IRQ > if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { > cpumask_copy(desc->affinity, cpumask); > desc->chip->set_affinity(irq, cpumask); > } else { > desc->status |= IRQ_MOVE_PENDING; > cpumask_copy(desc->pending_mask, cpumask); > } > #else > > That IRQ_DISABLED case is a software state and as such it has nothing to > do with how safe it is to move an irq in process context. [...] > > The only reason we migrate MSIs in interrupt context today is that there > wasn't infrastructure for support migration both in interrupt context > and outside of it. Yes. The idea here was to force the MSI migration to happen in process context. One of the patches in the series did disable_irq(dev->irq); irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); enable_irq(dev->irq); with the above patch adding irq/manage code check for interrupt disabled and moving the interrupt in process context. IIRC, there was no IRQ_MOVE_PCNTXT when we were developing this HPET code and we ended up having this ugly hack. IRQ_MOVE_PCNTXT was there when we eventually submitted the patch upstream. But, looks like I did a blind rebasing instead of using IRQ_MOVE_PCNTXT in hpet MSI code. Below patch fixes this. i.e., revert commit 932775a4ab622e3c99bd59f14cc and add PCNTXT to HPET MSI setup. Also removes copying of desc->affinity in generic code as set_affinity routines are doing it internally. Reported-by: "Eric W. Biederman" Signed-off-by: Venkatesh Pallipadi Acked-by: "Eric W. Biederman" Cc: "Li Shaohua" Cc: Gary Hade Cc: "lcm@us.ibm.com" Cc: suresh.b.siddha@intel.com LKML-Reference: <20090413222058.GB8211@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- kernel/irq/manage.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7e2e7dd4cd2f..2734eca59243 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -109,10 +109,9 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) spin_lock_irqsave(&desc->lock, flags); #ifdef CONFIG_GENERIC_PENDING_IRQ - if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - cpumask_copy(desc->affinity, cpumask); + if (desc->status & IRQ_MOVE_PCNTXT) desc->chip->set_affinity(irq, cpumask); - } else { + else { desc->status |= IRQ_MOVE_PENDING; cpumask_copy(desc->pending_mask, cpumask); } -- cgit v1.2.3 From c8a250058656495be02c00de61e26b017c86ef00 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Apr 2009 09:40:49 +0200 Subject: lockdep: more robust lockdep_map init sequence Steven Rostedt reported: > OK, I think I figured this bug out. This is a lockdep issue with respect > to tracepoints. > > The trace points in lockdep are called all the time. Outside the lockdep > logic. But if lockdep were to trigger an error / warning (which this run > did) we might be in trouble. For new locks, like the dentry->d_lock, that > are created, they will not get a name: > > void lockdep_init_map(struct lockdep_map *lock, const char *name, > struct lock_class_key *key, int subclass) > { > if (unlikely(!debug_locks)) > return; > > When a problem is found by lockdep, debug_locks becomes false. Thus we > stop allocating names for locks. This dentry->d_lock I had, now has no > name. Worse yet, I have CONFIG_DEBUG_VM set, that scrambles non > initialized memory. Thus, when the trace point was hit, it had junk for > the lock->name, and the machine crashed. Ah, nice catch. I think we should put at least the name in regardless. Ensure we at least initialize the trivial entries of the depmap so that they can be relied upon, even when lockdep itself decided to pack up and go home. [ Impact: fix lock tracing after lockdep warnings. ] Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker LKML-Reference: <1239954049.23397.4156.camel@laptop> Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b0f011866969..accb40cdb12a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2490,13 +2490,20 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - if (unlikely(!debug_locks)) + lock->class_cache = NULL; +#ifdef CONFIG_LOCK_STAT + lock->cpu = raw_smp_processor_id(); +#endif + + if (DEBUG_LOCKS_WARN_ON(!name)) { + lock->name = "NULL"; return; + } + + lock->name = name; if (DEBUG_LOCKS_WARN_ON(!key)) return; - if (DEBUG_LOCKS_WARN_ON(!name)) - return; /* * Sanity check, the lock-class key must be persistent: */ @@ -2505,12 +2512,11 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, DEBUG_LOCKS_WARN_ON(1); return; } - lock->name = name; lock->key = key; - lock->class_cache = NULL; -#ifdef CONFIG_LOCK_STAT - lock->cpu = raw_smp_processor_id(); -#endif + + if (unlikely(!debug_locks)) + return; + if (subclass) register_lock_class(lock, subclass, 1); } -- cgit v1.2.3 From b48ccb095a0c9257241261ec2bd1cbb1bdabc48b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2009 09:36:52 +0200 Subject: locking: clarify kernel-taint warning message Andi Kleen reported this message triggering on non-lockdep kernels: Disabling lockdep due to kernel taint Clarify the message to say 'lock debugging' - debug_locks_off() turns off all things lock debugging, not just lockdep. [ Impact: change kernel warning message text ] Reported-by: Andi Kleen Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 934fb377f4b3..3dcaa1661357 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -221,7 +221,7 @@ void add_taint(unsigned flag) * post-warning case. */ if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off()) - printk(KERN_WARNING "Disabling lockdep due to kernel taint\n"); + printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n"); set_bit(flag, &tainted_mask); } -- cgit v1.2.3 From 418df63c2d94f238ac7e1d1d53be35dd6b7a7252 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 22 Apr 2009 12:01:49 +0100 Subject: Delete slow-work timers properly Slow-work appears to delete its timer as soon as the first user unregisters, even though other users could be active. At the same time, it never seems to delete slow_work_oom_timer. Arrange for both to happen in the shutdown path. Signed-off-by: Jonathan Corbet Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- kernel/slow-work.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/slow-work.c b/kernel/slow-work.c index cf2bc01186ef..b28d19135f43 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -609,14 +609,14 @@ void slow_work_unregister_user(void) if (slow_work_user_count == 0) { printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); slow_work_threads_should_exit = true; + del_timer_sync(&slow_work_cull_timer); + del_timer_sync(&slow_work_oom_timer); wake_up_all(&slow_work_thread_wq); wait_for_completion(&slow_work_last_thread_exited); printk(KERN_NOTICE "Slow work thread pool:" " Shut down complete\n"); } - del_timer_sync(&slow_work_cull_timer); - mutex_unlock(&slow_work_user_lock); } EXPORT_SYMBOL(slow_work_unregister_user); -- cgit v1.2.3 From 0c8454f56623505a99463405fd7d5664adfbb094 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 25 Apr 2009 00:16:06 +0200 Subject: PM/Hibernate: Fix waiting for image device to appear on resume Commit c751085943362143f84346d274e0011419c84202 ("PM/Hibernate: Wait for SCSI devices scan to complete during resume") added a call to scsi_complete_async_scans() to software_resume(), so that it waited for the SCSI scanning to complete, but the call was added at a wrong place. Namely, it should have been added after wait_for_device_probe(), which is called only if the image partition hasn't been specified yet. Also, it's reasonable to check if the image partition is present and only wait for the device probing and SCSI scanning to complete if it is not the case. Additionally, since noresume is checked right at the beginning of software_resume() and the function returns immediately if it's set, it doesn't make sense to check it once again later. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 0854770b63b9..e71ca9cd81b2 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -645,13 +645,6 @@ static int software_resume(void) if (noresume) return 0; - /* - * We can't depend on SCSI devices being available after loading one of - * their modules if scsi_complete_async_scans() is not called and the - * resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate @@ -663,32 +656,42 @@ static int software_resume(void) * here to avoid lockdep complaining. */ mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); + + if (swsusp_resume_device) + goto Check_image; + + if (!strlen(resume_file)) { + error = -ENOENT; + goto Unlock; + } + + pr_debug("PM: Checking image partition %s\n", resume_file); + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { - if (!strlen(resume_file)) { - mutex_unlock(&pm_mutex); - return -ENOENT; - } /* * Some device discovery might still be in progress; we need * to wait for this to finish. */ wait_for_device_probe(); + /* + * We can't depend on SCSI devices being available after loading + * one of their modules until scsi_complete_async_scans() is + * called and the resume device usually is a SCSI one. + */ + scsi_complete_async_scans(); + swsusp_resume_device = name_to_dev_t(resume_file); - pr_debug("PM: Resume from partition %s\n", resume_file); - } else { - pr_debug("PM: Resume from partition %d:%d\n", - MAJOR(swsusp_resume_device), - MINOR(swsusp_resume_device)); + if (!swsusp_resume_device) { + error = -ENODEV; + goto Unlock; + } } - if (noresume) { - /** - * FIXME: If noresume is specified, we need to find the - * partition and reset it back to normal swap space. - */ - mutex_unlock(&pm_mutex); - return 0; - } + Check_image: + pr_debug("PM: Resume from partition %d:%d\n", + MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pr_debug("PM: Checking hibernation image.\n"); error = swsusp_check(); -- cgit v1.2.3 From cad81bc2529ab8c62b6fdc83a1c0c7f4a87209eb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 27 Apr 2009 01:41:34 +0200 Subject: ptrace: ptrace_attach: fix the usage of ->cred_exec_mutex ptrace_attach() needs task->cred_exec_mutex, not current->cred_exec_mutex. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: David Howells Signed-off-by: James Morris --- kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index dfcd83ceee3b..0692ab5a0d67 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -188,7 +188,7 @@ int ptrace_attach(struct task_struct *task) /* Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently under ptrace. */ - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + retval = mutex_lock_interruptible(&task->cred_exec_mutex); if (retval < 0) goto out; @@ -232,7 +232,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(&task->cred_exec_mutex); out: return retval; } -- cgit v1.2.3 From 7267fa6819467669f5cc2ba81a615dcc88158b4b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:16:21 -0400 Subject: tracing: fix ref count in splice pages The pages allocated for the splice binary buffer did not initialize the ref count correctly. This caused pages not to be freed and causes a drastic memory leak. Thanks to logdev I was able to trace the tracer to find where the leak was. [ Impact: stop memory leak when using splice ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1ce5dc6372b8..a884c09006c4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3448,6 +3448,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (!ref) break; + ref->ref = 1; ref->buffer = info->tr->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer); if (!ref->page) { -- cgit v1.2.3 From f5f293a4e3d0a0c52cec31de6762c95050156516 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Apr 2009 14:44:49 +0200 Subject: sched: account system time properly Andrew Gallatin reported that IRQ and SOFTIRQ times were sometime not reported correctly on recent kernels, and even bisected to commit 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 ([PATCH] fix scaled & unscaled cputime accounting) as the first bad commit. Further analysis pointed that commit 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d ([PATCH] idle cputime accounting) was the real cause of the problem. account_process_tick() was not taking into account timer IRQ interrupting the idle task servicing a hard or soft irq. On mostly idle cpu, irqs were thus not accounted and top or mpstat could tell user/admin that cpu was 100 % idle, 0.00 % irq, 0.00 % softirq, while it was not. [ Impact: fix occasionally incorrect CPU statistics in top/mpstat ] Reported-by: Andrew Gallatin Re-reported-by: Andrew Morton Signed-off-by: Eric Dumazet Acked-by: Martin Schwidefsky Cc: rick.jones2@hp.com Cc: brice@myri.com Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <49F84BC1.7080602@cosmosbay.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index b902e587a3a0..26efa475bdc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4732,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (user_tick) account_user_time(p, one_jiffy, one_jiffy_scaled); - else if (p != rq->idle) + else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) account_system_time(p, HARDIRQ_OFFSET, one_jiffy, one_jiffy_scaled); else -- cgit v1.2.3 From 6e85c5ba73c07b990798087e9b858c065db2b234 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 29 Apr 2009 19:14:32 -0400 Subject: kernel/posix-cpu-timers.c: fix sparse warning Sparse reports the following in kernel/posix-cpu-timers.c: warning: symbol 'firing' shadows an earlier one Signed-off-by: H Hartley Sweeten Cc: Subrata Modak LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c9dcf98b4463..bece7c0b67b2 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1420,19 +1420,19 @@ void run_posix_cpu_timers(struct task_struct *tsk) * timer call will interfere. */ list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { - int firing; + int cpu_firing; + spin_lock(&timer->it_lock); list_del_init(&timer->it.cpu.entry); - firing = timer->it.cpu.firing; + cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this * almost-firing as an overrun. So don't generate an event. */ - if (likely(firing >= 0)) { + if (likely(cpu_firing >= 0)) cpu_timer_fire(timer); - } spin_unlock(&timer->it_lock); } } -- cgit v1.2.3 From d7226fb6ec5d4f325e4e7fd905894e2ea3eb3ae0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 May 2009 15:16:04 +0200 Subject: Revert "genirq: assert that irq handlers are indeed running in hardirq context" This reverts commit 044d408409cc4e1bc75c886e27ca85c270db104c. The commit added a warning when handle_IRQ_event() is called outside of hard interrupt context. This breaks the generic tasklet based interrupt resend mechanism which is used when the hardware has no way to retrigger the interrupt. So we get a warning for a use case which is correct and worked for years. Remove it. Signed-off-by: Thomas Gleixner --- kernel/irq/handle.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142be8dd2..26e08754744f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -363,8 +363,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; - WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!"); - if (!(action->flags & IRQF_DISABLED)) local_irq_enable_in_hardirq(); -- cgit v1.2.3 From 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 Mon Sep 17 00:00:00 2001 From: john stultz Date: Fri, 1 May 2009 13:10:25 -0700 Subject: clockevents: prevent endless loop in tick_handle_periodic() tick_handle_periodic() can lock up hard when a one shot clock event device is used in combination with jiffies clocksource. Avoid an endless loop issue by requiring that a highres valid clocksource be installed before we call tick_periodic() in a loop when using ONESHOT mode. The result is we will only increment jiffies once per interrupt until a continuous hardware clocksource is available. Without this, we can run into a endless loop, where each cycle through the loop, jiffies is updated which increments time by tick_period or more (due to clock steering), which can cause the event programming to think the next event was before the newly incremented time and fail causing tick_periodic() to be called again and the whole process loops forever. [ Impact: prevent hard lock up ] Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/time/tick-common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 21a5ca849514..83c4417b6a3c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev) for (;;) { if (!clockevents_program_event(dev, next, ktime_get())) return; - tick_periodic(cpu); + /* + * Have to be careful here. If we're in oneshot mode, + * before we call tick_periodic() in a loop, we need + * to be sure we're using a real hardware clocksource. + * Otherwise we could get trapped in an infinite + * loop, as the tick_periodic() increments jiffies, + * when then will increment time, posibly causing + * the loop to trigger again and again. + */ + if (timekeeping_valid_for_hres()) + tick_periodic(cpu); next = ktime_add(next, tick_period); } } -- cgit v1.2.3 From 9e4a5bda89034502fb144331e71a0efdfd5fae97 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 30 Apr 2009 15:08:57 -0700 Subject: mm: prevent divide error for small values of vm_dirty_bytes Avoid setting less than two pages for vm_dirty_bytes: this is necessary to avoid potential division by 0 (like the following) in get_dirty_limits(). [ 49.951610] divide error: 0000 [#1] PREEMPT SMP [ 49.952195] last sysfs file: /sys/devices/pci0000:00/0000:00:01.1/host0/target0:0:0/0:0:0:0/block/sda/uevent [ 49.952195] CPU 1 [ 49.952195] Modules linked in: pcspkr [ 49.952195] Pid: 3064, comm: dd Not tainted 2.6.30-rc3 #1 [ 49.952195] RIP: 0010:[] [] get_dirty_limits+0xe9/0x2c0 [ 49.952195] RSP: 0018:ffff88001de03a98 EFLAGS: 00010202 [ 49.952195] RAX: 00000000000000c0 RBX: ffff88001de03b80 RCX: 28f5c28f5c28f5c3 [ 49.952195] RDX: 0000000000000000 RSI: 00000000000000c0 RDI: 0000000000000000 [ 49.952195] RBP: ffff88001de03ae8 R08: 0000000000000000 R09: 0000000000000000 [ 49.952195] R10: ffff88001ddda9a0 R11: 0000000000000001 R12: 0000000000000001 [ 49.952195] R13: ffff88001fbc8218 R14: ffff88001de03b70 R15: ffff88001de03b78 [ 49.952195] FS: 00007fe9a435b6f0(0000) GS:ffff8800025d9000(0000) knlGS:0000000000000000 [ 49.952195] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 49.952195] CR2: 00007fe9a39ab000 CR3: 000000001de38000 CR4: 00000000000006e0 [ 49.952195] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 49.952195] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 49.952195] Process dd (pid: 3064, threadinfo ffff88001de02000, task ffff88001ddda250) [ 49.952195] Stack: [ 49.952195] ffff88001fa0de00 ffff88001f2dbd70 ffff88001f9fe800 000080b900000000 [ 49.952195] 00000000000000c0 ffff8800027a6100 0000000000000400 ffff88001fbc8218 [ 49.952195] 0000000000000000 0000000000000600 ffff88001de03bb8 ffffffff802d3ed7 [ 49.952195] Call Trace: [ 49.952195] [] balance_dirty_pages_ratelimited_nr+0x1d7/0x3f0 [ 49.952195] [] ? ext3_writeback_write_end+0x9e/0x120 [ 49.952195] [] generic_file_buffered_write+0x12f/0x330 [ 49.952195] [] __generic_file_aio_write_nolock+0x26d/0x460 [ 49.952195] [] ? generic_file_aio_write+0x52/0xd0 [ 49.952195] [] generic_file_aio_write+0x69/0xd0 [ 49.952195] [] ext3_file_write+0x26/0xc0 [ 49.952195] [] do_sync_write+0xf1/0x140 [ 49.952195] [] ? get_lock_stats+0x2a/0x60 [ 49.952195] [] ? autoremove_wake_function+0x0/0x40 [ 49.952195] [] vfs_write+0xcb/0x190 [ 49.952195] [] sys_write+0x50/0x90 [ 49.952195] [] system_call_fastpath+0x16/0x1b [ 49.952195] Code: 00 00 00 2b 05 09 1c 17 01 48 89 c6 49 0f af f4 48 c1 ee 02 48 89 f0 48 f7 e1 48 89 d6 31 d2 48 c1 ee 02 48 0f af 75 d0 48 89 f0 <48> f7 f7 41 8b 95 ac 01 00 00 48 89 c7 49 0f af d4 48 c1 ea 02 [ 49.952195] RIP [] get_dirty_limits+0xe9/0x2c0 [ 49.952195] RSP [ 50.096523] ---[ end trace 008d7aa02f244d7b ]--- Signed-off-by: Andrea Righi Cc: Peter Zijlstra Cc: David Rientjes Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e3d2c7dd59b9..ea78fa101ad6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -103,6 +103,9 @@ static unsigned long one_ul = 1; static int one_hundred = 100; static int one_thousand = 1000; +/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -1006,7 +1009,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &dirty_bytes_handler, .strategy = &sysctl_intvec, - .extra1 = &one_ul, + .extra1 = &dirty_bytes_min, }, { .procname = "dirty_writeback_centisecs", -- cgit v1.2.3 From 381a80e6df396eaabef2c00f85974a4579ac1c70 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 6 May 2009 16:02:50 -0700 Subject: inotify: use GFP_NOFS in kernel_event() to work around a lockdep false-positive There is what we believe to be a false positive reported by lockdep. inotify_inode_queue_event() => take inotify_mutex => kernel_event() => kmalloc() => SLOB => alloc_pages_node() => page reclaim => slab reclaim => dcache reclaim => inotify_inode_is_dead => take inotify_mutex => deadlock The plan is to fix this via lockdep annotation, but that is proving to be quite involved. The patch flips the allocation over to GFP_NFS to shut the warning up, for the 2.6.30 release. Hopefully we will fix this for real in 2.6.31. I'll queue a patch in -mm to switch it back to GFP_KERNEL so we don't forget. ================================= [ INFO: inconsistent lock state ] 2.6.30-rc2-next-20090417 #203 --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/380 [HC0[0]:SC0[0]:HE1:SE1] takes: (&inode->inotify_mutex){+.+.?.}, at: [] inotify_inode_is_dead+0x35/0xb0 {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0x68/0x90 [] lockdep_trace_alloc+0xf5/0x100 [] __kmalloc_node+0x31/0x1e0 [] kernel_event+0xe2/0x190 [] inotify_dev_queue_event+0x126/0x230 [] inotify_inode_queue_event+0xc6/0x110 [] vfs_create+0xcd/0x140 [] do_filp_open+0x88d/0xa20 [] do_sys_open+0x98/0x140 [] sys_open+0x20/0x30 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff irq event stamp: 690455 hardirqs last enabled at (690455): [] _spin_unlock_irqrestore+0x44/0x80 hardirqs last disabled at (690454): [] _spin_lock_irqsave+0x32/0xa0 softirqs last enabled at (690178): [] __do_softirq+0x202/0x220 softirqs last disabled at (690157): [] call_softirq+0x1c/0x50 other info that might help us debug this: 2 locks held by kswapd0/380: #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x37/0x180 #1: (&type->s_umount_key#17){++++..}, at: [] shrink_dcache_memory+0x11f/0x1e0 stack backtrace: Pid: 380, comm: kswapd0 Not tainted 2.6.30-rc2-next-20090417 #203 Call Trace: [] print_usage_bug+0x19f/0x200 [] ? save_stack_trace+0x2f/0x50 [] mark_lock+0x4bb/0x6d0 [] ? check_usage_forwards+0x0/0xc0 [] __lock_acquire+0xc62/0x1ae0 [] ? slob_free+0x10c/0x370 [] lock_acquire+0xe1/0x120 [] ? inotify_inode_is_dead+0x35/0xb0 [] mutex_lock_nested+0x63/0x420 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? sched_clock+0x9/0x10 [] ? lock_release_holdtime+0x35/0x1c0 [] inotify_inode_is_dead+0x35/0xb0 [] dentry_iput+0xbc/0xe0 [] d_kill+0x33/0x60 [] __shrink_dcache_sb+0x2d3/0x350 [] shrink_dcache_memory+0x15a/0x1e0 [] shrink_slab+0x125/0x180 [] kswapd+0x560/0x7a0 [] ? isolate_pages_global+0x0/0x2c0 [] ? autoremove_wake_function+0x0/0x40 [] ? trace_hardirqs_on+0xd/0x10 [] ? kswapd+0x0/0x7a0 [] kthread+0x5b/0xa0 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 [eparis@redhat.com: fix audit too] Cc: Al Viro Cc: Matt Mackall Cc: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Eric Paris Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/auditfilter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a6fe71fd5d1b..713098ee5a02 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1028,7 +1028,7 @@ static void audit_update_watch(struct audit_parent *parent, if (audit_enabled) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, + ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u", audit_get_loginuid(current), @@ -1067,7 +1067,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) e = container_of(r, struct audit_entry, rule); if (audit_enabled) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, + ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u", audit_get_loginuid(current), -- cgit v1.2.3 From 57adc4d2dbf968fdbe516359688094eef4d46581 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 6 May 2009 16:02:53 -0700 Subject: Eliminate thousands of warnings with gcc 3.2 build When building with gcc 3.2 I get thousands of warnings such as include/linux/gfp.h: In function `allocflags_to_migratetype': include/linux/gfp.h:105: warning: null format string due to passing a NULL format string to warn_slowpath() in #define __WARN() warn_slowpath(__FILE__, __LINE__, NULL) Split this case out into a separate call. This also shrinks the kernel slightly: text data bss dec hex filename 4802274 707668 712704 6222646 5ef336 vmlinux text data bss dec hex filename 4799027 703572 712704 6215303 5ed687 vmlinux due to removeing one argument from the commonly-called __WARN(). [akpm@linux-foundation.org: reduce scope of `empty'] Acked-by: Jesper Nilsson Acked-by: Johannes Weiner Acked-by: Arjan van de Ven Signed-off-by: Andi Kleen Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 3dcaa1661357..874ecf1307ae 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -340,7 +340,7 @@ void oops_exit(void) } #ifdef WANT_WARN_ON_SLOWPATH -void warn_slowpath(const char *file, int line, const char *fmt, ...) +void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) { va_list args; char function[KSYM_SYMBOL_LEN]; @@ -356,7 +356,7 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...) if (board) printk(KERN_WARNING "Hardware name: %s\n", board); - if (fmt) { + if (*fmt) { va_start(args, fmt); vprintk(fmt, args); va_end(args); @@ -367,7 +367,14 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...) print_oops_end_marker(); add_taint(TAINT_WARN); } -EXPORT_SYMBOL(warn_slowpath); +EXPORT_SYMBOL(warn_slowpath_fmt); + +void warn_slowpath_null(const char *file, int line) +{ + static const char *empty = ""; + warn_slowpath_fmt(file, line, empty); +} +EXPORT_SYMBOL(warn_slowpath_null); #endif #ifdef CONFIG_CC_STACKPROTECTOR -- cgit v1.2.3 From 201517a7f3ec497fff545a7659c6c876f89f9054 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 7 May 2009 16:31:26 -0400 Subject: kprobes: fix to use text_mutex around arm/disarm kprobe Fix kprobes to lock text_mutex around some arch_arm/disarm_kprobe() which are newly added by commit de5bd88d5a5cce3cacea904d3503e5ebdb3852a2. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Mathieu Desnoyers Cc: Jim Keniston Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index a5e74ddee0e2..c0fa54b276d9 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -319,6 +319,22 @@ struct kprobe __kprobes *get_kprobe(void *addr) return NULL; } +/* Arm a kprobe with text_mutex */ +static void __kprobes arm_kprobe(struct kprobe *kp) +{ + mutex_lock(&text_mutex); + arch_arm_kprobe(kp); + mutex_unlock(&text_mutex); +} + +/* Disarm a kprobe with text_mutex */ +static void __kprobes disarm_kprobe(struct kprobe *kp) +{ + mutex_lock(&text_mutex); + arch_disarm_kprobe(kp); + mutex_unlock(&text_mutex); +} + /* * Aggregate handlers for multiple kprobes support - these handlers * take care of invoking the individual kprobe handlers on p->list @@ -538,7 +554,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) ap->flags &= ~KPROBE_FLAG_DISABLED; if (!kprobes_all_disarmed) /* Arm the breakpoint again. */ - arch_arm_kprobe(ap); + arm_kprobe(ap); } return 0; } @@ -789,11 +805,8 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p) * enabled and not gone - otherwise, the breakpoint would * already have been removed. We save on flushing icache. */ - if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { - mutex_lock(&text_mutex); - arch_disarm_kprobe(p); - mutex_unlock(&text_mutex); - } + if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) + disarm_kprobe(p); hlist_del_rcu(&old_p->hlist); } else { if (p->break_handler && !kprobe_gone(p)) @@ -810,7 +823,7 @@ noclean: if (!kprobe_disabled(old_p)) { try_to_disable_aggr_kprobe(old_p); if (!kprobes_all_disarmed && kprobe_disabled(old_p)) - arch_disarm_kprobe(old_p); + disarm_kprobe(old_p); } } return 0; @@ -1364,7 +1377,7 @@ int __kprobes disable_kprobe(struct kprobe *kp) try_to_disable_aggr_kprobe(p); if (!kprobes_all_disarmed && kprobe_disabled(p)) - arch_disarm_kprobe(p); + disarm_kprobe(p); out: mutex_unlock(&kprobe_mutex); return ret; @@ -1393,7 +1406,7 @@ int __kprobes enable_kprobe(struct kprobe *kp) } if (!kprobes_all_disarmed && kprobe_disabled(p)) - arch_arm_kprobe(p); + arm_kprobe(p); p->flags &= ~KPROBE_FLAG_DISABLED; if (p != kp) -- cgit v1.2.3 From 92d23f703c608fcb2c8edd74a3fd0f4031e18606 Mon Sep 17 00:00:00 2001 From: Ron Date: Fri, 8 May 2009 22:54:49 +0930 Subject: sched: Fix fallback sched_clock()'s offset when using jiffies Account for the initial offset to the jiffy count. [ Impact: fix printk timestamps on architectures using fallback sched_clock() ] Signed-off-by: Ron Lee Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched_clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 819f17ac796e..e1d16c9a7680 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -38,7 +38,8 @@ */ unsigned long long __attribute__((weak)) sched_clock(void) { - return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); + return (unsigned long long)(jiffies - INITIAL_JIFFIES) + * (NSEC_PER_SEC / HZ); } static __read_mostly int sched_clock_running; -- cgit v1.2.3 From 6f5bbff9a1b7d6864a495763448a363bbfa96324 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:34:22 -0400 Subject: Convert obvious places to deactivate_locked_super() Signed-off-by: Al Viro --- kernel/cgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 382109b5baeb..a7267bfd3765 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1133,8 +1133,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, free_cg_links: free_cg_links(&tmp_cg_links); drop_new_super: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return ret; } -- cgit v1.2.3 From d80c19df5fcceb8c741e96f09f275c2da719efef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 12 May 2009 16:29:13 +0200 Subject: lockdep: increase MAX_LOCKDEP_ENTRIES and MAX_LOCKDEP_CHAINS Now that lockdep coverage has increased it has become easier to run out of entries: [ 21.401387] BUG: MAX_LOCKDEP_ENTRIES too low! [ 21.402007] turning off the locking correctness validator. [ 21.402007] Pid: 1555, comm: S99local Not tainted 2.6.30-rc5-tip #2 [ 21.402007] Call Trace: [ 21.402007] [] add_lock_to_list+0x53/0xba [ 21.402007] [] ? lookup_mnt+0x19/0x53 [ 21.402007] [] check_prev_add+0x14b/0x1c7 [ 21.402007] [] validate_chain+0x474/0x52a [ 21.402007] [] __lock_acquire+0x342/0x3c7 [ 21.402007] [] lock_acquire+0xc1/0xe5 [ 21.402007] [] ? lookup_mnt+0x19/0x53 [ 21.402007] [] _spin_lock+0x31/0x66 Double the size - as we've done in the past. [ Impact: allow lockdep to cover more locks ] Acked-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/lockdep_internals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index a2cc7e9a6e84..699a2ac3a0d7 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -54,9 +54,9 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ -#define MAX_LOCKDEP_ENTRIES 8192UL +#define MAX_LOCKDEP_ENTRIES 16384UL -#define MAX_LOCKDEP_CHAINS_BITS 14 +#define MAX_LOCKDEP_CHAINS_BITS 15 #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) -- cgit v1.2.3 From cd17cbfda004fe5f406c01b318c6378d9895896f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2009 11:32:24 +0200 Subject: Revert "mm: add /proc controls for pdflush threads" This reverts commit fafd688e4c0c34da0f3de909881117d374e4c7af. Work is progressing to switch away from pdflush as the process backing for flushing out dirty data. So it seems pointless to add more knobs to control pdflush threads. The original author of the patch did not have any specific use cases for adding the knobs, so we can easily revert this before 2.6.30 to avoid having to maintain this API forever. Signed-off-by: Jens Axboe --- kernel/sysctl.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ea78fa101ad6..b2970d56fb76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,7 +101,6 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -static int one_thousand = 1000; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1033,28 +1032,6 @@ static struct ctl_table vm_table[] = { .mode = 0444 /* read-only*/, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_min", - .data = &nr_pdflush_threads_min, - .maxlen = sizeof nr_pdflush_threads_min, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &nr_pdflush_threads_max, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_max", - .data = &nr_pdflush_threads_max, - .maxlen = sizeof nr_pdflush_threads_max, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &nr_pdflush_threads_min, - .extra2 = &one_thousand, - }, { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", -- cgit v1.2.3 From 364b5b7b1d793a7f98be55b6b154716dcae78dfc Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 13 May 2009 21:56:59 -0500 Subject: sysrq, intel_fb: fix sysrq g collision Commit 79e539453b34e35f39299a899d263b0a1f1670bd introduced a regression where you cannot use sysrq 'g' to enter kgdb. The solution is to move the intel fb sysrq over to V for video instead of G for graphics. The SMP VOYAGER code to register for the sysrq-v is not anywhere to be found in the mainline kernel, so the comments in the code were cleaned up as well. This patch also cleans up the sysrq definitions for kgdb to make it generic for the kernel debugger, such that the sysrq 'g' can be used in the future to enter a gdbstub or another kernel debugger. Signed-off-by: Jason Wessel Acked-by: Jesse Barnes Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kgdb.c b/kernel/kgdb.c index e4dcfb2272a4..9147a3190c9d 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -1583,8 +1583,8 @@ static void sysrq_handle_gdb(int key, struct tty_struct *tty) static struct sysrq_key_op sysrq_gdb_op = { .handler = sysrq_handle_gdb, - .help_msg = "Gdb", - .action_msg = "GDB", + .help_msg = "debug(G)", + .action_msg = "DEBUG", }; #endif -- cgit v1.2.3 From 88fc86c283d9c3854e67e4155808027bc2519eb6 Mon Sep 17 00:00:00 2001 From: GeunSik Lim Date: Thu, 14 May 2009 17:23:38 +0900 Subject: tracing: Append prompt in /debug/tracing/README file append prompt in /debug/tracing/README file. This is trivial issue. Fix typo Mini Howto file(README) for ftrace. [ Impact: cleanup ] Signed-off-by: GeunSik Lim Acked-by: Steven Rostedt Cc: williams LKML-Reference: <1242289418.31161.45.camel@centos51> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a884c09006c4..cda81ec58d9f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2380,7 +2380,7 @@ static const char readme_msg[] = "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" - "echo 0 > /debug/tracing/tracing_enabled\n" + "# echo 0 > /debug/tracing/tracing_enabled\n" ; static ssize_t -- cgit v1.2.3 From 4484079d517c2b6521621be0b1ea246ccc55c7d7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 May 2009 23:30:50 +0200 Subject: PM: check sysdev_suspend(PMSG_FREEZE) return value Check the return value of sysdev_suspend(). I think this was a typo. Without this change, the following "if" check is always false. I also changed the error message so it's distinguishable from the similar message a few lines above. Signed-off-by: Bjorn Helgaas Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/disk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e71ca9cd81b2..b0dc9e7a0d17 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -241,9 +241,9 @@ static int create_image(int platform_mode) local_irq_disable(); - sysdev_suspend(PMSG_FREEZE); + error = sysdev_suspend(PMSG_FREEZE); if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " + printk(KERN_ERR "PM: Some system devices failed to power down, " "aborting hibernation\n"); goto Enable_irqs; } -- cgit v1.2.3 From 0f6f49a8cd0163fdb1723ed29f01fc65177108dc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 May 2009 13:41:28 -0700 Subject: Fix caller information for warn_slowpath_null Ian Campbell noticed that since "Eliminate thousands of warnings with gcc 3.2 build" (commit 57adc4d2dbf968fdbe516359688094eef4d46581) all WARN_ON()'s currently appear to come from warn_slowpath_null(), eg: WARNING: at kernel/softirq.c:143 warn_slowpath_null+0x1c/0x20() because now that warn_slowpath_null() is in the call path, the __builtin_return_address(0) returns that, rather than the place that caused the warning. Fix this by splitting up the warn_slowpath_null/fmt cases differently, using a common helper function, and getting the return address in the right place. This also happens to avoid the unnecessary stack usage for the non-stdargs case, and just generally cleans things up. Make the function name printout use %pS while at it. Cc: Ian Campbell Cc: Jesper Nilsson Cc: Johannes Weiner Cc: Arjan van de Ven Cc: Andi Kleen Cc: Hugh Dickins Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 874ecf1307ae..984b3ecbd72c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -340,39 +340,44 @@ void oops_exit(void) } #ifdef WANT_WARN_ON_SLOWPATH -void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) -{ +struct slowpath_args { + const char *fmt; va_list args; - char function[KSYM_SYMBOL_LEN]; - unsigned long caller = (unsigned long)__builtin_return_address(0); - const char *board; +}; - sprint_symbol(function, caller); +static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args) +{ + const char *board; printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, - line, function); + printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); board = dmi_get_system_info(DMI_PRODUCT_NAME); if (board) printk(KERN_WARNING "Hardware name: %s\n", board); - if (*fmt) { - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - } + if (args) + vprintk(args->fmt, args->args); print_modules(); dump_stack(); print_oops_end_marker(); add_taint(TAINT_WARN); } + +void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) +{ + struct slowpath_args args; + + args.fmt = fmt; + va_start(args.args, fmt); + warn_slowpath_common(file, line, __builtin_return_address(0), &args); + va_end(args.args); +} EXPORT_SYMBOL(warn_slowpath_fmt); void warn_slowpath_null(const char *file, int line) { - static const char *empty = ""; - warn_slowpath_fmt(file, line, empty); + warn_slowpath_common(file, line, __builtin_return_address(0), NULL); } EXPORT_SYMBOL(warn_slowpath_null); #endif -- cgit v1.2.3 From 64d1304a64477629cb16b75491a77bafe6f86963 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 May 2009 21:20:10 +0200 Subject: futex: setup writeable mapping for futex ops which modify user space data The futex code installs a read only mapping via get_user_pages_fast() even if the futex op function has to modify user space data. The eventual fault was fixed up by futex_handle_fault() which walked the VMA with mmap_sem held. After the cleanup patches which removed the mmap_sem dependency of the futex code commit 4dc5b7a36a49eff97050894cf1b3a9a02523717 (futex: clean up fault logic) removed the private VMA walk logic from the futex code. This change results in a stale RO mapping which is not fixed up. Instead of reintroducing the previous fault logic we set up the mapping in get_user_pages_fast() read/write for all operations which modify user space data. Also handle private futexes in the same way and make the current unconditional access_ok(VERIFY_WRITE) depend on the futex op. Reported-by: Andreas Schwab Signed-off-by: Thomas Gleixner CC: stable@kernel.org --- kernel/futex.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index eef8cd26b5e5..d546b2d53a62 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -193,6 +193,7 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -203,7 +204,8 @@ static void drop_futex_key_refs(union futex_key *key) * * lock_page() might sleep, the caller should not hold a spinlock. */ -static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +static int +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; @@ -226,7 +228,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) * but access_ok() should be faster than find_vma() */ if (!fshared) { - if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) + if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) return -EFAULT; key->private.mm = mm; key->private.address = address; @@ -235,7 +237,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) } again: - err = get_user_pages_fast(address, 1, 0, &page); + err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page); if (err < 0) return err; @@ -677,7 +679,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -723,10 +725,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -814,10 +816,10 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int ret, drop_count = 0; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -1140,7 +1142,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.bitset = bitset; retry: q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); + ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -1330,7 +1332,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, q.pi_state = NULL; retry: q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); + ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -1594,7 +1596,7 @@ retry: if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; -- cgit v1.2.3 From 32bdfac5462d777f35b00838893c4f87baf23efe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 24 May 2009 21:15:07 +0200 Subject: PM: Do not hold dpm_list_mtx while disabling/enabling nonboot CPUs We shouldn't hold dpm_list_mtx while executing [disable|enable]_nonboot_cpus(), because theoretically this may lead to a deadlock as shown by the following example (provided by Johannes Berg): CPU 3 CPU 2 CPU 1 suspend/hibernate something: rtnl_lock() device_pm_lock() -> mutex_lock(&dpm_list_mtx) mutex_lock(&dpm_list_mtx) linkwatch_work -> rtnl_lock() disable_nonboot_cpus() -> flush CPU 3 workqueue Fortunately, device drivers are supposed to stop any activities that might lead to the registration of new device objects way before disable_nonboot_cpus() is called, so it shouldn't be necessary to hold dpm_list_mtx over the entire late part of device suspend and early part of device resume. Thus, during the late suspend and the early resume of devices acquire dpm_list_mtx only when dpm_list is going to be traversed and release it right after that. This patch is reported to fix the regressions tracked as http://bugzilla.kernel.org/show_bug.cgi?id=13245. Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern Reported-by: Miles Lane Tested-by: Ming Lei --- kernel/kexec.c | 2 -- kernel/power/disk.c | 21 +++------------------ kernel/power/main.c | 7 +------ 3 files changed, 4 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index 5a758c6e4950..e4983770913b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1451,7 +1451,6 @@ int kernel_kexec(void) error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; - device_pm_lock(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for @@ -1489,7 +1488,6 @@ int kernel_kexec(void) enable_nonboot_cpus(); device_power_up(PMSG_RESTORE); Resume_devices: - device_pm_unlock(); device_resume(PMSG_RESTORE); Resume_console: resume_console(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index b0dc9e7a0d17..5cb080e7eebd 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -215,8 +215,6 @@ static int create_image(int platform_mode) if (error) return error; - device_pm_lock(); - /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -227,7 +225,7 @@ static int create_image(int platform_mode) if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Unlock; + return error; } error = platform_pre_snapshot(platform_mode); @@ -280,9 +278,6 @@ static int create_image(int platform_mode) device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Unlock: - device_pm_unlock(); - return error; } @@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode) { int error; - device_pm_lock(); - error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); - goto Unlock; + return error; } error = platform_pre_restore(platform_mode); @@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode) device_power_up(PMSG_RECOVER); - Unlock: - device_pm_unlock(); - return error; } @@ -464,11 +454,9 @@ int hibernation_platform_enter(void) goto Resume_devices; } - device_pm_lock(); - error = device_power_down(PMSG_HIBERNATE); if (error) - goto Unlock; + goto Resume_devices; error = hibernation_ops->prepare(); if (error) @@ -493,9 +481,6 @@ int hibernation_platform_enter(void) device_power_up(PMSG_RESTORE); - Unlock: - device_pm_unlock(); - Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); diff --git a/kernel/power/main.c b/kernel/power/main.c index f99ed6a75eac..868028280d13 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state) { int error; - device_pm_lock(); - if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - goto Done; + return error; } error = device_power_down(PMSG_SUSPEND); @@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); - Done: - device_pm_unlock(); - return error; } -- cgit v1.2.3 From d5a877e8dd409d8c702986d06485c374b705d340 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 24 May 2009 13:03:43 -0700 Subject: async: make sure independent async domains can't accidentally entangle The problem occurs when async_synchronize_full_domain() is called when the async_pending list is not empty. This will cause lowest_running() to return the cookie of the first entry on the async_pending list, which might be nothing at all to do with the domain being asked for and thus cause the domain synchronization to wait for an unrelated domain. This can cause a deadlock if domain synchronization is used from one domain to wait for another. Fix by running over the async_pending list to see if any pending items actually belong to our domain (and return their cookies if they do). Signed-off-by: James Bottomley Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/async.c b/kernel/async.c index 968ef9457d4e..50540301ed0f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -92,19 +92,23 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; + async_cookie_t ret = next_cookie; /* begin with "infinity" value */ + if (!list_empty(running)) { entry = list_first_entry(running, struct async_entry, list); - return entry->cookie; - } else if (!list_empty(&async_pending)) { - entry = list_first_entry(&async_pending, - struct async_entry, list); - return entry->cookie; - } else { - /* nothing in progress... next_cookie is "infinity" */ - return next_cookie; + ret = entry->cookie; } + if (!list_empty(&async_pending)) { + list_for_each_entry(entry, &async_pending, list) + if (entry->running == running) { + ret = entry->cookie; + break; + } + } + + return ret; } static async_cookie_t lowest_in_progress(struct list_head *running) -- cgit v1.2.3 From ab2b7ebaad16226c9a5e85c5f384d19fa58a7459 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 26 May 2009 09:11:03 +0100 Subject: kmod: Release sub_info on cred allocation failure. call_usermodehelper_setup() forgot to kfree(sub_info) when prepare_usermodehelper_creds() failed. Signed-off-by: Tetsuo Handa Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- kernel/kmod.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index b750675251e5..7e95bedb2bfc 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->argv = argv; sub_info->envp = envp; sub_info->cred = prepare_usermodehelper_creds(); - if (!sub_info->cred) + if (!sub_info->cred) { + kfree(sub_info); return NULL; + } out: return sub_info; -- cgit v1.2.3 From 087eb437051b3de817720f9c80c440fc9e7dcce8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2009 16:29:07 -0700 Subject: ptrace: tracehook_report_clone: fix false positives The "trace || CLONE_PTRACE" check in tracehook_report_clone() is not right, - If the untraced task does clone(CLONE_PTRACE) the new child is not traced, we must not queue SIGSTOP. - If we forked the traced task, but the tracer exits and untraces both the forking task and the new child (after copy_process() drops tasklist_lock), we should not queue SIGSTOP too. Change the code to check task_ptrace() != 0 instead. This is still racy, but the race is harmless. We can race with another tracer attaching to this child, or the tracer can exit and detach in parallel. But giwen that we didn't do wake_up_new_task() yet, the child must have the pending SIGSTOP anyway. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Christoph Hellwig Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd00726..875ffbdd96d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1409,7 +1409,7 @@ long do_fork(unsigned long clone_flags, } audit_finish_fork(p); - tracehook_report_clone(trace, regs, clone_flags, nr, p); + tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to -- cgit v1.2.3 From edaba2c5334492f82d39ec35637c6dea5176a977 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2009 16:29:09 -0700 Subject: ptrace: revert "ptrace_detach: the wrong wakeup breaks the ERESTARTxxx logic" Commit 95a3540da9c81a5987be810e1d9a83640a366bd5 ("ptrace_detach: the wrong wakeup breaks the ERESTARTxxx logic") removed the "extra" wake_up_process() from ptrace_detach(), but as Jan pointed out this breaks the compatibility. I believe the changelog is right and this wake_up() is wrong in many ways, but GDB assumes that ptrace(PTRACE_DETACH, child, 0, 0) always wakes up the tracee. Despite the fact this breaks SIGNAL_STOP_STOPPED/group_stop_count logic, and despite the fact this wake_up_process() can break another assumption: PTRACE_DETACH with SIGSTOP should leave the tracee in TASK_STOPPED case. Because the untraced child can dequeue SIGSTOP and call do_signal_stop() before ptrace_detach() calls wake_up_process(). Revert this change for now. We need some fixes even if we we want to keep the current behaviour, but these fixes are not for 2.6.30. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Jan Kratochvil Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0692ab5a0d67..42c317874cfa 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -304,6 +304,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) if (child->ptrace) { child->exit_code = data; dead = __ptrace_detach(current, child); + if (!child->exit_state) + wake_up_process(child); } write_unlock_irq(&tasklist_lock); -- cgit v1.2.3 From 3af968e066d593bc4dacc021715f3e95ddf0996f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Jun 2009 12:31:53 -0700 Subject: async: Fix lack of boot-time console due to insufficient synchronization Our async work synchronization was broken by "async: make sure independent async domains can't accidentally entangle" (commit d5a877e8dd409d8c702986d06485c374b705d340), because it would report the wrong lowest active async ID when there was both running and pending async work. This caused things like no being able to read the root filesystem, resulting in missing console devices and inability to run 'init', causing a boot-time panic. This fixes it by properly returning the lowest pending async ID: if there is any running async work, that will have a lower ID than any pending work, and we should _not_ look at the pending work list. There were alternative patches from Jaswinder and James, but this one also cleans up the code by removing the pointless 'ret' variable and the unnecesary testing for an empty list around 'for_each_entry()' (if the list is empty, the for_each_entry() thing just won't execute). Fixes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13474 Reported-and-tested-by: Chris Clayton Cc: Jaswinder Singh Rajput Cc: James Bottomley Cc: Arjan van de Ven Signed-off-by: Linus Torvalds --- kernel/async.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/async.c b/kernel/async.c index 50540301ed0f..27235f5de198 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -92,23 +92,18 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; - async_cookie_t ret = next_cookie; /* begin with "infinity" value */ if (!list_empty(running)) { entry = list_first_entry(running, struct async_entry, list); - ret = entry->cookie; + return entry->cookie; } - if (!list_empty(&async_pending)) { - list_for_each_entry(entry, &async_pending, list) - if (entry->running == running) { - ret = entry->cookie; - break; - } - } + list_for_each_entry(entry, &async_pending, list) + if (entry->running == running) + return entry->cookie; - return ret; + return next_cookie; /* "infinity" value */ } static async_cookie_t lowest_in_progress(struct list_head *running) -- cgit v1.2.3 From eaa958402ea40851097d051f52ba1bb7a885efe9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Jun 2009 14:51:36 -0700 Subject: cpumask: alloc zeroed cpumask for static cpumask_var_ts These are defined as static cpumask_var_t so if MAXSMP is not used, they are cleared already. Avoid surprises when MAXSMP is enabled. Signed-off-by: Yinghai Lu Signed-off-by: Rusty Russell --- kernel/sched_cpupri.c | 2 +- kernel/sched_rt.c | 2 +- kernel/smp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index cdd3c89574cd..344712a5e3ed 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) vec->count = 0; if (bootmem) alloc_bootmem_cpumask_var(&vec->mask); - else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) + else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) goto cleanup; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f2c66f8f9712..9bf0d2a73045 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void) unsigned int i; for_each_possible_cpu(i) - alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), + zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), GFP_KERNEL, cpu_to_node(i)); } #endif /* CONFIG_SMP */ diff --git a/kernel/smp.c b/kernel/smp.c index 858baac568ee..ad63d8501207 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, + if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, cpu_to_node(cpu))) return NOTIFY_BAD; break; -- cgit v1.2.3