From 81c98869faa5f3a9457c93efef908ef476326b31 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Thu, 3 Apr 2014 14:46:25 -0700 Subject: kthread: ensure locality of task_struct allocations In the presence of memoryless nodes, numa_node_id() will return the current CPU's NUMA node, but that may not be where we expect to allocate from memory from. Instead, we should rely on the fallback code in the memory allocator itself, by using NUMA_NO_NODE. Also, when calling kthread_create_on_node(), use the nearest node with memory to the cpu in question, rather than the node it is running on. Signed-off-by: Nishanth Aravamudan Reviewed-by: Christoph Lameter Acked-by: David Rientjes Cc: Anton Blanchard Cc: Tejun Heo Cc: Oleg Nesterov Cc: Jan Kara Cc: Thomas Gleixner Cc: Tetsuo Handa Cc: Wanpeng Li Cc: Joonsoo Kim Cc: Ben Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index b5ae3ee860a9..9a130ec06f7a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -217,7 +217,7 @@ int tsk_fork_get_node(struct task_struct *tsk) if (tsk == kthreadd_task) return tsk->pref_node_fork; #endif - return numa_node_id(); + return NUMA_NO_NODE; } static void create_kthread(struct kthread_create_info *create) @@ -369,7 +369,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), { struct task_struct *p; - p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt, + p = kthread_create_on_node(threadfn, data, cpu_to_mem(cpu), namefmt, cpu); if (IS_ERR(p)) return p; -- cgit v1.2.3 From 62572e29bc530b38921ef6059088b4788a9832a5 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Thu, 3 Apr 2014 14:47:18 -0700 Subject: kernel/watchdog.c: touch_nmi_watchdog should only touch local cpu not every one I ran into a scenario where while one cpu was stuck and should have panic'd because of the NMI watchdog, it didn't. The reason was another cpu was spewing stack dumps on to the console. Upon investigation, I noticed that when writing to the console and also when dumping the stack, the watchdog is touched. This causes all the cpus to reset their NMI watchdog flags and the 'stuck' cpu just spins forever. This change causes the semantics of touch_nmi_watchdog to be changed slightly. Previously, I accidentally changed the semantics and we noticed there was a codepath in which touch_nmi_watchdog could be touched from a preemtible area. That caused a BUG() to happen when CONFIG_DEBUG_PREEMPT was enabled. I believe it was the acpi code. My attempt here re-introduces the change to have the touch_nmi_watchdog() code only touch the local cpu instead of all of the cpus. But instead of using __get_cpu_var(), I use the __raw_get_cpu_var() version. This avoids the preemption problem. However my reasoning wasn't because I was trying to be lazy. Instead I rationalized it as, well if preemption is enabled then interrupts should be enabled to and the NMI watchdog will have no reason to trigger. So it won't matter if the wrong cpu is touched because the percpu interrupt counters the NMI watchdog uses should still be incrementing. Don said: : I'm ok with this patch, though it does alter the behaviour of how : touch_nmi_watchdog works. For the most part I don't think most callers : need to touch all of the watchdogs (on each cpu). Perhaps a corner case : will pop up (the scheduler?? to mimic touch_all_softlockup_watchdogs() ). : : But this does address an issue where if a system is locked up and one cpu : is spewing out useful debug messages (or error messages), the hard lockup : will fail to go off. We have seen this on RHEL also. Signed-off-by: Don Zickus Signed-off-by: Ben Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 01c6f979486f..e90089fd78e0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -158,14 +158,14 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - if (watchdog_user_enabled) { - unsigned cpu; - - for_each_present_cpu(cpu) { - if (per_cpu(watchdog_nmi_touch, cpu) != true) - per_cpu(watchdog_nmi_touch, cpu) = true; - } - } + /* + * Using __raw here because some code paths have + * preemption enabled. If preemption is enabled + * then interrupts should be enabled too, in which + * case we shouldn't have to worry about the watchdog + * going off. + */ + __raw_get_cpu_var(watchdog_nmi_touch) = true; touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); -- cgit v1.2.3 From d26914d11751b23ca2e8747725f2cae10c2f2c1b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 3 Apr 2014 14:47:24 -0700 Subject: mm: optimize put_mems_allowed() usage Since put_mems_allowed() is strictly optional, its a seqcount retry, we don't need to evaluate the function if the allocation was in fact successful, saving a smp_rmb some loads and comparisons on some relative fast-paths. Since the naming, get/put_mems_allowed() does suggest a mandatory pairing, rename the interface, as suggested by Mel, to resemble the seqcount interface. This gives us: read_mems_allowed_begin() and read_mems_allowed_retry(), where it is important to note that the return value of the latter call is inverted from its previous incarnation. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e6b1b66afe52..f6fc7475f1a1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1022,7 +1022,7 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, task_lock(tsk); /* * Determine if a loop is necessary if another thread is doing - * get_mems_allowed(). If at least one node remains unchanged and + * read_mems_allowed_begin(). If at least one node remains unchanged and * tsk does not have a mempolicy, then an empty nodemask will not be * possible when mems_allowed is larger than a word. */ -- cgit v1.2.3 From 5509a5d27b971a90b940e148ca9ca53312e4fa7a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 3 Apr 2014 14:48:19 -0700 Subject: drop_caches: add some documentation and info message There is plenty of anecdotal evidence and a load of blog posts suggesting that using "drop_caches" periodically keeps your system running in "tip top shape". Perhaps adding some kernel documentation will increase the amount of accurate data on its use. If we are not shrinking caches effectively, then we have real bugs. Using drop_caches will simply mask the bugs and make them harder to find, but certainly does not fix them, nor is it an appropriate "workaround" to limit the size of the caches. On the contrary, there have been bug reports on issues that turned out to be misguided use of cache dropping. Dropping caches is a very drastic and disruptive operation that is good for debugging and running tests, but if it creates bug reports from production use, kernel developers should be aware of its use. Add a bit more documentation about it, a syslog message to track down abusers, and vmstat drop counters to help analyze problem reports. [akpm@linux-foundation.org: checkpatch fixes] [hannes@cmpxchg.org: add runtime suppression control] Signed-off-by: Dave Hansen Signed-off-by: Michal Hocko Acked-by: KOSAKI Motohiro Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09d2e2413605..5c14b547882e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -123,7 +123,7 @@ static int __maybe_unused neg_one = -1; static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; -static int __maybe_unused three = 3; +static int __maybe_unused four = 4; static unsigned long one_ul = 1; static int one_hundred = 100; #ifdef CONFIG_PRINTK @@ -1264,7 +1264,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = drop_caches_sysctl_handler, .extra1 = &one, - .extra2 = &three, + .extra2 = &four, }, #ifdef CONFIG_COMPACTION { -- cgit v1.2.3 From 6af9f7bf3c399e0ab1eee048e13572c6d4e15fe9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 3 Apr 2014 14:48:25 -0700 Subject: sys_sysfs: Add CONFIG_SYSFS_SYSCALL sys_sysfs is an obsolete system call no longer supported by libc. - This patch adds a default CONFIG_SYSFS_SYSCALL=y - Option can be turned off in expert mode. - cond_syscall added to kernel/sys_ni.c [akpm@linux-foundation.org: tweak Kconfig help text] Signed-off-by: Fabian Frederick Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys_ni.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7078052284fd..74395a95b7e9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -146,6 +146,7 @@ cond_syscall(sys_io_destroy); cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); +cond_syscall(sys_sysfs); cond_syscall(sys_syslog); cond_syscall(sys_process_vm_readv); cond_syscall(sys_process_vm_writev); -- cgit v1.2.3 From 8f6c5ffc8987f4f5b5a3e9d557d94bbf3a9bf216 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 3 Apr 2014 14:48:26 -0700 Subject: kernel/groups.c: remove return value of set_groups After commit 6307f8fee295 ("security: remove dead hook task_setgroups"), set_groups will always return zero, so we could just remove return value of set_groups. This patch reduces code size, and simplfies code to use set_groups, because we don't need to check its return value any more. [akpm@linux-foundation.org: remove obsolete claims from set_groups() comment] Signed-off-by: Wang YanQing Cc: "Eric W. Biederman" Cc: Serge Hallyn Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/groups.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/groups.c b/kernel/groups.c index 90cf1c38c8ea..451698f86cfa 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -157,17 +157,13 @@ int groups_search(const struct group_info *group_info, kgid_t grp) * set_groups - Change a group subscription in a set of credentials * @new: The newly prepared set of credentials to alter * @group_info: The group list to install - * - * Validate a group subscription and, if valid, insert it into a set - * of credentials. */ -int set_groups(struct cred *new, struct group_info *group_info) +void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); groups_sort(group_info); get_group_info(group_info); new->group_info = group_info; - return 0; } EXPORT_SYMBOL(set_groups); @@ -182,18 +178,12 @@ EXPORT_SYMBOL(set_groups); int set_current_groups(struct group_info *group_info) { struct cred *new; - int ret; new = prepare_creds(); if (!new) return -ENOMEM; - ret = set_groups(new, group_info); - if (ret < 0) { - abort_creds(new); - return ret; - } - + set_groups(new, group_info); return commit_creds(new); } -- cgit v1.2.3 From 69369a7003735d0d8ef22097e27a55a8bad9557a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 3 Apr 2014 14:48:27 -0700 Subject: fs, kernel: permit disabling the uselib syscall uselib hasn't been used since libc5; glibc does not use it. Support turning it off. When disabled, also omit the load_elf_library implementation from binfmt_elf.c, which only uselib invokes. bloat-o-meter: add/remove: 0/4 grow/shrink: 0/1 up/down: 0/-785 (-785) function old new delta padzero 39 36 -3 uselib_flags 20 - -20 sys_uselib 168 - -168 SyS_uselib 168 - -168 load_elf_library 426 - -426 The new CONFIG_USELIB defaults to `y'. Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys_ni.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 74395a95b7e9..bc8d1b74a6b9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -152,6 +152,7 @@ cond_syscall(sys_process_vm_readv); cond_syscall(sys_process_vm_writev); cond_syscall(compat_sys_process_vm_readv); cond_syscall(compat_sys_process_vm_writev); +cond_syscall(sys_uselib); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); -- cgit v1.2.3 From c96d6660dc65b0a90aea9834bfd8be1d5656da18 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 3 Apr 2014 14:48:35 -0700 Subject: kernel: audit/fix non-modular users of module_init in core code Code that is obj-y (always built-in) or dependent on a bool Kconfig (built-in or absent) can never be modular. So using module_init as an alias for __initcall can be somewhat misleading. Fix these up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. The audit targets the following module_init users for change: kernel/user.c obj-y kernel/kexec.c bool KEXEC (one instance per arch) kernel/profile.c bool PROFILING kernel/hung_task.c bool DETECT_HUNG_TASK kernel/sched/stats.c bool SCHEDSTATS kernel/user_namespace.c bool USER_NS Note that direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of subsys_initcall (which makes sense for these files) will thus change this registration from level 6-device to level 4-subsys (i.e. slightly earlier). However no observable impact of that difference has been observed during testing. Also, two instances of missing ";" at EOL are fixed in kexec. Signed-off-by: Paul Gortmaker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 3 +-- kernel/kexec.c | 4 ++-- kernel/profile.c | 2 +- kernel/sched/stats.c | 2 +- kernel/user.c | 3 +-- kernel/user_namespace.c | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0b9c169d577f..06bb1417b063 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -246,5 +246,4 @@ static int __init hung_task_init(void) return 0; } - -module_init(hung_task_init); +subsys_initcall(hung_task_init); diff --git a/kernel/kexec.c b/kernel/kexec.c index 45601cf41bee..c0d261c7db7b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1235,7 +1235,7 @@ static int __init crash_notes_memory_init(void) } return 0; } -module_init(crash_notes_memory_init) +subsys_initcall(crash_notes_memory_init); /* @@ -1629,7 +1629,7 @@ static int __init crash_save_vmcoreinfo_init(void) return 0; } -module_init(crash_save_vmcoreinfo_init) +subsys_initcall(crash_save_vmcoreinfo_init); /* * Move into place and start executing a preloaded standalone diff --git a/kernel/profile.c b/kernel/profile.c index ebdd9c1a86b4..1b266dbe755a 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -604,5 +604,5 @@ int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */ hotcpu_notifier(profile_cpu_callback, 0); return 0; } -module_init(create_proc_profile); +subsys_initcall(create_proc_profile); #endif /* CONFIG_PROC_FS */ diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index da98af347e8b..a476bea17fbc 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -142,4 +142,4 @@ static int __init proc_schedstat_init(void) proc_create("schedstat", 0, NULL, &proc_schedstat_operations); return 0; } -module_init(proc_schedstat_init); +subsys_initcall(proc_schedstat_init); diff --git a/kernel/user.c b/kernel/user.c index c006131beb77..294fc6a94168 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -222,5 +222,4 @@ static int __init uid_cache_init(void) return 0; } - -module_init(uid_cache_init); +subsys_initcall(uid_cache_init); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index dd06439b9c84..0d8f6023fd8d 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -902,4 +902,4 @@ static __init int user_namespaces_init(void) user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); return 0; } -module_init(user_namespaces_init); +subsys_initcall(user_namespaces_init); -- cgit v1.2.3 From 28ab49ff7f3dcaf4df8d2bd0d4099b8c08285ed7 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 3 Apr 2014 14:48:36 -0700 Subject: kernel/resource.c: make reallocate_resource() static sparse says: kernel/resource.c:518:5: warning: symbol 'reallocate_resource' was not declared. Should it be static? Signed-off-by: Daeseok Youn Reviewed-by: Yasuaki Ishimatsu Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 673061c06da1..8957d686e29b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -511,7 +511,7 @@ static int find_resource(struct resource *root, struct resource *new, * @newsize: new size of the resource descriptor * @constraint: the size and alignment constraints to be met. */ -int reallocate_resource(struct resource *root, struct resource *old, +static int reallocate_resource(struct resource *root, struct resource *old, resource_size_t newsize, struct resource_constraint *constraint) { -- cgit v1.2.3 From c64730b26f08cccfbc8fcbf169c304b4bd71dcac Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 3 Apr 2014 14:48:39 -0700 Subject: printk: remove obsolete check for log level "c" The kernel log level "c" was removed in commit 61e99ab8e35a ("printk: remove the now unnecessary "C" annotation for KERN_CONT"). It is no longer detected in printk_get_level(). Hence we do not need to check it in vprintk_emit. Signed-off-by: Petr Mladek Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jan Kara Cc: Michal Hocko Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4dae9cbe9259..db7a02e05241 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1560,8 +1560,6 @@ asmlinkage int vprintk_emit(int facility, int level, level = kern_level - '0'; case 'd': /* KERN_DEFAULT */ lflags |= LOG_PREFIX; - case 'c': /* KERN_CONT */ - break; } text_len -= end_of_header - text; text = (char *)end_of_header; -- cgit v1.2.3 From e8c42d36ab86cf45f88c3a0e344233b1032fbf3d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 3 Apr 2014 14:48:41 -0700 Subject: printk: add comment about tricky check for text buffer size There is no check for potential "text_len" overflow. It is not needed because only valid level is detected. It took me some time to understand why. It would deserve a comment ;-) Signed-off-by: Petr Mladek Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jan Kara Cc: Michal Hocko Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index db7a02e05241..012f3e40671d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1561,6 +1561,11 @@ asmlinkage int vprintk_emit(int facility, int level, case 'd': /* KERN_DEFAULT */ lflags |= LOG_PREFIX; } + /* + * No need to check length here because vscnprintf + * put '\0' at the end of the string. Only valid and + * newly printed level is detected. + */ text_len -= end_of_header - text; text = (char *)end_of_header; } -- cgit v1.2.3 From 39b25109b400ea397e64c417d8b965a53e2ee0f0 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 3 Apr 2014 14:48:42 -0700 Subject: printk: use also the last bytes in the ring buffer It seems that we have newer used the last byte in the ring buffer. In fact, we have newer used the last 4 bytes because of padding. First problem is in the check for free space. The exact number of free bytes is enough to store the length of data. Second problem is in the check where the ring buffer is rotated. The left side counts the first unused index. It is unused, so it might be the same as the size of the buffer. Note that the first problem has to be fixed together with the second one. Otherwise, the buffer is rotated even when there is enough space on the end of the buffer. Then the beginning of the buffer is rewritten and valid entries get corrupted. Signed-off-by: Petr Mladek Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jan Kara Cc: Michal Hocko Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 012f3e40671d..b3a1790f9e05 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -319,7 +319,7 @@ static void log_store(int facility, int level, else free = log_first_idx - log_next_idx; - if (free > size + sizeof(struct printk_log)) + if (free >= size + sizeof(struct printk_log)) break; /* drop old messages until we have enough contiuous space */ @@ -327,7 +327,7 @@ static void log_store(int facility, int level, log_first_seq++; } - if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) { + if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { /* * This message + an additional empty header does not fit * at the end of the buffer. Add an empty header with len == 0 -- cgit v1.2.3 From fce6e0338abe910ba6d4db0657ae8adc6aa1a72b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 3 Apr 2014 14:48:43 -0700 Subject: printk: do not compute the size of the message twice This is just a tiny optimization. It removes duplicate computation of the message size. Signed-off-by: Petr Mladek Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jan Kara Cc: Michal Hocko Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b3a1790f9e05..ff9faf4e3cd5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -351,7 +351,7 @@ static void log_store(int facility, int level, else msg->ts_nsec = local_clock(); memset(log_dict(msg) + dict_len, 0, pad_len); - msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len; + msg->len = size; /* insert message */ log_next_idx += msg->len; -- cgit v1.2.3 From 72581487a61f6304a7cc32e189eb65fb1c920a53 Mon Sep 17 00:00:00 2001 From: Jane Li Date: Thu, 3 Apr 2014 14:48:45 -0700 Subject: printk: fix one circular lockdep warning about console_lock Fix a warning about possible circular locking dependency. If do in following sequence: enter suspend -> resume -> plug-out CPUx (echo 0 > cpux/online) lockdep will show warning as following: ====================================================== [ INFO: possible circular locking dependency detected ] 3.10.0 #2 Tainted: G O ------------------------------------------------------- sh/1271 is trying to acquire lock: (console_lock){+.+.+.}, at: console_cpu_notify+0x20/0x2c but task is already holding lock: (cpu_hotplug.lock){+.+.+.}, at: cpu_hotplug_begin+0x2c/0x58 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (cpu_hotplug.lock){+.+.+.}: lock_acquire+0x98/0x12c mutex_lock_nested+0x50/0x3d8 cpu_hotplug_begin+0x2c/0x58 _cpu_up+0x24/0x154 cpu_up+0x64/0x84 smp_init+0x9c/0xd4 kernel_init_freeable+0x78/0x1c8 kernel_init+0x8/0xe4 ret_from_fork+0x14/0x2c -> #1 (cpu_add_remove_lock){+.+.+.}: lock_acquire+0x98/0x12c mutex_lock_nested+0x50/0x3d8 disable_nonboot_cpus+0x8/0xe8 suspend_devices_and_enter+0x214/0x448 pm_suspend+0x1e4/0x284 try_to_suspend+0xa4/0xbc process_one_work+0x1c4/0x4fc worker_thread+0x138/0x37c kthread+0xa4/0xb0 ret_from_fork+0x14/0x2c -> #0 (console_lock){+.+.+.}: __lock_acquire+0x1b38/0x1b80 lock_acquire+0x98/0x12c console_lock+0x54/0x68 console_cpu_notify+0x20/0x2c notifier_call_chain+0x44/0x84 __cpu_notify+0x2c/0x48 cpu_notify_nofail+0x8/0x14 _cpu_down+0xf4/0x258 cpu_down+0x24/0x40 store_online+0x30/0x74 dev_attr_store+0x18/0x24 sysfs_write_file+0x16c/0x19c vfs_write+0xb4/0x190 SyS_write+0x3c/0x70 ret_fast_syscall+0x0/0x48 Chain exists of: console_lock --> cpu_add_remove_lock --> cpu_hotplug.lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(cpu_add_remove_lock); lock(cpu_hotplug.lock); lock(console_lock); *** DEADLOCK *** There are three locks involved in two sequence: a) pm suspend: console_lock (@suspend_console()) cpu_add_remove_lock (@disable_nonboot_cpus()) cpu_hotplug.lock (@_cpu_down()) b) Plug-out CPUx: cpu_add_remove_lock (@(cpu_down()) cpu_hotplug.lock (@_cpu_down()) console_lock (@console_cpu_notify()) => Lockdeps prints warning log. There should be not real deadlock, as flag of console_suspended can protect this. Although console_suspend() releases console_sem, it doesn't tell lockdep about it. That results in the lockdep warning about circular locking when doing the following: enter suspend -> resume -> plug-out CPUx (echo 0 > cpux/online) Fix the problem by telling lockdep we actually released the semaphore in console_suspend() and acquired it again in console_resume(). Signed-off-by: Jane Li Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ff9faf4e3cd5..a45b50962295 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1883,6 +1883,7 @@ void suspend_console(void) console_lock(); console_suspended = 1; up(&console_sem); + mutex_release(&console_lock_dep_map, 1, _RET_IP_); } void resume_console(void) @@ -1890,6 +1891,7 @@ void resume_console(void) if (!console_suspend_enabled) return; down(&console_sem); + mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); console_suspended = 0; console_unlock(); } -- cgit v1.2.3