From 61e96496d3c949701a48b908f99f4ed891cd1101 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 2 Aug 2016 14:03:44 -0700 Subject: task_work: use READ_ONCE/lockless_dereference, avoid pi_lock if !task_works Change task_work_cancel() to use lockless_dereference(), this is what the code really wants but we didn't have this helper when it was written. Also add the fast-path task->task_works == NULL check, in the likely case this task has no pending works and we can avoid spin_lock(task->pi_lock). While at it, change other users of ACCESS_ONCE() to use READ_ONCE(). Link: http://lkml.kernel.org/r/20160610150042.GA13868@redhat.com Signed-off-by: Oleg Nesterov Cc: Andrea Parri Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/task_work.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index 6ab4842b00e8..d513051fcca2 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -29,7 +29,7 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) struct callback_head *head; do { - head = ACCESS_ONCE(task->task_works); + head = READ_ONCE(task->task_works); if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; @@ -57,6 +57,9 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) struct callback_head **pprev = &task->task_works; struct callback_head *work; unsigned long flags; + + if (likely(!task->task_works)) + return NULL; /* * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the @@ -64,8 +67,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - while ((work = ACCESS_ONCE(*pprev))) { - smp_read_barrier_depends(); + while ((work = lockless_dereference(*pprev))) { if (work->func != func) pprev = &work->next; else if (cmpxchg(pprev, work, work->next) == work) @@ -95,7 +97,7 @@ void task_work_run(void) * work_exited unless the list is empty. */ do { - work = ACCESS_ONCE(task->task_works); + work = READ_ONCE(task->task_works); head = !work && (task->flags & PF_EXITING) ? &work_exited : NULL; } while (cmpxchg(&task->task_works, work, head) != work); -- cgit v1.2.3 From 9d5059c959ac739dbf837cec14586e58e7a67292 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 2 Aug 2016 14:03:47 -0700 Subject: dynamic_debug: only add header when used kernel.h header doesn't directly use dynamic debug, instead we can include it in module.c (which used it via kernel.h). printk.h only uses it if CONFIG_DYNAMIC_DEBUG is on, changing the inclusion to only happen in that case. Link: http://lkml.kernel.org/r/1468429793-16917-1-git-send-email-luisbg@osg.samsung.com [luisbg@osg.samsung.com: include dynamic_debug.h in drb_int.h] Link: http://lkml.kernel.org/r/1468447828-18558-2-git-send-email-luisbg@osg.samsung.com Signed-off-by: Luis de Bethencourt Cc: Rusty Russell Cc: Hidehiro Kawai Cc: Borislav Petkov Cc: Michal Nazarewicz Cc: Rasmus Villemoes Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 5f71aa63ed2a..a0f48b8b00da 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include "module-internal.h" -- cgit v1.2.3 From bebca05281d039e4144e1c51f402fd1d5f54b5e2 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 2 Aug 2016 14:03:50 -0700 Subject: printk: do not include interrupt.h A trivial cosmetic change: interrupt.h header is redundant since commit 6b898c07cb1d ("console: use might_sleep in console_lock"). Link: http://lkml.kernel.org/r/20160620132847.21930-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d4de33934dac..09af62e71fee 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -26,7 +26,6 @@ #include #include #include -#include /* For in_interrupt() */ #include #include #include -- cgit v1.2.3 From 874f9c7da9a4acbc1b9e12ca722579fb50e4d142 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 2 Aug 2016 14:03:53 -0700 Subject: printk: create pr_ functions Using functions instead of macros can reduce overall code size by eliminating unnecessary "KERN_SOH" prefixes from format strings. defconfig x86-64: $ size vmlinux* text data bss dec hex filename 10193570 4331464 1105920 15630954 ee826a vmlinux.new 10192623 4335560 1105920 15634103 ee8eb7 vmlinux.old As the return value are unimportant and unused in the kernel tree, these new functions return void. Miscellanea: - change pr_ macros to call new __pr_ functions - change vprintk_nmi and vprintk_default to add LOGLEVEL_ argument [akpm@linux-foundation.org: fix LOGLEVEL_INFO, per Joe] Link: http://lkml.kernel.org/r/e16cc34479dfefcae37c98b481e6646f0f69efc3.1466718827.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/internal.h | 16 ++++++++++------ kernel/printk/nmi.c | 13 +++++++++++-- kernel/printk/printk.c | 27 ++++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..5d4505f30083 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,9 +16,11 @@ */ #include -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt, + va_list args); -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); +__printf(2, 0) +int vprintk_default(int level, const char *fmt, va_list args); #ifdef CONFIG_PRINTK_NMI @@ -31,9 +33,10 @@ extern raw_spinlock_t logbuf_lock; * via per-CPU variable. */ DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +__printf(2, 0) +static inline int vprintk_func(int level, const char *fmt, va_list args) { - return this_cpu_read(printk_func)(fmt, args); + return this_cpu_read(printk_func)(level, fmt, args); } extern atomic_t nmi_message_lost; @@ -44,9 +47,10 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +__printf(2, 0) +static inline int vprintk_func(int level, const char *fmt, va_list args) { - return vprintk_default(fmt, args); + return vprintk_default(level, fmt, args); } static inline int get_nmi_message_lost(void) diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index b69eb8a2876f..bc3eeb1ae6da 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); * one writer running. But the buffer might get flushed from another * CPU, so we need to be careful. */ -static int vprintk_nmi(const char *fmt, va_list args) +static int vprintk_nmi(int level, const char *fmt, va_list args) { struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); int add = 0; @@ -79,7 +79,16 @@ again: if (!len) smp_rmb(); - add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + if (level != LOGLEVEL_DEFAULT) { + add = snprintf(s->buffer + len, sizeof(s->buffer) - len, + KERN_SOH "%c", '0' + level); + add += vsnprintf(s->buffer + len + add, + sizeof(s->buffer) - len - add, + fmt, args); + } else { + add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, + fmt, args); + } /* * Do it once again if the buffer has been flushed in the meantime. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 09af62e71fee..d2accf2f4448 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1801,7 +1801,28 @@ asmlinkage int printk_emit(int facility, int level, } EXPORT_SYMBOL(printk_emit); -int vprintk_default(const char *fmt, va_list args) +#ifdef CONFIG_PRINTK +#define define_pr_level(func, loglevel) \ +asmlinkage __visible void func(const char *fmt, ...) \ +{ \ + va_list args; \ + \ + va_start(args, fmt); \ + vprintk_default(loglevel, fmt, args); \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func) + +define_pr_level(__pr_emerg, LOGLEVEL_EMERG); +define_pr_level(__pr_alert, LOGLEVEL_ALERT); +define_pr_level(__pr_crit, LOGLEVEL_CRIT); +define_pr_level(__pr_err, LOGLEVEL_ERR); +define_pr_level(__pr_warn, LOGLEVEL_WARNING); +define_pr_level(__pr_notice, LOGLEVEL_NOTICE); +define_pr_level(__pr_info, LOGLEVEL_INFO); +#endif + +int vprintk_default(int level, const char *fmt, va_list args) { int r; @@ -1811,7 +1832,7 @@ int vprintk_default(const char *fmt, va_list args) return r; } #endif - r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + r = vprintk_emit(0, level, NULL, 0, fmt, args); return r; } @@ -1844,7 +1865,7 @@ asmlinkage __visible int printk(const char *fmt, ...) int r; va_start(args, fmt); - r = vprintk_func(fmt, args); + r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args); va_end(args); return r; -- cgit v1.2.3 From cf7754441c563230ed74096fcd4b8cca49910550 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 2 Aug 2016 14:03:56 -0700 Subject: printk: introduce suppress_message_printing() Messages' levels and console log level are inspected when the actual printing occurs, which may provoke console_unlock() and console_cont_flush() to waste CPU cycles on every message that has loglevel above the current console_loglevel. Schematically, console_unlock() does the following: console_unlock() { ... for (;;) { ... raw_spin_lock_irqsave(&logbuf_lock, flags); skip: msg = log_from_idx(console_idx); if (msg->flags & LOG_NOCONS) { ... goto skip; } level = msg->level; len += msg_print_text(); >> sprintfs memcpy, etc. if (nr_ext_console_drivers) { ext_len = msg_print_ext_header(); >> scnprintf ext_len += msg_print_ext_body(); >> scnprintfs etc. } ... raw_spin_unlock(&logbuf_lock); call_console_drivers(level, ext_text, ext_len, text, len) { if (level >= console_loglevel && >> drop the message !ignore_loglevel) return; console->write(...); } local_irq_restore(flags); } ... } The thing here is this deferred `level >= console_loglevel' check. We are wasting CPU cycles on sprintfs/memcpy/etc. preparing the messages that we will eventually drop. This can be huge when we register a new CON_PRINTBUFFER console, for instance. For every such a console register_console() resets the console_seq, console_idx, console_prev and sets a `exclusive console' pointer to replay the log buffer to that just-registered console. And there can be a lot of messages to replay, in the worst case most of which can be dropped after console_loglevel test. We know messages' levels long before we call msg_print_text() and friends, so we can just move console_loglevel check out of call_console_drivers() and format a new message only if we are sure that it won't be dropped. The patch factors out loglevel check into suppress_message_printing() function and tests message->level and console_loglevel before formatting functions in console_unlock() and console_cont_flush() are getting executed. This improves things not only for exclusive CON_PRINTBUFFER consoles, but for every console_unlock() that attempts to print a message of level above the console_loglevel. Link: http://lkml.kernel.org/r/20160627135012.8229-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Cc: Tejun Heo Cc: Jan Kara Cc: Calvin Owens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d2accf2f4448..8bdce14254f4 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -985,6 +985,11 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting (prints all kernel messages to the console)"); +static bool suppress_message_printing(int level) +{ + return (level >= console_loglevel && !ignore_loglevel); +} + #ifdef CONFIG_BOOT_PRINTK_DELAY static int boot_delay; /* msecs delay after each printk during bootup */ @@ -1014,7 +1019,7 @@ static void boot_delay_msec(int level) unsigned long timeout; if ((boot_delay == 0 || system_state != SYSTEM_BOOTING) - || (level >= console_loglevel && !ignore_loglevel)) { + || suppress_message_printing(level)) { return; } @@ -1438,8 +1443,6 @@ static void call_console_drivers(int level, trace_console(text, len); - if (level >= console_loglevel && !ignore_loglevel) - return; if (!console_drivers) return; @@ -1908,6 +1911,7 @@ static void call_console_drivers(int level, static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } +static bool suppress_message_printing(int level) { return false; } /* Still needs to be defined for users */ DEFINE_PER_CPU(printk_func_t, printk_func); @@ -2187,6 +2191,13 @@ static void console_cont_flush(char *text, size_t size) if (!cont.len) goto out; + if (suppress_message_printing(cont.level)) { + cont.cons = cont.len; + if (cont.flushed) + cont.len = 0; + goto out; + } + /* * We still queue earlier records, likely because the console was * busy. The earlier ones need to be printed before this one, we @@ -2290,10 +2301,13 @@ skip: break; msg = log_from_idx(console_idx); - if (msg->flags & LOG_NOCONS) { + level = msg->level; + if ((msg->flags & LOG_NOCONS) || + suppress_message_printing(level)) { /* * Skip record we have buffered and already printed - * directly to the console when we received it. + * directly to the console when we received it, and + * record that has level above the console loglevel. */ console_idx = log_next(console_idx); console_seq++; @@ -2307,7 +2321,6 @@ skip: goto skip; } - level = msg->level; len += msg_print_text(msg, console_prev, false, text + len, sizeof(text) - len); if (nr_ext_console_drivers) { -- cgit v1.2.3 From 40a7d9f5f90681c6d7890b6a07f230bb4afe7e39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Aug 2016 14:03:59 -0700 Subject: printk: include instead of asm-generic headers are generic implementations for architecture specific code and should not be included by common code. Thus use the asm/ version of sections.h to get at the linker sections. Link: http://lkml.kernel.org/r/1468285008-7331-1-git-send-email-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8bdce14254f4..70c66c5ba212 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -47,7 +47,7 @@ #include #include -#include +#include #define CREATE_TRACE_POINTS #include -- cgit v1.2.3 From 750afe7babd117daabebf4855da18e4418ea845e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Aug 2016 14:04:07 -0700 Subject: printk: add kernel parameter to control writes to /dev/kmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "printk.devkmsg" kernel command line parameter which controls how userspace writes into /dev/kmsg. It has three options: * ratelimit - ratelimit logging from userspace. * on - unlimited logging from userspace * off - logging from userspace gets ignored The default setting is to ratelimit the messages written to it. This changes the kernel default setting of "on" to "ratelimit" and we do that because we want to keep userspace spamming /dev/kmsg to sane levels. This is especially moot when a small kernel log buffer wraps around and messages get lost. So the ratelimiting setting should be a sane setting where kernel messages should have a bit higher chance of survival from all the spamming. It additionally does not limit logging to /dev/kmsg while the system is booting if we haven't disabled it on the command line. Furthermore, we can control the logging from a lower priority sysctl interface - kernel.printk_devkmsg. That interface will succeed only if printk.devkmsg *hasn't* been supplied on the command line. If it has, then printk.devkmsg is a one-time setting which remains for the duration of the system lifetime. This "locking" of the setting is to prevent userspace from changing the logging on us through sysctl(2). This patch is based on previous patches from Linus and Steven. [bp@suse.de: fixes] Link: http://lkml.kernel.org/r/20160719072344.GC25563@nazgul.tnic Link: http://lkml.kernel.org/r/20160716061745.15795-3-bp@alien8.de Signed-off-by: Borislav Petkov Cc: Dave Young Cc: Franck Bui Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++--- kernel/sysctl.c | 7 +++ 2 files changed, 141 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 70c66c5ba212..a5ef95ca18c9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -85,6 +85,111 @@ static struct lockdep_map console_lock_dep_map = { }; #endif +enum devkmsg_log_bits { + __DEVKMSG_LOG_BIT_ON = 0, + __DEVKMSG_LOG_BIT_OFF, + __DEVKMSG_LOG_BIT_LOCK, +}; + +enum devkmsg_log_masks { + DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), + DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), + DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), +}; + +/* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ +#define DEVKMSG_LOG_MASK_DEFAULT 0 + +static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; + +static int __control_devkmsg(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) { + devkmsg_log = DEVKMSG_LOG_MASK_ON; + return 2; + } else if (!strncmp(str, "off", 3)) { + devkmsg_log = DEVKMSG_LOG_MASK_OFF; + return 3; + } else if (!strncmp(str, "ratelimit", 9)) { + devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; + return 9; + } + return -EINVAL; +} + +static int __init control_devkmsg(char *str) +{ + if (__control_devkmsg(str) < 0) + return 1; + + /* + * Set sysctl string accordingly: + */ + if (devkmsg_log == DEVKMSG_LOG_MASK_ON) { + memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE); + strncpy(devkmsg_log_str, "on", 2); + } else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) { + memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE); + strncpy(devkmsg_log_str, "off", 3); + } + /* else "ratelimit" which is set by default. */ + + /* + * Sysctl cannot change it anymore. The kernel command line setting of + * this parameter is to force the setting to be permanent throughout the + * runtime of the system. This is a precation measure against userspace + * trying to be a smarta** and attempting to change it up on us. + */ + devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; + + return 0; +} +__setup("printk.devkmsg=", control_devkmsg); + +char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; + +int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char old_str[DEVKMSG_STR_MAX_SIZE]; + unsigned int old; + int err; + + if (write) { + if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) + return -EINVAL; + + old = devkmsg_log; + strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); + } + + err = proc_dostring(table, write, buffer, lenp, ppos); + if (err) + return err; + + if (write) { + err = __control_devkmsg(devkmsg_log_str); + + /* + * Do not accept an unknown string OR a known string with + * trailing crap... + */ + if (err < 0 || (err + 1 != *lenp)) { + + /* ... and restore old setting. */ + devkmsg_log = old; + strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); + + return -EINVAL; + } + } + + return 0; +} + /* * Number of registered extended console drivers. * @@ -613,6 +718,7 @@ struct devkmsg_user { u64 seq; u32 idx; enum log_flags prev; + struct ratelimit_state rs; struct mutex lock; char buf[CONSOLE_EXT_LOG_MAX]; }; @@ -622,11 +728,24 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) char *buf, *line; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ + struct file *file = iocb->ki_filp; + struct devkmsg_user *user = file->private_data; size_t len = iov_iter_count(from); ssize_t ret = len; - if (len > LOG_LINE_MAX) + if (!user || len > LOG_LINE_MAX) return -EINVAL; + + /* Ignore when user logging is disabled. */ + if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) + return len; + + /* Ratelimit when not explicitly enabled. */ + if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { + if (!___ratelimit(&user->rs, current->comm)) + return ret; + } + buf = kmalloc(len+1, GFP_KERNEL); if (buf == NULL) return -ENOMEM; @@ -799,19 +918,24 @@ static int devkmsg_open(struct inode *inode, struct file *file) struct devkmsg_user *user; int err; - /* write-only does not need any file context */ - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - return 0; + if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) + return -EPERM; - err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, - SYSLOG_FROM_READER); - if (err) - return err; + /* write-only does not need any file context */ + if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); + if (err) + return err; + } user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); if (!user) return -ENOMEM; + ratelimit_default_init(&user->rs); + ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); + mutex_init(&user->lock); raw_spin_lock_irq(&logbuf_lock); @@ -830,6 +954,8 @@ static int devkmsg_release(struct inode *inode, struct file *file) if (!user) return 0; + ratelimit_state_exit(&user->rs); + mutex_destroy(&user->lock); kfree(user); return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 53954631a4e1..b43d0b27c1fe 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -813,6 +813,13 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, + { + .procname = "printk_devkmsg", + .data = devkmsg_log_str, + .maxlen = DEVKMSG_STR_MAX_SIZE, + .mode = 0644, + .proc_handler = devkmsg_sysctl_set_loglvl, + }, { .procname = "dmesg_restrict", .data = &dmesg_restrict, -- cgit v1.2.3 From 627393d44860386e948bb63a8e5b53f2cc44d070 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 2 Aug 2016 14:05:40 -0700 Subject: kernel/exit.c: quieten greatest stack depth printk Many targets enable CONFIG_DEBUG_STACK_USAGE, and while the information is useful, it isn't worthy of pr_warn(). Reduce it to pr_info(). Link: http://lkml.kernel.org/r/1466982072-29836-1-git-send-email-anton@ozlabs.org Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 84ae830234f8..2f974ae042a6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -715,7 +715,7 @@ static void check_stack_usage(void) spin_lock(&low_water_lock); if (free < lowest_to_date) { - pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n", + pr_info("%s (%d) used greatest stack depth: %lu bytes left\n", current->comm, task_pid_nr(current), free); lowest_to_date = free; } -- cgit v1.2.3 From 4caf9615247aceab56e91df6c0e11892ea55f4f0 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Tue, 2 Aug 2016 14:05:45 -0700 Subject: kexec: return error number directly This is a cleanup patch to make kexec more clear to return error number directly. The variable result is useless, because there is no other function's return value assignes to it. So remove it. Link: http://lkml.kernel.org/r/1464179273-57668-1-git-send-email-mnghuan@gmail.com Signed-off-by: Minfei Huang Cc: Dave Young Cc: Baoquan He Cc: Borislav Petkov Cc: Xunlei Pang Cc: Atsushi Kumagai Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 56b3ed0927b0..23311c803b1b 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -147,7 +147,7 @@ static struct page *kimage_alloc_page(struct kimage *image, int sanity_check_segment_list(struct kimage *image) { - int result, i; + int i; unsigned long nr_segments = image->nr_segments; /* @@ -163,16 +163,15 @@ int sanity_check_segment_list(struct kimage *image) * simply because addresses are changed to page size * granularity. */ - result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) - return result; + return -EADDRNOTAVAIL; if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) - return result; + return -EADDRNOTAVAIL; } /* Verify our destination addresses do not overlap. @@ -180,7 +179,6 @@ int sanity_check_segment_list(struct kimage *image) * through very weird things can happen with no * easy explanation as one segment stops on another. */ - result = -EINVAL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; unsigned long j; @@ -194,7 +192,7 @@ int sanity_check_segment_list(struct kimage *image) pend = pstart + image->segment[j].memsz; /* Do the segments overlap ? */ if ((mend > pstart) && (mstart < pend)) - return result; + return -EINVAL; } } @@ -203,10 +201,9 @@ int sanity_check_segment_list(struct kimage *image) * and it is easier to check up front than to be surprised * later on. */ - result = -EINVAL; for (i = 0; i < nr_segments; i++) { if (image->segment[i].bufsz > image->segment[i].memsz) - return result; + return -EINVAL; } /* @@ -220,7 +217,6 @@ int sanity_check_segment_list(struct kimage *image) */ if (image->type == KEXEC_TYPE_CRASH) { - result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; @@ -229,7 +225,7 @@ int sanity_check_segment_list(struct kimage *image) /* Ensure we are within the crash kernel limits */ if ((mstart < crashk_res.start) || (mend > crashk_res.end)) - return result; + return -EADDRNOTAVAIL; } } -- cgit v1.2.3 From 465d377701dfe6a08a9f361a3fd926dea7f89c74 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:05:57 -0700 Subject: kexec: ensure user memory sizes do not wrap Ensure that user memory sizes do not wrap around when validating the user input, which can lead to the following input validation working incorrectly. [akpm@linux-foundation.org: fix it for kexec-return-error-number-directly.patch] Link: http://lkml.kernel.org/r/E1b8koF-0004HM-5x@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Reviewed-by: Pratyush Anand Acked-by: Baoquan He Cc: Keerthy Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 23311c803b1b..5a83b2a9d584 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -168,6 +168,8 @@ int sanity_check_segment_list(struct kimage *image) mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; + if (mstart > mend) + return -EADDRNOTAVAIL; if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) return -EADDRNOTAVAIL; if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) -- cgit v1.2.3 From dae28018f56645b61f5beb84d5831346d3c5e457 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:06:00 -0700 Subject: kdump: arrange for paddr_vmcoreinfo_note() to return phys_addr_t On PAE systems (eg, ARM LPAE) the vmcore note may be located above 4GB physical on 32-bit architectures, so we need a wider type than "unsigned long" here. Arrange for paddr_vmcoreinfo_note() to return a phys_addr_t, thereby allowing it to be located above 4GB. This makes no difference for kexec-tools, as they already assume a 64-bit type when reading from this file. Link: http://lkml.kernel.org/r/E1b8koK-0004HS-K9@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Reviewed-by: Pratyush Anand Acked-by: Baoquan He Cc: Keerthy Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 2 +- kernel/ksysfs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5a83b2a9d584..dab03f17be25 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1372,7 +1372,7 @@ void vmcoreinfo_append_str(const char *fmt, ...) void __weak arch_crash_save_vmcoreinfo(void) {} -unsigned long __weak paddr_vmcoreinfo_note(void) +phys_addr_t __weak paddr_vmcoreinfo_note(void) { return __pa((unsigned long)(char *)&vmcoreinfo_note); } diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 152da4a48867..9f1920d2d0c6 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -128,8 +128,8 @@ KERNEL_ATTR_RW(kexec_crash_size); static ssize_t vmcoreinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%lx %x\n", - paddr_vmcoreinfo_note(), + phys_addr_t vmcore_base = paddr_vmcoreinfo_note(); + return sprintf(buf, "%pa %x\n", &vmcore_base, (unsigned int)sizeof(vmcoreinfo_note)); } KERNEL_ATTR_RO(vmcoreinfo); -- cgit v1.2.3 From 43546d8669d62d75fa69ca9a45d2f586665f56bd Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:06:04 -0700 Subject: kexec: allow architectures to override boot mapping kexec physical addresses are the boot-time view of the system. For certain ARM systems (such as Keystone 2), the boot view of the system does not match the kernel's view of the system: the boot view uses a special alias in the lower 4GB of the physical address space. To cater for these kinds of setups, we need to translate between the boot view physical addresses and the normal kernel view physical addresses. This patch extracts the current transation points into linux/kexec.h, and allows an architecture to override the functions. Due to the translations required, we unfortunately end up with six translation functions, which are reduced down to four that the architecture can override. [akpm@linux-foundation.org: kexec.h needs asm/io.h for phys_to_virt()] Link: http://lkml.kernel.org/r/E1b8koP-0004HZ-Vf@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Cc: Keerthy Cc: Pratyush Anand Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 3 ++- kernel/kexec_core.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index 4384672d3245..980936a90ee6 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -48,7 +48,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, if (kexec_on_panic) { /* Verify we have a valid entry point */ - if ((entry < crashk_res.start) || (entry > crashk_res.end)) + if ((entry < phys_to_boot_phys(crashk_res.start)) || + (entry > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index dab03f17be25..73d4c5f57dd8 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -225,8 +225,8 @@ int sanity_check_segment_list(struct kimage *image) mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ - if ((mstart < crashk_res.start) || - (mend > crashk_res.end)) + if ((mstart < phys_to_boot_phys(crashk_res.start)) || + (mend > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL; } } @@ -350,7 +350,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; - pfn = page_to_pfn(pages); + pfn = page_to_boot_pfn(pages); epfn = pfn + count; addr = pfn << PAGE_SHIFT; eaddr = epfn << PAGE_SHIFT; @@ -476,7 +476,7 @@ static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) return -ENOMEM; ind_page = page_address(page); - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; + *image->entry = virt_to_boot_phys(ind_page) | IND_INDIRECTION; image->entry = ind_page; image->last_entry = ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); @@ -531,13 +531,13 @@ void kimage_terminate(struct kimage *image) #define for_each_kimage_entry(image, ptr, entry) \ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ ptr = (entry & IND_INDIRECTION) ? \ - phys_to_virt((entry & PAGE_MASK)) : ptr + 1) + boot_phys_to_virt((entry & PAGE_MASK)) : ptr + 1) static void kimage_free_entry(kimage_entry_t entry) { struct page *page; - page = pfn_to_page(entry >> PAGE_SHIFT); + page = boot_pfn_to_page(entry >> PAGE_SHIFT); kimage_free_pages(page); } @@ -631,7 +631,7 @@ static struct page *kimage_alloc_page(struct kimage *image, * have a match. */ list_for_each_entry(page, &image->dest_pages, lru) { - addr = page_to_pfn(page) << PAGE_SHIFT; + addr = page_to_boot_pfn(page) << PAGE_SHIFT; if (addr == destination) { list_del(&page->lru); return page; @@ -646,12 +646,12 @@ static struct page *kimage_alloc_page(struct kimage *image, if (!page) return NULL; /* If the page cannot be used file it away */ - if (page_to_pfn(page) > + if (page_to_boot_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { list_add(&page->lru, &image->unusable_pages); continue; } - addr = page_to_pfn(page) << PAGE_SHIFT; + addr = page_to_boot_pfn(page) << PAGE_SHIFT; /* If it is the destination page we want use it */ if (addr == destination) @@ -674,7 +674,7 @@ static struct page *kimage_alloc_page(struct kimage *image, struct page *old_page; old_addr = *old & PAGE_MASK; - old_page = pfn_to_page(old_addr >> PAGE_SHIFT); + old_page = boot_pfn_to_page(old_addr >> PAGE_SHIFT); copy_highpage(page, old_page); *old = addr | (*old & ~PAGE_MASK); @@ -730,7 +730,7 @@ static int kimage_load_normal_segment(struct kimage *image, result = -ENOMEM; goto out; } - result = kimage_add_page(image, page_to_pfn(page) + result = kimage_add_page(image, page_to_boot_pfn(page) << PAGE_SHIFT); if (result < 0) goto out; @@ -791,7 +791,7 @@ static int kimage_load_crash_segment(struct kimage *image, char *ptr; size_t uchunk, mchunk; - page = pfn_to_page(maddr >> PAGE_SHIFT); + page = boot_pfn_to_page(maddr >> PAGE_SHIFT); if (!page) { result = -ENOMEM; goto out; @@ -919,7 +919,7 @@ void __weak crash_free_reserved_phys_range(unsigned long begin, unsigned long addr; for (addr = begin; addr < end; addr += PAGE_SIZE) - free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); + free_reserved_page(boot_pfn_to_page(addr >> PAGE_SHIFT)); } int crash_shrink_memory(unsigned long new_size) -- cgit v1.2.3 From b26e27ddfd2a986dc53e259aba572f3aac182eb8 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Tue, 2 Aug 2016 14:06:13 -0700 Subject: kexec: use core_param for crash_kexec_post_notifiers boot option crash_kexec_post_notifiers ia a boot option which controls whether the 1st kernel calls panic notifiers or not before booting the 2nd kernel. However, there is no need to limit it to being modifiable only at boot time. So, use core_param instead of early_param. Link: http://lkml.kernel.org/r/20160705113327.5864.43139.stgit@softrs Signed-off-by: Hidehiro Kawai Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Eric Biederman Cc: Masami Hiramatsu Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 8aa74497cc5a..ca8cea1ef673 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -108,6 +108,7 @@ void panic(const char *fmt, ...) long i, i_next = 0; int state = 0; int old_cpu, this_cpu; + bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; /* * Disable local interrupts. This will prevent panic_smp_self_stop @@ -160,7 +161,7 @@ void panic(const char *fmt, ...) * * Bypass the panic_cpu check and call __crash_kexec directly. */ - if (!crash_kexec_post_notifiers) { + if (!_crash_kexec_post_notifiers) { printk_nmi_flush_on_panic(); __crash_kexec(NULL); } @@ -191,7 +192,7 @@ void panic(const char *fmt, ...) * * Bypass the panic_cpu check and call __crash_kexec directly. */ - if (crash_kexec_post_notifiers) + if (_crash_kexec_post_notifiers) __crash_kexec(NULL); bust_spinlocks(0); @@ -571,13 +572,7 @@ EXPORT_SYMBOL(__stack_chk_fail); core_param(panic, panic_timeout, int, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); core_param(panic_on_warn, panic_on_warn, int, 0644); - -static int __init setup_crash_kexec_post_notifiers(char *s) -{ - crash_kexec_post_notifiers = true; - return 0; -} -early_param("crash_kexec_post_notifiers", setup_crash_kexec_post_notifiers); +core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); static int __init oops_setup(char *s) { -- cgit v1.2.3 From 21db79e8bb054d0351a6b1b464f1c9c47a2e6e8d Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 2 Aug 2016 14:06:16 -0700 Subject: kexec: add a kexec_crash_loaded() function Provide a wrapper function to be used by kernel code to check whether a crash kernel is loaded. It returns the same value that can be seen in /sys/kernel/kexec_crash_loaded by userspace programs. I'm exporting the function, because it will be used by Xen, and it is possible to compile Xen modules separately to enable the use of PV drivers with unmodified bare-metal kernels. Link: http://lkml.kernel.org/r/20160713121955.14969.69080.stgit@hananiah.suse.cz Signed-off-by: Petr Tesarik Cc: Juergen Gross Cc: Josh Triplett Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Boris Ostrovsky Cc: "Paul E. McKenney" Cc: Dave Young Cc: David Vrabel Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 6 ++++++ kernel/ksysfs.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 73d4c5f57dd8..704534029a00 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -95,6 +95,12 @@ int kexec_should_crash(struct task_struct *p) return 0; } +int kexec_crash_loaded(void) +{ + return !!kexec_crash_image; +} +EXPORT_SYMBOL_GPL(kexec_crash_loaded); + /* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 9f1920d2d0c6..ee1bc1bb8feb 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -101,7 +101,7 @@ KERNEL_ATTR_RO(kexec_loaded); static ssize_t kexec_crash_loaded_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", !!kexec_crash_image); + return sprintf(buf, "%d\n", kexec_crash_loaded()); } KERNEL_ATTR_RO(kexec_crash_loaded); -- cgit v1.2.3 From 1730f146604ea426e54938cdbcf87df1047ef0dc Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Tue, 2 Aug 2016 14:06:22 -0700 Subject: kexec: add restriction on kexec_load() segment sizes I hit the following issue when run trinity in my system. The kernel is 3.4 version, but mainline has the same issue. The root cause is that the segment size is too large so the kerenl spends too long trying to allocate a page. Other cases will block until the test case quits. Also, OOM conditions will occur. Call Trace: __alloc_pages_nodemask+0x14c/0x8f0 alloc_pages_current+0xaf/0x120 kimage_alloc_pages+0x10/0x60 kimage_alloc_control_pages+0x5d/0x270 machine_kexec_prepare+0xe5/0x6c0 ? kimage_free_page_list+0x52/0x70 sys_kexec_load+0x141/0x600 ? vfs_write+0x100/0x180 system_call_fastpath+0x16/0x1b The patch changes sanity_check_segment_list() to verify that the usage by all segments does not exceed half of memory. [akpm@linux-foundation.org: fix for kexec-return-error-number-directly.patch, update comment] Link: http://lkml.kernel.org/r/1469625474-53904-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Suggested-by: Eric W. Biederman Cc: Vivek Goyal Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 704534029a00..561675589511 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -146,6 +146,7 @@ EXPORT_SYMBOL_GPL(kexec_crash_loaded); * allocating pages whose destination address we do not care about. */ #define KIMAGE_NO_DEST (-1UL) +#define PAGE_COUNT(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) static struct page *kimage_alloc_page(struct kimage *image, gfp_t gfp_mask, @@ -155,6 +156,7 @@ int sanity_check_segment_list(struct kimage *image) { int i; unsigned long nr_segments = image->nr_segments; + unsigned long total_pages = 0; /* * Verify we have good destination addresses. The caller is @@ -214,6 +216,21 @@ int sanity_check_segment_list(struct kimage *image) return -EINVAL; } + /* + * Verify that no more than half of memory will be consumed. If the + * request from userspace is too large, a large amount of time will be + * wasted allocating pages, which can cause a soft lockup. + */ + for (i = 0; i < nr_segments; i++) { + if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2) + return -EINVAL; + + total_pages += PAGE_COUNT(image->segment[i].memsz); + } + + if (total_pages > totalram_pages / 2) + return -EINVAL; + /* * Verify we have good destination addresses. Normally * the caller is responsible for making certain we don't -- cgit v1.2.3 From 59dbb2a06fc2bcb752b964e036884fe9bb9dbbe0 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Tue, 2 Aug 2016 14:07:18 -0700 Subject: relay: add global mode support for buffer-only channels Commit 20d8b67c06fa ("relay: add buffer-only channels; useful for early logging") added support to use channels with no associated files. This is useful when the exact location of relay file is not known or the the parent directory of relay file is not available, while creating the channel and the logging has to start right from the boot. But there was no provision to use global mode with buffer-only channels, which is added by this patch, without modifying the interface where initially there will be a dummy invocation of create_buf_file callback through which kernel client can convey the need of a global buffer. For the use case where drivers/kernel clients want a simple interface for the userspace, which enables them to capture data/logs from relay file inorder & without any post processing, support of Global buffer mode is warranted. Modules, like i915, using relay_open() in early init would have to later register their buffer-only relays, once debugfs is available, by calling relay_late_setup_files(). Hence relay_late_setup_files() symbol also needs to be exported. Link: http://lkml.kernel.org/r/1468404563-11653-1-git-send-email-akash.goel@intel.com Signed-off-by: Akash Goel Cc: Eduard - Gabriel Munteanu Cc: Tom Zanussi Cc: Chris Wilson Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 04d7cf3ef8cf..d797502140b9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -451,6 +451,13 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) if (!dentry) goto free_buf; relay_set_buf_dentry(buf, dentry); + } else { + /* Only retrieve global info, nothing more, nothing less */ + dentry = chan->cb->create_buf_file(NULL, NULL, + S_IRUSR, buf, + &chan->is_global); + if (WARN_ON(dentry)) + goto free_buf; } buf->cpu = cpu; @@ -562,6 +569,10 @@ static int relay_hotcpu_callback(struct notifier_block *nb, * attributes specified. The created channel buffer files * will be named base_filename0...base_filenameN-1. File * permissions will be %S_IRUSR. + * + * If opening a buffer (@parent = NULL) that you later wish to register + * in a filesystem, call relay_late_setup_files() once the @parent dentry + * is available. */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, @@ -640,8 +651,12 @@ static void __relay_set_buf_dentry(void *info) * * Returns 0 if successful, non-zero otherwise. * - * Use to setup files for a previously buffer-only channel. - * Useful to do early tracing in kernel, before VFS is up, for example. + * Use to setup files for a previously buffer-only channel created + * by relay_open() with a NULL parent dentry. + * + * For example, this is useful for perfomring early tracing in kernel, + * before VFS is up and then exposing the early results once the dentry + * is available. */ int relay_late_setup_files(struct rchan *chan, const char *base_filename, @@ -666,6 +681,20 @@ int relay_late_setup_files(struct rchan *chan, } chan->has_base_filename = 1; chan->parent = parent; + + if (chan->is_global) { + err = -EINVAL; + if (!WARN_ON_ONCE(!chan->buf[0])) { + dentry = relay_create_buf_file(chan, chan->buf[0], 0); + if (dentry && !WARN_ON_ONCE(!chan->is_global)) { + relay_set_buf_dentry(chan->buf[0], dentry); + err = 0; + } + } + mutex_unlock(&relay_channels_mutex); + return err; + } + curr_cpu = get_cpu(); /* * The CPU hotplug notifier ran before us and created buffers with @@ -706,6 +735,7 @@ int relay_late_setup_files(struct rchan *chan, return err; } +EXPORT_SYMBOL_GPL(relay_late_setup_files); /** * relay_switch_subbuf - switch to a new sub-buffer -- cgit v1.2.3 From 27eb6622ab67bad75814c9b7b08096cfb16be63a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 2 Aug 2016 14:07:24 -0700 Subject: config: add android config fragments Copy the config fragments from the AOSP common kernel android-4.4 branch. It is becoming possible to run mainline kernels with Android, but the kernel defconfigs don't work as-is and debugging missing config options is a pain. Adding the config fragments into the kernel tree, makes configuring a mainline kernel as simple as: make ARCH=arm multi_v7_defconfig android-base.config android-recommended.config The following non-upstream config options were removed: CONFIG_NETFILTER_XT_MATCH_QTAGUID CONFIG_NETFILTER_XT_MATCH_QUOTA2 CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG CONFIG_PPPOLAC CONFIG_PPPOPNS CONFIG_SECURITY_PERF_EVENTS_RESTRICT CONFIG_USB_CONFIGFS_F_MTP CONFIG_USB_CONFIGFS_F_PTP CONFIG_USB_CONFIGFS_F_ACC CONFIG_USB_CONFIGFS_F_AUDIO_SRC CONFIG_USB_CONFIGFS_UEVENT CONFIG_INPUT_KEYCHORD CONFIG_INPUT_KEYRESET Link: http://lkml.kernel.org/r/1466708235-28593-1-git-send-email-robh@kernel.org Signed-off-by: Rob Herring Cc: Amit Pundir Cc: John Stultz Cc: Dmitry Shmidt Cc: Rom Lemarchand Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/configs/android-base.config | 152 ++++++++++++++++++++++++++++++ kernel/configs/android-recommended.config | 121 ++++++++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 kernel/configs/android-base.config create mode 100644 kernel/configs/android-recommended.config (limited to 'kernel') diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config new file mode 100644 index 000000000000..9f748ed7bea8 --- /dev/null +++ b/kernel/configs/android-base.config @@ -0,0 +1,152 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_DEVKMEM is not set +# CONFIG_DEVMEM is not set +# CONFIG_INET_LRO is not set +# CONFIG_MODULES is not set +# CONFIG_OABI_COMPAT is not set +# CONFIG_SYSVIPC is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_ASHMEM=y +CONFIG_AUDIT=y +CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_DM_CRYPT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_EMBEDDED=y +CONFIG_FB=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_INET_ESP=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IPV6=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_KEY=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_NAT=y +CONFIG_NO_HZ=y +CONFIG_PACKET=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PREEMPT=y +CONFIG_QUOTA=y +CONFIG_RTC_CLASS=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SETEND_EMULATION=y +CONFIG_STAGING=y +CONFIG_SWP_EMULATION=y +CONFIG_SYNC=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_USB_GADGET=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_XFRM_USER=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config new file mode 100644 index 000000000000..e3b953e966d2 --- /dev/null +++ b/kernel/configs/android-recommended.config @@ -0,0 +1,121 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_PM_WAKELOCKS_GC is not set +# CONFIG_VT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_COMPACTION=y +CONFIG_DEBUG_RODATA=y +CONFIG_DM_UEVENT=y +CONFIG_DRAGONRISE_FF=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FUSE_FS=y +CONFIG_GREENASIA_FF=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WACOM=y +CONFIG_HID_WALTOP=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_GPIO=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_UINPUT=y +CONFIG_ION=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KSM=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGITECH_FF=y +CONFIG_MD=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_MSDOS_FS=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PANTHERLORD_FF=y +CONFIG_PERF_EVENTS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +CONFIG_POWER_SUPPLY=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_SCHEDSTATS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_SND=y +CONFIG_SOUND=y +CONFIG_SUSPEND_TIME=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TIMER_STATS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UHID=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_USBNET=y +CONFIG_VFAT_FS=y -- cgit v1.2.3