From a2e5d188aad31f7177cbd6d9ddaf8cc9aa4affe0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:40 +0000 Subject: kdb: Remove currently unused kdbtab_t->cmd_flags The struct member is never used in the code, so we can remove it. We will introduce real flags soon by renaming cmd_repeat to cmd_flags. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 1 - kernel/debug/kdb/kdb_private.h | 1 - 2 files changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 379650b984f8..cc02aa205668 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2694,7 +2694,6 @@ int kdb_register_repeat(char *cmd, kp->cmd_func = func; kp->cmd_usage = usage; kp->cmd_help = help; - kp->cmd_flags = 0; kp->cmd_minlen = minlen; kp->cmd_repeat = repeat; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 7afd3c8c41d5..c4c46c7b26fd 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -172,7 +172,6 @@ typedef struct _kdbtab { kdb_func_t cmd_func; /* Function to execute command */ char *cmd_usage; /* Usage String for this command */ char *cmd_help; /* Help message for this command */ - short cmd_flags; /* Parsing flags */ short cmd_minlen; /* Minimum legal # command * chars required */ kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */ -- cgit v1.2.3 From 15a42a9bc9ffcff4315a7154313db08c6bf9ef11 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:41 +0000 Subject: kdb: Rename kdb_repeat_t to kdb_cmdflags_t, cmd_repeat to cmd_flags We're about to add more options for command behaviour, so let's expand the meaning of kdb_repeat_t. So far we just do various renames, there should be no functional changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 6 +++--- kernel/debug/kdb/kdb_private.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index cc02aa205668..41966b5f86b7 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1008,7 +1008,7 @@ int kdb_parse(const char *cmdstr) if (result && ignore_errors && result > KDB_CMD_GO) result = 0; KDB_STATE_CLEAR(CMD); - switch (tp->cmd_repeat) { + switch (tp->cmd_flags) { case KDB_REPEAT_NONE: argc = 0; if (argv[0]) @@ -2646,7 +2646,7 @@ int kdb_register_repeat(char *cmd, char *usage, char *help, short minlen, - kdb_repeat_t repeat) + kdb_cmdflags_t flags) { int i; kdbtab_t *kp; @@ -2695,7 +2695,7 @@ int kdb_register_repeat(char *cmd, kp->cmd_usage = usage; kp->cmd_help = help; kp->cmd_minlen = minlen; - kp->cmd_repeat = repeat; + kp->cmd_flags = flags; return 0; } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index c4c46c7b26fd..eaacd1693954 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -174,7 +174,7 @@ typedef struct _kdbtab { char *cmd_help; /* Help message for this command */ short cmd_minlen; /* Minimum legal # command * chars required */ - kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */ + kdb_cmdflags_t cmd_flags; /* Command behaviour flags */ } kdbtab_t; extern int kdb_bt(int, const char **); /* KDB display back trace */ -- cgit v1.2.3 From 42c884c10b775ce04f8aabe488820134625c893e Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:42 +0000 Subject: kdb: Rename kdb_register_repeat() to kdb_register_flags() We're about to add more options for commands behaviour, so let's give a more generic name to the low-level kdb command registration function. There are just various renames, no functional changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_bp.c | 14 ++++---- kernel/debug/kdb/kdb_main.c | 86 ++++++++++++++++++++++----------------------- kernel/trace/trace_kdb.c | 2 +- 3 files changed, 51 insertions(+), 51 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index b20d544f20c2..59536661c7b9 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -531,21 +531,21 @@ void __init kdb_initbptab(void) for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) bp->bp_free = 1; - kdb_register_repeat("bp", kdb_bp, "[]", + kdb_register_flags("bp", kdb_bp, "[]", "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("bl", kdb_bp, "[]", + kdb_register_flags("bl", kdb_bp, "[]", "Display breakpoints", 0, KDB_REPEAT_NO_ARGS); if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) - kdb_register_repeat("bph", kdb_bp, "[]", + kdb_register_flags("bph", kdb_bp, "[]", "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("bc", kdb_bc, "", + kdb_register_flags("bc", kdb_bc, "", "Clear Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("be", kdb_bc, "", + kdb_register_flags("be", kdb_bc, "", "Enable Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bd", kdb_bc, "", + kdb_register_flags("bd", kdb_bc, "", "Disable Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ss", kdb_ss, "", + kdb_register_flags("ss", kdb_ss, "", "Single Step", 1, KDB_REPEAT_NO_ARGS); /* * Architecture dependent initialization. diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 41966b5f86b7..070f1ff358d2 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2629,7 +2629,7 @@ static int kdb_grep_help(int argc, const char **argv) } /* - * kdb_register_repeat - This function is used to register a kernel + * kdb_register_flags - This function is used to register a kernel * debugger command. * Inputs: * cmd Command name @@ -2641,12 +2641,12 @@ static int kdb_grep_help(int argc, const char **argv) * zero for success, one if a duplicate command. */ #define kdb_command_extend 50 /* arbitrary */ -int kdb_register_repeat(char *cmd, - kdb_func_t func, - char *usage, - char *help, - short minlen, - kdb_cmdflags_t flags) +int kdb_register_flags(char *cmd, + kdb_func_t func, + char *usage, + char *help, + short minlen, + kdb_cmdflags_t flags) { int i; kdbtab_t *kp; @@ -2699,13 +2699,13 @@ int kdb_register_repeat(char *cmd, return 0; } -EXPORT_SYMBOL_GPL(kdb_register_repeat); +EXPORT_SYMBOL_GPL(kdb_register_flags); /* * kdb_register - Compatibility register function for commands that do * not need to specify a repeat state. Equivalent to - * kdb_register_repeat with KDB_REPEAT_NONE. + * kdb_register_flags with KDB_REPEAT_NONE. * Inputs: * cmd Command name * func Function to execute the command @@ -2720,8 +2720,8 @@ int kdb_register(char *cmd, char *help, short minlen) { - return kdb_register_repeat(cmd, func, usage, help, minlen, - KDB_REPEAT_NONE); + return kdb_register_flags(cmd, func, usage, help, minlen, + KDB_REPEAT_NONE); } EXPORT_SYMBOL_GPL(kdb_register); @@ -2763,79 +2763,79 @@ static void __init kdb_inittab(void) for_each_kdbcmd(kp, i) kp->cmd_name = NULL; - kdb_register_repeat("md", kdb_md, "", + kdb_register_flags("md", kdb_md, "", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mdr", kdb_md, " ", + kdb_register_flags("mdr", kdb_md, " ", "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mdp", kdb_md, " ", + kdb_register_flags("mdp", kdb_md, " ", "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mds", kdb_md, "", + kdb_register_flags("mds", kdb_md, "", "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mm", kdb_mm, " ", + kdb_register_flags("mm", kdb_mm, " ", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("go", kdb_go, "[]", + kdb_register_flags("go", kdb_go, "[]", "Continue Execution", 1, KDB_REPEAT_NONE); - kdb_register_repeat("rd", kdb_rd, "", + kdb_register_flags("rd", kdb_rd, "", "Display Registers", 0, KDB_REPEAT_NONE); - kdb_register_repeat("rm", kdb_rm, " ", + kdb_register_flags("rm", kdb_rm, " ", "Modify Registers", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ef", kdb_ef, "", + kdb_register_flags("ef", kdb_ef, "", "Display exception frame", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bt", kdb_bt, "[]", + kdb_register_flags("bt", kdb_bt, "[]", "Stack traceback", 1, KDB_REPEAT_NONE); - kdb_register_repeat("btp", kdb_bt, "", + kdb_register_flags("btp", kdb_bt, "", "Display stack for process ", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", + kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE); - kdb_register_repeat("btc", kdb_bt, "", + kdb_register_flags("btc", kdb_bt, "", "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); - kdb_register_repeat("btt", kdb_bt, "", + kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, KDB_REPEAT_NONE); - kdb_register_repeat("env", kdb_env, "", + kdb_register_flags("env", kdb_env, "", "Show environment variables", 0, KDB_REPEAT_NONE); - kdb_register_repeat("set", kdb_set, "", + kdb_register_flags("set", kdb_set, "", "Set environment variables", 0, KDB_REPEAT_NONE); - kdb_register_repeat("help", kdb_help, "", + kdb_register_flags("help", kdb_help, "", "Display Help Message", 1, KDB_REPEAT_NONE); - kdb_register_repeat("?", kdb_help, "", + kdb_register_flags("?", kdb_help, "", "Display Help Message", 0, KDB_REPEAT_NONE); - kdb_register_repeat("cpu", kdb_cpu, "", + kdb_register_flags("cpu", kdb_cpu, "", "Switch to new cpu", 0, KDB_REPEAT_NONE); - kdb_register_repeat("kgdb", kdb_kgdb, "", + kdb_register_flags("kgdb", kdb_kgdb, "", "Enter kgdb mode", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ps", kdb_ps, "[|A]", + kdb_register_flags("ps", kdb_ps, "[|A]", "Display active task list", 0, KDB_REPEAT_NONE); - kdb_register_repeat("pid", kdb_pid, "", + kdb_register_flags("pid", kdb_pid, "", "Switch to another task", 0, KDB_REPEAT_NONE); - kdb_register_repeat("reboot", kdb_reboot, "", + kdb_register_flags("reboot", kdb_reboot, "", "Reboot the machine immediately", 0, KDB_REPEAT_NONE); #if defined(CONFIG_MODULES) - kdb_register_repeat("lsmod", kdb_lsmod, "", + kdb_register_flags("lsmod", kdb_lsmod, "", "List loaded kernel modules", 0, KDB_REPEAT_NONE); #endif #if defined(CONFIG_MAGIC_SYSRQ) - kdb_register_repeat("sr", kdb_sr, "", + kdb_register_flags("sr", kdb_sr, "", "Magic SysRq key", 0, KDB_REPEAT_NONE); #endif #if defined(CONFIG_PRINTK) - kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", + kdb_register_flags("dmesg", kdb_dmesg, "[lines]", "Display syslog buffer", 0, KDB_REPEAT_NONE); #endif if (arch_kgdb_ops.enable_nmi) { - kdb_register_repeat("disable_nmi", kdb_disable_nmi, "", + kdb_register_flags("disable_nmi", kdb_disable_nmi, "", "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE); } - kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", + kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); - kdb_register_repeat("kill", kdb_kill, "<-signal> ", + kdb_register_flags("kill", kdb_kill, "<-signal> ", "Send a signal to a process", 0, KDB_REPEAT_NONE); - kdb_register_repeat("summary", kdb_summary, "", + kdb_register_flags("summary", kdb_summary, "", "Summarize the system", 4, KDB_REPEAT_NONE); - kdb_register_repeat("per_cpu", kdb_per_cpu, " [] []", + kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", "Display per_cpu variables", 3, KDB_REPEAT_NONE); - kdb_register_repeat("grephelp", kdb_grep_help, "", + kdb_register_flags("grephelp", kdb_grep_help, "", "Display help on | grep", 0, KDB_REPEAT_NONE); } diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index bd90e1b06088..1e3b36c75048 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -127,7 +127,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { - kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", + kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", "Dump ftrace log", 0, KDB_REPEAT_NONE); return 0; } -- cgit v1.2.3 From 04bb171e7aa99dee0c92e772e4f66f8d5c1b4081 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:43 +0000 Subject: kdb: Use KDB_REPEAT_* values as flags The actual values of KDB_REPEAT_* enum values and overall logic stayed the same, but we now treat the values as flags. This makes it possible to add other flags and combine them, plus makes the code a lot simpler and shorter. But functionality-wise, there should be no changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 070f1ff358d2..cbacae24a55a 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1008,20 +1008,13 @@ int kdb_parse(const char *cmdstr) if (result && ignore_errors && result > KDB_CMD_GO) result = 0; KDB_STATE_CLEAR(CMD); - switch (tp->cmd_flags) { - case KDB_REPEAT_NONE: - argc = 0; - if (argv[0]) - *(argv[0]) = '\0'; - break; - case KDB_REPEAT_NO_ARGS: - argc = 1; - if (argv[1]) - *(argv[1]) = '\0'; - break; - case KDB_REPEAT_WITH_ARGS: - break; - } + + if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS) + return result; + + argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0; + if (argv[argc]) + *(argv[argc]) = '\0'; return result; } -- cgit v1.2.3 From e8ab24d9b0173ada3eeed31d7d7f982228efc2c5 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:44 +0000 Subject: kdb: Remove KDB_REPEAT_NONE flag Since we now treat KDB_REPEAT_* as flags, there is no need to pass KDB_REPEAT_NONE. It's just the default behaviour when no flags are specified. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_bp.c | 6 ++--- kernel/debug/kdb/kdb_main.c | 59 ++++++++++++++++++++++----------------------- kernel/trace/trace_kdb.c | 2 +- 3 files changed, 33 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 59536661c7b9..f8844fb55311 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -539,11 +539,11 @@ void __init kdb_initbptab(void) kdb_register_flags("bph", kdb_bp, "[]", "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); kdb_register_flags("bc", kdb_bc, "", - "Clear Breakpoint", 0, KDB_REPEAT_NONE); + "Clear Breakpoint", 0, 0); kdb_register_flags("be", kdb_bc, "", - "Enable Breakpoint", 0, KDB_REPEAT_NONE); + "Enable Breakpoint", 0, 0); kdb_register_flags("bd", kdb_bc, "", - "Disable Breakpoint", 0, KDB_REPEAT_NONE); + "Disable Breakpoint", 0, 0); kdb_register_flags("ss", kdb_ss, "", "Single Step", 1, KDB_REPEAT_NO_ARGS); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index cbacae24a55a..538bf1dce26a 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2698,7 +2698,7 @@ EXPORT_SYMBOL_GPL(kdb_register_flags); /* * kdb_register - Compatibility register function for commands that do * not need to specify a repeat state. Equivalent to - * kdb_register_flags with KDB_REPEAT_NONE. + * kdb_register_flags with flags set to 0. * Inputs: * cmd Command name * func Function to execute the command @@ -2713,8 +2713,7 @@ int kdb_register(char *cmd, char *help, short minlen) { - return kdb_register_flags(cmd, func, usage, help, minlen, - KDB_REPEAT_NONE); + return kdb_register_flags(cmd, func, usage, help, minlen, 0); } EXPORT_SYMBOL_GPL(kdb_register); @@ -2768,68 +2767,68 @@ static void __init kdb_inittab(void) kdb_register_flags("mm", kdb_mm, " ", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); kdb_register_flags("go", kdb_go, "[]", - "Continue Execution", 1, KDB_REPEAT_NONE); + "Continue Execution", 1, 0); kdb_register_flags("rd", kdb_rd, "", - "Display Registers", 0, KDB_REPEAT_NONE); + "Display Registers", 0, 0); kdb_register_flags("rm", kdb_rm, " ", - "Modify Registers", 0, KDB_REPEAT_NONE); + "Modify Registers", 0, 0); kdb_register_flags("ef", kdb_ef, "", - "Display exception frame", 0, KDB_REPEAT_NONE); + "Display exception frame", 0, 0); kdb_register_flags("bt", kdb_bt, "[]", - "Stack traceback", 1, KDB_REPEAT_NONE); + "Stack traceback", 1, 0); kdb_register_flags("btp", kdb_bt, "", - "Display stack for process ", 0, KDB_REPEAT_NONE); + "Display stack for process ", 0, 0); kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", - "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE); + "Backtrace all processes matching state flag", 0, 0); kdb_register_flags("btc", kdb_bt, "", - "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); + "Backtrace current process on each cpu", 0, 0); kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, - KDB_REPEAT_NONE); + 0); kdb_register_flags("env", kdb_env, "", - "Show environment variables", 0, KDB_REPEAT_NONE); + "Show environment variables", 0, 0); kdb_register_flags("set", kdb_set, "", - "Set environment variables", 0, KDB_REPEAT_NONE); + "Set environment variables", 0, 0); kdb_register_flags("help", kdb_help, "", - "Display Help Message", 1, KDB_REPEAT_NONE); + "Display Help Message", 1, 0); kdb_register_flags("?", kdb_help, "", - "Display Help Message", 0, KDB_REPEAT_NONE); + "Display Help Message", 0, 0); kdb_register_flags("cpu", kdb_cpu, "", - "Switch to new cpu", 0, KDB_REPEAT_NONE); + "Switch to new cpu", 0, 0); kdb_register_flags("kgdb", kdb_kgdb, "", - "Enter kgdb mode", 0, KDB_REPEAT_NONE); + "Enter kgdb mode", 0, 0); kdb_register_flags("ps", kdb_ps, "[|A]", - "Display active task list", 0, KDB_REPEAT_NONE); + "Display active task list", 0, 0); kdb_register_flags("pid", kdb_pid, "", - "Switch to another task", 0, KDB_REPEAT_NONE); + "Switch to another task", 0, 0); kdb_register_flags("reboot", kdb_reboot, "", - "Reboot the machine immediately", 0, KDB_REPEAT_NONE); + "Reboot the machine immediately", 0, 0); #if defined(CONFIG_MODULES) kdb_register_flags("lsmod", kdb_lsmod, "", - "List loaded kernel modules", 0, KDB_REPEAT_NONE); + "List loaded kernel modules", 0, 0); #endif #if defined(CONFIG_MAGIC_SYSRQ) kdb_register_flags("sr", kdb_sr, "", - "Magic SysRq key", 0, KDB_REPEAT_NONE); + "Magic SysRq key", 0, 0); #endif #if defined(CONFIG_PRINTK) kdb_register_flags("dmesg", kdb_dmesg, "[lines]", - "Display syslog buffer", 0, KDB_REPEAT_NONE); + "Display syslog buffer", 0, 0); #endif if (arch_kgdb_ops.enable_nmi) { kdb_register_flags("disable_nmi", kdb_disable_nmi, "", - "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE); + "Disable NMI entry to KDB", 0, 0); } kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", - "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); + "Define a set of commands, down to endefcmd", 0, 0); kdb_register_flags("kill", kdb_kill, "<-signal> ", - "Send a signal to a process", 0, KDB_REPEAT_NONE); + "Send a signal to a process", 0, 0); kdb_register_flags("summary", kdb_summary, "", - "Summarize the system", 4, KDB_REPEAT_NONE); + "Summarize the system", 4, 0); kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", - "Display per_cpu variables", 3, KDB_REPEAT_NONE); + "Display per_cpu variables", 3, 0); kdb_register_flags("grephelp", kdb_grep_help, "", - "Display help on | grep", 0, KDB_REPEAT_NONE); + "Display help on | grep", 0, 0); } /* Execute any commands defined in kdb_cmds. */ diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 1e3b36c75048..3da7e3043596 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", - "Dump ftrace log", 0, KDB_REPEAT_NONE); + "Dump ftrace log", 0, 0); return 0; } -- cgit v1.2.3 From 9452e977ac17caf9f98a91b33d5e3c3357258c64 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 14:36:45 +0000 Subject: kdb: Categorize kdb commands (similar to SysRq categorization) This patch introduces several new flags to collect kdb commands into groups (later allowing them to be optionally disabled). This follows similar prior art to enable/disable magic sysrq commands. The commands have been categorized as follows: Always on: go (w/o args), env, set, help, ?, cpu (w/o args), sr, dmesg, disable_nmi, defcmd, summary, grephelp Mem read: md, mdr, mdp, mds, ef, bt (with args), per_cpu Mem write: mm Reg read: rd Reg write: go (with args), rm Inspect: bt (w/o args), btp, bta, btc, btt, ps, pid, lsmod Flow ctrl: bp, bl, bph, bc, be, bd, ss Signal: kill Reboot: reboot All: cpu, kgdb, (and all of the above), nmi_console Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_bp.c | 21 +++++--- kernel/debug/kdb/kdb_main.c | 120 ++++++++++++++++++++++++++++++++------------ kernel/trace/trace_kdb.c | 2 +- 3 files changed, 102 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index f8844fb55311..e1dbf4a2c69e 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -532,21 +532,28 @@ void __init kdb_initbptab(void) bp->bp_free = 1; kdb_register_flags("bp", kdb_bp, "[]", - "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS); + "Set/Display breakpoints", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); kdb_register_flags("bl", kdb_bp, "[]", - "Display breakpoints", 0, KDB_REPEAT_NO_ARGS); + "Display breakpoints", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) kdb_register_flags("bph", kdb_bp, "[]", - "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); + "[datar [length]|dataw [length]] Set hw brk", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); kdb_register_flags("bc", kdb_bc, "", - "Clear Breakpoint", 0, 0); + "Clear Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("be", kdb_bc, "", - "Enable Breakpoint", 0, 0); + "Enable Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("bd", kdb_bc, "", - "Disable Breakpoint", 0, 0); + "Disable Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("ss", kdb_ss, "", - "Single Step", 1, KDB_REPEAT_NO_ARGS); + "Single Step", 1, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); /* * Architecture dependent initialization. */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 538bf1dce26a..fae1fc3962f8 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -187,6 +187,26 @@ struct task_struct *kdb_curr_task(int cpu) return p; } +/* + * Check whether the flags of the current command and the permissions + * of the kdb console has allow a command to be run. + */ +static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, + bool no_args) +{ + /* permissions comes from userspace so needs massaging slightly */ + permissions &= KDB_ENABLE_MASK; + permissions |= KDB_ENABLE_ALWAYS_SAFE; + + /* some commands change group when launched with no arguments */ + if (no_args) + permissions |= permissions << KDB_ENABLE_NO_ARGS_SHIFT; + + flags |= KDB_ENABLE_ALL; + + return permissions & flags; +} + /* * kdbgetenv - This function will return the character string value of * an environment variable. @@ -641,8 +661,13 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0) if (!s->count) s->usable = 0; if (s->usable) - kdb_register(s->name, kdb_exec_defcmd, - s->usage, s->help, 0); + /* macros are always safe because when executed each + * internal command re-enters kdb_parse() and is + * safety checked individually. + */ + kdb_register_flags(s->name, kdb_exec_defcmd, s->usage, + s->help, 0, + KDB_ENABLE_ALWAYS_SAFE); return 0; } if (!s->usable) @@ -2757,78 +2782,107 @@ static void __init kdb_inittab(void) kdb_register_flags("md", kdb_md, "", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, - KDB_REPEAT_NO_ARGS); + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mdr", kdb_md, " ", - "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS); + "Display Raw Memory", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mdp", kdb_md, " ", - "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS); + "Display Physical Memory", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mds", kdb_md, "", - "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS); + "Display Memory Symbolically", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mm", kdb_mm, " ", - "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); + "Modify Memory Contents", 0, + KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS); kdb_register_flags("go", kdb_go, "[]", - "Continue Execution", 1, 0); + "Continue Execution", 1, + KDB_ENABLE_REG_WRITE | KDB_ENABLE_ALWAYS_SAFE_NO_ARGS); kdb_register_flags("rd", kdb_rd, "", - "Display Registers", 0, 0); + "Display Registers", 0, + KDB_ENABLE_REG_READ); kdb_register_flags("rm", kdb_rm, " ", - "Modify Registers", 0, 0); + "Modify Registers", 0, + KDB_ENABLE_REG_WRITE); kdb_register_flags("ef", kdb_ef, "", - "Display exception frame", 0, 0); + "Display exception frame", 0, + KDB_ENABLE_MEM_READ); kdb_register_flags("bt", kdb_bt, "[]", - "Stack traceback", 1, 0); + "Stack traceback", 1, + KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS); kdb_register_flags("btp", kdb_bt, "", - "Display stack for process ", 0, 0); + "Display stack for process ", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", - "Backtrace all processes matching state flag", 0, 0); + "Backtrace all processes matching state flag", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("btc", kdb_bt, "", - "Backtrace current process on each cpu", 0, 0); + "Backtrace current process on each cpu", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, - 0); + KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS); kdb_register_flags("env", kdb_env, "", - "Show environment variables", 0, 0); + "Show environment variables", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("set", kdb_set, "", - "Set environment variables", 0, 0); + "Set environment variables", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("help", kdb_help, "", - "Display Help Message", 1, 0); + "Display Help Message", 1, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("?", kdb_help, "", - "Display Help Message", 0, 0); + "Display Help Message", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("cpu", kdb_cpu, "", - "Switch to new cpu", 0, 0); + "Switch to new cpu", 0, + KDB_ENABLE_ALWAYS_SAFE_NO_ARGS); kdb_register_flags("kgdb", kdb_kgdb, "", "Enter kgdb mode", 0, 0); kdb_register_flags("ps", kdb_ps, "[|A]", - "Display active task list", 0, 0); + "Display active task list", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("pid", kdb_pid, "", - "Switch to another task", 0, 0); + "Switch to another task", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("reboot", kdb_reboot, "", - "Reboot the machine immediately", 0, 0); + "Reboot the machine immediately", 0, + KDB_ENABLE_REBOOT); #if defined(CONFIG_MODULES) kdb_register_flags("lsmod", kdb_lsmod, "", - "List loaded kernel modules", 0, 0); + "List loaded kernel modules", 0, + KDB_ENABLE_INSPECT); #endif #if defined(CONFIG_MAGIC_SYSRQ) kdb_register_flags("sr", kdb_sr, "", - "Magic SysRq key", 0, 0); + "Magic SysRq key", 0, + KDB_ENABLE_ALWAYS_SAFE); #endif #if defined(CONFIG_PRINTK) kdb_register_flags("dmesg", kdb_dmesg, "[lines]", - "Display syslog buffer", 0, 0); + "Display syslog buffer", 0, + KDB_ENABLE_ALWAYS_SAFE); #endif if (arch_kgdb_ops.enable_nmi) { kdb_register_flags("disable_nmi", kdb_disable_nmi, "", - "Disable NMI entry to KDB", 0, 0); + "Disable NMI entry to KDB", 0, + KDB_ENABLE_ALWAYS_SAFE); } kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", - "Define a set of commands, down to endefcmd", 0, 0); + "Define a set of commands, down to endefcmd", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("kill", kdb_kill, "<-signal> ", - "Send a signal to a process", 0, 0); + "Send a signal to a process", 0, + KDB_ENABLE_SIGNAL); kdb_register_flags("summary", kdb_summary, "", - "Summarize the system", 4, 0); + "Summarize the system", 4, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", - "Display per_cpu variables", 3, 0); + "Display per_cpu variables", 3, + KDB_ENABLE_MEM_READ); kdb_register_flags("grephelp", kdb_grep_help, "", - "Display help on | grep", 0, 0); + "Display help on | grep", 0, + KDB_ENABLE_ALWAYS_SAFE); } /* Execute any commands defined in kdb_cmds. */ diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 3da7e3043596..1058f6bd8399 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", - "Dump ftrace log", 0, 0); + "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); return 0; } -- cgit v1.2.3 From 420c2b1b0df84f5956036b5185cc1e11d247817d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:46 +0000 Subject: kdb: Add enable mask for groups of commands Currently all kdb commands are enabled whenever kdb is deployed. This makes it difficult to deploy kdb to help debug certain types of systems. Android phones provide one example; the FIQ debugger found on some Android devices has a deliberately weak set of commands to allow the debugger to enabled very late in the production cycle. Certain kiosk environments offer another interesting case where an engineer might wish to probe the system state using passive inspection commands without providing sufficient power for a passer by to root it. Without any restrictions, obtaining the root rights via KDB is a matter of a few commands, and works everywhere. For example, log in as a normal user: cbou:~$ id uid=1001(cbou) gid=1001(cbou) groups=1001(cbou) Now enter KDB (for example via sysrq): Entering kdb (current=0xffff8800065bc740, pid 920) due to Keyboard Entry kdb> ps 23 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0xffff8800065bc740 920 919 1 0 R 0xffff8800065bca20 *bash 0xffff880007078000 1 0 0 0 S 0xffff8800070782e0 init [...snip...] 0xffff8800065be3c0 918 1 0 0 S 0xffff8800065be6a0 getty 0xffff8800065b9c80 919 1 0 0 S 0xffff8800065b9f60 login 0xffff8800065bc740 920 919 1 0 R 0xffff8800065bca20 *bash All we need is the offset of cred pointers. We can look up the offset in the distro's kernel source, but it is unnecessary. We can just start dumping init's task_struct, until we see the process name: kdb> md 0xffff880007078000 0xffff880007078000 0000000000000001 ffff88000703c000 ................ 0xffff880007078010 0040210000000002 0000000000000000 .....!@......... [...snip...] 0xffff8800070782b0 ffff8800073e0580 ffff8800073e0580 ..>.......>..... 0xffff8800070782c0 0000000074696e69 0000000000000000 init............ ^ Here, 'init'. Creds are just above it, so the offset is 0x02b0. Now we set up init's creds for our non-privileged shell: kdb> mm 0xffff8800065bc740+0x02b0 0xffff8800073e0580 0xffff8800065bc9f0 = 0xffff8800073e0580 kdb> mm 0xffff8800065bc740+0x02b8 0xffff8800073e0580 0xffff8800065bc9f8 = 0xffff8800073e0580 And thus gaining the root: kdb> go cbou:~$ id uid=0(root) gid=0(root) groups=0(root) cbou:~$ bash root:~# p.s. No distro enables kdb by default (although, with a nice KDB-over-KMS feature availability, I would expect at least some would enable it), so it's not actually some kind of a major issue. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index fae1fc3962f8..fe1ac56b62e9 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +44,12 @@ #include #include "kdb_private.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "kdb." + +static int kdb_cmd_enabled; +module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); + #define GREP_LEN 256 char kdb_grep_string[GREP_LEN]; int kdb_grepping_flag; @@ -121,6 +129,7 @@ static kdbmsg_t kdbmsgs[] = { KDBMSG(BADLENGTH, "Invalid length field"), KDBMSG(NOBP, "No Breakpoint exists"), KDBMSG(BADADDR, "Invalid address"), + KDBMSG(NOPERM, "Permission denied"), }; #undef KDBMSG @@ -495,6 +504,15 @@ int kdbgetaddrarg(int argc, const char **argv, int *nextarg, char *cp; kdb_symtab_t symtab; + /* + * If the enable flags prohibit both arbitrary memory access + * and flow control then there are no reasonable grounds to + * provide symbol lookup. + */ + if (!kdb_check_flags(KDB_ENABLE_MEM_READ | KDB_ENABLE_FLOW_CTRL, + kdb_cmd_enabled, false)) + return KDB_NOPERM; + /* * Process arguments which follow the following syntax: * @@ -1028,6 +1046,10 @@ int kdb_parse(const char *cmdstr) if (i < kdb_max_commands) { int result; + + if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1)) + return KDB_NOPERM; + KDB_STATE_SET(CMD); result = (*tp->cmd_func)(argc-1, (const char **)argv); if (result && ignore_errors && result > KDB_CMD_GO) @@ -1939,10 +1961,14 @@ static int kdb_rm(int argc, const char **argv) */ static int kdb_sr(int argc, const char **argv) { + bool check_mask = + !kdb_check_flags(KDB_ENABLE_ALL, kdb_cmd_enabled, false); + if (argc != 1) return KDB_ARGCOUNT; + kdb_trap_printk++; - __handle_sysrq(*argv[1], false); + __handle_sysrq(*argv[1], check_mask); kdb_trap_printk--; return 0; @@ -2393,6 +2419,8 @@ static int kdb_help(int argc, const char **argv) return 0; if (!kt->cmd_name) continue; + if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true)) + continue; if (strlen(kt->cmd_usage) > 20) space = "\n "; kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name, -- cgit v1.2.3 From b8017177cdfd46b0222b3b74b206780f52f22f3d Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 14:36:47 +0000 Subject: kdb: Allow access to sensitive commands to be restricted by default Currently kiosk mode must be explicitly requested by the bootloader or userspace. It is convenient to be able to change the default value in a similar manner to CONFIG_MAGIC_SYSRQ_DEFAULT_MASK. Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index fe1ac56b62e9..8d84979cbe05 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -47,7 +47,7 @@ #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "kdb." -static int kdb_cmd_enabled; +static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); #define GREP_LEN 256 -- cgit v1.2.3 From a1465d2f396e416a0049332b20fca5977384b9f5 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 11 Nov 2014 09:31:53 -0600 Subject: kgdb: timeout if secondary CPUs ignore the roundup Currently if an active CPU fails to respond to a roundup request the CPU that requested the roundup will become stuck. This needlessly reduces the robustness of the debugger. This patch introduces a timeout allowing the system state to be examined even when the system contains unresponsive processors. It also modifies kdb's cpu command to make it censor attempts to switch to unresponsive processors and to report their state as (D)ead. Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- kernel/debug/debug_core.c | 9 +++++++-- kernel/debug/kdb/kdb_debugger.c | 4 ++++ kernel/debug/kdb/kdb_main.c | 4 +++- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 1adf62b39b96..acd749736822 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -471,6 +471,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, int cpu; int trace_on = 0; int online_cpus = num_online_cpus(); + u64 time_left; kgdb_info[ks->cpu].enter_kgdb++; kgdb_info[ks->cpu].exception_state |= exception_state; @@ -595,9 +596,13 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + - atomic_read(&slaves_in_kgdb)) != online_cpus) + time_left = loops_per_jiffy * HZ; + while (kgdb_do_roundup && --time_left && + (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != + online_cpus) cpu_relax(); + if (!time_left) + pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); /* * At this point the primary processor is completely diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index 8859ca34dcfe..15e1a7af5dd0 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -129,6 +129,10 @@ int kdb_stub(struct kgdb_state *ks) ks->pass_exception = 1; KDB_FLAG_SET(CATASTROPHIC); } + /* set CATASTROPHIC if the system contains unresponsive processors */ + for_each_online_cpu(i) + if (!kgdb_info[i].enter_kgdb) + KDB_FLAG_SET(CATASTROPHIC); if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { KDB_STATE_CLEAR(SSBPT); KDB_STATE_CLEAR(DOING_SS); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 8d84979cbe05..f191bddf64b8 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2201,6 +2201,8 @@ static void kdb_cpu_status(void) for (start_cpu = -1, i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) { state = 'F'; /* cpu is offline */ + } else if (!kgdb_info[i].enter_kgdb) { + state = 'D'; /* cpu is online but unresponsive */ } else { state = ' '; /* cpu is responding to kdb */ if (kdb_task_state_char(KDB_TSK(i)) == 'I') @@ -2254,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv) /* * Validate cpunum */ - if ((cpunum > NR_CPUS) || !cpu_online(cpunum)) + if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) return KDB_BADCPUNUM; dbg_switch_cpu = cpunum; -- cgit v1.2.3 From 0f16996cf2ed7c368dd95b4c517ce572b96a10f5 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 21:30:11 +0200 Subject: kernel/debug/debug_core.c: Logging clean-up -Convert printk( to pr_foo() -Add pr_fmt -Coalesce formats Cc: Jason Wessel Cc: Andrew Morton Cc: Joe Perches Signed-off-by: Fabian Frederick Signed-off-by: Jason Wessel --- kernel/debug/debug_core.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index acd749736822..07ce18ca71e0 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -27,6 +27,9 @@ * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. */ + +#define pr_fmt(fmt) "KGDB: " fmt + #include #include #include @@ -196,8 +199,8 @@ int __weak kgdb_validate_break_address(unsigned long addr) return err; err = kgdb_arch_remove_breakpoint(&tmp); if (err) - printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " - "memory destroyed at: %lx", addr); + pr_err("Critical breakpoint error, kernel memory destroyed at: %lx\n", + addr); return err; } @@ -256,8 +259,8 @@ int dbg_activate_sw_breakpoints(void) error = kgdb_arch_set_breakpoint(&kgdb_break[i]); if (error) { ret = error; - printk(KERN_INFO "KGDB: BP install failed: %lx", - kgdb_break[i].bpt_addr); + pr_info("BP install failed: %lx\n", + kgdb_break[i].bpt_addr); continue; } @@ -319,8 +322,8 @@ int dbg_deactivate_sw_breakpoints(void) continue; error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) { - printk(KERN_INFO "KGDB: BP remove failed: %lx\n", - kgdb_break[i].bpt_addr); + pr_info("BP remove failed: %lx\n", + kgdb_break[i].bpt_addr); ret = error; } @@ -367,7 +370,7 @@ int dbg_remove_all_break(void) goto setundefined; error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) - printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", + pr_err("breakpoint remove failed: %lx\n", kgdb_break[i].bpt_addr); setundefined: kgdb_break[i].state = BP_UNDEFINED; @@ -400,9 +403,9 @@ static int kgdb_io_ready(int print_wait) if (print_wait) { #ifdef CONFIG_KGDB_KDB if (!dbg_kdb_mode) - printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n"); + pr_crit("waiting... or $3#33 for KDB\n"); #else - printk(KERN_CRIT "KGDB: Waiting for remote debugger\n"); + pr_crit("Waiting for remote debugger\n"); #endif } return 1; @@ -430,8 +433,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks) exception_level = 0; kgdb_skipexception(ks->ex_vector, ks->linux_regs); dbg_activate_sw_breakpoints(); - printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n", - addr); + pr_crit("re-enter error: breakpoint removed %lx\n", addr); WARN_ON_ONCE(1); return 1; @@ -444,7 +446,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks) panic("Recursive entry to debugger"); } - printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n"); + pr_crit("re-enter exception: ALL breakpoints killed\n"); #ifdef CONFIG_KGDB_KDB /* Allow kdb to debug itself one level */ return 0; @@ -800,15 +802,15 @@ static struct console kgdbcons = { static void sysrq_handle_dbg(int key) { if (!dbg_io_ops) { - printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); + pr_crit("ERROR: No KGDB I/O module available\n"); return; } if (!kgdb_connected) { #ifdef CONFIG_KGDB_KDB if (!dbg_kdb_mode) - printk(KERN_CRIT "KGDB or $3#33 for KDB\n"); + pr_crit("KGDB or $3#33 for KDB\n"); #else - printk(KERN_CRIT "Entering KGDB\n"); + pr_crit("Entering KGDB\n"); #endif } @@ -950,7 +952,7 @@ static void kgdb_initial_breakpoint(void) { kgdb_break_asap = 0; - printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n"); + pr_crit("Waiting for connection from remote gdb...\n"); kgdb_breakpoint(); } @@ -969,8 +971,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) if (dbg_io_ops) { spin_unlock(&kgdb_registration_lock); - printk(KERN_ERR "kgdb: Another I/O driver is already " - "registered with KGDB.\n"); + pr_err("Another I/O driver is already registered with KGDB\n"); return -EBUSY; } @@ -986,8 +987,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) spin_unlock(&kgdb_registration_lock); - printk(KERN_INFO "kgdb: Registered I/O driver %s.\n", - new_dbg_io_ops->name); + pr_info("Registered I/O driver %s\n", new_dbg_io_ops->name); /* Arm KGDB now. */ kgdb_register_callbacks(); @@ -1022,8 +1022,7 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops) spin_unlock(&kgdb_registration_lock); - printk(KERN_INFO - "kgdb: Unregistered I/O driver %s, debugger disabled.\n", + pr_info("Unregistered I/O driver %s, debugger disabled\n", old_dbg_io_ops->name); } EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); -- cgit v1.2.3 From 3640dcfa4fd00cd91d88bb86250bdb496f7070c0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 19 Dec 2014 18:35:53 -0500 Subject: audit: don't attempt to lookup PIDs when changing PID filtering audit rules Commit f1dc4867 ("audit: anchor all pid references in the initial pid namespace") introduced a find_vpid() call when adding/removing audit rules with PID/PPID filters; unfortunately this is problematic as find_vpid() only works if there is a task with the associated PID alive on the system. The following commands demonstrate a simple reproducer. # auditctl -D # auditctl -l # autrace /bin/true # auditctl -l This patch resolves the problem by simply using the PID provided by the user without any additional validation, e.g. no calls to check to see if the task/PID exists. Cc: stable@vger.kernel.org # 3.15 Cc: Richard Guy Briggs Signed-off-by: Paul Moore Acked-by: Eric Paris Reviewed-by: Richard Guy Briggs --- kernel/auditfilter.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index d214cd073a58..c0d148bd7a5c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -444,19 +444,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->val = 0; } - if ((f->type == AUDIT_PID) || (f->type == AUDIT_PPID)) { - struct pid *pid; - rcu_read_lock(); - pid = find_vpid(f->val); - if (!pid) { - rcu_read_unlock(); - err = -ESRCH; - goto exit_free; - } - f->val = pid_nr(pid); - rcu_read_unlock(); - } - err = audit_field_valid(entry, f); if (err) goto exit_free; -- cgit v1.2.3 From 54dc77d974a50147d6639dac6f59cb2c29207161 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 18 Dec 2014 23:09:27 -0500 Subject: audit: use supplied gfp_mask from audit_buffer in kauditd_send_multicast_skb Eric Paris explains: Since kauditd_send_multicast_skb() gets called in audit_log_end(), which can come from any context (aka even a sleeping context) GFP_KERNEL can't be used. Since the audit_buffer knows what context it should use, pass that down and use that. See: https://lkml.org/lkml/2014/12/16/542 BUG: sleeping function called from invalid context at mm/slab.c:2849 in_atomic(): 1, irqs_disabled(): 0, pid: 885, name: sulogin 2 locks held by sulogin/885: #0: (&sig->cred_guard_mutex){+.+.+.}, at: [] prepare_bprm_creds+0x28/0x8b #1: (tty_files_lock){+.+.+.}, at: [] selinux_bprm_committing_creds+0x55/0x22b CPU: 1 PID: 885 Comm: sulogin Not tainted 3.18.0-next-20141216 #30 Hardware name: Dell Inc. Latitude E6530/07Y85M, BIOS A15 06/20/2014 ffff880223744f10 ffff88022410f9b8 ffffffff916ba529 0000000000000375 ffff880223744f10 ffff88022410f9e8 ffffffff91063185 0000000000000006 0000000000000000 0000000000000000 0000000000000000 ffff88022410fa38 Call Trace: [] dump_stack+0x50/0xa8 [] ___might_sleep+0x1b6/0x1be [] __might_sleep+0x119/0x128 [] cache_alloc_debugcheck_before.isra.45+0x1d/0x1f [] kmem_cache_alloc+0x43/0x1c9 [] __alloc_skb+0x42/0x1a3 [] skb_copy+0x3e/0xa3 [] audit_log_end+0x83/0x100 [] ? avc_audit_pre_callback+0x103/0x103 [] common_lsm_audit+0x441/0x450 [] slow_avc_audit+0x63/0x67 [] avc_has_perm+0xca/0xe3 [] inode_has_perm+0x5a/0x65 [] selinux_bprm_committing_creds+0x98/0x22b [] security_bprm_committing_creds+0xe/0x10 [] install_exec_creds+0xe/0x79 [] load_elf_binary+0xe36/0x10d7 [] search_binary_handler+0x81/0x18c [] do_execveat_common.isra.31+0x4e3/0x7b7 [] do_execve+0x1f/0x21 [] SyS_execve+0x25/0x29 [] stub_execve+0x69/0xa0 Cc: stable@vger.kernel.org #v3.16-rc1 Reported-by: Valdis Kletnieks Signed-off-by: Richard Guy Briggs Tested-by: Valdis Kletnieks Signed-off-by: Paul Moore --- kernel/audit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index f3a981db91ff..c7e097a0d7af 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -429,7 +429,7 @@ static void kauditd_send_skb(struct sk_buff *skb) * This function doesn't consume an skb as might be expected since it has to * copy it anyways. */ -static void kauditd_send_multicast_skb(struct sk_buff *skb) +static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *copy; struct audit_net *aunet = net_generic(&init_net, audit_net_id); @@ -448,11 +448,11 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb) * no reason for new multicast clients to continue with this * non-compliance. */ - copy = skb_copy(skb, GFP_KERNEL); + copy = skb_copy(skb, gfp_mask); if (!copy) return; - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); } /* @@ -1949,7 +1949,7 @@ void audit_log_end(struct audit_buffer *ab) struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); nlh->nlmsg_len = ab->skb->len; - kauditd_send_multicast_skb(ab->skb); + kauditd_send_multicast_skb(ab->skb, ab->gfp_mask); /* * The original kaudit unicast socket sends up messages with -- cgit v1.2.3 From 4a92843601ad0f5067f441d2f0dca55bbe18c076 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 22 Dec 2014 12:27:39 -0500 Subject: audit: correctly record file names with different path name types There is a problem with the audit system when multiple audit records are created for the same path, each with a different path name type. The root cause of the problem is in __audit_inode() when an exact match (both the path name and path name type) is not found for a path name record; the existing code creates a new path name record, but it never sets the path name in this record, leaving it NULL. This patch corrects this problem by assigning the path name to these newly created records. There are many ways to reproduce this problem, but one of the easiest is the following (assuming auditd is running): # mkdir /root/tmp/test # touch /root/tmp/test/567 # auditctl -a always,exit -F dir=/root/tmp/test # touch /root/tmp/test/567 Afterwards, or while the commands above are running, check the audit log and pay special attention to the PATH records. A faulty kernel will display something like the following for the file creation: type=SYSCALL msg=audit(1416957442.025:93): arch=c000003e syscall=2 success=yes exit=3 ... comm="touch" exe="/usr/bin/touch" type=CWD msg=audit(1416957442.025:93): cwd="/root/tmp" type=PATH msg=audit(1416957442.025:93): item=0 name="test/" inode=401409 ... nametype=PARENT type=PATH msg=audit(1416957442.025:93): item=1 name=(null) inode=393804 ... nametype=NORMAL type=PATH msg=audit(1416957442.025:93): item=2 name=(null) inode=393804 ... nametype=NORMAL While a patched kernel will show the following: type=SYSCALL msg=audit(1416955786.566:89): arch=c000003e syscall=2 success=yes exit=3 ... comm="touch" exe="/usr/bin/touch" type=CWD msg=audit(1416955786.566:89): cwd="/root/tmp" type=PATH msg=audit(1416955786.566:89): item=0 name="test/" inode=401409 ... nametype=PARENT type=PATH msg=audit(1416955786.566:89): item=1 name="test/567" inode=393804 ... nametype=NORMAL This issue was brought up by a number of people, but special credit should go to hujianyang@huawei.com for reporting the problem along with an explanation of the problem and a patch. While the original patch did have some problems (see the archive link below), it did demonstrate the problem and helped kickstart the fix presented here. * https://lkml.org/lkml/2014/9/5/66 Reported-by: hujianyang Signed-off-by: Paul Moore Acked-by: Richard Guy Briggs --- kernel/auditsc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 89335723fb2a..287b3d381174 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1877,12 +1877,18 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, } out_alloc: - /* unable to find the name from a previous getname(). Allocate a new - * anonymous entry. - */ - n = audit_alloc_name(context, AUDIT_TYPE_NORMAL); + /* unable to find an entry with both a matching name and type */ + n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); if (!n) return; + if (name) + /* since name is not NULL we know there is already a matching + * name record, see audit_getname(), so there must be a type + * mismatch; reuse the string path since the original name + * record will keep the string valid until we free it in + * audit_free_names() */ + n->name = name; + out: if (parent) { n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; -- cgit v1.2.3 From 574732c73d155320f9358d9ee5d84beb0f4ecee2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 23 Dec 2014 15:05:36 +1030 Subject: param: initialize store function to NULL if not available. I rebased Kees' 'param: do not set store func without write perm' on top of my 'params: cleanup sysfs allocation'. However, my patch uses krealloc which doesn't zero memory, leaving .store unset. Reported-by: Sasha Levin Cc: Kees Cook Signed-off-by: Rusty Russell --- kernel/params.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index 0af9b2c4e56c..bd65d136a470 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -648,6 +648,8 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, /* Do not allow runtime DAC changes to make param writable. */ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + else + mk->mp->attrs[mk->mp->num].mattr.store = NULL; mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; mk->mp->num++; -- cgit v1.2.3 From b74e6278fd6db5848163ccdc6e9d8eb6efdee9bd Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Thu, 18 Dec 2014 12:44:30 -0600 Subject: sched: Fix KMALLOC_MAX_SIZE overflow during cpumask allocation When allocating space for load_balance_mask, in sched_init, when CPUMASK_OFFSTACK is set, we've managed to spill over KMALLOC_MAX_SIZE on our 6144 core machine. The patch below breaks up the allocations so that they don't overflow the max alloc size. It also allocates the masks on the the node from which they'll most commonly be accessed, to minimize remote accesses on NUMA machines. Suggested-by: George Beshers Signed-off-by: Alex Thorlton Cc: George Beshers Cc: Russ Anderson Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418928270-148543-1-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b5797b78add6..c0accc00566e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7112,9 +7112,6 @@ void __init sched_init(void) #endif #ifdef CONFIG_RT_GROUP_SCHED alloc_size += 2 * nr_cpu_ids * sizeof(void **); -#endif -#ifdef CONFIG_CPUMASK_OFFSTACK - alloc_size += num_possible_cpus() * cpumask_size(); #endif if (alloc_size) { ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); @@ -7135,13 +7132,13 @@ void __init sched_init(void) ptr += nr_cpu_ids * sizeof(void **); #endif /* CONFIG_RT_GROUP_SCHED */ + } #ifdef CONFIG_CPUMASK_OFFSTACK - for_each_possible_cpu(i) { - per_cpu(load_balance_mask, i) = (void *)ptr; - ptr += cpumask_size(); - } -#endif /* CONFIG_CPUMASK_OFFSTACK */ + for_each_possible_cpu(i) { + per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( + cpumask_size(), GFP_KERNEL, cpu_to_node(i)); } +#endif /* CONFIG_CPUMASK_OFFSTACK */ init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); -- cgit v1.2.3 From 041d7b98ffe59c59fdd639931dea7d74f9aa9a59 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 23 Dec 2014 13:02:04 -0500 Subject: audit: restore AUDIT_LOGINUID unset ABI A regression was caused by commit 780a7654cee8: audit: Make testing for a valid loginuid explicit. (which in turn attempted to fix a regression caused by e1760bd) When audit_krule_to_data() fills in the rules to get a listing, there was a missing clause to convert back from AUDIT_LOGINUID_SET to AUDIT_LOGINUID. This broke userspace by not returning the same information that was sent and expected. The rule: auditctl -a exit,never -F auid=-1 gives: auditctl -l LIST_RULES: exit,never f24=0 syscall=all when it should give: LIST_RULES: exit,never auid=-1 (0xffffffff) syscall=all Tag it so that it is reported the same way it was set. Create a new private flags audit_krule field (pflags) to store it that won't interact with the public one from the API. Cc: stable@vger.kernel.org # v3.10-rc1+ Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditfilter.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index c0d148bd7a5c..103586e239a2 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -442,6 +442,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; + entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } err = audit_field_valid(entry, f); @@ -617,6 +618,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->buflen += data->values[i] = audit_pack_string(&bufp, krule->filterkey); break; + case AUDIT_LOGINUID_SET: + if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) { + data->fields[i] = AUDIT_LOGINUID; + data->values[i] = AUDIT_UID_UNSET; + break; + } + /* fallthrough if set */ default: data->values[i] = f->val; } @@ -633,6 +641,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) int i; if (a->flags != b->flags || + a->pflags != b->pflags || a->listnr != b->listnr || a->action != b->action || a->field_count != b->field_count) @@ -751,6 +760,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) new = &entry->rule; new->vers_ops = old->vers_ops; new->flags = old->flags; + new->pflags = old->pflags; new->listnr = old->listnr; new->action = old->action; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) -- cgit v1.2.3 From 023e2cfa36c31b0ad28c159a1bb0d61ff57334c8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Dec 2014 21:00:06 +0100 Subject: netlink/genetlink: pass network namespace to bind/unbind Netlink families can exist in multiple namespaces, and for the most part multicast subscriptions are per network namespace. Thus it only makes sense to have bind/unbind notifications per network namespace. To achieve this, pass the network namespace of a given client socket to the bind/unbind functions. Also do this in generic netlink, and there also make sure that any bind for multicast groups that only exist in init_net is rejected. This isn't really a problem if it is accepted since a client in a different namespace will never receive any notifications from such a group, but it can confuse the family if not rejected (it's also possible to silently (without telling the family) accept it, but it would also have to be ignored on unbind so families that take any kind of action on bind/unbind won't do unnecessary work for invalid clients like that. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index f8f203e8018c..aba9d9fadf0c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1100,7 +1100,7 @@ static void audit_receive(struct sk_buff *skb) } /* Run custom bind function on netlink socket group connect or bind requests. */ -static int audit_bind(int group) +static int audit_bind(struct net *net, int group) { if (!capable(CAP_AUDIT_READ)) return -EPERM; -- cgit v1.2.3 From fcf22d8267ad2601fe9b6c549d1be96401c23e0b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 30 Dec 2014 09:26:21 -0500 Subject: audit: create private file name copies when auditing inodes Unfortunately, while commit 4a928436 ("audit: correctly record file names with different path name types") fixed a problem where we were not recording filenames, it created a new problem by attempting to use these file names after they had been freed. This patch resolves the issue by creating a copy of the filename which the audit subsystem frees after it is done with the string. At some point it would be nice to resolve this issue with refcounts, or something similar, instead of having to allocate/copy strings, but that is almost surely beyond the scope of a -rcX patch so we'll defer that for later. On the plus side, only audit users should be impacted by the string copying. Reported-by: Toralf Foerster Signed-off-by: Paul Moore --- kernel/auditsc.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 287b3d381174..793e9e98f7f8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -72,6 +72,8 @@ #include #include #include +#include +#include #include "audit.h" @@ -1861,8 +1863,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, } list_for_each_entry_reverse(n, &context->names_list, list) { - /* does the name pointer match? */ - if (!n->name || n->name->name != name->name) + if (!n->name || strcmp(n->name->name, name->name)) continue; /* match the correct record type */ @@ -1881,14 +1882,44 @@ out_alloc: n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); if (!n) return; - if (name) - /* since name is not NULL we know there is already a matching - * name record, see audit_getname(), so there must be a type - * mismatch; reuse the string path since the original name - * record will keep the string valid until we free it in - * audit_free_names() */ - n->name = name; + /* unfortunately, while we may have a path name to record with the + * inode, we can't always rely on the string lasting until the end of + * the syscall so we need to create our own copy, it may fail due to + * memory allocation issues, but we do our best */ + if (name) { + /* we can't use getname_kernel() due to size limits */ + size_t len = strlen(name->name) + 1; + struct filename *new = __getname(); + + if (unlikely(!new)) + goto out; + + if (len <= (PATH_MAX - sizeof(*new))) { + new->name = (char *)(new) + sizeof(*new); + new->separate = false; + } else if (len <= PATH_MAX) { + /* this looks odd, but is due to final_putname() */ + struct filename *new2; + new2 = kmalloc(sizeof(*new2), GFP_KERNEL); + if (unlikely(!new2)) { + __putname(new); + goto out; + } + new2->name = (char *)new; + new2->separate = true; + new = new2; + } else { + /* we should never get here, but let's be safe */ + __putname(new); + goto out; + } + strlcpy((char *)new->name, name->name, len); + new->uptr = NULL; + new->aname = n; + n->name = new; + n->name_put = true; + } out: if (parent) { n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; -- cgit v1.2.3 From 6ada1fc0e1c4775de0e043e1bd3ae9d065491aa5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:22:48 -0500 Subject: time: settimeofday: Validate the values of tv from user An unvalidated user input is multiplied by a constant, which can result in an undefined behaviour for large values. While this is validated later, we should avoid triggering undefined behaviour. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: stable Signed-off-by: Sasha Levin [jstultz: include trivial milisecond->microsecond correction noticed by Andy] Signed-off-by: John Stultz --- kernel/time/time.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/time/time.c b/kernel/time/time.c index a9ae20fb0b11..22d5d3b73970 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -196,6 +196,10 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, if (tv) { if (copy_from_user(&user_tv, tv, sizeof(*tv))) return -EFAULT; + + if (!timeval_valid(&user_tv)) + return -EINVAL; + new_ts.tv_sec = user_tv.tv_sec; new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; } -- cgit v1.2.3 From 5e5aeb4367b450a28f447f6d5ab57d8f2ab16a5f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:25:05 -0500 Subject: time: adjtimex: Validate the ADJ_FREQUENCY values Verify that the frequency value from userspace is valid and makes sense. Unverified values can cause overflows later on. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: stable Signed-off-by: Sasha Levin [jstultz: Fix up bug for negative values and drop redunent cap check] Signed-off-by: John Stultz --- kernel/time/ntp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 87a346fd6d61..28bf91c60a0b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -633,6 +633,13 @@ int ntp_validate_timex(struct timex *txc) if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) return -EPERM; + if (txc->modes & ADJ_FREQUENCY) { + if (LONG_MIN / PPM_SCALE > txc->freq) + return -EINVAL; + if (LONG_MAX / PPM_SCALE < txc->freq) + return -EINVAL; + } + return 0; } -- cgit v1.2.3 From 3245d6acab981a2388ffb877c7ecc97e763c59d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Jan 2015 14:32:12 -0800 Subject: exit: fix race between wait_consider_task() and wait_task_zombie() wait_consider_task() checks EXIT_ZOMBIE after EXIT_DEAD/EXIT_TRACE and both checks can fail if we race with EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE change in between, gcc needs to reload p->exit_state after security_task_wait(). In this case ->notask_error will be wrongly cleared and do_wait() can hang forever if it was the last eligible child. Many thanks to Arne who carefully investigated the problem. Note: this bug is very old but it was pure theoretical until commit b3ab03160dfa ("wait: completely ignore the EXIT_DEAD tasks"). Before this commit "-O2" was probably enough to guarantee that compiler won't read ->exit_state twice. Signed-off-by: Oleg Nesterov Reported-by: Arne Goedeke Tested-by: Arne Goedeke Cc: [3.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 1ea4369890a3..6806c55475ee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1287,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) static int wait_consider_task(struct wait_opts *wo, int ptrace, struct task_struct *p) { + /* + * We can race with wait_task_zombie() from another thread. + * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition + * can't confuse the checks below. + */ + int exit_state = ACCESS_ONCE(p->exit_state); int ret; - if (unlikely(p->exit_state == EXIT_DEAD)) + if (unlikely(exit_state == EXIT_DEAD)) return 0; ret = eligible_child(wo, p); @@ -1310,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, return 0; } - if (unlikely(p->exit_state == EXIT_TRACE)) { + if (unlikely(exit_state == EXIT_TRACE)) { /* * ptrace == 0 means we are the natural parent. In this case * we should clear notask_error, debugger will notify us. @@ -1337,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* slay zombie? */ - if (p->exit_state == EXIT_ZOMBIE) { + if (exit_state == EXIT_ZOMBIE) { /* we don't reap group leaders with subthreads */ if (!delay_group_leader(p)) { /* -- cgit v1.2.3 From 88a7c26af8dab2f2d69f5a6067eb670694ec38c0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 4 Jan 2015 10:36:19 -0800 Subject: perf: Move task_pt_regs sampling into arch code On x86_64, at least, task_pt_regs may be only partially initialized in many contexts, so x86_64 should not use it without extra care from interrupt context, let alone NMI context. This will allow x86_64 to override the logic and will supply some scratch space to use to make a cleaner copy of user regs. Tested-by: Jiri Olsa Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: chenggang.qcg@taobao.com Cc: Wu Fengguang Cc: Namhyung Kim Cc: Mike Galbraith Cc: Arjan van de Ven Cc: David Ahern Cc: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Jean Pihet Cc: Linus Torvalds Cc: Mark Salter Cc: Russell King Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/e431cd4c18c2e1c44c774f10758527fb2d1025c4.1420396372.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c1ee7f2bebc..882f835a0d85 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs) + struct pt_regs *regs, + struct pt_regs *regs_user_copy) { - if (!user_mode(regs)) { - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - regs_user->abi = perf_reg_abi(current); + if (user_mode(regs)) { + regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; + } else if (current->mm) { + perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs); + perf_sample_regs_user(&data->regs_user, regs, + &data->regs_user_copy); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ -- cgit v1.2.3 From 32a8df4e0b33fccc9715213b382160415b5c4008 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Fri, 19 Dec 2014 08:29:56 +0800 Subject: sched: Fix odd values in effective_load() calculations In effective_load, we have (long w * unsigned long tg->shares) / long W, when w is negative, it is cast to unsigned long and hence the product is insanely large. Fix this by casting tg->shares to long. Reported-by: Sasha Levin Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Dave Jones Cc: Andrey Ryabinin Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141219002956.GA25405@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df2cdf77f899..6b99659cbeec 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4424,7 +4424,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) * wl = S * s'_i; see (2) */ if (W > 0 && w < W) - wl = (w * tg->shares) / W; + wl = (w * (long)tg->shares) / W; else wl = tg->shares; -- cgit v1.2.3 From 6a503c3be937d275113b702e0421e5b0720abe8a Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Wed, 17 Dec 2014 11:50:31 +0100 Subject: sched/deadline: Fix migration of SCHED_DEADLINE tasks According to global EDF, tasks should be migrated between runqueues without checking if their scheduling deadlines and runtimes are valid. However, SCHED_DEADLINE currently performs such a check: a migration happens doing: deactivate_task(rq, next_task, 0); set_task_cpu(next_task, later_rq->cpu); activate_task(later_rq, next_task, 0); which ends up calling dequeue_task_dl(), setting the new CPU, and then calling enqueue_task_dl(). enqueue_task_dl() then calls enqueue_dl_entity(), which calls update_dl_entity(), which can modify scheduling deadline and runtime, breaking global EDF scheduling. As a result, some of the properties of global EDF are not respected: for example, a taskset {(30, 80), (40, 80), (120, 170)} scheduled on two cores can have unbounded response times for the third task even if 30/80+40/80+120/170 = 1.5809 < 2 This can be fixed by invoking update_dl_entity() only in case of wakeup, or if this is a new SCHED_DEADLINE task. Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Cc: Dario Faggioli Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418813432-20797-2-git-send-email-luca.abeni@unitn.it Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e5db8c6feebd..55af498d3c8c 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -826,10 +826,10 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, * parameters of the task might need updating. Otherwise, * we want a replenishment of its runtime. */ - if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) - replenish_dl_entity(dl_se, pi_se); - else + if (dl_se->dl_new || flags & ENQUEUE_WAKEUP) update_dl_entity(dl_se, pi_se); + else if (flags & ENQUEUE_REPLENISH) + replenish_dl_entity(dl_se, pi_se); __enqueue_dl_entity(dl_se); } -- cgit v1.2.3 From 269ad8015a6b2bb1cf9e684da4921eb6fa0a0c88 Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Wed, 17 Dec 2014 11:50:32 +0100 Subject: sched/deadline: Avoid double-accounting in case of missed deadlines The dl_runtime_exceeded() function is supposed to ckeck if a SCHED_DEADLINE task must be throttled, by checking if its current runtime is <= 0. However, it also checks if the scheduling deadline has been missed (the current time is larger than the current scheduling deadline), further decreasing the runtime if this happens. This "double accounting" is wrong: - In case of partitioned scheduling (or single CPU), this happens if task_tick_dl() has been called later than expected (due to small HZ values). In this case, the current runtime is also negative, and replenish_dl_entity() can take care of the deadline miss by recharging the current runtime to a value smaller than dl_runtime - In case of global scheduling on multiple CPUs, scheduling deadlines can be missed even if the task did not consume more runtime than expected, hence penalizing the task is wrong This patch fix this problem by throttling a SCHED_DEADLINE task only when its runtime becomes negative, and not modifying the runtime Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Cc: Dario Faggioli Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418813432-20797-3-git-send-email-luca.abeni@unitn.it Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 55af498d3c8c..b52092f2636d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -570,24 +570,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) static int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) { - int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); - int rorun = dl_se->runtime <= 0; - - if (!rorun && !dmiss) - return 0; - - /* - * If we are beyond our current deadline and we are still - * executing, then we have already used some of the runtime of - * the next instance. Thus, if we do not account that, we are - * stealing bandwidth from the system at each deadline miss! - */ - if (dmiss) { - dl_se->runtime = rorun ? dl_se->runtime : 0; - dl_se->runtime -= rq_clock(rq) - dl_se->deadline; - } - - return 1; + return (dl_se->runtime <= 0); } extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); -- cgit v1.2.3 From 7f1a169b88f513e32a432ca0f85bfd282d117bd6 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 25 Dec 2014 15:51:21 +0900 Subject: sched/fair: Fix RCU stall upon -ENOMEM in sched_create_group() When alloc_fair_sched_group() in sched_create_group() fails, free_sched_group() is called, and free_fair_sched_group() is called by free_sched_group(). Since destroy_cfs_bandwidth() is called by free_fair_sched_group() without calling init_cfs_bandwidth(), RCU stall occurs at hrtimer_cancel(): INFO: rcu_sched self-detected stall on CPU { 1} (t=60000 jiffies g=13074 c=13073 q=0) Task dump for CPU 1: (fprintd) R running task 0 6249 1 0x00000088 ... Call Trace: [] sched_show_task+0xa8/0x110 [] dump_cpu_task+0x3d/0x50 [] rcu_dump_cpu_stacks+0x90/0xd0 [] rcu_check_callbacks+0x491/0x700 [] update_process_times+0x4b/0x80 [] tick_sched_handle.isra.20+0x36/0x50 [] tick_sched_timer+0x42/0x70 [] __run_hrtimer+0x69/0x1a0 [] ? tick_sched_handle.isra.20+0x50/0x50 [] hrtimer_interrupt+0xef/0x230 [] local_apic_timer_interrupt+0x3b/0x70 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 [] ? lock_hrtimer_base.isra.23+0x18/0x50 [] ? __kmalloc+0x211/0x230 [] hrtimer_try_to_cancel+0x22/0xd0 [] ? __kmalloc+0x211/0x230 [] hrtimer_cancel+0x22/0x30 [] free_fair_sched_group+0x25/0xd0 [] free_sched_group+0x16/0x40 [] sched_create_group+0x4b/0x80 [] sched_autogroup_create_attach+0x43/0x1c0 [] sys_setsid+0x7c/0x110 [] system_call_fastpath+0x12/0x17 Check whether init_cfs_bandwidth() was called before calling destroy_cfs_bandwidth(). Signed-off-by: Tetsuo Handa [ Move the check into destroy_cfs_bandwidth() to aid compilability. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Paul Turner Cc: Ben Segall Cc: Linus Torvalds Link: http://lkml.kernel.org/r/201412252210.GCC30204.SOMVFFOtQJFLOH@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b99659cbeec..40667cbf371b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4005,6 +4005,10 @@ void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + /* init_cfs_bandwidth() was not called */ + if (!cfs_b->throttled_cfs_rq.next) + return; + hrtimer_cancel(&cfs_b->period_timer); hrtimer_cancel(&cfs_b->slack_timer); } -- cgit v1.2.3 From a63b03e2d2477586440741677ecac45bcf28d7b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Jan 2015 10:29:35 +0000 Subject: mutex: Always clear owner field upon mutex_unlock() Currently if DEBUG_MUTEXES is enabled, the mutex->owner field is only cleared iff debug_locks is active. This exposes a race to other users of the field where the mutex->owner may be still set to a stale value, potentially upsetting mutex_spin_on_owner() among others. References: https://bugs.freedesktop.org/show_bug.cgi?id=87955 Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Cc: Daniel Vetter Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1420540175-30204-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/mutex-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 5cf6731b98e9..3ef3736002d8 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->owner != current); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug * mutexes so that we can do it here after we've verified state. */ + mutex_clear_owner(lock); atomic_set(&lock->count, 1); } -- cgit v1.2.3 From 8f86f83709c585742dea5dd7f0d2b79c43f992ec Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 13 Jan 2015 11:20:43 -0500 Subject: ftrace: Fix updating of filters for shared global_ops filters As the set_ftrace_filter affects both the function tracer as well as the function graph tracer, the ops that represent each have a shared ftrace_ops_hash structure. This allows both to be updated when the filter files are updated. But if function graph is enabled and the global_ops (function tracing) ops is not, then it is possible that the filter could be changed without the update happening for the function graph ops. This will cause the changes to not take place and may even cause a ftrace_bug to occur as it could mess with the trampoline accounting. The solution is to check if the ops uses the shared global_ops filter and if the ops itself is not enabled, to check if there's another ops that is enabled and also shares the global_ops filter. In that case, the modification still needs to be executed. Link: http://lkml.kernel.org/r/20150114154329.055980438@goodmis.org Cc: stable@vger.kernel.org # 3.17+ Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 929a733d302e..2b35d0ba578d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4008,8 +4008,32 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) static void ftrace_ops_update_code(struct ftrace_ops *ops, struct ftrace_hash *old_hash) { - if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) + struct ftrace_ops *op; + + if (!ftrace_enabled) + return; + + if (ops->flags & FTRACE_OPS_FL_ENABLED) { ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); + return; + } + + /* + * If this is the shared global_ops filter, then we need to + * check if there is another ops that shares it, is enabled. + * If so, we still need to run the modify code. + */ + if (ops->func_hash != &global_ops.local_hash) + return; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->func_hash == &global_ops.local_hash && + op->flags & FTRACE_OPS_FL_ENABLED) { + ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); + /* Only need to do this once */ + return; + } + } while_for_each_ftrace_op(op); } static int -- cgit v1.2.3 From 7485058eea40783ac142a60c3e799fc66ce72583 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 13 Jan 2015 14:03:38 -0500 Subject: ftrace: Check both notrace and filter for old hash Using just the filter for checking for trampolines or regs is not enough when updating the code against the records that represent all functions. Both the filter hash and the notrace hash need to be checked. To trigger this bug (using trace-cmd and perf): # perf probe -a do_fork # trace-cmd start -B foo -e probe # trace-cmd record -p function_graph -n do_fork sleep 1 The trace-cmd record at the end clears the filter before it disables function_graph tracing and then that causes the accounting of the ftrace function records to become incorrect and causes ftrace to bug. Link: http://lkml.kernel.org/r/20150114154329.358378039@goodmis.org Cc: stable@vger.kernel.org [ still need to switch old_hash_ops to old_ops_hash ] Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2b35d0ba578d..224e768bdc73 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2497,12 +2497,14 @@ static void ftrace_run_update_code(int command) } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, - struct ftrace_hash *old_hash) + struct ftrace_ops_hash *old_hash) { ops->flags |= FTRACE_OPS_FL_MODIFYING; - ops->old_hash.filter_hash = old_hash; + ops->old_hash.filter_hash = old_hash->filter_hash; + ops->old_hash.notrace_hash = old_hash->notrace_hash; ftrace_run_update_code(command); ops->old_hash.filter_hash = NULL; + ops->old_hash.notrace_hash = NULL; ops->flags &= ~FTRACE_OPS_FL_MODIFYING; } @@ -3579,7 +3581,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly = static int ftrace_probe_registered; -static void __enable_ftrace_function_probe(struct ftrace_hash *old_hash) +static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash) { int ret; int i; @@ -3637,6 +3639,7 @@ int register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { + struct ftrace_ops_hash old_hash_ops; struct ftrace_func_probe *entry; struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; struct ftrace_hash *old_hash = *orig_hash; @@ -3658,6 +3661,10 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, mutex_lock(&trace_probe_ops.func_hash->regex_lock); + old_hash_ops.filter_hash = old_hash; + /* Probes only have filters */ + old_hash_ops.notrace_hash = NULL; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); if (!hash) { count = -ENOMEM; @@ -3718,7 +3725,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); - __enable_ftrace_function_probe(old_hash); + __enable_ftrace_function_probe(&old_hash_ops); if (!ret) free_ftrace_hash_rcu(old_hash); @@ -4006,7 +4013,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) } static void ftrace_ops_update_code(struct ftrace_ops *ops, - struct ftrace_hash *old_hash) + struct ftrace_ops_hash *old_hash) { struct ftrace_ops *op; @@ -4041,6 +4048,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; + struct ftrace_ops_hash old_hash_ops; struct ftrace_hash *old_hash; struct ftrace_hash *hash; int ret; @@ -4077,9 +4085,11 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, mutex_lock(&ftrace_lock); old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; ret = ftrace_hash_move(ops, enable, orig_hash, hash); if (!ret) { - ftrace_ops_update_code(ops, old_hash); + ftrace_ops_update_code(ops, &old_hash_ops); free_ftrace_hash_rcu(old_hash); } mutex_unlock(&ftrace_lock); @@ -4291,6 +4301,7 @@ static void __init set_ftrace_early_filters(void) int ftrace_regex_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; + struct ftrace_ops_hash old_hash_ops; struct ftrace_iterator *iter; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; @@ -4324,10 +4335,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) mutex_lock(&ftrace_lock); old_hash = *orig_hash; + old_hash_ops.filter_hash = iter->ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = iter->ops->func_hash->notrace_hash; ret = ftrace_hash_move(iter->ops, filter_hash, orig_hash, iter->hash); if (!ret) { - ftrace_ops_update_code(iter->ops, old_hash); + ftrace_ops_update_code(iter->ops, &old_hash_ops); free_ftrace_hash_rcu(old_hash); } mutex_unlock(&ftrace_lock); -- cgit v1.2.3 From 83829b74f54186a154ae6b586c05cdb838c2ebc6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 14 Jan 2015 12:24:37 -0500 Subject: tracing: Remove extra call to init_ftrace_syscalls() trace_init() calls init_ftrace_syscalls() and then calls trace_event_init() which also calls init_ftrace_syscalls(). It makes more sense to only call it from trace_event_init(). Calling it twice wastes memory, as it allocates the syscall events twice, and loses the first copy of it. Link: http://lkml.kernel.org/r/54AF53BD.5070303@huawei.com Link: http://lkml.kernel.org/r/20150115040505.930398632@goodmis.org Reported-by: Wang Nan Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e767972e99c..4a9079b9f082 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6918,7 +6918,6 @@ void __init trace_init(void) tracepoint_printk = 0; } tracer_alloc_buffers(); - init_ftrace_syscalls(); trace_event_init(); } -- cgit v1.2.3 From ce1039bd3a89e99e4f624e75fb1777fc92d76eb3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 14 Jan 2015 12:53:45 -0500 Subject: tracing: Fix enabling of syscall events on the command line Commit 5f893b2639b2 "tracing: Move enabling tracepoints to just after rcu_init()" broke the enabling of system call events from the command line. The reason was that the enabling of command line trace events was moved before PID 1 started, and the syscall tracepoints require that all tasks have the TIF_SYSCALL_TRACEPOINT flag set. But the swapper task (pid 0) is not part of that. Since the swapper task is the only task that is running at this early in boot, no task gets the flag set, and the tracepoint never gets reached. Instead of setting the swapper task flag (there should be no reason to do that), re-enabled trace events again after the init thread (PID 1) has been started. It requires disabling all command line events and re-enabling them, as just enabling them again will not reset the logic to set the TIF_SYSCALL_TRACEPOINT flag, as the syscall tracepoint will be fooled into thinking that it was already set, and wont try setting it again. For this reason, we must first disable it and re-enable it. Link: http://lkml.kernel.org/r/1421188517-18312-1-git-send-email-mpe@ellerman.id.au Link: http://lkml.kernel.org/r/20150115040506.216066449@goodmis.org Reported-by: Michael Ellerman Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 69 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 366a78a3e61e..b03a0ea77b99 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2429,12 +2429,39 @@ static __init int event_trace_memsetup(void) return 0; } +static __init void +early_enable_events(struct trace_array *tr, bool disable_first) +{ + char *buf = bootup_event_buf; + char *token; + int ret; + + while (true) { + token = strsep(&buf, ","); + + if (!token) + break; + if (!*token) + continue; + + /* Restarting syscalls requires that we stop them first */ + if (disable_first) + ftrace_set_clr_event(tr, token, 0); + + ret = ftrace_set_clr_event(tr, token, 1); + if (ret) + pr_warn("Failed to enable trace event: %s\n", token); + + /* Put back the comma to allow this to be called again */ + if (buf) + *(buf - 1) = ','; + } +} + static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); struct ftrace_event_call **iter, *call; - char *buf = bootup_event_buf; - char *token; int ret; if (!tr) @@ -2456,18 +2483,7 @@ static __init int event_trace_enable(void) */ __trace_early_add_events(tr); - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; - if (!*token) - continue; - - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); - } + early_enable_events(tr, false); trace_printk_start_comm(); @@ -2478,6 +2494,31 @@ static __init int event_trace_enable(void) return 0; } +/* + * event_trace_enable() is called from trace_event_init() first to + * initialize events and perhaps start any events that are on the + * command line. Unfortunately, there are some events that will not + * start this early, like the system call tracepoints that need + * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable() + * is called before pid 1 starts, and this flag is never set, making + * the syscall tracepoint never get reached, but the event is enabled + * regardless (and not doing anything). + */ +static __init int event_trace_enable_again(void) +{ + struct trace_array *tr; + + tr = top_trace_array(); + if (!tr) + return -ENODEV; + + early_enable_events(tr, true); + + return 0; +} + +early_initcall(event_trace_enable_again); + static __init int event_trace_init(void) { struct trace_array *tr; -- cgit v1.2.3 From 29187a9eeaf362d8422e62e17a22a6e115277a49 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2015 14:21:16 -0500 Subject: workqueue: fix subtle pool management issue which can stall whole worker_pool A worker_pool's forward progress is guaranteed by the fact that the last idle worker assumes the manager role to create more workers and summon the rescuers if creating workers doesn't succeed in timely manner before proceeding to execute work items. This manager role is implemented in manage_workers(), which indicates whether the worker may proceed to work item execution with its return value. This is necessary because multiple workers may contend for the manager role, and, if there already is a manager, others should proceed to work item execution. Unfortunately, the function also indicates that the worker may proceed to work item execution if need_to_create_worker() is false at the head of the function. need_to_create_worker() tests the following conditions. pending work items && !nr_running && !nr_idle The first and third conditions are protected by pool->lock and thus won't change while holding pool->lock; however, nr_running can change asynchronously as other workers block and resume and while it's likely to be zero, as someone woke this worker up in the first place, some other workers could have become runnable inbetween making it non-zero. If this happens, manage_worker() could return false even with zero nr_idle making the worker, the last idle one, proceed to execute work items. If then all workers of the pool end up blocking on a resource which can only be released by a work item which is pending on that pool, the whole pool can deadlock as there's no one to create more workers or summon the rescuers. This patch fixes the problem by removing the early exit condition from maybe_create_worker() and making manage_workers() return false iff there's already another manager, which ensures that the last worker doesn't start executing work items. We can leave the early exit condition alone and just ignore the return value but the only reason it was put there is because the manage_workers() used to perform both creations and destructions of workers and thus the function may be invoked while the pool is trying to reduce the number of workers. Now that manage_workers() is called only when more workers are needed, the only case this early exit condition is triggered is rare race conditions rendering it pointless. Tested with simulated workload and modified workqueue code which trigger the pool deadlock reliably without this patch. Signed-off-by: Tejun Heo Reported-by: Eric Sandeen Link: http://lkml.kernel.org/g/54B019F4.8030009@sandeen.net Cc: Dave Chinner Cc: Lai Jiangshan Cc: stable@vger.kernel.org --- kernel/workqueue.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6202b08f1933..beeeac9e0e3e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1841,17 +1841,11 @@ static void pool_mayday_timeout(unsigned long __pool) * spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. Called only from * manager. - * - * Return: - * %false if no action was taken and pool->lock stayed locked, %true - * otherwise. */ -static bool maybe_create_worker(struct worker_pool *pool) +static void maybe_create_worker(struct worker_pool *pool) __releases(&pool->lock) __acquires(&pool->lock) { - if (!need_to_create_worker(pool)) - return false; restart: spin_unlock_irq(&pool->lock); @@ -1877,7 +1871,6 @@ restart: */ if (need_to_create_worker(pool)) goto restart; - return true; } /** @@ -1897,16 +1890,14 @@ restart: * multiple times. Does GFP_KERNEL allocations. * * Return: - * %false if the pool don't need management and the caller can safely start - * processing works, %true indicates that the function released pool->lock - * and reacquired it to perform some management function and that the - * conditions that the caller verified while holding the lock before - * calling the function might no longer be true. + * %false if the pool doesn't need management and the caller can safely + * start processing works, %true if management function was performed and + * the conditions that the caller verified before calling the function may + * no longer be true. */ static bool manage_workers(struct worker *worker) { struct worker_pool *pool = worker->pool; - bool ret = false; /* * Anyone who successfully grabs manager_arb wins the arbitration @@ -1919,12 +1910,12 @@ static bool manage_workers(struct worker *worker) * actual management, the pool may stall indefinitely. */ if (!mutex_trylock(&pool->manager_arb)) - return ret; + return false; - ret |= maybe_create_worker(pool); + maybe_create_worker(pool); mutex_unlock(&pool->manager_arb); - return ret; + return true; } /** -- cgit v1.2.3 From fc7f0dd381720ea5ee5818645f7d0e9dece41cb0 Mon Sep 17 00:00:00 2001 From: Louis Langholtz Date: Thu, 15 Jan 2015 22:04:46 -0700 Subject: kernel: avoid overflow in cmp_range Avoid overflow possibility. [ The overflow is purely theoretical, since this is used for memory ranges that aren't even close to using the full 64 bits, but this is the right thing to do regardless. - Linus ] Signed-off-by: Louis Langholtz Cc: Yinghai Lu Cc: Peter Anvin Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/range.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/range.c b/kernel/range.c index 322ea8e93e4b..82cfc285b046 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -113,12 +113,12 @@ static int cmp_range(const void *x1, const void *x2) { const struct range *r1 = x1; const struct range *r2 = x2; - s64 start1, start2; - start1 = r1->start; - start2 = r2->start; - - return start1 - start2; + if (r1->start < r2->start) + return -1; + if (r1->start > r2->start) + return 1; + return 0; } int clean_sort_range(struct range *range, int az) -- cgit v1.2.3 From c772be52319de9756fd82f36d37a6d3e003441e3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:04 +1030 Subject: param: fix uninitialized read with CONFIG_DEBUG_LOCK_ALLOC ignore_lockdep is uninitialized, and sysfs_attr_init() doesn't initialize it, so memset to 0. Reported-by: Huang Ying Cc: Eric W. Biederman Signed-off-by: Rusty Russell --- kernel/params.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index bd65d136a470..728e05b167de 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -642,6 +642,7 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ + memset(&mk->mp->attrs[mk->mp->num], 0, sizeof(mk->mp->attrs[0])); sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); mk->mp->attrs[mk->mp->num].param = kp; mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; -- cgit v1.2.3 From d453cded05ee219b77815ea194dc36efa5398bca Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:04 +1030 Subject: module_arch_freeing_init(): new hook for archs before module->module_init freed. Archs have been abusing module_free() to clean up their arch-specific allocations. Since module_free() is also (ab)used by BPF and trace code, let's keep it to simple allocations, and provide a hook called before that. This means that avr32, ia64, parisc and s390 no longer need to implement their own module_free() at all. avr32 doesn't need module_finalize() either. Signed-off-by: Rusty Russell Cc: Chris Metcalf Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linux-s390@vger.kernel.org --- kernel/module.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 3965511ae133..68be0b1f9e7f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1804,6 +1804,10 @@ void __weak module_arch_cleanup(struct module *mod) { } +void __weak module_arch_freeing_init(struct module *mod) +{ +} + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1841,6 +1845,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -2930,6 +2935,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); module_free(mod, mod->module_core); } @@ -3055,6 +3061,7 @@ static int do_init_module(struct module *mod) mod->strtab = mod->core_strtab; #endif unset_module_init_ro_nx(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3 From be1f221c0445a4157d177197c236f888d3581914 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:05 +1030 Subject: module: remove mod arg from module_free, rename module_memfree(). Nothing needs the module pointer any more, and the next patch will call it from RCU, where the module itself might no longer exist. Removing the arg is the safest approach. This just codifies the use of the module_alloc/module_free pattern which ftrace and bpf use. Signed-off-by: Rusty Russell Acked-by: Alexei Starovoitov Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Ralf Baechle Cc: Ley Foon Tan Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Steven Rostedt Cc: x86@kernel.org Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Masami Hiramatsu Cc: linux-cris-kernel@axis.com Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: nios2-dev@lists.rocketboards.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Cc: netdev@vger.kernel.org --- kernel/bpf/core.c | 2 +- kernel/kprobes.c | 2 +- kernel/module.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d6594e457a25..a64e7a207d2b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -163,7 +163,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { - module_free(NULL, hdr); + module_memfree(hdr); } #endif /* CONFIG_BPF_JIT */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 06f58309fed2..ee619929cf90 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -127,7 +127,7 @@ static void *alloc_insn_page(void) static void free_insn_page(void *page) { - module_free(NULL, page); + module_memfree(page); } struct kprobe_insn_cache kprobe_insn_slots = { diff --git a/kernel/module.c b/kernel/module.c index 68be0b1f9e7f..1f85fd5c89d3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1795,7 +1795,7 @@ static void unset_module_core_ro_nx(struct module *mod) { } static void unset_module_init_ro_nx(struct module *mod) { } #endif -void __weak module_free(struct module *mod, void *module_region) +void __weak module_memfree(void *module_region) { vfree(module_region); } @@ -1846,7 +1846,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); + module_memfree(mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1855,7 +1855,7 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ unset_module_core_ro_nx(mod); - module_free(mod, mod->module_core); + module_memfree(mod->module_core); #ifdef CONFIG_MPU update_protections(current->mm); @@ -2790,7 +2790,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_free(mod, mod->module_core); + module_memfree(mod->module_core); return -ENOMEM; } memset(ptr, 0, mod->init_size); @@ -2936,8 +2936,8 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); - module_free(mod, mod->module_core); + module_memfree(mod->module_init); + module_memfree(mod->module_core); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3062,7 +3062,7 @@ static int do_init_module(struct module *mod) #endif unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); + module_memfree(mod->module_init); mod->module_init = NULL; mod->init_size = 0; mod->init_ro_size = 0; -- cgit v1.2.3 From c749637909eea5d4090c6f50b89c2c20b534a280 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:05 +1030 Subject: module: fix race in kallsyms resolution during module load success. The kallsyms routines (module_symbol_name, lookup_module_* etc) disable preemption to walk the modules rather than taking the module_mutex: this is because they are used for symbol resolution during oopses. This works because there are synchronize_sched() and synchronize_rcu() in the unload and failure paths. However, there's one case which doesn't have that: the normal case where module loading succeeds, and we free the init section. We don't want a synchronize_rcu() there, because it would slow down module loading: this bug was introduced in 2009 to speed module loading in the first place. Thus, we want to do the free in an RCU callback. We do this in the simplest possible way by allocating a new rcu_head: if we put it in the module structure we'd have to worry about that getting freed. Reported-by: Rui Xiang Signed-off-by: Rusty Russell --- kernel/module.c | 55 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 1f85fd5c89d3..ed4ec9c30bd2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2989,10 +2989,31 @@ static void do_mod_ctors(struct module *mod) #endif } +/* For freeing module_init on success, in case kallsyms traversing */ +struct mod_initfree { + struct rcu_head rcu; + void *module_init; +}; + +static void do_free_init(struct rcu_head *head) +{ + struct mod_initfree *m = container_of(head, struct mod_initfree, rcu); + module_memfree(m->module_init); + kfree(m); +} + /* This is where the real work happens */ static int do_init_module(struct module *mod) { int ret = 0; + struct mod_initfree *freeinit; + + freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL); + if (!freeinit) { + ret = -ENOMEM; + goto fail; + } + freeinit->module_init = mod->module_init; /* * We want to find out whether @mod uses async during init. Clear @@ -3005,18 +3026,7 @@ static int do_init_module(struct module *mod) if (mod->init != NULL) ret = do_one_initcall(mod->init); if (ret < 0) { - /* - * Init routine failed: abort. Try to protect us from - * buggy refcounters. - */ - mod->state = MODULE_STATE_GOING; - synchronize_sched(); - module_put(mod); - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_GOING, mod); - free_module(mod); - wake_up_all(&module_wq); - return ret; + goto fail_free_freeinit; } if (ret > 0) { pr_warn("%s: '%s'->init suspiciously returned %d, it should " @@ -3062,15 +3072,34 @@ static int do_init_module(struct module *mod) #endif unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); mod->module_init = NULL; mod->init_size = 0; mod->init_ro_size = 0; mod->init_text_size = 0; + /* + * We want to free module_init, but be aware that kallsyms may be + * walking this with preempt disabled. In all the failure paths, + * we call synchronize_rcu/synchronize_sched, but we don't want + * to slow down the success path, so use actual RCU here. + */ + call_rcu(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); return 0; + +fail_free_freeinit: + kfree(freeinit); +fail: + /* Try to protect us from buggy refcounters. */ + mod->state = MODULE_STATE_GOING; + synchronize_sched(); + module_put(mod); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + free_module(mod); + wake_up_all(&module_wq); + return ret; } static int may_init_module(void) -- cgit v1.2.3 From d5db139ab3764640e0882a1746e7b9fdee33fd87 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Jan 2015 11:13:14 +1030 Subject: module: make module_refcount() a signed integer. James Bottomley points out that it will be -1 during unload. It's only used for diagnostics, so let's not hide that as it could be a clue as to what's gone wrong. Cc: Jason Wessel Acked-and-documention-added-by: James Bottomley Reviewed-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/debug/kdb/kdb_main.c | 2 +- kernel/module.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 379650b984f8..2934889f2cce 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1979,7 +1979,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf("%-20s%8u 0x%p ", mod->name, mod->core_size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD - kdb_printf("%4ld ", module_refcount(mod)); + kdb_printf("%4d ", module_refcount(mod)); #endif if (mod->state == MODULE_STATE_GOING) kdb_printf(" (Unloading)"); diff --git a/kernel/module.c b/kernel/module.c index ed4ec9c30bd2..d856e96a3cce 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -772,9 +772,18 @@ static int try_stop_module(struct module *mod, int flags, int *forced) return 0; } -unsigned long module_refcount(struct module *mod) +/** + * module_refcount - return the refcount or -1 if unloading + * + * @mod: the module we're checking + * + * Returns: + * -1 if the module is in the process of unloading + * otherwise the number of references in the kernel to the module + */ +int module_refcount(struct module *mod) { - return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE; + return atomic_read(&mod->refcnt) - MODULE_REF_BASE; } EXPORT_SYMBOL(module_refcount); @@ -856,7 +865,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) struct module_use *use; int printed_something = 0; - seq_printf(m, " %lu ", module_refcount(mod)); + seq_printf(m, " %i ", module_refcount(mod)); /* * Always include a trailing , so userspace can differentiate @@ -908,7 +917,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr); static ssize_t show_refcnt(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%lu\n", module_refcount(mk->mod)); + return sprintf(buffer, "%i\n", module_refcount(mk->mod)); } static struct module_attribute modinfo_refcnt = -- cgit v1.2.3 From e9d1b4f3c60997fe197bf0243cb4a41a44387a88 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 Jan 2015 14:30:22 -0800 Subject: x86, mpx: Strictly enforce empty prctl() args Description from Michael Kerrisk. He suggested an identical patch to one I had already coded up and tested. commit fe3d197f8431 "x86, mpx: On-demand kernel allocation of bounds tables" added two new prctl() operations, PR_MPX_ENABLE_MANAGEMENT and PR_MPX_DISABLE_MANAGEMENT. However, no checks were included to ensure that unused arguments are zero, as is done in many existing prctl()s and as should be done for all new prctl()s. This patch adds the required checks. Suggested-by: Andy Lutomirski Suggested-by: Michael Kerrisk Signed-off-by: Dave Hansen Cc: Dave Hansen Link: http://lkml.kernel.org/r/20150108223022.7F56FD13@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- kernel/sys.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index a8c9f5a7dda6..ea9c88109894 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2210,9 +2210,13 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, up_write(&me->mm->mmap_sem); break; case PR_MPX_ENABLE_MANAGEMENT: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; error = MPX_ENABLE_MANAGEMENT(me); break; case PR_MPX_DISABLE_MANAGEMENT: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; error = MPX_DISABLE_MANAGEMENT(me); break; default: -- cgit v1.2.3