From ba14a194a434ccc8f733e263ad2ce941e35e5787 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Aug 2016 02:35:21 -0700 Subject: fork: Add generic vmalloced stack support If CONFIG_VMAP_STACK=y is selected, kernel stacks are allocated with __vmalloc_node_range(). Grsecurity has had a similar feature (called GRKERNSEC_KSTACKOVERFLOW=y) for a long time. Signed-off-by: Andy Lutomirski Acked-by: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/14c07d4fd173a5b117f51e8b939f9f4323e39899.1470907718.git.luto@kernel.org [ Minor edits. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..20f9f47bcfd0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1923,6 +1923,9 @@ struct task_struct { #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; #endif +#ifdef CONFIG_VMAP_STACK + struct vm_struct *stack_vm_area; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* @@ -1939,6 +1942,18 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifdef CONFIG_VMAP_STACK +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return t->stack_vm_area; +} +#else +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return NULL; +} +#endif + /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) -- cgit v1.2.3 From daa460a88c09b26b68e8b017de589c217e901afb Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:56 -0500 Subject: ftrace: Only allocate the ret_stack 'fp' field when needed This saves some memory when HAVE_FUNCTION_GRAPH_FP_TEST isn't defined. On x86_64 with newer versions of gcc which have -mfentry, it saves 400 bytes per task. Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5c7747d9ea7b5cb47ef0a8ce8a6cea6bf7aa94bf.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7d565afe35d2..4ad9ccc60e38 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -795,7 +795,9 @@ struct ftrace_ret_stack { unsigned long func; unsigned long long calltime; unsigned long long subtime; +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; +#endif }; /* -- cgit v1.2.3 From 9a7c348ba6a46f6270d4fe49577649dad5664fe7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:57 -0500 Subject: ftrace: Add return address pointer to ftrace_ret_stack Storing this value will help prevent unwinders from getting out of sync with the function graph tracer ret_stack. Now instead of needing a stateful iterator, they can compare the return address pointer to find the right ret_stack entry. Note that an array of 50 ftrace_ret_stack structs is allocated for every task. So when an arch implements this, it will add either 200 or 400 bytes of memory usage per task (depending on whether it's a 32-bit or 64-bit platform). Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a95cfcc39e8f26b89a430c56926af0bb217bc0a1.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4ad9ccc60e38..483e02a50d37 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -798,6 +798,9 @@ struct ftrace_ret_stack { #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif +#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + unsigned long *retp; +#endif }; /* @@ -809,7 +812,7 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, - unsigned long frame_pointer); + unsigned long frame_pointer, unsigned long *retp); /* * Sometimes we don't want to trace a function with the function -- cgit v1.2.3 From 223918e32a87c79ac55ca4aa513ba405ba4d57cd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:58 -0500 Subject: ftrace: Add ftrace_graph_ret_addr() stack unwinding helpers When function graph tracing is enabled for a function, ftrace modifies the stack by replacing the original return address with the address of a hook function (return_to_handler). Stack unwinders need a way to get the original return address. Add an arch-independent helper function for that named ftrace_graph_ret_addr(). This adds two variations of the function: one depends on HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, and the other relies on an index state variable. The former is recommended because, in some cases, the latter can cause problems when the unwinder skips stack frames. It can get out of sync with the ret_stack index and wrong addresses can be reported for the stack trace. Once all arches have been ported to use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, we can get rid of the distinction. Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/36bd90f762fc5e5af3929e3797a68a64906421cf.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 483e02a50d37..6f93ac46e7f0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -814,6 +814,9 @@ extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, unsigned long frame_pointer, unsigned long *retp); +unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, + unsigned long ret, unsigned long *retp); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function @@ -875,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) return -1; } +static inline unsigned long +ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, + unsigned long *retp) +{ + return ret; +} + static inline void pause_graph_tracing(void) { } static inline void unpause_graph_tracing(void) { } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From c65eacbe290b8141554c71b2c94489e73ade8c8d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Sep 2016 14:29:24 -0700 Subject: sched/core: Allow putting thread_info into task_struct If an arch opts in by setting CONFIG_THREAD_INFO_IN_TASK_STRUCT, then thread_info is defined as a single 'u32 flags' and is the first entry of task_struct. thread_info::task is removed (it serves no purpose if thread_info is embedded in task_struct), and thread_info::cpu gets its own slot in task_struct. This is heavily based on a patch written by Linus. Originally-from: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a0898196f0476195ca02713691a5037a14f2aac5.1473801993.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 9 +++++++++ include/linux/sched.h | 36 ++++++++++++++++++++++++++++++++++-- include/linux/thread_info.h | 15 +++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f8834f820ec2..9c04d44eeb3c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,8 @@ #include #include +#include + #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), @@ -183,12 +185,19 @@ extern struct task_group root_task_group; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +#else +# define INIT_TASK_TI(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ + INIT_TASK_TI(tsk) \ .state = 0, \ .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 20f9f47bcfd0..a287e8b13549 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1458,6 +1458,13 @@ struct tlbflush_unmap_batch { }; struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * For reasons of header soup (see current_thread_info()), this + * must be the first element of task_struct. + */ + struct thread_info thread_info; +#endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; atomic_t usage; @@ -1467,6 +1474,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; +#ifdef CONFIG_THREAD_INFO_IN_TASK + unsigned int cpu; /* current CPU */ +#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -2588,7 +2598,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; +#endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; @@ -3076,10 +3088,26 @@ static inline void threadgroup_change_end(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); } -#ifndef __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_THREAD_INFO_IN_TASK + +static inline struct thread_info *task_thread_info(struct task_struct *task) +{ + return &task->thread_info; +} +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} +#define setup_thread_stack(new,old) do { } while(0) +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) +#define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { @@ -3379,7 +3407,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) static inline unsigned int task_cpu(const struct task_struct *p) { +#ifdef CONFIG_THREAD_INFO_IN_TASK + return p->cpu; +#else return task_thread_info(p)->cpu; +#endif } static inline int task_node(const struct task_struct *p) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 2b5b10eed74f..e2d0fd81b1ba 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,6 +13,21 @@ struct timespec; struct compat_timespec; +#ifdef CONFIG_THREAD_INFO_IN_TASK +struct thread_info { + u32 flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define current_thread_info() ((struct thread_info *)current) +#endif + /* * System call restart block. */ -- cgit v1.2.3 From c6c314a613cd7d03fb97713e0d642b493de42e69 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:43 -0700 Subject: sched/core: Add try_get_task_stack() and put_task_stack() There are a few places in the kernel that access stack memory belonging to a different task. Before we can start freeing task stacks before the task_struct is freed, we need a way for those code paths to pin the stack. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/17a434f50ad3d77000104f21666575e10a9c1fbd.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a287e8b13549..a95867267e9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3094,11 +3094,19 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) { return &task->thread_info; } + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ static inline void *task_stack_page(const struct task_struct *task) { return task->stack; } + #define setup_thread_stack(new,old) do { } while(0) + static inline unsigned long *end_of_stack(const struct task_struct *task) { return task->stack; @@ -3134,6 +3142,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p) } #endif + +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From 68f24b08ee892d47bdef925d676e1ae1ccc316f8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:48 -0700 Subject: sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK We currently keep every task's stack around until the task_struct itself is freed. This means that we keep the stack allocation alive for longer than necessary and that, under load, we free stacks in big batches whenever RCU drops the last task reference. Neither of these is good for reuse of cache-hot memory, and freeing in batches prevents us from usefully caching small numbers of vmalloced stacks. On architectures that have thread_info on the stack, we can't easily change this, but on architectures that set THREAD_INFO_IN_TASK, we can free it as soon as the task is dead. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- include/linux/sched.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c04d44eeb3c..325f649d77ff 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -186,7 +186,9 @@ extern struct task_group root_task_group; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK -# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +# define INIT_TASK_TI(tsk) \ + .thread_info = INIT_THREAD_INFO(tsk), \ + .stack_refcount = ATOMIC_INIT(1), #else # define INIT_TASK_TI(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index a95867267e9f..abb795afc823 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1936,6 +1936,10 @@ struct task_struct { #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* A live task holds one reference. */ + atomic_t stack_refcount; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* @@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return atomic_inc_not_zero(&tsk->stack_refcount) ? + task_stack_page(tsk) : NULL; +} + +extern void put_task_stack(struct task_struct *tsk); +#else static inline void *try_get_task_stack(struct task_struct *tsk) { return task_stack_page(tsk); } static inline void put_task_stack(struct task_struct *tsk) {} +#endif #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From 907241dccb4ce5d9413cf3c030b32b0cfc184914 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 23 Sep 2016 18:24:07 +0100 Subject: thread_info: Use unsigned long for flags The generic THREAD_INFO_IN_TASK definition of thread_info::flags is a u32, matching x86 prior to the introduction of THREAD_INFO_IN_TASK. However, common helpers like test_ti_thread_flag() implicitly assume that thread_info::flags has at least the size and alignment of unsigned long, and relying on padding and alignment provided by other elements of task_struct is somewhat fragile. Additionally, some architectures use more that 32 bits for thread_info::flags, and others may need to in future. With THREAD_INFO_IN_TASK, task struct follows thread_info with a long field, and thus we no longer save any space as we did back in commit: affa219b60a11b32 ("x86: change thread_info's flag field back to 32 bits") Given all this, it makes more sense for the generic thread_info::flags to be an unsigned long. In fact given contains/uses the helpers mentioned above, BE arches *must* use unsigned long (or something of the same size) today, or they wouldn't work. Make it so. Signed-off-by: Mark Rutland Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1474651447-30447-1-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e2d0fd81b1ba..45f004e9cc59 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -15,7 +15,7 @@ struct compat_timespec; #ifdef CONFIG_THREAD_INFO_IN_TASK struct thread_info { - u32 flags; /* low level flags */ + unsigned long flags; /* low level flags */ }; #define INIT_THREAD_INFO(tsk) \ -- cgit v1.2.3