From 1a7dc0db7181ebc8b9ec17e5a15ad4c766c7d3d4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 11:33:52 +0200 Subject: x86/fpu: Rename fpu_detect() to fpu__detect() Use the fpu__*() namespace to organize FPU ops better. Also document fpu__detect() a bit. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 23ba6765b718..2dc08c231a9a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -166,7 +166,7 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void fpu_detect(struct cpuinfo_x86 *c); +extern void fpu__detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); -- cgit v1.2.3 From c0c2803dee21bef08ef5aacdf96fe2f1759ccc62 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:52:56 +0200 Subject: x86/fpu: Move thread_info::fpu_counter into thread_info::fpu.counter This field is kept separate from the main FPU state structure for no good reason. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2dc08c231a9a..64d6b5d97ce9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -433,6 +433,15 @@ struct fpu { unsigned int last_cpu; unsigned int has_fpu; union thread_xstate *state; + /* + * This counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char counter; }; #ifdef CONFIG_X86_64 @@ -535,15 +544,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; - /* - * fpu_counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time - */ - unsigned char fpu_counter; }; /* -- cgit v1.2.3 From 126009993faa7a750835e67f3ccb90cee124ffa7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:57:24 +0200 Subject: x86/fpu: Improve the comment for the fpu::counter field This was pretty hard to read, improve it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 64d6b5d97ce9..28df85561730 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -435,11 +435,11 @@ struct fpu { union thread_xstate *state; /* * This counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time + * during which the FPU stays used. If this is over a threshold, the + * lazy fpu saving logic becomes unlazy, to save the trap overhead. + * This is an unsigned char so that after 256 iterations the counter + * wraps and the context switch behavior turns lazy again; this is to + * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; }; -- cgit v1.2.3 From 14b9675ae9c83c764c0c1fdf4b33f0e9156a4e4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:57:24 +0200 Subject: x86/fpu: Move FPU data structures to asm/fpu_types.h Move the FPU details to asm/fpu_types.h, to further factor out the FPU code. ( As an added bonus, the 'struct orig_ist' definition now moves next to its other data types - the FPU definitions were slapped in the middle of them for some mysterious reason. ) No code changed. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 132 +-------------------------------------- 1 file changed, 1 insertion(+), 131 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 28df85561730..6b75c4b927ec 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -21,6 +21,7 @@ struct mm_struct; #include #include #include +#include #include #include @@ -313,137 +314,6 @@ struct orig_ist { unsigned long ist[7]; }; -#define MXCSR_DEFAULT 0x1f80 - -struct i387_fsave_struct { - u32 cwd; /* FPU Control Word */ - u32 swd; /* FPU Status Word */ - u32 twd; /* FPU Tag Word */ - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Pointer Offset */ - u32 fos; /* FPU Operand Pointer Selector */ - - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - - /* Software status information [not touched by FSAVE ]: */ - u32 status; -}; - -struct i387_fxsave_struct { - u16 cwd; /* Control Word */ - u16 swd; /* Status Word */ - u16 twd; /* Tag Word */ - u16 fop; /* Last Instruction Opcode */ - union { - struct { - u64 rip; /* Instruction Pointer */ - u64 rdp; /* Data Pointer */ - }; - struct { - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Offset */ - u32 fos; /* FPU Operand Selector */ - }; - }; - u32 mxcsr; /* MXCSR Register State */ - u32 mxcsr_mask; /* MXCSR Mask */ - - /* 8*16 bytes for each FP-reg = 128 bytes: */ - u32 st_space[32]; - - /* 16*16 bytes for each XMM-reg = 256 bytes: */ - u32 xmm_space[64]; - - u32 padding[12]; - - union { - u32 padding1[12]; - u32 sw_reserved[12]; - }; - -} __attribute__((aligned(16))); - -struct i387_soft_struct { - u32 cwd; - u32 swd; - u32 twd; - u32 fip; - u32 fcs; - u32 foo; - u32 fos; - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - u8 ftop; - u8 changed; - u8 lookahead; - u8 no_update; - u8 rm; - u8 alimit; - struct math_emu_info *info; - u32 entry_eip; -}; - -struct ymmh_struct { - /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ - u32 ymmh_space[64]; -}; - -/* We don't support LWP yet: */ -struct lwp_struct { - u8 reserved[128]; -}; - -struct bndreg { - u64 lower_bound; - u64 upper_bound; -} __packed; - -struct bndcsr { - u64 bndcfgu; - u64 bndstatus; -} __packed; - -struct xsave_hdr_struct { - u64 xstate_bv; - u64 xcomp_bv; - u64 reserved[6]; -} __attribute__((packed)); - -struct xsave_struct { - struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; - struct ymmh_struct ymmh; - struct lwp_struct lwp; - struct bndreg bndreg[4]; - struct bndcsr bndcsr; - /* new processor state extensions will go here */ -} __attribute__ ((packed, aligned (64))); - -union thread_xstate { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; - struct xsave_struct xsave; -}; - -struct fpu { - unsigned int last_cpu; - unsigned int has_fpu; - union thread_xstate *state; - /* - * This counter contains the number of consecutive context switches - * during which the FPU stays used. If this is over a threshold, the - * lazy fpu saving logic becomes unlazy, to save the trap overhead. - * This is an unsigned char so that after 256 iterations the counter - * wraps and the context switch behavior turns lazy again; this is to - * deal with bursty apps that only use the FPU for a short time: - */ - unsigned char counter; -}; - #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -- cgit v1.2.3 From 11ad19277e025f914518bc2943a240cdd37cf844 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:44:46 +0200 Subject: x86/fpu: Remove the free_thread_xstate() complication Use fpstate_free() directly to manage FPU state. Only process.c was using this method, so this is a speedup as well, as it removes the extra function call and related clobbers. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6b75c4b927ec..fef8db024ece 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -362,7 +362,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; -extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; struct perf_event; -- cgit v1.2.3 From f55f88e25e9b5232054a82d47de7aaf67179b78b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:02:33 +0200 Subject: x86/fpu: Make task_xstate_cachep static It's now local to fpu/core.c, make it static. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fef8db024ece..d50cc7f61559 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -362,7 +362,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; -extern struct kmem_cache *task_xstate_cachep; struct perf_event; -- cgit v1.2.3 From c5bedc6847c3be6efe0e671a6155c9a25fd468bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:49:20 +0200 Subject: x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active Introduce a simple fpu->fpstate_active flag in the fpu context data structure and use that instead of PF_USED_MATH in task->flags. Testing for this flag byte should be slightly more efficient than testing a bit in a bitmask, but the main advantage is that most FPU functions can now be performed on a 'struct fpu' alone, they don't need access to 'struct task_struct' anymore. There's a slight linecount increase, mostly due to the 'fpu' local variables and due to extra comments. The local variables will go away once we move most of the FPU methods to pure 'struct fpu' parameters. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d50cc7f61559..0f4add462697 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -385,6 +385,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ @@ -395,8 +399,6 @@ struct thread_struct { unsigned long cr2; unsigned long trap_nr; unsigned long error_code; - /* floating point and extended processor state */ - struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; -- cgit v1.2.3 From c66e3f28237199629358e9e5a76973c400a54041 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:12:44 +0200 Subject: x86/fpu: Remove the extra fpu__detect() layer Now that fpu__detect() has become an empty layer around fpu__init_system(), eliminate it and make fpu__init_system() the main system initialization routine. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0f4add462697..b9e487499ae2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -167,7 +167,6 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void fpu__detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); -- cgit v1.2.3 From b8c1b8ea7b219a7ba6d58d97bfdf1403b741f8d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 May 2015 09:58:12 +0200 Subject: x86/fpu: Fix FPU state save area alignment bug On most configs task-struct is cache line aligned, which makes the XSAVE area's 64-byte required alignment work out fine. But on some .config's task_struct is aligned only to 16 bytes (enforced by ARCH_MIN_TASKALIGN), which makes things like fpu__copy() (that XSAVEOPT uses) not work so well. I broke this in: 7366ed771f6e ("x86/fpu: Simplify FPU handling by embedding the fpstate in task_struct (again)") which embedded the fpstate in the task_struct. The alignment requirements of the FPU code were originally present in ARCH_MIN_TASKALIGN, which still has a value of 16, which was the alignment requirement of the FPU state area prior XSAVE. But this link was not documented (and not required) and the link got lost when the FPU state area was made dynamic years ago. With XSAVEOPT the minimum alignment requirment went up to 64 bytes, and the embedding of the FPU state area in task_struct exposed it again - and '16' was not increased to '64'. So fix this bug, but also try to address the underlying lost link of information that made it easier to happen: - document ARCH_MIN_TASKALIGN a bit better - use alignof() to recover the current alignment requirements. This would work in the future as well, should the alignment requirements go up to 128 bytes with things like AVX512. ( We should probably also use the vSMP alignment rules for all of x86, but that's for another patch. ) Reported-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b9e487499ae2..8e04f51d6bea 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -53,11 +53,16 @@ static inline void *current_text_addr(void) return pc; } +/* + * These alignment constraints are for performance in the vSMP case, + * but in the task_struct case we must also meet hardware imposed + * alignment requirements of the FPU state: + */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else -# define ARCH_MIN_TASKALIGN 16 +# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -- cgit v1.2.3 From 46a6e0cf1c6665a8e867d8f7798d7a3538633f03 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:02 -0700 Subject: x86/mpx: Clean up the code by not passing a task pointer around when unnecessary The MPX code can only work on the current task. You can not, for instance, enable MPX management in another process or thread. You can also not handle a fault for another process or thread. Despite this, we pass a task_struct around prolifically. This patch removes all of the task struct passing for code paths where the code can not deal with another task (which turns out to be all of them). This has no functional changes. It's just a cleanup. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: bp@alien8.de Link: http://lkml.kernel.org/r/20150607183702.6A81DA2C@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8e04f51d6bea..53dbd2b4f1d8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -802,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); /* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) -#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) +#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() +#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() #ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(struct task_struct *tsk); -extern int mpx_disable_management(struct task_struct *tsk); +extern int mpx_enable_management(void); +extern int mpx_disable_management(void); #else -static inline int mpx_enable_management(struct task_struct *tsk) +static inline int mpx_enable_management(void) { return -EINVAL; } -static inline int mpx_disable_management(struct task_struct *tsk) +static inline int mpx_disable_management(void) { return -EINVAL; } -- cgit v1.2.3