From 483dbf6a35907610597fdc304bd32ecba40cdff0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 May 2021 16:11:29 +0100 Subject: arm64/sve: Split _sve_flush macro into separate Z and predicate flushes Trivial refactoring to support further work, no change to generated code. Signed-off-by: Mark Brown Reviewed-by: Dave Martin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210512151131.27877-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimdmacros.h | 4 +++- arch/arm64/kernel/entry-fpsimd.S | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index a2563992d2dc..059204477ce6 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -213,8 +213,10 @@ mov v\nz\().16b, v\nz\().16b .endm -.macro sve_flush +.macro sve_flush_z _for n, 0, 31, _sve_flush_z \n +.endm +.macro sve_flush_p_ffr _for n, 0, 15, _sve_pfalse \n _sve_wrffr 0 .endm diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 3ecec60d3295..7921d58427c2 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -72,7 +72,8 @@ SYM_FUNC_END(sve_load_from_fpsimd_state) /* Zero all SVE registers but the first 128-bits of each vector */ SYM_FUNC_START(sve_flush_live) - sve_flush + sve_flush_z + sve_flush_p_ffr ret SYM_FUNC_END(sve_flush_live) -- cgit v1.2.3 From c9f6890bca111a879a8af1f2390ac49cf05b11df Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 May 2021 16:11:30 +0100 Subject: arm64/sve: Use the sve_flush macros in sve_load_from_fpsimd_state() This makes the code a bit clearer and as a result we can also make the indentation more normal, there is no change to the generated code. Signed-off-by: Mark Brown Reviewed-by: Dave Martin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210512151131.27877-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-fpsimd.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 7921d58427c2..dd8382e5ce82 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -63,11 +63,10 @@ SYM_FUNC_END(sve_set_vq) * and the rest zeroed. All the other SVE registers will be zeroed. */ SYM_FUNC_START(sve_load_from_fpsimd_state) - sve_load_vq x1, x2, x3 - fpsimd_restore x0, 8 - _for n, 0, 15, _sve_pfalse \n - _sve_wrffr 0 - ret + sve_load_vq x1, x2, x3 + fpsimd_restore x0, 8 + sve_flush_p_ffr + ret SYM_FUNC_END(sve_load_from_fpsimd_state) /* Zero all SVE registers but the first 128-bits of each vector */ -- cgit v1.2.3 From ad4711f962e08eff8d6e9b03f9670b1af6ea9395 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 May 2021 16:11:31 +0100 Subject: arm64/sve: Skip flushing Z registers with 128 bit vectors When the SVE vector length is 128 bits then there are no bits in the Z registers which are not shared with the V registers so we can skip them when zeroing state not shared with FPSIMD, this results in a minor performance improvement. Signed-off-by: Mark Brown Reviewed-by: Dave Martin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210512151131.27877-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/kernel/entry-fpsimd.S | 12 ++++++++++-- arch/arm64/kernel/fpsimd.c | 6 ++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 2599504674b5..c072161d5c65 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); -extern void sve_flush_live(void); +extern void sve_flush_live(unsigned long vq_minus_1); extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index dd8382e5ce82..0a7a64753878 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -69,10 +69,18 @@ SYM_FUNC_START(sve_load_from_fpsimd_state) ret SYM_FUNC_END(sve_load_from_fpsimd_state) -/* Zero all SVE registers but the first 128-bits of each vector */ +/* + * Zero all SVE registers but the first 128-bits of each vector + * + * VQ must already be configured by caller, any further updates of VQ + * will need to ensure that the register state remains valid. + * + * x0 = VQ - 1 + */ SYM_FUNC_START(sve_flush_live) + cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state sve_flush_z - sve_flush_p_ffr +1: sve_flush_p_ffr ret SYM_FUNC_END(sve_flush_live) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ad3dd34a83cf..e57b23f95284 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) * disabling the trap, otherwise update our in-memory copy. */ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); - sve_flush_live(); + unsigned long vq_minus_one = + sve_vq_from_vl(current->thread.sve_vl) - 1; + sve_set_vq(vq_minus_one); + sve_flush_live(vq_minus_one); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); -- cgit v1.2.3