diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 13:16:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 13:16:36 -0700 |
commit | 60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch) | |
tree | 23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/lib/memset_64.S | |
parent | 977e1ba50893c15121557b39de586901fe3f75cf (diff) | |
parent | 3b75232d55680ca166dffa274d0587d5faf0a016 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar:
"There were lots of changes in this development cycle:
- over 100 separate cleanups, restructuring changes, speedups and
fixes in the x86 system call, irq, trap and other entry code, part
of a heroic effort to deobfuscate a decade old spaghetti asm code
and its C code dependencies (Denys Vlasenko, Andy Lutomirski)
- alternatives code fixes and enhancements (Borislav Petkov)
- simplifications and cleanups to the compat code (Brian Gerst)
- signal handling fixes and new x86 testcases (Andy Lutomirski)
- various other fixes and cleanups
By their nature many of these changes are risky - we tried to test
them well on many different x86 systems (there are no known
regressions), and they are split up finely to help bisection - but
there's still a fair bit of residual risk left so caveat emptor"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
perf/x86/64: Report regs_user->ax too in get_regs_user()
perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
x86/asm/entry/32: Tidy up JNZ instructions after TESTs
x86/asm/entry/64: Reduce padding in execve stubs
x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
x86/asm/entry/64: Simplify jumps in ret_from_fork
x86/asm/entry/64: Remove a redundant jump
x86/asm/entry/64: Optimize [v]fork/clone stubs
x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
x86/asm/entry/64: Add forgotten CFI annotation
x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
x86/asm/entry/64: Move opportunistic sysret code to syscall code path
x86, selftests: Add sigreturn selftest
x86/alternatives: Guard NOPs optimization
x86/asm/entry: Clear EXTRA_REGS for all executable formats
x86/signal: Remove pax argument from restore_sigcontext
...
Diffstat (limited to 'arch/x86/lib/memset_64.S')
-rw-r--r-- | arch/x86/lib/memset_64.S | 61 |
1 files changed, 24 insertions, 37 deletions
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6f44935c6a60..93118fb23976 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -5,19 +5,30 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> +.weak memset + /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is * simpler and shorter than the orignal function as well. - * + * * rdi destination - * rsi value (char) - * rdx count (bytes) - * + * rsi value (char) + * rdx count (bytes) + * * rax original destination - */ - .section .altinstr_replacement, "ax", @progbits -.Lmemset_c: + */ +ENTRY(memset) +ENTRY(__memset) + /* + * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended + * to use it when possible. If not available, use fast string instructions. + * + * Otherwise, use original memset function. + */ + ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp memset_erms", X86_FEATURE_ERMS + movq %rdi,%r9 movq %rdx,%rcx andl $7,%edx @@ -31,8 +42,8 @@ rep stosb movq %r9,%rax ret -.Lmemset_e: - .previous +ENDPROC(memset) +ENDPROC(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses @@ -45,21 +56,16 @@ * * rax original destination */ - .section .altinstr_replacement, "ax", @progbits -.Lmemset_c_e: +ENTRY(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax ret -.Lmemset_e_e: - .previous - -.weak memset +ENDPROC(memset_erms) -ENTRY(memset) -ENTRY(__memset) +ENTRY(memset_orig) CFI_STARTPROC movq %rdi,%r10 @@ -134,23 +140,4 @@ ENTRY(__memset) jmp .Lafter_bad_alignment .Lfinal: CFI_ENDPROC -ENDPROC(memset) -ENDPROC(__memset) - - /* Some CPUs support enhanced REP MOVSB/STOSB feature. - * It is recommended to use this when possible. - * - * If enhanced REP MOVSB/STOSB feature is not available, use fast string - * instructions. - * - * Otherwise, use original memset function. - * - * In .altinstructions section, ERMS feature is placed after REG_GOOD - * feature to implement the right patch order. - */ - .section .altinstructions,"a" - altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ - .Lfinal-__memset,.Lmemset_e-.Lmemset_c - altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ - .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e - .previous +ENDPROC(memset_orig) |