summaryrefslogtreecommitdiff
path: root/arch/x86/lib/copy_user_64.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 17:36:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 17:36:08 -0700
commitdf48d8716eab9608fe93924e4ae06ff110e8674f (patch)
tree0fe10733a414b3651e1dae29518b7960a4da0aa4 /arch/x86/lib/copy_user_64.S
parentacd30250d7d0f495685d1c7c6184636a22fcdf7f (diff)
parent29510ec3b626c86de9707bb8904ff940d430289b (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (107 commits) perf stat: Add more cache-miss percentage printouts perf stat: Add -d -d and -d -d -d options to show more CPU events ftrace/kbuild: Add recordmcount files to force full build ftrace: Add self-tests for multiple function trace users ftrace: Modify ftrace_set_filter/notrace to take ops ftrace: Allow dynamically allocated function tracers ftrace: Implement separate user function filtering ftrace: Free hash with call_rcu_sched() ftrace: Have global_ops store the functions that are to be traced ftrace: Add ops parameter to ftrace_startup/shutdown functions ftrace: Add enabled_functions file ftrace: Use counters to enable functions to trace ftrace: Separate hash allocation and assignment ftrace: Create a global_ops to hold the filter and notrace hashes ftrace: Use hash instead for FTRACE_FL_FILTER ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored functions perf bench, x86: Add alternatives-asm.h wrapper x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB ...
Diffstat (limited to 'arch/x86/lib/copy_user_64.S')
-rw-r--r--arch/x86/lib/copy_user_64.S69
1 files changed, 57 insertions, 12 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e482615195..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
- .macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+ .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt-1b /* offset */ /* or alternatively to alt */
+ .long \alt1-1b /* offset */ /* or alternatively to alt1 */
+3: .byte 0xe9 /* near jump with 32bit immediate */
+ .long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
+
.section .altinstructions,"a"
- .align 8
- .quad 0b
- .quad 2b
- .word \feature /* when feature is set */
- .byte 5
- .byte 5
+ altinstruction_entry 0b,2b,\feature1,5,5
+ altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
addq %rdx,%rcx
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
- jae bad_to_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ja bad_to_user
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
addq %rdx,%rcx
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
- jae bad_from_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ja bad_from_user
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_from_user)
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+ CFI_STARTPROC
+ andl %edx,%edx
+ jz 2f
+ movl %edx,%ecx
+1: rep
+ movsb
+2: xorl %eax,%eax
+ ret
+
+ .section .fixup,"ax"
+12: movl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ .section __ex_table,"a"
+ .align 8
+ .quad 1b,12b
+ .previous
+ CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)