diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2008-08-17 21:05:42 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2008-10-22 22:55:20 -0700 |
commit | bb8985586b7a906e116db835c64773b7a7d51663 (patch) | |
tree | de93ae58e88cc563d95cc124a73f3930594c6100 /arch/x86/include/asm/string_32.h | |
parent | 8ede0bdb63305d3353efd97e9af6210afb05734e (diff) |
x86, um: ... and asm-x86 move
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/include/asm/string_32.h')
-rw-r--r-- | arch/x86/include/asm/string_32.h | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h new file mode 100644 index 000000000000..487843ed245a --- /dev/null +++ b/arch/x86/include/asm/string_32.h @@ -0,0 +1,326 @@ +#ifndef ASM_X86__STRING_32_H +#define ASM_X86__STRING_32_H + +#ifdef __KERNEL__ + +/* Let gcc decide whether to inline or use the out of line functions */ + +#define __HAVE_ARCH_STRCPY +extern char *strcpy(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCPY +extern char *strncpy(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCAT +extern char *strcat(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCAT +extern char *strncat(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCMP +extern int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRNCMP +extern int strncmp(const char *cs, const char *ct, size_t count); + +#define __HAVE_ARCH_STRCHR +extern char *strchr(const char *s, int c); + +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *s); + +static __always_inline void *__memcpy(void *to, const void *from, size_t n) +{ + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; +} + +/* + * This looks ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) +{ + long esi, edi; + if (!n) + return to; + + switch (n) { + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; + } + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { + /* large block: use rep prefix */ + int ecx; + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" + ); + } else { + /* small block: don't clobber ecx + smaller code */ + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + } + switch (n % 4) { + /* tail */ + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + } +} + +#define __HAVE_ARCH_MEMCPY + +#ifdef CONFIG_X86_USE_3DNOW + +#include <asm/mmx.h> + +/* + * This CPU favours 3DNow strongly (eg AMD Athlon) + */ + +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) +{ + if (len < 512) + return __constant_memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +static inline void *__memcpy3d(void *to, const void *from, size_t len) +{ + if (len < 512) + return __memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy3d((t), (f), (n)) \ + : __memcpy3d((t), (f), (n))) + +#else + +/* + * No 3D Now! + */ + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy((t), (f), (n)) \ + : __memcpy((t), (f), (n))) + +#endif + +#define __HAVE_ARCH_MEMMOVE +void *memmove(void *dest, const void *src, size_t n); + +#define memcmp __builtin_memcmp + +#define __HAVE_ARCH_MEMCHR +extern void *memchr(const void *cs, int c, size_t count); + +static inline void *__memset_generic(void *s, char c, size_t count) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; +} + +/* we might want to write optimized versions of these later */ +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) + +/* + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill + * things 32 bits at a time even when we don't know the size of the + * area at compile-time.. + */ +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) +{ + int d0, d1; + asm volatile("rep ; stosl\n\t" + "testb $2,%b3\n\t" + "je 1f\n\t" + "stosw\n" + "1:\ttestb $1,%b3\n\t" + "je 2f\n\t" + "stosb\n" + "2:" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) + : "memory"); + return s; +} + +/* Added by Gertjan van Wingerde to make minix and sysv module work */ +#define __HAVE_ARCH_STRNLEN +extern size_t strnlen(const char *s, size_t count); +/* end of additional stuff */ + +#define __HAVE_ARCH_STRSTR +extern char *strstr(const char *cs, const char *ct); + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as we by now know that both pattern and count is constant.. + */ +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, + size_t count) +{ + switch (count) { + case 0: + return s; + case 1: + *(unsigned char *)s = pattern & 0xff; + return s; + case 2: + *(unsigned short *)s = pattern & 0xffff; + return s; + case 3: + *(unsigned short *)s = pattern & 0xffff; + *((unsigned char *)s + 2) = pattern & 0xff; + return s; + case 4: + *(unsigned long *)s = pattern; + return s; + } + +#define COMMON(x) \ + asm volatile("rep ; stosl" \ + x \ + : "=&c" (d0), "=&D" (d1) \ + : "a" (eax), "0" (count/4), "1" ((long)s) \ + : "memory") + + { + int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 + /* Workaround for broken gcc 4.0 */ + register unsigned long eax asm("%eax") = pattern; +#else + unsigned long eax = pattern; +#endif + + switch (count % 4) { + case 0: + COMMON(""); + return s; + case 1: + COMMON("\n\tstosb"); + return s; + case 2: + COMMON("\n\tstosw"); + return s; + default: + COMMON("\n\tstosw\n\tstosb"); + return s; + } + } + +#undef COMMON +} + +#define __constant_c_x_memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_c_and_count_memset((s), (c), (count)) \ + : __constant_c_memset((s), (c), (count))) + +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) + +#define __HAVE_ARCH_MEMSET +#define memset(s, c, count) \ + (__builtin_constant_p(c) \ + ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ + (count)) \ + : __memset((s), (c), (count))) + +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +#define __HAVE_ARCH_MEMSCAN +extern void *memscan(void *addr, int c, size_t size); + +#endif /* __KERNEL__ */ + +#endif /* ASM_X86__STRING_32_H */ |