summaryrefslogtreecommitdiff
path: root/include/asm-ia64/gcc_intrin.h
diff options
context:
space:
mode:
authorDavid Mosberger-Tang <davidm@hpl.hp.com>2005-04-21 11:07:59 -0700
committerTony Luck <tony.luck@intel.com>2005-04-21 11:07:59 -0700
commit821376bf15e692941f9235f13a14987009fd0b10 (patch)
tree2179380ee3eb38fb393719e6ce32b15e934c4a44 /include/asm-ia64/gcc_intrin.h
parentd8470b7c13e11c18cf14a7e3180f0b00e715e4f0 (diff)
[IA64] fix fls()
The ia64-version of fls() never worked as intended (the bitnumbering was off by 1 and fls(0) was undefined). This patch fixes the problem by using a popcnt-based fls(), which on McKinley-derived cores is slightly faster than both ia64_fls() and generic_fls(). The resulting code, however, is bigger (7-8 bundles instead of about 3 bundles). Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or newer since the compiler can predicate that and schedule it better. Thanks to Simon Derr and Matt Mackall for tracking down this bug. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'include/asm-ia64/gcc_intrin.h')
-rw-r--r--include/asm-ia64/gcc_intrin.h10
1 files changed, 7 insertions, 3 deletions
diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h
index 7c357dfbae50..4fb4e439b05c 100644
--- a/include/asm-ia64/gcc_intrin.h
+++ b/include/asm-ia64/gcc_intrin.h
@@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__;
ia64_intri_res; \
})
-#define ia64_popcnt(x) \
-({ \
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define ia64_popcnt(x) __builtin_popcountl(x)
+#else
+# define ia64_popcnt(x) \
+ ({ \
__u64 ia64_intri_res; \
asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
\
ia64_intri_res; \
-})
+ })
+#endif
#define ia64_getf_exp(x) \
({ \