diff options
author | Nicolas Pitre <nico@fluxnic.net> | 2019-08-20 23:05:42 -0400 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2019-09-01 23:53:25 +0200 |
commit | 602828c1aade576ac5f3fbd59b4eb014c5fc2414 (patch) | |
tree | 1f3041f8c1f2ad40d56ee35213f63f927cc6fa74 /include/asm-generic/div64.h | |
parent | 3940ba8eea8c6c89f547a1bd153977cece5fecd2 (diff) |
__div64_const32(): improve the generic C version
Let's rework that code to avoid large immediate values and convert some
64-bit variables to 32-bit ones when possible. This allows gcc to
produce smaller and better code. This even produces optimal code on
RISC-V.
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'include/asm-generic/div64.h')
-rw-r--r-- | include/asm-generic/div64.h | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index dc9726fdac8f..33358245b4fa 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) uint32_t m_hi = m >> 32; uint32_t n_lo = n; uint32_t n_hi = n >> 32; - uint64_t res, tmp; + uint64_t res; + uint32_t res_lo, res_hi, tmp; if (!bias) { res = ((uint64_t)m_lo * n_lo) >> 32; @@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) res = (m + (uint64_t)m_lo * n_lo) >> 32; } else { res = m + (uint64_t)m_lo * n_lo; - tmp = (res < m) ? (1ULL << 32) : 0; - res = (res >> 32) + tmp; + res_lo = res >> 32; + res_hi = (res_lo < m_hi); + res = res_lo | ((uint64_t)res_hi << 32); } if (!(m & ((1ULL << 63) | (1ULL << 31)))) { @@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) res += (uint64_t)m_hi * n_lo; res >>= 32; } else { - tmp = res += (uint64_t)m_lo * n_hi; + res += (uint64_t)m_lo * n_hi; + tmp = res >> 32; res += (uint64_t)m_hi * n_lo; - tmp = (res < tmp) ? (1ULL << 32) : 0; - res = (res >> 32) + tmp; + res_lo = res >> 32; + res_hi = (res_lo < tmp); + res = res_lo | ((uint64_t)res_hi << 32); } res += (uint64_t)m_hi * n_hi; |