diff options
author | Robin Murphy <robin.murphy@arm.com> | 2020-01-20 18:52:29 +0000 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2020-03-09 18:08:25 +0000 |
commit | e9c7ddbf8b4b6a291bf3b5bfa7c883235164d9be (patch) | |
tree | aa52e43a553143c92273950d372f427607c06c2c /arch/arm64/lib | |
parent | 27afb236fe5adaa3911e47c91057ba783549226f (diff) |
arm64: csum: Optimise IPv6 header checksum
Throwing our __uint128_t idioms at csum_ipv6_magic() makes it
about 1.3x-2x faster across a range of microarchitecture/compiler
combinations. Not much in absolute terms, but every little helps.
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/lib')
-rw-r--r-- | arch/arm64/lib/csum.c | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c index 1f82c66b32ea..60eccae2abad 100644 --- a/arch/arm64/lib/csum.c +++ b/arch/arm64/lib/csum.c @@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len) return sum >> 16; } + +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __uint128_t src, dst; + u64 sum = (__force u64)csum; + + src = *(const __uint128_t *)saddr->s6_addr; + dst = *(const __uint128_t *)daddr->s6_addr; + + sum += (__force u32)htonl(len); +#ifdef __LITTLE_ENDIAN + sum += (u32)proto << 24; +#else + sum += proto; +#endif + src += (src >> 64) | (src << 64); + dst += (dst >> 64) | (dst << 64); + + sum = accumulate(sum, src >> 64); + sum = accumulate(sum, dst >> 64); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold((__force __wsum)(sum >> 32)); +} +EXPORT_SYMBOL(csum_ipv6_magic); |