1 /* SPDX-License-Identifier: GPL-2.0 */
3 * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
5 * Copyright 2018 Google LLC
7 * Author: Eric Biggers <ebiggers@google.com>
10 #include <linux/linkage.h>
11 #include <linux/cfi_types.h>
35 .macro _nh_stride k0, k1, k2, k3
37 // Load next message stride
38 ld1 {T3.16b}, [MESSAGE], #16
40 // Load next key stride
41 ld1 {\k3\().4s}, [KEY], #16
43 // Add message words to key words
44 add T0.4s, T3.4s, \k0\().4s
45 add T1.4s, T3.4s, \k1\().4s
46 add T2.4s, T3.4s, \k2\().4s
47 add T3.4s, T3.4s, \k3\().4s
49 // Multiply 32x32 => 64 and accumulate
54 umlal PASS0_SUMS.2d, T0.2s, T4.2s
55 umlal PASS1_SUMS.2d, T1.2s, T5.2s
56 umlal PASS2_SUMS.2d, T2.2s, T6.2s
57 umlal PASS3_SUMS.2d, T3.2s, T7.2s
61 * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
62 * __le64 hash[NH_NUM_PASSES])
64 * It's guaranteed that message_len % 16 == 0.
66 SYM_TYPED_FUNC_START(nh_neon)
68 ld1 {K0.4s,K1.4s}, [KEY], #32
69 movi PASS0_SUMS.2d, #0
70 movi PASS1_SUMS.2d, #0
71 ld1 {K2.4s}, [KEY], #16
72 movi PASS2_SUMS.2d, #0
73 movi PASS3_SUMS.2d, #0
75 subs MESSAGE_LEN, MESSAGE_LEN, #64
78 _nh_stride K0, K1, K2, K3
79 _nh_stride K1, K2, K3, K0
80 _nh_stride K2, K3, K0, K1
81 _nh_stride K3, K0, K1, K2
82 subs MESSAGE_LEN, MESSAGE_LEN, #64
86 ands MESSAGE_LEN, MESSAGE_LEN, #63
88 _nh_stride K0, K1, K2, K3
90 subs MESSAGE_LEN, MESSAGE_LEN, #16
92 _nh_stride K1, K2, K3, K0
94 subs MESSAGE_LEN, MESSAGE_LEN, #16
96 _nh_stride K2, K3, K0, K1
99 // Sum the accumulators for each pass, then store the sums to 'hash'
100 addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
101 addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
102 st1 {T0.16b,T1.16b}, [HASH]
104 SYM_FUNC_END(nh_neon)