Linux 6.7-rc7
[linux-modified.git] / arch / arm64 / crypto / sm4-ce-ccm-core.S
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
4  * as specified in rfc8998
5  * https://datatracker.ietf.org/doc/html/rfc8998
6  *
7  * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
8  */
9
10 #include <linux/linkage.h>
11 #include <linux/cfi_types.h>
12 #include <asm/assembler.h>
13 #include "sm4-ce-asm.h"
14
15 .arch   armv8-a+crypto
16
17 .irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
18         .set .Lv\b\().4s, \b
19 .endr
20
21 .macro sm4e, vd, vn
22         .inst 0xcec08400 | (.L\vn << 5) | .L\vd
23 .endm
24
25 /* Register macros */
26
27 #define RMAC    v16
28
29 /* Helper macros. */
30
31 #define inc_le128(vctr)                                 \
32                 mov             vctr.d[1], x8;          \
33                 mov             vctr.d[0], x7;          \
34                 adds            x8, x8, #1;             \
35                 rev64           vctr.16b, vctr.16b;     \
36                 adc             x7, x7, xzr;
37
38
39 .align 3
40 SYM_FUNC_START(sm4_ce_cbcmac_update)
41         /* input:
42          *   x0: round key array, CTX
43          *   x1: mac
44          *   x2: src
45          *   w3: nblocks
46          */
47         SM4_PREPARE(x0)
48
49         ld1             {RMAC.16b}, [x1]
50
51 .Lcbcmac_loop_4x:
52         cmp             w3, #4
53         blt             .Lcbcmac_loop_1x
54
55         sub             w3, w3, #4
56
57         ld1             {v0.16b-v3.16b}, [x2], #64
58
59         SM4_CRYPT_BLK(RMAC)
60         eor             RMAC.16b, RMAC.16b, v0.16b
61         SM4_CRYPT_BLK(RMAC)
62         eor             RMAC.16b, RMAC.16b, v1.16b
63         SM4_CRYPT_BLK(RMAC)
64         eor             RMAC.16b, RMAC.16b, v2.16b
65         SM4_CRYPT_BLK(RMAC)
66         eor             RMAC.16b, RMAC.16b, v3.16b
67
68         cbz             w3, .Lcbcmac_end
69         b               .Lcbcmac_loop_4x
70
71 .Lcbcmac_loop_1x:
72         sub             w3, w3, #1
73
74         ld1             {v0.16b}, [x2], #16
75
76         SM4_CRYPT_BLK(RMAC)
77         eor             RMAC.16b, RMAC.16b, v0.16b
78
79         cbnz            w3, .Lcbcmac_loop_1x
80
81 .Lcbcmac_end:
82         st1             {RMAC.16b}, [x1]
83         ret
84 SYM_FUNC_END(sm4_ce_cbcmac_update)
85
86 .align 3
87 SYM_FUNC_START(sm4_ce_ccm_final)
88         /* input:
89          *   x0: round key array, CTX
90          *   x1: ctr0 (big endian, 128 bit)
91          *   x2: mac
92          */
93         SM4_PREPARE(x0)
94
95         ld1             {RMAC.16b}, [x2]
96         ld1             {v0.16b}, [x1]
97
98         SM4_CRYPT_BLK2(RMAC, v0)
99
100         /* en-/decrypt the mac with ctr0 */
101         eor             RMAC.16b, RMAC.16b, v0.16b
102         st1             {RMAC.16b}, [x2]
103
104         ret
105 SYM_FUNC_END(sm4_ce_ccm_final)
106
107 .align 3
108 SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
109         /* input:
110          *   x0: round key array, CTX
111          *   x1: dst
112          *   x2: src
113          *   x3: ctr (big endian, 128 bit)
114          *   w4: nbytes
115          *   x5: mac
116          */
117         SM4_PREPARE(x0)
118
119         ldp             x7, x8, [x3]
120         rev             x7, x7
121         rev             x8, x8
122
123         ld1             {RMAC.16b}, [x5]
124
125 .Lccm_enc_loop_4x:
126         cmp             w4, #(4 * 16)
127         blt             .Lccm_enc_loop_1x
128
129         sub             w4, w4, #(4 * 16)
130
131         /* construct CTRs */
132         inc_le128(v8)                   /* +0 */
133         inc_le128(v9)                   /* +1 */
134         inc_le128(v10)                  /* +2 */
135         inc_le128(v11)                  /* +3 */
136
137         ld1             {v0.16b-v3.16b}, [x2], #64
138
139         SM4_CRYPT_BLK2(v8, RMAC)
140         eor             v8.16b, v8.16b, v0.16b
141         eor             RMAC.16b, RMAC.16b, v0.16b
142         SM4_CRYPT_BLK2(v9, RMAC)
143         eor             v9.16b, v9.16b, v1.16b
144         eor             RMAC.16b, RMAC.16b, v1.16b
145         SM4_CRYPT_BLK2(v10, RMAC)
146         eor             v10.16b, v10.16b, v2.16b
147         eor             RMAC.16b, RMAC.16b, v2.16b
148         SM4_CRYPT_BLK2(v11, RMAC)
149         eor             v11.16b, v11.16b, v3.16b
150         eor             RMAC.16b, RMAC.16b, v3.16b
151
152         st1             {v8.16b-v11.16b}, [x1], #64
153
154         cbz             w4, .Lccm_enc_end
155         b               .Lccm_enc_loop_4x
156
157 .Lccm_enc_loop_1x:
158         cmp             w4, #16
159         blt             .Lccm_enc_tail
160
161         sub             w4, w4, #16
162
163         /* construct CTRs */
164         inc_le128(v8)
165
166         ld1             {v0.16b}, [x2], #16
167
168         SM4_CRYPT_BLK2(v8, RMAC)
169         eor             v8.16b, v8.16b, v0.16b
170         eor             RMAC.16b, RMAC.16b, v0.16b
171
172         st1             {v8.16b}, [x1], #16
173
174         cbz             w4, .Lccm_enc_end
175         b               .Lccm_enc_loop_1x
176
177 .Lccm_enc_tail:
178         /* construct CTRs */
179         inc_le128(v8)
180
181         SM4_CRYPT_BLK2(RMAC, v8)
182
183         /* store new MAC */
184         st1             {RMAC.16b}, [x5]
185
186 .Lccm_enc_tail_loop:
187         ldrb            w0, [x2], #1            /* get 1 byte from input */
188         umov            w9, v8.b[0]             /* get top crypted CTR byte */
189         umov            w6, RMAC.b[0]           /* get top MAC byte */
190
191         eor             w9, w9, w0              /* w9 = CTR ^ input */
192         eor             w6, w6, w0              /* w6 = MAC ^ input */
193
194         strb            w9, [x1], #1            /* store out byte */
195         strb            w6, [x5], #1            /* store MAC byte */
196
197         subs            w4, w4, #1
198         beq             .Lccm_enc_ret
199
200         /* shift out one byte */
201         ext             RMAC.16b, RMAC.16b, RMAC.16b, #1
202         ext             v8.16b, v8.16b, v8.16b, #1
203
204         b               .Lccm_enc_tail_loop
205
206 .Lccm_enc_end:
207         /* store new MAC */
208         st1             {RMAC.16b}, [x5]
209
210         /* store new CTR */
211         rev             x7, x7
212         rev             x8, x8
213         stp             x7, x8, [x3]
214
215 .Lccm_enc_ret:
216         ret
217 SYM_FUNC_END(sm4_ce_ccm_enc)
218
219 .align 3
220 SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
221         /* input:
222          *   x0: round key array, CTX
223          *   x1: dst
224          *   x2: src
225          *   x3: ctr (big endian, 128 bit)
226          *   w4: nbytes
227          *   x5: mac
228          */
229         SM4_PREPARE(x0)
230
231         ldp             x7, x8, [x3]
232         rev             x7, x7
233         rev             x8, x8
234
235         ld1             {RMAC.16b}, [x5]
236
237 .Lccm_dec_loop_4x:
238         cmp             w4, #(4 * 16)
239         blt             .Lccm_dec_loop_1x
240
241         sub             w4, w4, #(4 * 16)
242
243         /* construct CTRs */
244         inc_le128(v8)                   /* +0 */
245         inc_le128(v9)                   /* +1 */
246         inc_le128(v10)                  /* +2 */
247         inc_le128(v11)                  /* +3 */
248
249         ld1             {v0.16b-v3.16b}, [x2], #64
250
251         SM4_CRYPT_BLK2(v8, RMAC)
252         eor             v8.16b, v8.16b, v0.16b
253         eor             RMAC.16b, RMAC.16b, v8.16b
254         SM4_CRYPT_BLK2(v9, RMAC)
255         eor             v9.16b, v9.16b, v1.16b
256         eor             RMAC.16b, RMAC.16b, v9.16b
257         SM4_CRYPT_BLK2(v10, RMAC)
258         eor             v10.16b, v10.16b, v2.16b
259         eor             RMAC.16b, RMAC.16b, v10.16b
260         SM4_CRYPT_BLK2(v11, RMAC)
261         eor             v11.16b, v11.16b, v3.16b
262         eor             RMAC.16b, RMAC.16b, v11.16b
263
264         st1             {v8.16b-v11.16b}, [x1], #64
265
266         cbz             w4, .Lccm_dec_end
267         b               .Lccm_dec_loop_4x
268
269 .Lccm_dec_loop_1x:
270         cmp             w4, #16
271         blt             .Lccm_dec_tail
272
273         sub             w4, w4, #16
274
275         /* construct CTRs */
276         inc_le128(v8)
277
278         ld1             {v0.16b}, [x2], #16
279
280         SM4_CRYPT_BLK2(v8, RMAC)
281         eor             v8.16b, v8.16b, v0.16b
282         eor             RMAC.16b, RMAC.16b, v8.16b
283
284         st1             {v8.16b}, [x1], #16
285
286         cbz             w4, .Lccm_dec_end
287         b               .Lccm_dec_loop_1x
288
289 .Lccm_dec_tail:
290         /* construct CTRs */
291         inc_le128(v8)
292
293         SM4_CRYPT_BLK2(RMAC, v8)
294
295         /* store new MAC */
296         st1             {RMAC.16b}, [x5]
297
298 .Lccm_dec_tail_loop:
299         ldrb            w0, [x2], #1            /* get 1 byte from input */
300         umov            w9, v8.b[0]             /* get top crypted CTR byte */
301         umov            w6, RMAC.b[0]           /* get top MAC byte */
302
303         eor             w9, w9, w0              /* w9 = CTR ^ input */
304         eor             w6, w6, w9              /* w6 = MAC ^ output */
305
306         strb            w9, [x1], #1            /* store out byte */
307         strb            w6, [x5], #1            /* store MAC byte */
308
309         subs            w4, w4, #1
310         beq             .Lccm_dec_ret
311
312         /* shift out one byte */
313         ext             RMAC.16b, RMAC.16b, RMAC.16b, #1
314         ext             v8.16b, v8.16b, v8.16b, #1
315
316         b               .Lccm_dec_tail_loop
317
318 .Lccm_dec_end:
319         /* store new MAC */
320         st1             {RMAC.16b}, [x5]
321
322         /* store new CTR */
323         rev             x7, x7
324         rev             x8, x8
325         stp             x7, x8, [x3]
326
327 .Lccm_dec_ret:
328         ret
329 SYM_FUNC_END(sm4_ce_ccm_dec)