GNU Linux-libre 4.19.304-gnu1
[releases.git] / arch / arm64 / crypto / aes-modes.S
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* included by aes-ce.S and aes-neon.S */
12
13         .text
14         .align          4
15
16 aes_encrypt_block4x:
17         encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
18         ret
19 ENDPROC(aes_encrypt_block4x)
20
21 aes_decrypt_block4x:
22         decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
23         ret
24 ENDPROC(aes_decrypt_block4x)
25
26         /*
27          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28          *                 int blocks)
29          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30          *                 int blocks)
31          */
32
33 AES_ENTRY(aes_ecb_encrypt)
34         frame_push      5
35
36         mov             x19, x0
37         mov             x20, x1
38         mov             x21, x2
39         mov             x22, x3
40         mov             x23, x4
41
42 .Lecbencrestart:
43         enc_prepare     w22, x21, x5
44
45 .LecbencloopNx:
46         subs            w23, w23, #4
47         bmi             .Lecbenc1x
48         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
49         bl              aes_encrypt_block4x
50         st1             {v0.16b-v3.16b}, [x19], #64
51         cond_yield_neon .Lecbencrestart
52         b               .LecbencloopNx
53 .Lecbenc1x:
54         adds            w23, w23, #4
55         beq             .Lecbencout
56 .Lecbencloop:
57         ld1             {v0.16b}, [x20], #16            /* get next pt block */
58         encrypt_block   v0, w22, x21, x5, w6
59         st1             {v0.16b}, [x19], #16
60         subs            w23, w23, #1
61         bne             .Lecbencloop
62 .Lecbencout:
63         frame_pop
64         ret
65 AES_ENDPROC(aes_ecb_encrypt)
66
67
68 AES_ENTRY(aes_ecb_decrypt)
69         frame_push      5
70
71         mov             x19, x0
72         mov             x20, x1
73         mov             x21, x2
74         mov             x22, x3
75         mov             x23, x4
76
77 .Lecbdecrestart:
78         dec_prepare     w22, x21, x5
79
80 .LecbdecloopNx:
81         subs            w23, w23, #4
82         bmi             .Lecbdec1x
83         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
84         bl              aes_decrypt_block4x
85         st1             {v0.16b-v3.16b}, [x19], #64
86         cond_yield_neon .Lecbdecrestart
87         b               .LecbdecloopNx
88 .Lecbdec1x:
89         adds            w23, w23, #4
90         beq             .Lecbdecout
91 .Lecbdecloop:
92         ld1             {v0.16b}, [x20], #16            /* get next ct block */
93         decrypt_block   v0, w22, x21, x5, w6
94         st1             {v0.16b}, [x19], #16
95         subs            w23, w23, #1
96         bne             .Lecbdecloop
97 .Lecbdecout:
98         frame_pop
99         ret
100 AES_ENDPROC(aes_ecb_decrypt)
101
102
103         /*
104          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105          *                 int blocks, u8 iv[])
106          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107          *                 int blocks, u8 iv[])
108          */
109
110 AES_ENTRY(aes_cbc_encrypt)
111         frame_push      6
112
113         mov             x19, x0
114         mov             x20, x1
115         mov             x21, x2
116         mov             x22, x3
117         mov             x23, x4
118         mov             x24, x5
119
120 .Lcbcencrestart:
121         ld1             {v4.16b}, [x24]                 /* get iv */
122         enc_prepare     w22, x21, x6
123
124 .Lcbcencloop4x:
125         subs            w23, w23, #4
126         bmi             .Lcbcenc1x
127         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
128         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
129         encrypt_block   v0, w22, x21, x6, w7
130         eor             v1.16b, v1.16b, v0.16b
131         encrypt_block   v1, w22, x21, x6, w7
132         eor             v2.16b, v2.16b, v1.16b
133         encrypt_block   v2, w22, x21, x6, w7
134         eor             v3.16b, v3.16b, v2.16b
135         encrypt_block   v3, w22, x21, x6, w7
136         st1             {v0.16b-v3.16b}, [x19], #64
137         mov             v4.16b, v3.16b
138         st1             {v4.16b}, [x24]                 /* return iv */
139         cond_yield_neon .Lcbcencrestart
140         b               .Lcbcencloop4x
141 .Lcbcenc1x:
142         adds            w23, w23, #4
143         beq             .Lcbcencout
144 .Lcbcencloop:
145         ld1             {v0.16b}, [x20], #16            /* get next pt block */
146         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
147         encrypt_block   v4, w22, x21, x6, w7
148         st1             {v4.16b}, [x19], #16
149         subs            w23, w23, #1
150         bne             .Lcbcencloop
151 .Lcbcencout:
152         st1             {v4.16b}, [x24]                 /* return iv */
153         frame_pop
154         ret
155 AES_ENDPROC(aes_cbc_encrypt)
156
157
158 AES_ENTRY(aes_cbc_decrypt)
159         frame_push      6
160
161         mov             x19, x0
162         mov             x20, x1
163         mov             x21, x2
164         mov             x22, x3
165         mov             x23, x4
166         mov             x24, x5
167
168 .Lcbcdecrestart:
169         ld1             {v7.16b}, [x24]                 /* get iv */
170         dec_prepare     w22, x21, x6
171
172 .LcbcdecloopNx:
173         subs            w23, w23, #4
174         bmi             .Lcbcdec1x
175         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
176         mov             v4.16b, v0.16b
177         mov             v5.16b, v1.16b
178         mov             v6.16b, v2.16b
179         bl              aes_decrypt_block4x
180         sub             x20, x20, #16
181         eor             v0.16b, v0.16b, v7.16b
182         eor             v1.16b, v1.16b, v4.16b
183         ld1             {v7.16b}, [x20], #16            /* reload 1 ct block */
184         eor             v2.16b, v2.16b, v5.16b
185         eor             v3.16b, v3.16b, v6.16b
186         st1             {v0.16b-v3.16b}, [x19], #64
187         st1             {v7.16b}, [x24]                 /* return iv */
188         cond_yield_neon .Lcbcdecrestart
189         b               .LcbcdecloopNx
190 .Lcbcdec1x:
191         adds            w23, w23, #4
192         beq             .Lcbcdecout
193 .Lcbcdecloop:
194         ld1             {v1.16b}, [x20], #16            /* get next ct block */
195         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
196         decrypt_block   v0, w22, x21, x6, w7
197         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
198         mov             v7.16b, v1.16b                  /* ct is next iv */
199         st1             {v0.16b}, [x19], #16
200         subs            w23, w23, #1
201         bne             .Lcbcdecloop
202 .Lcbcdecout:
203         st1             {v7.16b}, [x24]                 /* return iv */
204         frame_pop
205         ret
206 AES_ENDPROC(aes_cbc_decrypt)
207
208
209         /*
210          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211          *                 int blocks, u8 ctr[])
212          */
213
214 AES_ENTRY(aes_ctr_encrypt)
215         frame_push      6
216
217         mov             x19, x0
218         mov             x20, x1
219         mov             x21, x2
220         mov             x22, x3
221         mov             x23, x4
222         mov             x24, x5
223
224 .Lctrrestart:
225         enc_prepare     w22, x21, x6
226         ld1             {v4.16b}, [x24]
227
228         umov            x6, v4.d[1]             /* keep swabbed ctr in reg */
229         rev             x6, x6
230 .LctrloopNx:
231         subs            w23, w23, #4
232         bmi             .Lctr1x
233         cmn             w6, #4                  /* 32 bit overflow? */
234         bcs             .Lctr1x
235         add             w7, w6, #1
236         mov             v0.16b, v4.16b
237         add             w8, w6, #2
238         mov             v1.16b, v4.16b
239         add             w9, w6, #3
240         mov             v2.16b, v4.16b
241         rev             w7, w7
242         mov             v3.16b, v4.16b
243         rev             w8, w8
244         mov             v1.s[3], w7
245         rev             w9, w9
246         mov             v2.s[3], w8
247         mov             v3.s[3], w9
248         ld1             {v5.16b-v7.16b}, [x20], #48     /* get 3 input blocks */
249         bl              aes_encrypt_block4x
250         eor             v0.16b, v5.16b, v0.16b
251         ld1             {v5.16b}, [x20], #16            /* get 1 input block  */
252         eor             v1.16b, v6.16b, v1.16b
253         eor             v2.16b, v7.16b, v2.16b
254         eor             v3.16b, v5.16b, v3.16b
255         st1             {v0.16b-v3.16b}, [x19], #64
256         add             x6, x6, #4
257         rev             x7, x6
258         ins             v4.d[1], x7
259         cbz             w23, .Lctrout
260         st1             {v4.16b}, [x24]         /* return next CTR value */
261         cond_yield_neon .Lctrrestart
262         b               .LctrloopNx
263 .Lctr1x:
264         adds            w23, w23, #4
265         beq             .Lctrout
266 .Lctrloop:
267         mov             v0.16b, v4.16b
268         encrypt_block   v0, w22, x21, x8, w7
269
270         adds            x6, x6, #1              /* increment BE ctr */
271         rev             x7, x6
272         ins             v4.d[1], x7
273         bcs             .Lctrcarry              /* overflow? */
274
275 .Lctrcarrydone:
276         subs            w23, w23, #1
277         bmi             .Lctrtailblock          /* blocks <0 means tail block */
278         ld1             {v3.16b}, [x20], #16
279         eor             v3.16b, v0.16b, v3.16b
280         st1             {v3.16b}, [x19], #16
281         bne             .Lctrloop
282
283 .Lctrout:
284         st1             {v4.16b}, [x24]         /* return next CTR value */
285 .Lctrret:
286         frame_pop
287         ret
288
289 .Lctrtailblock:
290         st1             {v0.16b}, [x19]
291         b               .Lctrret
292
293 .Lctrcarry:
294         umov            x7, v4.d[0]             /* load upper word of ctr  */
295         rev             x7, x7                  /* ... to handle the carry */
296         add             x7, x7, #1
297         rev             x7, x7
298         ins             v4.d[0], x7
299         b               .Lctrcarrydone
300 AES_ENDPROC(aes_ctr_encrypt)
301         .ltorg
302
303
304         /*
305          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306          *                 int blocks, u8 const rk2[], u8 iv[], int first)
307          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
308          *                 int blocks, u8 const rk2[], u8 iv[], int first)
309          */
310
311         .macro          next_tweak, out, in, const, tmp
312         sshr            \tmp\().2d,  \in\().2d,   #63
313         and             \tmp\().16b, \tmp\().16b, \const\().16b
314         add             \out\().2d,  \in\().2d,   \in\().2d
315         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
316         eor             \out\().16b, \out\().16b, \tmp\().16b
317         .endm
318
319 .Lxts_mul_x:
320 CPU_LE( .quad           1, 0x87         )
321 CPU_BE( .quad           0x87, 1         )
322
323 AES_ENTRY(aes_xts_encrypt)
324         frame_push      6
325
326         mov             x19, x0
327         mov             x20, x1
328         mov             x21, x2
329         mov             x22, x3
330         mov             x23, x4
331         mov             x24, x6
332
333         ld1             {v4.16b}, [x24]
334         cbz             w7, .Lxtsencnotfirst
335
336         enc_prepare     w3, x5, x8
337         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
338         enc_switch_key  w3, x2, x8
339         ldr             q7, .Lxts_mul_x
340         b               .LxtsencNx
341
342 .Lxtsencrestart:
343         ld1             {v4.16b}, [x24]
344 .Lxtsencnotfirst:
345         enc_prepare     w22, x21, x8
346 .LxtsencloopNx:
347         ldr             q7, .Lxts_mul_x
348         next_tweak      v4, v4, v7, v8
349 .LxtsencNx:
350         subs            w23, w23, #4
351         bmi             .Lxtsenc1x
352         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
353         next_tweak      v5, v4, v7, v8
354         eor             v0.16b, v0.16b, v4.16b
355         next_tweak      v6, v5, v7, v8
356         eor             v1.16b, v1.16b, v5.16b
357         eor             v2.16b, v2.16b, v6.16b
358         next_tweak      v7, v6, v7, v8
359         eor             v3.16b, v3.16b, v7.16b
360         bl              aes_encrypt_block4x
361         eor             v3.16b, v3.16b, v7.16b
362         eor             v0.16b, v0.16b, v4.16b
363         eor             v1.16b, v1.16b, v5.16b
364         eor             v2.16b, v2.16b, v6.16b
365         st1             {v0.16b-v3.16b}, [x19], #64
366         mov             v4.16b, v7.16b
367         cbz             w23, .Lxtsencout
368         st1             {v4.16b}, [x24]
369         cond_yield_neon .Lxtsencrestart
370         b               .LxtsencloopNx
371 .Lxtsenc1x:
372         adds            w23, w23, #4
373         beq             .Lxtsencout
374 .Lxtsencloop:
375         ld1             {v1.16b}, [x20], #16
376         eor             v0.16b, v1.16b, v4.16b
377         encrypt_block   v0, w22, x21, x8, w7
378         eor             v0.16b, v0.16b, v4.16b
379         st1             {v0.16b}, [x19], #16
380         subs            w23, w23, #1
381         beq             .Lxtsencout
382         next_tweak      v4, v4, v7, v8
383         b               .Lxtsencloop
384 .Lxtsencout:
385         st1             {v4.16b}, [x24]
386         frame_pop
387         ret
388 AES_ENDPROC(aes_xts_encrypt)
389
390
391 AES_ENTRY(aes_xts_decrypt)
392         frame_push      6
393
394         mov             x19, x0
395         mov             x20, x1
396         mov             x21, x2
397         mov             x22, x3
398         mov             x23, x4
399         mov             x24, x6
400
401         ld1             {v4.16b}, [x24]
402         cbz             w7, .Lxtsdecnotfirst
403
404         enc_prepare     w3, x5, x8
405         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
406         dec_prepare     w3, x2, x8
407         ldr             q7, .Lxts_mul_x
408         b               .LxtsdecNx
409
410 .Lxtsdecrestart:
411         ld1             {v4.16b}, [x24]
412 .Lxtsdecnotfirst:
413         dec_prepare     w22, x21, x8
414 .LxtsdecloopNx:
415         ldr             q7, .Lxts_mul_x
416         next_tweak      v4, v4, v7, v8
417 .LxtsdecNx:
418         subs            w23, w23, #4
419         bmi             .Lxtsdec1x
420         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
421         next_tweak      v5, v4, v7, v8
422         eor             v0.16b, v0.16b, v4.16b
423         next_tweak      v6, v5, v7, v8
424         eor             v1.16b, v1.16b, v5.16b
425         eor             v2.16b, v2.16b, v6.16b
426         next_tweak      v7, v6, v7, v8
427         eor             v3.16b, v3.16b, v7.16b
428         bl              aes_decrypt_block4x
429         eor             v3.16b, v3.16b, v7.16b
430         eor             v0.16b, v0.16b, v4.16b
431         eor             v1.16b, v1.16b, v5.16b
432         eor             v2.16b, v2.16b, v6.16b
433         st1             {v0.16b-v3.16b}, [x19], #64
434         mov             v4.16b, v7.16b
435         cbz             w23, .Lxtsdecout
436         st1             {v4.16b}, [x24]
437         cond_yield_neon .Lxtsdecrestart
438         b               .LxtsdecloopNx
439 .Lxtsdec1x:
440         adds            w23, w23, #4
441         beq             .Lxtsdecout
442 .Lxtsdecloop:
443         ld1             {v1.16b}, [x20], #16
444         eor             v0.16b, v1.16b, v4.16b
445         decrypt_block   v0, w22, x21, x8, w7
446         eor             v0.16b, v0.16b, v4.16b
447         st1             {v0.16b}, [x19], #16
448         subs            w23, w23, #1
449         beq             .Lxtsdecout
450         next_tweak      v4, v4, v7, v8
451         b               .Lxtsdecloop
452 .Lxtsdecout:
453         st1             {v4.16b}, [x24]
454         frame_pop
455         ret
456 AES_ENDPROC(aes_xts_decrypt)
457
458         /*
459          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
460          *                int blocks, u8 dg[], int enc_before, int enc_after)
461          */
462 AES_ENTRY(aes_mac_update)
463         frame_push      6
464
465         mov             x19, x0
466         mov             x20, x1
467         mov             x21, x2
468         mov             x22, x3
469         mov             x23, x4
470         mov             x24, x6
471
472         ld1             {v0.16b}, [x23]                 /* get dg */
473         enc_prepare     w2, x1, x7
474         cbz             w5, .Lmacloop4x
475
476         encrypt_block   v0, w2, x1, x7, w8
477
478 .Lmacloop4x:
479         subs            w22, w22, #4
480         bmi             .Lmac1x
481         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
482         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
483         encrypt_block   v0, w21, x20, x7, w8
484         eor             v0.16b, v0.16b, v2.16b
485         encrypt_block   v0, w21, x20, x7, w8
486         eor             v0.16b, v0.16b, v3.16b
487         encrypt_block   v0, w21, x20, x7, w8
488         eor             v0.16b, v0.16b, v4.16b
489         cmp             w22, wzr
490         csinv           x5, x24, xzr, eq
491         cbz             w5, .Lmacout
492         encrypt_block   v0, w21, x20, x7, w8
493         st1             {v0.16b}, [x23]                 /* return dg */
494         cond_yield_neon .Lmacrestart
495         b               .Lmacloop4x
496 .Lmac1x:
497         add             w22, w22, #4
498 .Lmacloop:
499         cbz             w22, .Lmacout
500         ld1             {v1.16b}, [x19], #16            /* get next pt block */
501         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
502
503         subs            w22, w22, #1
504         csinv           x5, x24, xzr, eq
505         cbz             w5, .Lmacout
506
507 .Lmacenc:
508         encrypt_block   v0, w21, x20, x7, w8
509         b               .Lmacloop
510
511 .Lmacout:
512         st1             {v0.16b}, [x23]                 /* return dg */
513         frame_pop
514         ret
515
516 .Lmacrestart:
517         ld1             {v0.16b}, [x23]                 /* get dg */
518         enc_prepare     w21, x20, x0
519         b               .Lmacloop4x
520 AES_ENDPROC(aes_mac_update)