GNU Linux-libre 4.14.259-gnu1
[releases.git] / arch / arm64 / crypto / aes-modes.S
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* included by aes-ce.S and aes-neon.S */
12
13         .text
14         .align          4
15
16 /*
17  * There are several ways to instantiate this code:
18  * - no interleave, all inline
19  * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20  * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21  * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22  * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
23  *
24  * Macros imported by this code:
25  * - enc_prepare        - setup NEON registers for encryption
26  * - dec_prepare        - setup NEON registers for decryption
27  * - enc_switch_key     - change to new key after having prepared for encryption
28  * - encrypt_block      - encrypt a single block
29  * - decrypt block      - decrypt a single block
30  * - encrypt_block2x    - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31  * - decrypt_block2x    - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32  * - encrypt_block4x    - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33  * - decrypt_block4x    - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
34  */
35
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH      stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP       ldp x29, x30, [sp],#16
39
40 #if INTERLEAVE == 2
41
42 aes_encrypt_block2x:
43         encrypt_block2x v0, v1, w3, x2, x6, w7
44         ret
45 ENDPROC(aes_encrypt_block2x)
46
47 aes_decrypt_block2x:
48         decrypt_block2x v0, v1, w3, x2, x6, w7
49         ret
50 ENDPROC(aes_decrypt_block2x)
51
52 #elif INTERLEAVE == 4
53
54 aes_encrypt_block4x:
55         encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
56         ret
57 ENDPROC(aes_encrypt_block4x)
58
59 aes_decrypt_block4x:
60         decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
61         ret
62 ENDPROC(aes_decrypt_block4x)
63
64 #else
65 #error INTERLEAVE should equal 2 or 4
66 #endif
67
68         .macro          do_encrypt_block2x
69         bl              aes_encrypt_block2x
70         .endm
71
72         .macro          do_decrypt_block2x
73         bl              aes_decrypt_block2x
74         .endm
75
76         .macro          do_encrypt_block4x
77         bl              aes_encrypt_block4x
78         .endm
79
80         .macro          do_decrypt_block4x
81         bl              aes_decrypt_block4x
82         .endm
83
84 #else
85 #define FRAME_PUSH
86 #define FRAME_POP
87
88         .macro          do_encrypt_block2x
89         encrypt_block2x v0, v1, w3, x2, x6, w7
90         .endm
91
92         .macro          do_decrypt_block2x
93         decrypt_block2x v0, v1, w3, x2, x6, w7
94         .endm
95
96         .macro          do_encrypt_block4x
97         encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
98         .endm
99
100         .macro          do_decrypt_block4x
101         decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
102         .endm
103
104 #endif
105
106         /*
107          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108          *                 int blocks, int first)
109          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110          *                 int blocks, int first)
111          */
112
113 AES_ENTRY(aes_ecb_encrypt)
114         FRAME_PUSH
115         cbz             w5, .LecbencloopNx
116
117         enc_prepare     w3, x2, x5
118
119 .LecbencloopNx:
120 #if INTERLEAVE >= 2
121         subs            w4, w4, #INTERLEAVE
122         bmi             .Lecbenc1x
123 #if INTERLEAVE == 2
124         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 pt blocks */
125         do_encrypt_block2x
126         st1             {v0.16b-v1.16b}, [x0], #32
127 #else
128         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
129         do_encrypt_block4x
130         st1             {v0.16b-v3.16b}, [x0], #64
131 #endif
132         b               .LecbencloopNx
133 .Lecbenc1x:
134         adds            w4, w4, #INTERLEAVE
135         beq             .Lecbencout
136 #endif
137 .Lecbencloop:
138         ld1             {v0.16b}, [x1], #16             /* get next pt block */
139         encrypt_block   v0, w3, x2, x5, w6
140         st1             {v0.16b}, [x0], #16
141         subs            w4, w4, #1
142         bne             .Lecbencloop
143 .Lecbencout:
144         FRAME_POP
145         ret
146 AES_ENDPROC(aes_ecb_encrypt)
147
148
149 AES_ENTRY(aes_ecb_decrypt)
150         FRAME_PUSH
151         cbz             w5, .LecbdecloopNx
152
153         dec_prepare     w3, x2, x5
154
155 .LecbdecloopNx:
156 #if INTERLEAVE >= 2
157         subs            w4, w4, #INTERLEAVE
158         bmi             .Lecbdec1x
159 #if INTERLEAVE == 2
160         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
161         do_decrypt_block2x
162         st1             {v0.16b-v1.16b}, [x0], #32
163 #else
164         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
165         do_decrypt_block4x
166         st1             {v0.16b-v3.16b}, [x0], #64
167 #endif
168         b               .LecbdecloopNx
169 .Lecbdec1x:
170         adds            w4, w4, #INTERLEAVE
171         beq             .Lecbdecout
172 #endif
173 .Lecbdecloop:
174         ld1             {v0.16b}, [x1], #16             /* get next ct block */
175         decrypt_block   v0, w3, x2, x5, w6
176         st1             {v0.16b}, [x0], #16
177         subs            w4, w4, #1
178         bne             .Lecbdecloop
179 .Lecbdecout:
180         FRAME_POP
181         ret
182 AES_ENDPROC(aes_ecb_decrypt)
183
184
185         /*
186          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187          *                 int blocks, u8 iv[], int first)
188          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189          *                 int blocks, u8 iv[], int first)
190          */
191
192 AES_ENTRY(aes_cbc_encrypt)
193         cbz             w6, .Lcbcencloop
194
195         ld1             {v0.16b}, [x5]                  /* get iv */
196         enc_prepare     w3, x2, x6
197
198 .Lcbcencloop:
199         ld1             {v1.16b}, [x1], #16             /* get next pt block */
200         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with iv */
201         encrypt_block   v0, w3, x2, x6, w7
202         st1             {v0.16b}, [x0], #16
203         subs            w4, w4, #1
204         bne             .Lcbcencloop
205         st1             {v0.16b}, [x5]                  /* return iv */
206         ret
207 AES_ENDPROC(aes_cbc_encrypt)
208
209
210 AES_ENTRY(aes_cbc_decrypt)
211         FRAME_PUSH
212         cbz             w6, .LcbcdecloopNx
213
214         ld1             {v7.16b}, [x5]                  /* get iv */
215         dec_prepare     w3, x2, x6
216
217 .LcbcdecloopNx:
218 #if INTERLEAVE >= 2
219         subs            w4, w4, #INTERLEAVE
220         bmi             .Lcbcdec1x
221 #if INTERLEAVE == 2
222         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
223         mov             v2.16b, v0.16b
224         mov             v3.16b, v1.16b
225         do_decrypt_block2x
226         eor             v0.16b, v0.16b, v7.16b
227         eor             v1.16b, v1.16b, v2.16b
228         mov             v7.16b, v3.16b
229         st1             {v0.16b-v1.16b}, [x0], #32
230 #else
231         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
232         mov             v4.16b, v0.16b
233         mov             v5.16b, v1.16b
234         mov             v6.16b, v2.16b
235         do_decrypt_block4x
236         sub             x1, x1, #16
237         eor             v0.16b, v0.16b, v7.16b
238         eor             v1.16b, v1.16b, v4.16b
239         ld1             {v7.16b}, [x1], #16             /* reload 1 ct block */
240         eor             v2.16b, v2.16b, v5.16b
241         eor             v3.16b, v3.16b, v6.16b
242         st1             {v0.16b-v3.16b}, [x0], #64
243 #endif
244         b               .LcbcdecloopNx
245 .Lcbcdec1x:
246         adds            w4, w4, #INTERLEAVE
247         beq             .Lcbcdecout
248 #endif
249 .Lcbcdecloop:
250         ld1             {v1.16b}, [x1], #16             /* get next ct block */
251         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
252         decrypt_block   v0, w3, x2, x6, w7
253         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
254         mov             v7.16b, v1.16b                  /* ct is next iv */
255         st1             {v0.16b}, [x0], #16
256         subs            w4, w4, #1
257         bne             .Lcbcdecloop
258 .Lcbcdecout:
259         FRAME_POP
260         st1             {v7.16b}, [x5]                  /* return iv */
261         ret
262 AES_ENDPROC(aes_cbc_decrypt)
263
264
265         /*
266          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
267          *                 int blocks, u8 ctr[], int first)
268          */
269
270 AES_ENTRY(aes_ctr_encrypt)
271         FRAME_PUSH
272         cbz             w6, .Lctrnotfirst       /* 1st time around? */
273         enc_prepare     w3, x2, x6
274         ld1             {v4.16b}, [x5]
275
276 .Lctrnotfirst:
277         umov            x8, v4.d[1]             /* keep swabbed ctr in reg */
278         rev             x8, x8
279 #if INTERLEAVE >= 2
280         cmn             w8, w4                  /* 32 bit overflow? */
281         bcs             .Lctrloop
282 .LctrloopNx:
283         subs            w4, w4, #INTERLEAVE
284         bmi             .Lctr1x
285 #if INTERLEAVE == 2
286         mov             v0.8b, v4.8b
287         mov             v1.8b, v4.8b
288         rev             x7, x8
289         add             x8, x8, #1
290         ins             v0.d[1], x7
291         rev             x7, x8
292         add             x8, x8, #1
293         ins             v1.d[1], x7
294         ld1             {v2.16b-v3.16b}, [x1], #32      /* get 2 input blocks */
295         do_encrypt_block2x
296         eor             v0.16b, v0.16b, v2.16b
297         eor             v1.16b, v1.16b, v3.16b
298         st1             {v0.16b-v1.16b}, [x0], #32
299 #else
300         ldr             q8, =0x30000000200000001        /* addends 1,2,3[,0] */
301         dup             v7.4s, w8
302         mov             v0.16b, v4.16b
303         add             v7.4s, v7.4s, v8.4s
304         mov             v1.16b, v4.16b
305         rev32           v8.16b, v7.16b
306         mov             v2.16b, v4.16b
307         mov             v3.16b, v4.16b
308         mov             v1.s[3], v8.s[0]
309         mov             v2.s[3], v8.s[1]
310         mov             v3.s[3], v8.s[2]
311         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
312         do_encrypt_block4x
313         eor             v0.16b, v5.16b, v0.16b
314         ld1             {v5.16b}, [x1], #16             /* get 1 input block  */
315         eor             v1.16b, v6.16b, v1.16b
316         eor             v2.16b, v7.16b, v2.16b
317         eor             v3.16b, v5.16b, v3.16b
318         st1             {v0.16b-v3.16b}, [x0], #64
319         add             x8, x8, #INTERLEAVE
320 #endif
321         rev             x7, x8
322         ins             v4.d[1], x7
323         cbz             w4, .Lctrout
324         b               .LctrloopNx
325 .Lctr1x:
326         adds            w4, w4, #INTERLEAVE
327         beq             .Lctrout
328 #endif
329 .Lctrloop:
330         mov             v0.16b, v4.16b
331         encrypt_block   v0, w3, x2, x6, w7
332
333         adds            x8, x8, #1              /* increment BE ctr */
334         rev             x7, x8
335         ins             v4.d[1], x7
336         bcs             .Lctrcarry              /* overflow? */
337
338 .Lctrcarrydone:
339         subs            w4, w4, #1
340         bmi             .Lctrtailblock          /* blocks <0 means tail block */
341         ld1             {v3.16b}, [x1], #16
342         eor             v3.16b, v0.16b, v3.16b
343         st1             {v3.16b}, [x0], #16
344         bne             .Lctrloop
345
346 .Lctrout:
347         st1             {v4.16b}, [x5]          /* return next CTR value */
348         FRAME_POP
349         ret
350
351 .Lctrtailblock:
352         st1             {v0.16b}, [x0]
353         FRAME_POP
354         ret
355
356 .Lctrcarry:
357         umov            x7, v4.d[0]             /* load upper word of ctr  */
358         rev             x7, x7                  /* ... to handle the carry */
359         add             x7, x7, #1
360         rev             x7, x7
361         ins             v4.d[0], x7
362         b               .Lctrcarrydone
363 AES_ENDPROC(aes_ctr_encrypt)
364         .ltorg
365
366
367         /*
368          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
369          *                 int blocks, u8 const rk2[], u8 iv[], int first)
370          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
371          *                 int blocks, u8 const rk2[], u8 iv[], int first)
372          */
373
374         .macro          next_tweak, out, in, const, tmp
375         sshr            \tmp\().2d,  \in\().2d,   #63
376         and             \tmp\().16b, \tmp\().16b, \const\().16b
377         add             \out\().2d,  \in\().2d,   \in\().2d
378         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
379         eor             \out\().16b, \out\().16b, \tmp\().16b
380         .endm
381
382 .Lxts_mul_x:
383 CPU_LE( .quad           1, 0x87         )
384 CPU_BE( .quad           0x87, 1         )
385
386 AES_ENTRY(aes_xts_encrypt)
387         FRAME_PUSH
388         cbz             w7, .LxtsencloopNx
389
390         ld1             {v4.16b}, [x6]
391         enc_prepare     w3, x5, x6
392         encrypt_block   v4, w3, x5, x6, w7              /* first tweak */
393         enc_switch_key  w3, x2, x6
394         ldr             q7, .Lxts_mul_x
395         b               .LxtsencNx
396
397 .LxtsencloopNx:
398         ldr             q7, .Lxts_mul_x
399         next_tweak      v4, v4, v7, v8
400 .LxtsencNx:
401 #if INTERLEAVE >= 2
402         subs            w4, w4, #INTERLEAVE
403         bmi             .Lxtsenc1x
404 #if INTERLEAVE == 2
405         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 pt blocks */
406         next_tweak      v5, v4, v7, v8
407         eor             v0.16b, v0.16b, v4.16b
408         eor             v1.16b, v1.16b, v5.16b
409         do_encrypt_block2x
410         eor             v0.16b, v0.16b, v4.16b
411         eor             v1.16b, v1.16b, v5.16b
412         st1             {v0.16b-v1.16b}, [x0], #32
413         cbz             w4, .LxtsencoutNx
414         next_tweak      v4, v5, v7, v8
415         b               .LxtsencNx
416 .LxtsencoutNx:
417         mov             v4.16b, v5.16b
418         b               .Lxtsencout
419 #else
420         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
421         next_tweak      v5, v4, v7, v8
422         eor             v0.16b, v0.16b, v4.16b
423         next_tweak      v6, v5, v7, v8
424         eor             v1.16b, v1.16b, v5.16b
425         eor             v2.16b, v2.16b, v6.16b
426         next_tweak      v7, v6, v7, v8
427         eor             v3.16b, v3.16b, v7.16b
428         do_encrypt_block4x
429         eor             v3.16b, v3.16b, v7.16b
430         eor             v0.16b, v0.16b, v4.16b
431         eor             v1.16b, v1.16b, v5.16b
432         eor             v2.16b, v2.16b, v6.16b
433         st1             {v0.16b-v3.16b}, [x0], #64
434         mov             v4.16b, v7.16b
435         cbz             w4, .Lxtsencout
436         b               .LxtsencloopNx
437 #endif
438 .Lxtsenc1x:
439         adds            w4, w4, #INTERLEAVE
440         beq             .Lxtsencout
441 #endif
442 .Lxtsencloop:
443         ld1             {v1.16b}, [x1], #16
444         eor             v0.16b, v1.16b, v4.16b
445         encrypt_block   v0, w3, x2, x6, w7
446         eor             v0.16b, v0.16b, v4.16b
447         st1             {v0.16b}, [x0], #16
448         subs            w4, w4, #1
449         beq             .Lxtsencout
450         next_tweak      v4, v4, v7, v8
451         b               .Lxtsencloop
452 .Lxtsencout:
453         FRAME_POP
454         ret
455 AES_ENDPROC(aes_xts_encrypt)
456
457
458 AES_ENTRY(aes_xts_decrypt)
459         FRAME_PUSH
460         cbz             w7, .LxtsdecloopNx
461
462         ld1             {v4.16b}, [x6]
463         enc_prepare     w3, x5, x6
464         encrypt_block   v4, w3, x5, x6, w7              /* first tweak */
465         dec_prepare     w3, x2, x6
466         ldr             q7, .Lxts_mul_x
467         b               .LxtsdecNx
468
469 .LxtsdecloopNx:
470         ldr             q7, .Lxts_mul_x
471         next_tweak      v4, v4, v7, v8
472 .LxtsdecNx:
473 #if INTERLEAVE >= 2
474         subs            w4, w4, #INTERLEAVE
475         bmi             .Lxtsdec1x
476 #if INTERLEAVE == 2
477         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
478         next_tweak      v5, v4, v7, v8
479         eor             v0.16b, v0.16b, v4.16b
480         eor             v1.16b, v1.16b, v5.16b
481         do_decrypt_block2x
482         eor             v0.16b, v0.16b, v4.16b
483         eor             v1.16b, v1.16b, v5.16b
484         st1             {v0.16b-v1.16b}, [x0], #32
485         cbz             w4, .LxtsdecoutNx
486         next_tweak      v4, v5, v7, v8
487         b               .LxtsdecNx
488 .LxtsdecoutNx:
489         mov             v4.16b, v5.16b
490         b               .Lxtsdecout
491 #else
492         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
493         next_tweak      v5, v4, v7, v8
494         eor             v0.16b, v0.16b, v4.16b
495         next_tweak      v6, v5, v7, v8
496         eor             v1.16b, v1.16b, v5.16b
497         eor             v2.16b, v2.16b, v6.16b
498         next_tweak      v7, v6, v7, v8
499         eor             v3.16b, v3.16b, v7.16b
500         do_decrypt_block4x
501         eor             v3.16b, v3.16b, v7.16b
502         eor             v0.16b, v0.16b, v4.16b
503         eor             v1.16b, v1.16b, v5.16b
504         eor             v2.16b, v2.16b, v6.16b
505         st1             {v0.16b-v3.16b}, [x0], #64
506         mov             v4.16b, v7.16b
507         cbz             w4, .Lxtsdecout
508         b               .LxtsdecloopNx
509 #endif
510 .Lxtsdec1x:
511         adds            w4, w4, #INTERLEAVE
512         beq             .Lxtsdecout
513 #endif
514 .Lxtsdecloop:
515         ld1             {v1.16b}, [x1], #16
516         eor             v0.16b, v1.16b, v4.16b
517         decrypt_block   v0, w3, x2, x6, w7
518         eor             v0.16b, v0.16b, v4.16b
519         st1             {v0.16b}, [x0], #16
520         subs            w4, w4, #1
521         beq             .Lxtsdecout
522         next_tweak      v4, v4, v7, v8
523         b               .Lxtsdecloop
524 .Lxtsdecout:
525         FRAME_POP
526         ret
527 AES_ENDPROC(aes_xts_decrypt)
528
529         /*
530          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
531          *                int blocks, u8 dg[], int enc_before, int enc_after)
532          */
533 AES_ENTRY(aes_mac_update)
534         ld1             {v0.16b}, [x4]                  /* get dg */
535         enc_prepare     w2, x1, x7
536         cbnz            w5, .Lmacenc
537
538 .Lmacloop:
539         cbz             w3, .Lmacout
540         ld1             {v1.16b}, [x0], #16             /* get next pt block */
541         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
542
543         subs            w3, w3, #1
544         csinv           x5, x6, xzr, eq
545         cbz             w5, .Lmacout
546
547 .Lmacenc:
548         encrypt_block   v0, w2, x1, x7, w8
549         b               .Lmacloop
550
551 .Lmacout:
552         st1             {v0.16b}, [x4]                  /* return dg */
553         ret
554 AES_ENDPROC(aes_mac_update)