2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
19 ENDPROC(aes_encrypt_block4x)
22 decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
24 ENDPROC(aes_decrypt_block4x)
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
33 AES_ENTRY(aes_ecb_encrypt)
43 enc_prepare w22, x21, x5
48 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
49 bl aes_encrypt_block4x
50 st1 {v0.16b-v3.16b}, [x19], #64
51 cond_yield_neon .Lecbencrestart
57 ld1 {v0.16b}, [x20], #16 /* get next pt block */
58 encrypt_block v0, w22, x21, x5, w6
59 st1 {v0.16b}, [x19], #16
65 AES_ENDPROC(aes_ecb_encrypt)
68 AES_ENTRY(aes_ecb_decrypt)
78 dec_prepare w22, x21, x5
83 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
84 bl aes_decrypt_block4x
85 st1 {v0.16b-v3.16b}, [x19], #64
86 cond_yield_neon .Lecbdecrestart
92 ld1 {v0.16b}, [x20], #16 /* get next ct block */
93 decrypt_block v0, w22, x21, x5, w6
94 st1 {v0.16b}, [x19], #16
100 AES_ENDPROC(aes_ecb_decrypt)
104 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105 * int blocks, u8 iv[])
106 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107 * int blocks, u8 iv[])
110 AES_ENTRY(aes_cbc_encrypt)
121 ld1 {v4.16b}, [x24] /* get iv */
122 enc_prepare w22, x21, x6
127 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
128 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
129 encrypt_block v0, w22, x21, x6, w7
130 eor v1.16b, v1.16b, v0.16b
131 encrypt_block v1, w22, x21, x6, w7
132 eor v2.16b, v2.16b, v1.16b
133 encrypt_block v2, w22, x21, x6, w7
134 eor v3.16b, v3.16b, v2.16b
135 encrypt_block v3, w22, x21, x6, w7
136 st1 {v0.16b-v3.16b}, [x19], #64
138 st1 {v4.16b}, [x24] /* return iv */
139 cond_yield_neon .Lcbcencrestart
145 ld1 {v0.16b}, [x20], #16 /* get next pt block */
146 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
147 encrypt_block v4, w22, x21, x6, w7
148 st1 {v4.16b}, [x19], #16
152 st1 {v4.16b}, [x24] /* return iv */
155 AES_ENDPROC(aes_cbc_encrypt)
158 AES_ENTRY(aes_cbc_decrypt)
169 ld1 {v7.16b}, [x24] /* get iv */
170 dec_prepare w22, x21, x6
175 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
179 bl aes_decrypt_block4x
181 eor v0.16b, v0.16b, v7.16b
182 eor v1.16b, v1.16b, v4.16b
183 ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
184 eor v2.16b, v2.16b, v5.16b
185 eor v3.16b, v3.16b, v6.16b
186 st1 {v0.16b-v3.16b}, [x19], #64
187 st1 {v7.16b}, [x24] /* return iv */
188 cond_yield_neon .Lcbcdecrestart
194 ld1 {v1.16b}, [x20], #16 /* get next ct block */
195 mov v0.16b, v1.16b /* ...and copy to v0 */
196 decrypt_block v0, w22, x21, x6, w7
197 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
198 mov v7.16b, v1.16b /* ct is next iv */
199 st1 {v0.16b}, [x19], #16
203 st1 {v7.16b}, [x24] /* return iv */
206 AES_ENDPROC(aes_cbc_decrypt)
210 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211 * int blocks, u8 ctr[])
214 AES_ENTRY(aes_ctr_encrypt)
225 enc_prepare w22, x21, x6
228 umov x6, v4.d[1] /* keep swabbed ctr in reg */
233 cmn w6, #4 /* 32 bit overflow? */
248 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
249 bl aes_encrypt_block4x
250 eor v0.16b, v5.16b, v0.16b
251 ld1 {v5.16b}, [x20], #16 /* get 1 input block */
252 eor v1.16b, v6.16b, v1.16b
253 eor v2.16b, v7.16b, v2.16b
254 eor v3.16b, v5.16b, v3.16b
255 st1 {v0.16b-v3.16b}, [x19], #64
260 st1 {v4.16b}, [x24] /* return next CTR value */
261 cond_yield_neon .Lctrrestart
268 encrypt_block v0, w22, x21, x8, w7
270 adds x6, x6, #1 /* increment BE ctr */
273 bcs .Lctrcarry /* overflow? */
277 bmi .Lctrtailblock /* blocks <0 means tail block */
278 ld1 {v3.16b}, [x20], #16
279 eor v3.16b, v0.16b, v3.16b
280 st1 {v3.16b}, [x19], #16
284 st1 {v4.16b}, [x24] /* return next CTR value */
294 umov x7, v4.d[0] /* load upper word of ctr */
295 rev x7, x7 /* ... to handle the carry */
300 AES_ENDPROC(aes_ctr_encrypt)
305 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306 * int blocks, u8 const rk2[], u8 iv[], int first)
307 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
308 * int blocks, u8 const rk2[], u8 iv[], int first)
311 .macro next_tweak, out, in, const, tmp
312 sshr \tmp\().2d, \in\().2d, #63
313 and \tmp\().16b, \tmp\().16b, \const\().16b
314 add \out\().2d, \in\().2d, \in\().2d
315 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
316 eor \out\().16b, \out\().16b, \tmp\().16b
320 CPU_LE( .quad 1, 0x87 )
321 CPU_BE( .quad 0x87, 1 )
323 AES_ENTRY(aes_xts_encrypt)
334 cbz w7, .Lxtsencnotfirst
336 enc_prepare w3, x5, x8
337 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
338 enc_switch_key w3, x2, x8
345 enc_prepare w22, x21, x8
348 next_tweak v4, v4, v7, v8
352 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
353 next_tweak v5, v4, v7, v8
354 eor v0.16b, v0.16b, v4.16b
355 next_tweak v6, v5, v7, v8
356 eor v1.16b, v1.16b, v5.16b
357 eor v2.16b, v2.16b, v6.16b
358 next_tweak v7, v6, v7, v8
359 eor v3.16b, v3.16b, v7.16b
360 bl aes_encrypt_block4x
361 eor v3.16b, v3.16b, v7.16b
362 eor v0.16b, v0.16b, v4.16b
363 eor v1.16b, v1.16b, v5.16b
364 eor v2.16b, v2.16b, v6.16b
365 st1 {v0.16b-v3.16b}, [x19], #64
369 cond_yield_neon .Lxtsencrestart
375 ld1 {v1.16b}, [x20], #16
376 eor v0.16b, v1.16b, v4.16b
377 encrypt_block v0, w22, x21, x8, w7
378 eor v0.16b, v0.16b, v4.16b
379 st1 {v0.16b}, [x19], #16
382 next_tweak v4, v4, v7, v8
388 AES_ENDPROC(aes_xts_encrypt)
391 AES_ENTRY(aes_xts_decrypt)
402 cbz w7, .Lxtsdecnotfirst
404 enc_prepare w3, x5, x8
405 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
406 dec_prepare w3, x2, x8
413 dec_prepare w22, x21, x8
416 next_tweak v4, v4, v7, v8
420 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
421 next_tweak v5, v4, v7, v8
422 eor v0.16b, v0.16b, v4.16b
423 next_tweak v6, v5, v7, v8
424 eor v1.16b, v1.16b, v5.16b
425 eor v2.16b, v2.16b, v6.16b
426 next_tweak v7, v6, v7, v8
427 eor v3.16b, v3.16b, v7.16b
428 bl aes_decrypt_block4x
429 eor v3.16b, v3.16b, v7.16b
430 eor v0.16b, v0.16b, v4.16b
431 eor v1.16b, v1.16b, v5.16b
432 eor v2.16b, v2.16b, v6.16b
433 st1 {v0.16b-v3.16b}, [x19], #64
437 cond_yield_neon .Lxtsdecrestart
443 ld1 {v1.16b}, [x20], #16
444 eor v0.16b, v1.16b, v4.16b
445 decrypt_block v0, w22, x21, x8, w7
446 eor v0.16b, v0.16b, v4.16b
447 st1 {v0.16b}, [x19], #16
450 next_tweak v4, v4, v7, v8
456 AES_ENDPROC(aes_xts_decrypt)
459 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
460 * int blocks, u8 dg[], int enc_before, int enc_after)
462 AES_ENTRY(aes_mac_update)
472 ld1 {v0.16b}, [x23] /* get dg */
473 enc_prepare w2, x1, x7
476 encrypt_block v0, w2, x1, x7, w8
481 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
482 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
483 encrypt_block v0, w21, x20, x7, w8
484 eor v0.16b, v0.16b, v2.16b
485 encrypt_block v0, w21, x20, x7, w8
486 eor v0.16b, v0.16b, v3.16b
487 encrypt_block v0, w21, x20, x7, w8
488 eor v0.16b, v0.16b, v4.16b
490 csinv x5, x24, xzr, eq
492 encrypt_block v0, w21, x20, x7, w8
493 st1 {v0.16b}, [x23] /* return dg */
494 cond_yield_neon .Lmacrestart
500 ld1 {v1.16b}, [x19], #16 /* get next pt block */
501 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
504 csinv x5, x24, xzr, eq
508 encrypt_block v0, w21, x20, x7, w8
512 st1 {v0.16b}, [x23] /* return dg */
517 ld1 {v0.16b}, [x23] /* get dg */
518 enc_prepare w21, x20, x0
520 AES_ENDPROC(aes_mac_update)