2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
14 #define AES_ENTRY(func) ENTRY(neon_ ## func)
15 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
17 /* multiply by polynomial 'x' in GF(2^8) */
18 .macro mul_by_x, out, in, temp, const
21 and \temp, \temp, \const
25 /* preload the entire Sbox */
26 .macro prepare, sbox, shiftrows, temp
31 ld1 {v16.16b-v19.16b}, [\temp], #64
32 ld1 {v20.16b-v23.16b}, [\temp], #64
33 ld1 {v24.16b-v27.16b}, [\temp], #64
34 ld1 {v28.16b-v31.16b}, [\temp]
37 /* do preload for encryption */
38 .macro enc_prepare, ignore0, ignore1, temp
39 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
42 .macro enc_switch_key, ignore0, ignore1, temp
46 /* do preload for decryption */
47 .macro dec_prepare, ignore0, ignore1, temp
48 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
51 /* apply SubBytes transformation using the the preloaded Sbox */
53 sub v9.16b, \in\().16b, v12.16b
54 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
55 sub v10.16b, v9.16b, v12.16b
56 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
57 sub v11.16b, v10.16b, v12.16b
58 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
59 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
62 /* apply MixColumns transformation */
63 .macro mix_columns, in
64 mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
65 rev32 v8.8h, \in\().8h
66 eor \in\().16b, v10.16b, \in\().16b
68 shl v11.4s, \in\().4s, #24
70 sri v11.4s, \in\().4s, #8
71 eor v9.16b, v9.16b, v8.16b
72 eor v10.16b, v10.16b, v9.16b
73 eor \in\().16b, v10.16b, v11.16b
76 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
77 .macro inv_mix_columns, in
78 mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
79 mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
80 eor \in\().16b, \in\().16b, v11.16b
82 eor \in\().16b, \in\().16b, v11.16b
86 .macro do_block, enc, in, rounds, rk, rkp, i
90 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
91 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
93 ld1 {v15.4s}, [\rkp], #16
102 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
105 .macro encrypt_block, in, rounds, rk, rkp, i
106 do_block 1, \in, \rounds, \rk, \rkp, \i
109 .macro decrypt_block, in, rounds, rk, rkp, i
110 do_block 0, \in, \rounds, \rk, \rkp, \i
114 * Interleaved versions: functionally equivalent to the
115 * ones above, but applied to 2 or 4 AES states in parallel.
118 .macro sub_bytes_2x, in0, in1
119 sub v8.16b, \in0\().16b, v12.16b
120 sub v9.16b, \in1\().16b, v12.16b
121 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
122 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
123 sub v10.16b, v8.16b, v12.16b
124 sub v11.16b, v9.16b, v12.16b
125 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
126 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
127 sub v8.16b, v10.16b, v12.16b
128 sub v9.16b, v11.16b, v12.16b
129 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
130 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
131 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
132 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
135 .macro sub_bytes_4x, in0, in1, in2, in3
136 sub v8.16b, \in0\().16b, v12.16b
137 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
138 sub v9.16b, \in1\().16b, v12.16b
139 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
140 sub v10.16b, \in2\().16b, v12.16b
141 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
142 sub v11.16b, \in3\().16b, v12.16b
143 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
144 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
145 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
146 sub v8.16b, v8.16b, v12.16b
147 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
148 sub v9.16b, v9.16b, v12.16b
149 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
150 sub v10.16b, v10.16b, v12.16b
151 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
152 sub v11.16b, v11.16b, v12.16b
153 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
154 sub v8.16b, v8.16b, v12.16b
155 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
156 sub v9.16b, v9.16b, v12.16b
157 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
158 sub v10.16b, v10.16b, v12.16b
159 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
160 sub v11.16b, v11.16b, v12.16b
161 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
162 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
163 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
166 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
167 sshr \tmp0\().16b, \in0\().16b, #7
168 add \out0\().16b, \in0\().16b, \in0\().16b
169 sshr \tmp1\().16b, \in1\().16b, #7
170 and \tmp0\().16b, \tmp0\().16b, \const\().16b
171 add \out1\().16b, \in1\().16b, \in1\().16b
172 and \tmp1\().16b, \tmp1\().16b, \const\().16b
173 eor \out0\().16b, \out0\().16b, \tmp0\().16b
174 eor \out1\().16b, \out1\().16b, \tmp1\().16b
177 .macro mix_columns_2x, in0, in1
178 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
179 rev32 v10.8h, \in0\().8h
180 rev32 v11.8h, \in1\().8h
181 eor \in0\().16b, v8.16b, \in0\().16b
182 eor \in1\().16b, v9.16b, \in1\().16b
183 shl v12.4s, v10.4s, #24
184 shl v13.4s, v11.4s, #24
185 eor v8.16b, v8.16b, v10.16b
186 sri v12.4s, v10.4s, #8
187 shl v10.4s, \in0\().4s, #24
188 eor v9.16b, v9.16b, v11.16b
189 sri v13.4s, v11.4s, #8
190 shl v11.4s, \in1\().4s, #24
191 sri v10.4s, \in0\().4s, #8
192 eor \in0\().16b, v8.16b, v12.16b
193 sri v11.4s, \in1\().4s, #8
194 eor \in1\().16b, v9.16b, v13.16b
195 eor \in0\().16b, v10.16b, \in0\().16b
196 eor \in1\().16b, v11.16b, \in1\().16b
199 .macro inv_mix_cols_2x, in0, in1
200 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
201 mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
202 eor \in0\().16b, \in0\().16b, v8.16b
203 eor \in1\().16b, \in1\().16b, v9.16b
206 eor \in0\().16b, \in0\().16b, v8.16b
207 eor \in1\().16b, \in1\().16b, v9.16b
208 mix_columns_2x \in0, \in1
211 .macro inv_mix_cols_4x, in0, in1, in2, in3
212 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
213 mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
214 mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
215 mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
216 eor \in0\().16b, \in0\().16b, v8.16b
217 eor \in1\().16b, \in1\().16b, v9.16b
218 eor \in2\().16b, \in2\().16b, v10.16b
219 eor \in3\().16b, \in3\().16b, v11.16b
224 eor \in0\().16b, \in0\().16b, v8.16b
225 eor \in1\().16b, \in1\().16b, v9.16b
226 eor \in2\().16b, \in2\().16b, v10.16b
227 eor \in3\().16b, \in3\().16b, v11.16b
228 mix_columns_2x \in0, \in1
229 mix_columns_2x \in2, \in3
232 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
236 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
237 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
238 sub_bytes_2x \in0, \in1
239 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
240 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
241 ld1 {v15.4s}, [\rkp], #16
245 mix_columns_2x \in0, \in1
246 ldr q13, .LForward_ShiftRows
248 inv_mix_cols_2x \in0, \in1
249 ldr q13, .LReverse_ShiftRows
253 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
254 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
257 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
261 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
262 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
263 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
264 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
265 sub_bytes_4x \in0, \in1, \in2, \in3
266 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
267 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
268 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
269 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
270 ld1 {v15.4s}, [\rkp], #16
274 mix_columns_2x \in0, \in1
275 mix_columns_2x \in2, \in3
276 ldr q13, .LForward_ShiftRows
278 inv_mix_cols_4x \in0, \in1, \in2, \in3
279 ldr q13, .LReverse_ShiftRows
283 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
284 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
285 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
286 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
289 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
290 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
293 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
294 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
297 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
298 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
301 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
302 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
305 #include "aes-modes.S"
310 CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
311 CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
312 CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
313 CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
316 CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
317 CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
318 CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
319 CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
322 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
323 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
324 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
325 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
326 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
327 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
328 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
329 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
330 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
331 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
332 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
333 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
334 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
335 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
336 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
337 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
338 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
339 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
340 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
341 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
342 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
343 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
344 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
345 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
346 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
347 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
348 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
349 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
350 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
351 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
352 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
353 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
356 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
357 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
358 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
359 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
360 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
361 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
362 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
363 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
364 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
365 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
366 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
367 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
368 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
369 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
370 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
371 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
372 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
373 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
374 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
375 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
376 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
377 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
378 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
379 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
380 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
381 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
382 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
383 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
384 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
385 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
386 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
387 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d