1 // -------------------------------------------------------------------------
2 // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3 // All rights reserved.
7 // The free distribution and use of this software in both source and binary
8 // form is allowed (with or without changes) provided that:
10 // 1. distributions of this source code include the above copyright
11 // notice, this list of conditions and the following disclaimer//
13 // 2. distributions in binary form include the above copyright
14 // notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other associated materials//
17 // 3. the copyright holder's name is not used to endorse products
18 // built using this software without specific written permission.
21 // ALTERNATIVELY, provided that this notice is retained in full, this product
22 // may be distributed under the terms of the GNU General Public License (GPL),
23 // in which case the provisions of the GPL apply INSTEAD OF those given above.
25 // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26 // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
30 // This software is provided 'as is' with no explicit or implied warranties
31 // in respect of its properties including, but not limited to, correctness
32 // and fitness for purpose.
33 // -------------------------------------------------------------------------
34 // Issue Date: 29/07/2002
36 .file "aes-i586-asm.S"
39 #include <linux/linkage.h>
40 #include <asm/asm-offsets.h>
42 #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
44 /* offsets to parameters with one register pushed onto stack */
49 /* offsets in crypto_aes_ctx structure */
54 // register mapping for encrypt and decrypt subroutines
72 #define _h(reg) reg##h
73 #define h(reg) _h(reg)
75 #define _l(reg) reg##l
76 #define l(reg) _l(reg)
78 // This macro takes a 32-bit word representing a column and uses
79 // each of its four bytes to index into four tables of 256 32-bit
80 // words to obtain values that are then xored into the appropriate
81 // output registers r0, r1, r4 or r5.
84 // table table base address
89 // idx input register for the round (destroyed)
90 // tmp scratch register for the round
93 #define do_col(table, a1,a2,a3,a4, idx, tmp) \
95 xor table(,%tmp,4),%a1; \
98 xor table+tlen(,%tmp,4),%a2; \
100 movzx %h(idx),%idx; \
101 xor table+2*tlen(,%tmp,4),%a3; \
102 xor table+3*tlen(,%idx,4),%a4;
104 // initialise output registers from the key schedule
105 // NB1: original value of a3 is in idx on exit
106 // NB2: original values of a1,a2,a4 aren't used
107 #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
109 movzx %l(idx),%tmp; \
111 xor table(,%tmp,4),%a1; \
113 movzx %h(idx),%tmp; \
115 xor table+tlen(,%tmp,4),%a2; \
116 movzx %l(idx),%tmp; \
117 movzx %h(idx),%idx; \
118 xor table+3*tlen(,%idx,4),%a4; \
121 xor table+2*tlen(,%tmp,4),%a3;
123 // initialise output registers from the key schedule
124 // NB1: original value of a3 is in idx on exit
125 // NB2: original values of a1,a2,a4 aren't used
126 #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
128 movzx %l(idx),%tmp; \
130 xor table(,%tmp,4),%a1; \
132 movzx %h(idx),%tmp; \
134 xor table+tlen(,%tmp,4),%a2; \
135 movzx %l(idx),%tmp; \
136 movzx %h(idx),%idx; \
137 xor table+3*tlen(,%idx,4),%a4; \
140 xor table+2*tlen(,%tmp,4),%a3;
143 // original Gladman had conditional saves to MMX regs.
144 #define save(a1, a2) \
147 #define restore(a1, a2) \
150 // These macros perform a forward encryption cycle. They are entered with
151 // the first previous round column values in r0,r1,r4,r5 and
152 // exit with the final values in the same registers, using stack
153 // for temporary storage.
155 // round column values
156 // on entry: r0,r1,r4,r5
157 // on exit: r2,r1,r4,r5
158 #define fwd_rnd1(arg, table) \
162 /* compute new column values */ \
163 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
164 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
166 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
168 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
170 // round column values
171 // on entry: r2,r1,r4,r5
172 // on exit: r0,r1,r4,r5
173 #define fwd_rnd2(arg, table) \
177 /* compute new column values */ \
178 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
179 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
181 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
183 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
185 // These macros performs an inverse encryption cycle. They are entered with
186 // the first previous round column values in r0,r1,r4,r5 and
187 // exit with the final values in the same registers, using stack
188 // for temporary storage
190 // round column values
191 // on entry: r0,r1,r4,r5
192 // on exit: r2,r1,r4,r5
193 #define inv_rnd1(arg, table) \
197 /* compute new column values */ \
198 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
199 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
201 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
203 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
205 // round column values
206 // on entry: r2,r1,r4,r5
207 // on exit: r0,r1,r4,r5
208 #define inv_rnd2(arg, table) \
212 /* compute new column values */ \
213 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
214 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
216 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
218 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
220 // AES (Rijndael) Encryption Subroutine
221 /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
223 .extern crypto_ft_tab
224 .extern crypto_fl_tab
230 // CAUTION: the order and the values used in these assigns
231 // rely on the register mappings
234 mov in_blk+4(%esp),%r2
236 mov klen(%ebp),%r3 // key size
239 lea ekey(%ebp),%ebp // key pointer
242 // input four columns and xor in first round key
253 sub $8,%esp // space for register saves on stack
254 add $16,%ebp // increment to next round key
256 jb 4f // 10 rounds for 128-bit key
258 je 3f // 12 rounds for 192-bit key
261 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
262 fwd_rnd2( -48(%ebp), crypto_ft_tab)
263 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
264 fwd_rnd2( -16(%ebp), crypto_ft_tab)
265 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
266 fwd_rnd2( +16(%ebp), crypto_ft_tab)
267 fwd_rnd1( +32(%ebp), crypto_ft_tab)
268 fwd_rnd2( +48(%ebp), crypto_ft_tab)
269 fwd_rnd1( +64(%ebp), crypto_ft_tab)
270 fwd_rnd2( +80(%ebp), crypto_ft_tab)
271 fwd_rnd1( +96(%ebp), crypto_ft_tab)
272 fwd_rnd2(+112(%ebp), crypto_ft_tab)
273 fwd_rnd1(+128(%ebp), crypto_ft_tab)
274 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
276 // move final values to the output array. CAUTION: the
277 // order of these assigns rely on the register mappings
280 mov out_blk+12(%esp),%ebp
292 // AES (Rijndael) Decryption Subroutine
293 /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
295 .extern crypto_it_tab
296 .extern crypto_il_tab
302 // CAUTION: the order and the values used in these assigns
303 // rely on the register mappings
306 mov in_blk+4(%esp),%r2
308 mov klen(%ebp),%r3 // key size
311 lea dkey(%ebp),%ebp // key pointer
314 // input four columns and xor in first round key
325 sub $8,%esp // space for register saves on stack
326 add $16,%ebp // increment to next round key
328 jb 4f // 10 rounds for 128-bit key
330 je 3f // 12 rounds for 192-bit key
333 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
334 inv_rnd2( -48(%ebp), crypto_it_tab)
335 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
336 inv_rnd2( -16(%ebp), crypto_it_tab)
337 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
338 inv_rnd2( +16(%ebp), crypto_it_tab)
339 inv_rnd1( +32(%ebp), crypto_it_tab)
340 inv_rnd2( +48(%ebp), crypto_it_tab)
341 inv_rnd1( +64(%ebp), crypto_it_tab)
342 inv_rnd2( +80(%ebp), crypto_it_tab)
343 inv_rnd1( +96(%ebp), crypto_it_tab)
344 inv_rnd2(+112(%ebp), crypto_it_tab)
345 inv_rnd1(+128(%ebp), crypto_it_tab)
346 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
348 // move final values to the output array. CAUTION: the
349 // order of these assigns rely on the register mappings
352 mov out_blk+12(%esp),%ebp