1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Blowfish Cipher Algorithm (x86_64)
5 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
8 #include <linux/linkage.h>
9 #include <linux/cfi_types.h>
11 .file "blowfish-x86_64-asm.S"
14 /* structure of crypto context */
16 #define s0 ((16 + 2) * 4)
17 #define s1 ((16 + 2 + (1 * 256)) * 4)
18 #define s2 ((16 + 2 + (2 * 256)) * 4)
19 #define s3 ((16 + 2 + (3 * 256)) * 4)
57 /***********************************************************************
59 ***********************************************************************/
65 movl s0(CTX,RT0,4), RT0d; \
66 addl s1(CTX,RT1,4), RT0d; \
70 xorl s2(CTX,RT1,4), RT0d; \
71 addl s3(CTX,RT2,4), RT0d; \
74 #define add_roundkey_enc(n) \
75 xorq p+4*(n)(CTX), RX0;
77 #define round_enc(n) \
78 add_roundkey_enc(n); \
83 #define add_roundkey_dec(n) \
84 movq p+4*(n-1)(CTX), RT0; \
88 #define round_dec(n) \
89 add_roundkey_dec(n); \
94 #define read_block() \
99 #define write_block() \
103 #define xor_block() \
107 SYM_FUNC_START(__blowfish_enc_blk)
112 * %rcx: bool, if true: xor output
130 add_roundkey_enc(16);
143 SYM_FUNC_END(__blowfish_enc_blk)
145 SYM_TYPED_FUNC_START(blowfish_dec_blk)
175 SYM_FUNC_END(blowfish_dec_blk)
177 /**********************************************************************
178 4-way blowfish, four blocks parallel
179 **********************************************************************/
181 /* F() for 4-way. Slower when used alone/1-way, but faster when used
182 * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
185 movzbl x ## bh, RT1d; \
186 movzbl x ## bl, RT3d; \
188 movzbl x ## bh, RT0d; \
189 movzbl x ## bl, RT2d; \
191 movl s0(CTX,RT0,4), RT0d; \
192 addl s1(CTX,RT2,4), RT0d; \
193 xorl s2(CTX,RT1,4), RT0d; \
194 addl s3(CTX,RT3,4), RT0d; \
197 #define add_preloaded_roundkey4() \
203 #define preload_roundkey_enc(n) \
204 movq p+4*(n)(CTX), RKEY;
206 #define add_roundkey_enc4(n) \
207 add_preloaded_roundkey4(); \
208 preload_roundkey_enc(n + 2);
210 #define round_enc4(n) \
211 add_roundkey_enc4(n); \
223 #define preload_roundkey_dec(n) \
224 movq p+4*((n)-1)(CTX), RKEY; \
227 #define add_roundkey_dec4(n) \
228 add_preloaded_roundkey4(); \
229 preload_roundkey_dec(n - 2);
231 #define round_dec4(n) \
232 add_roundkey_dec4(n); \
244 #define read_block4() \
261 #define write_block4() \
274 #define xor_block4() \
287 SYM_FUNC_START(__blowfish_enc_blk_4way)
292 * %rcx: bool, if true: xor output
302 preload_roundkey_enc(0);
314 add_preloaded_roundkey4();
334 SYM_FUNC_END(__blowfish_enc_blk_4way)
336 SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
349 preload_roundkey_dec(17);
360 add_preloaded_roundkey4();
369 SYM_FUNC_END(blowfish_dec_blk_4way)