2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * Copyright(c) 2015 Intel Corporation.
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * Contact Information:
21 * Sean Gulley <sean.m.gulley@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
26 * Copyright(c) 2015 Intel Corporation.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 #include <linux/linkage.h>
57 #include <linux/cfi_types.h>
59 #define DIGEST_PTR %rdi /* 1st arg */
60 #define DATA_PTR %rsi /* 2nd arg */
61 #define NUM_BLKS %rdx /* 3rd arg */
63 #define SHA256CONSTANTS %rax
74 #define SHUF_MASK %xmm8
76 #define ABEF_SAVE %xmm9
77 #define CDGH_SAVE %xmm10
80 * Intel SHA Extensions optimized implementation of a SHA-256 update function
82 * The function takes a pointer to the current hash values, a pointer to the
83 * input data, and a number of 64 byte blocks to process. Once all blocks have
84 * been processed, the digest pointer is updated with the resulting hash value.
85 * The function only processes complete blocks, there is no functionality to
86 * store partial blocks. All message padding and hash value initialization must
87 * be done outside the update function.
89 * The indented lines in the loop are instructions related to rounds processing.
90 * The non-indented lines are instructions related to the message schedule.
92 * void sha256_ni_transform(uint32_t *digest, const void *data,
94 * digest : pointer to digest
95 * data: pointer to input data
96 * numBlocks: Number of blocks to process
101 SYM_TYPED_FUNC_START(sha256_ni_transform)
103 shl $6, NUM_BLKS /* convert to bytes */
105 add DATA_PTR, NUM_BLKS /* pointer to end of data */
108 * load initial hash values
109 * Need to reorder these appropriately
110 * DCBA, HGFE -> ABEF, CDGH
112 movdqu 0*16(DIGEST_PTR), STATE0
113 movdqu 1*16(DIGEST_PTR), STATE1
115 pshufd $0xB1, STATE0, STATE0 /* CDAB */
116 pshufd $0x1B, STATE1, STATE1 /* EFGH */
117 movdqa STATE0, MSGTMP4
118 palignr $8, STATE1, STATE0 /* ABEF */
119 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
121 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
122 lea K256(%rip), SHA256CONSTANTS
125 /* Save hash values for addition after rounds */
126 movdqa STATE0, ABEF_SAVE
127 movdqa STATE1, CDGH_SAVE
130 movdqu 0*16(DATA_PTR), MSG
131 pshufb SHUF_MASK, MSG
133 paddd 0*16(SHA256CONSTANTS), MSG
134 sha256rnds2 STATE0, STATE1
135 pshufd $0x0E, MSG, MSG
136 sha256rnds2 STATE1, STATE0
139 movdqu 1*16(DATA_PTR), MSG
140 pshufb SHUF_MASK, MSG
142 paddd 1*16(SHA256CONSTANTS), MSG
143 sha256rnds2 STATE0, STATE1
144 pshufd $0x0E, MSG, MSG
145 sha256rnds2 STATE1, STATE0
146 sha256msg1 MSGTMP1, MSGTMP0
149 movdqu 2*16(DATA_PTR), MSG
150 pshufb SHUF_MASK, MSG
152 paddd 2*16(SHA256CONSTANTS), MSG
153 sha256rnds2 STATE0, STATE1
154 pshufd $0x0E, MSG, MSG
155 sha256rnds2 STATE1, STATE0
156 sha256msg1 MSGTMP2, MSGTMP1
159 movdqu 3*16(DATA_PTR), MSG
160 pshufb SHUF_MASK, MSG
162 paddd 3*16(SHA256CONSTANTS), MSG
163 sha256rnds2 STATE0, STATE1
164 movdqa MSGTMP3, MSGTMP4
165 palignr $4, MSGTMP2, MSGTMP4
166 paddd MSGTMP4, MSGTMP0
167 sha256msg2 MSGTMP3, MSGTMP0
168 pshufd $0x0E, MSG, MSG
169 sha256rnds2 STATE1, STATE0
170 sha256msg1 MSGTMP3, MSGTMP2
174 paddd 4*16(SHA256CONSTANTS), MSG
175 sha256rnds2 STATE0, STATE1
176 movdqa MSGTMP0, MSGTMP4
177 palignr $4, MSGTMP3, MSGTMP4
178 paddd MSGTMP4, MSGTMP1
179 sha256msg2 MSGTMP0, MSGTMP1
180 pshufd $0x0E, MSG, MSG
181 sha256rnds2 STATE1, STATE0
182 sha256msg1 MSGTMP0, MSGTMP3
186 paddd 5*16(SHA256CONSTANTS), MSG
187 sha256rnds2 STATE0, STATE1
188 movdqa MSGTMP1, MSGTMP4
189 palignr $4, MSGTMP0, MSGTMP4
190 paddd MSGTMP4, MSGTMP2
191 sha256msg2 MSGTMP1, MSGTMP2
192 pshufd $0x0E, MSG, MSG
193 sha256rnds2 STATE1, STATE0
194 sha256msg1 MSGTMP1, MSGTMP0
198 paddd 6*16(SHA256CONSTANTS), MSG
199 sha256rnds2 STATE0, STATE1
200 movdqa MSGTMP2, MSGTMP4
201 palignr $4, MSGTMP1, MSGTMP4
202 paddd MSGTMP4, MSGTMP3
203 sha256msg2 MSGTMP2, MSGTMP3
204 pshufd $0x0E, MSG, MSG
205 sha256rnds2 STATE1, STATE0
206 sha256msg1 MSGTMP2, MSGTMP1
210 paddd 7*16(SHA256CONSTANTS), MSG
211 sha256rnds2 STATE0, STATE1
212 movdqa MSGTMP3, MSGTMP4
213 palignr $4, MSGTMP2, MSGTMP4
214 paddd MSGTMP4, MSGTMP0
215 sha256msg2 MSGTMP3, MSGTMP0
216 pshufd $0x0E, MSG, MSG
217 sha256rnds2 STATE1, STATE0
218 sha256msg1 MSGTMP3, MSGTMP2
222 paddd 8*16(SHA256CONSTANTS), MSG
223 sha256rnds2 STATE0, STATE1
224 movdqa MSGTMP0, MSGTMP4
225 palignr $4, MSGTMP3, MSGTMP4
226 paddd MSGTMP4, MSGTMP1
227 sha256msg2 MSGTMP0, MSGTMP1
228 pshufd $0x0E, MSG, MSG
229 sha256rnds2 STATE1, STATE0
230 sha256msg1 MSGTMP0, MSGTMP3
234 paddd 9*16(SHA256CONSTANTS), MSG
235 sha256rnds2 STATE0, STATE1
236 movdqa MSGTMP1, MSGTMP4
237 palignr $4, MSGTMP0, MSGTMP4
238 paddd MSGTMP4, MSGTMP2
239 sha256msg2 MSGTMP1, MSGTMP2
240 pshufd $0x0E, MSG, MSG
241 sha256rnds2 STATE1, STATE0
242 sha256msg1 MSGTMP1, MSGTMP0
246 paddd 10*16(SHA256CONSTANTS), MSG
247 sha256rnds2 STATE0, STATE1
248 movdqa MSGTMP2, MSGTMP4
249 palignr $4, MSGTMP1, MSGTMP4
250 paddd MSGTMP4, MSGTMP3
251 sha256msg2 MSGTMP2, MSGTMP3
252 pshufd $0x0E, MSG, MSG
253 sha256rnds2 STATE1, STATE0
254 sha256msg1 MSGTMP2, MSGTMP1
258 paddd 11*16(SHA256CONSTANTS), MSG
259 sha256rnds2 STATE0, STATE1
260 movdqa MSGTMP3, MSGTMP4
261 palignr $4, MSGTMP2, MSGTMP4
262 paddd MSGTMP4, MSGTMP0
263 sha256msg2 MSGTMP3, MSGTMP0
264 pshufd $0x0E, MSG, MSG
265 sha256rnds2 STATE1, STATE0
266 sha256msg1 MSGTMP3, MSGTMP2
270 paddd 12*16(SHA256CONSTANTS), MSG
271 sha256rnds2 STATE0, STATE1
272 movdqa MSGTMP0, MSGTMP4
273 palignr $4, MSGTMP3, MSGTMP4
274 paddd MSGTMP4, MSGTMP1
275 sha256msg2 MSGTMP0, MSGTMP1
276 pshufd $0x0E, MSG, MSG
277 sha256rnds2 STATE1, STATE0
278 sha256msg1 MSGTMP0, MSGTMP3
282 paddd 13*16(SHA256CONSTANTS), MSG
283 sha256rnds2 STATE0, STATE1
284 movdqa MSGTMP1, MSGTMP4
285 palignr $4, MSGTMP0, MSGTMP4
286 paddd MSGTMP4, MSGTMP2
287 sha256msg2 MSGTMP1, MSGTMP2
288 pshufd $0x0E, MSG, MSG
289 sha256rnds2 STATE1, STATE0
293 paddd 14*16(SHA256CONSTANTS), MSG
294 sha256rnds2 STATE0, STATE1
295 movdqa MSGTMP2, MSGTMP4
296 palignr $4, MSGTMP1, MSGTMP4
297 paddd MSGTMP4, MSGTMP3
298 sha256msg2 MSGTMP2, MSGTMP3
299 pshufd $0x0E, MSG, MSG
300 sha256rnds2 STATE1, STATE0
304 paddd 15*16(SHA256CONSTANTS), MSG
305 sha256rnds2 STATE0, STATE1
306 pshufd $0x0E, MSG, MSG
307 sha256rnds2 STATE1, STATE0
309 /* Add current hash values with previously saved */
310 paddd ABEF_SAVE, STATE0
311 paddd CDGH_SAVE, STATE1
313 /* Increment data pointer and loop if more to process */
315 cmp NUM_BLKS, DATA_PTR
318 /* Write hash values back in the correct order */
319 pshufd $0x1B, STATE0, STATE0 /* FEBA */
320 pshufd $0xB1, STATE1, STATE1 /* DCHG */
321 movdqa STATE0, MSGTMP4
322 pblendw $0xF0, STATE1, STATE0 /* DCBA */
323 palignr $8, MSGTMP4, STATE1 /* HGFE */
325 movdqu STATE0, 0*16(DIGEST_PTR)
326 movdqu STATE1, 1*16(DIGEST_PTR)
331 SYM_FUNC_END(sha256_ni_transform)
333 .section .rodata.cst256.K256, "aM", @progbits, 256
336 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
337 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
338 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
339 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
340 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
341 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
342 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
343 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
344 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
345 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
346 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
347 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
348 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
349 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
350 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
351 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
353 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
355 PSHUFFLE_BYTE_FLIP_MASK:
356 .octa 0x0c0d0e0f08090a0b0405060700010203