2 @ ====================================================================
3 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4 @ project. The module is, however, dual licensed under OpenSSL and
5 @ CRYPTOGAMS licenses depending on where you obtain it. For further
6 @ details see http://www.openssl.org/~appro/cryptogams/.
8 @ Permission to use under GPL terms is granted.
9 @ ====================================================================
11 @ SHA256 block procedure for ARMv4. May 2007.
13 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15 @ byte [on single-issue Xscale PXA250 core].
19 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20 @ Cortex A8 core and ~20 cycles per processed byte.
24 @ Profiler-assisted and platform-specific optimization resulted in 16%
25 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
29 @ Add NEON implementation. On Cortex A8 it was measured to process one
30 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32 @ code (meaning that latter performs sub-optimally, nothing was done
37 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
40 # include "arm_arch.h"
42 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
43 # define __ARM_MAX_ARCH__ 7
62 .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67 .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68 .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
80 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
82 .word OPENSSL_armcap_P-sha256_block_data_order
86 .global sha256_block_data_order
87 .type sha256_block_data_order,%function
88 sha256_block_data_order:
89 .Lsha256_block_data_order:
91 sub r3,pc,#8 @ sha256_block_data_order
93 adr r3,.Lsha256_block_data_order
95 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
96 ldr r12,.LOPENSSL_armcap
97 ldr r12,[r3,r12] @ OPENSSL_armcap_P
103 add r2,r1,r2,lsl#6 @ len to point at the end of inp
104 stmdb sp!,{r0,r1,r2,r4-r11,lr}
105 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
106 sub r14,r3,#256+32 @ K256
107 sub sp,sp,#16*4 @ alloca(X[16])
119 str r1,[sp,#17*4] @ make room for r1
122 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
123 eor r0,r0,r8,ror#19 @ Sigma1(e)
128 @ ldrb r2,[r1,#3] @ 0
129 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
136 str r1,[sp,#17*4] @ make room for r1
140 eor r0,r0,r8,ror#19 @ Sigma1(e)
142 ldr r12,[r14],#4 @ *K256++
143 add r11,r11,r2 @ h+=X[i]
146 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
148 add r11,r11,r12 @ h+=K256[i]
149 eor r2,r2,r10 @ Ch(e,f,g)
151 add r11,r11,r2 @ h+=Ch(e,f,g)
154 cmp r12,#0xf2 @ done?
158 ldr r2,[r1],#4 @ prefetch
162 eor r12,r4,r5 @ a^b, b^c in next round
164 ldr r2,[sp,#2*4] @ from future BODY_16_xx
165 eor r12,r4,r5 @ a^b, b^c in next round
166 ldr r1,[sp,#15*4] @ from future BODY_16_xx
168 eor r0,r0,r4,ror#20 @ Sigma0(a)
169 and r3,r3,r12 @ (b^c)&=(a^b)
171 eor r3,r3,r5 @ Maj(a,b,c)
172 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
173 @ add r11,r11,r3 @ h+=Maj(a,b,c)
177 str r1,[sp,#17*4] @ make room for r1
180 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
181 eor r0,r0,r7,ror#19 @ Sigma1(e)
186 @ ldrb r2,[r1,#3] @ 1
187 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
194 str r1,[sp,#17*4] @ make room for r1
198 eor r0,r0,r7,ror#19 @ Sigma1(e)
200 ldr r3,[r14],#4 @ *K256++
201 add r10,r10,r2 @ h+=X[i]
204 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
206 add r10,r10,r3 @ h+=K256[i]
207 eor r2,r2,r9 @ Ch(e,f,g)
208 eor r0,r11,r11,ror#11
209 add r10,r10,r2 @ h+=Ch(e,f,g)
216 ldr r2,[r1],#4 @ prefetch
220 eor r3,r11,r4 @ a^b, b^c in next round
222 ldr r2,[sp,#3*4] @ from future BODY_16_xx
223 eor r3,r11,r4 @ a^b, b^c in next round
224 ldr r1,[sp,#0*4] @ from future BODY_16_xx
226 eor r0,r0,r11,ror#20 @ Sigma0(a)
227 and r12,r12,r3 @ (b^c)&=(a^b)
229 eor r12,r12,r4 @ Maj(a,b,c)
230 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
231 @ add r10,r10,r12 @ h+=Maj(a,b,c)
235 str r1,[sp,#17*4] @ make room for r1
238 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
239 eor r0,r0,r6,ror#19 @ Sigma1(e)
244 @ ldrb r2,[r1,#3] @ 2
245 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
252 str r1,[sp,#17*4] @ make room for r1
256 eor r0,r0,r6,ror#19 @ Sigma1(e)
258 ldr r12,[r14],#4 @ *K256++
259 add r9,r9,r2 @ h+=X[i]
262 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
264 add r9,r9,r12 @ h+=K256[i]
265 eor r2,r2,r8 @ Ch(e,f,g)
266 eor r0,r10,r10,ror#11
267 add r9,r9,r2 @ h+=Ch(e,f,g)
270 cmp r12,#0xf2 @ done?
274 ldr r2,[r1],#4 @ prefetch
278 eor r12,r10,r11 @ a^b, b^c in next round
280 ldr r2,[sp,#4*4] @ from future BODY_16_xx
281 eor r12,r10,r11 @ a^b, b^c in next round
282 ldr r1,[sp,#1*4] @ from future BODY_16_xx
284 eor r0,r0,r10,ror#20 @ Sigma0(a)
285 and r3,r3,r12 @ (b^c)&=(a^b)
287 eor r3,r3,r11 @ Maj(a,b,c)
288 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
289 @ add r9,r9,r3 @ h+=Maj(a,b,c)
293 str r1,[sp,#17*4] @ make room for r1
296 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
297 eor r0,r0,r5,ror#19 @ Sigma1(e)
302 @ ldrb r2,[r1,#3] @ 3
303 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
310 str r1,[sp,#17*4] @ make room for r1
314 eor r0,r0,r5,ror#19 @ Sigma1(e)
316 ldr r3,[r14],#4 @ *K256++
317 add r8,r8,r2 @ h+=X[i]
320 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
322 add r8,r8,r3 @ h+=K256[i]
323 eor r2,r2,r7 @ Ch(e,f,g)
325 add r8,r8,r2 @ h+=Ch(e,f,g)
332 ldr r2,[r1],#4 @ prefetch
336 eor r3,r9,r10 @ a^b, b^c in next round
338 ldr r2,[sp,#5*4] @ from future BODY_16_xx
339 eor r3,r9,r10 @ a^b, b^c in next round
340 ldr r1,[sp,#2*4] @ from future BODY_16_xx
342 eor r0,r0,r9,ror#20 @ Sigma0(a)
343 and r12,r12,r3 @ (b^c)&=(a^b)
345 eor r12,r12,r10 @ Maj(a,b,c)
346 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
347 @ add r8,r8,r12 @ h+=Maj(a,b,c)
351 str r1,[sp,#17*4] @ make room for r1
354 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
355 eor r0,r0,r4,ror#19 @ Sigma1(e)
360 @ ldrb r2,[r1,#3] @ 4
361 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
368 str r1,[sp,#17*4] @ make room for r1
372 eor r0,r0,r4,ror#19 @ Sigma1(e)
374 ldr r12,[r14],#4 @ *K256++
375 add r7,r7,r2 @ h+=X[i]
378 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
380 add r7,r7,r12 @ h+=K256[i]
381 eor r2,r2,r6 @ Ch(e,f,g)
383 add r7,r7,r2 @ h+=Ch(e,f,g)
386 cmp r12,#0xf2 @ done?
390 ldr r2,[r1],#4 @ prefetch
394 eor r12,r8,r9 @ a^b, b^c in next round
396 ldr r2,[sp,#6*4] @ from future BODY_16_xx
397 eor r12,r8,r9 @ a^b, b^c in next round
398 ldr r1,[sp,#3*4] @ from future BODY_16_xx
400 eor r0,r0,r8,ror#20 @ Sigma0(a)
401 and r3,r3,r12 @ (b^c)&=(a^b)
402 add r11,r11,r7 @ d+=h
403 eor r3,r3,r9 @ Maj(a,b,c)
404 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
405 @ add r7,r7,r3 @ h+=Maj(a,b,c)
409 str r1,[sp,#17*4] @ make room for r1
412 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
413 eor r0,r0,r11,ror#19 @ Sigma1(e)
418 @ ldrb r2,[r1,#3] @ 5
419 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
426 str r1,[sp,#17*4] @ make room for r1
430 eor r0,r0,r11,ror#19 @ Sigma1(e)
432 ldr r3,[r14],#4 @ *K256++
433 add r6,r6,r2 @ h+=X[i]
436 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
438 add r6,r6,r3 @ h+=K256[i]
439 eor r2,r2,r5 @ Ch(e,f,g)
441 add r6,r6,r2 @ h+=Ch(e,f,g)
448 ldr r2,[r1],#4 @ prefetch
452 eor r3,r7,r8 @ a^b, b^c in next round
454 ldr r2,[sp,#7*4] @ from future BODY_16_xx
455 eor r3,r7,r8 @ a^b, b^c in next round
456 ldr r1,[sp,#4*4] @ from future BODY_16_xx
458 eor r0,r0,r7,ror#20 @ Sigma0(a)
459 and r12,r12,r3 @ (b^c)&=(a^b)
460 add r10,r10,r6 @ d+=h
461 eor r12,r12,r8 @ Maj(a,b,c)
462 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
463 @ add r6,r6,r12 @ h+=Maj(a,b,c)
467 str r1,[sp,#17*4] @ make room for r1
470 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
471 eor r0,r0,r10,ror#19 @ Sigma1(e)
476 @ ldrb r2,[r1,#3] @ 6
477 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
484 str r1,[sp,#17*4] @ make room for r1
488 eor r0,r0,r10,ror#19 @ Sigma1(e)
490 ldr r12,[r14],#4 @ *K256++
491 add r5,r5,r2 @ h+=X[i]
494 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
496 add r5,r5,r12 @ h+=K256[i]
497 eor r2,r2,r4 @ Ch(e,f,g)
499 add r5,r5,r2 @ h+=Ch(e,f,g)
502 cmp r12,#0xf2 @ done?
506 ldr r2,[r1],#4 @ prefetch
510 eor r12,r6,r7 @ a^b, b^c in next round
512 ldr r2,[sp,#8*4] @ from future BODY_16_xx
513 eor r12,r6,r7 @ a^b, b^c in next round
514 ldr r1,[sp,#5*4] @ from future BODY_16_xx
516 eor r0,r0,r6,ror#20 @ Sigma0(a)
517 and r3,r3,r12 @ (b^c)&=(a^b)
519 eor r3,r3,r7 @ Maj(a,b,c)
520 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
521 @ add r5,r5,r3 @ h+=Maj(a,b,c)
525 str r1,[sp,#17*4] @ make room for r1
528 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
529 eor r0,r0,r9,ror#19 @ Sigma1(e)
534 @ ldrb r2,[r1,#3] @ 7
535 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
542 str r1,[sp,#17*4] @ make room for r1
546 eor r0,r0,r9,ror#19 @ Sigma1(e)
548 ldr r3,[r14],#4 @ *K256++
549 add r4,r4,r2 @ h+=X[i]
552 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
554 add r4,r4,r3 @ h+=K256[i]
555 eor r2,r2,r11 @ Ch(e,f,g)
557 add r4,r4,r2 @ h+=Ch(e,f,g)
564 ldr r2,[r1],#4 @ prefetch
568 eor r3,r5,r6 @ a^b, b^c in next round
570 ldr r2,[sp,#9*4] @ from future BODY_16_xx
571 eor r3,r5,r6 @ a^b, b^c in next round
572 ldr r1,[sp,#6*4] @ from future BODY_16_xx
574 eor r0,r0,r5,ror#20 @ Sigma0(a)
575 and r12,r12,r3 @ (b^c)&=(a^b)
577 eor r12,r12,r6 @ Maj(a,b,c)
578 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
579 @ add r4,r4,r12 @ h+=Maj(a,b,c)
583 str r1,[sp,#17*4] @ make room for r1
586 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
587 eor r0,r0,r8,ror#19 @ Sigma1(e)
592 @ ldrb r2,[r1,#3] @ 8
593 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
600 str r1,[sp,#17*4] @ make room for r1
604 eor r0,r0,r8,ror#19 @ Sigma1(e)
606 ldr r12,[r14],#4 @ *K256++
607 add r11,r11,r2 @ h+=X[i]
610 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
612 add r11,r11,r12 @ h+=K256[i]
613 eor r2,r2,r10 @ Ch(e,f,g)
615 add r11,r11,r2 @ h+=Ch(e,f,g)
618 cmp r12,#0xf2 @ done?
622 ldr r2,[r1],#4 @ prefetch
626 eor r12,r4,r5 @ a^b, b^c in next round
628 ldr r2,[sp,#10*4] @ from future BODY_16_xx
629 eor r12,r4,r5 @ a^b, b^c in next round
630 ldr r1,[sp,#7*4] @ from future BODY_16_xx
632 eor r0,r0,r4,ror#20 @ Sigma0(a)
633 and r3,r3,r12 @ (b^c)&=(a^b)
635 eor r3,r3,r5 @ Maj(a,b,c)
636 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
637 @ add r11,r11,r3 @ h+=Maj(a,b,c)
641 str r1,[sp,#17*4] @ make room for r1
644 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
645 eor r0,r0,r7,ror#19 @ Sigma1(e)
650 @ ldrb r2,[r1,#3] @ 9
651 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
658 str r1,[sp,#17*4] @ make room for r1
662 eor r0,r0,r7,ror#19 @ Sigma1(e)
664 ldr r3,[r14],#4 @ *K256++
665 add r10,r10,r2 @ h+=X[i]
668 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
670 add r10,r10,r3 @ h+=K256[i]
671 eor r2,r2,r9 @ Ch(e,f,g)
672 eor r0,r11,r11,ror#11
673 add r10,r10,r2 @ h+=Ch(e,f,g)
680 ldr r2,[r1],#4 @ prefetch
684 eor r3,r11,r4 @ a^b, b^c in next round
686 ldr r2,[sp,#11*4] @ from future BODY_16_xx
687 eor r3,r11,r4 @ a^b, b^c in next round
688 ldr r1,[sp,#8*4] @ from future BODY_16_xx
690 eor r0,r0,r11,ror#20 @ Sigma0(a)
691 and r12,r12,r3 @ (b^c)&=(a^b)
693 eor r12,r12,r4 @ Maj(a,b,c)
694 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
695 @ add r10,r10,r12 @ h+=Maj(a,b,c)
697 @ ldr r2,[r1],#4 @ 10
699 str r1,[sp,#17*4] @ make room for r1
702 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
703 eor r0,r0,r6,ror#19 @ Sigma1(e)
708 @ ldrb r2,[r1,#3] @ 10
709 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
716 str r1,[sp,#17*4] @ make room for r1
720 eor r0,r0,r6,ror#19 @ Sigma1(e)
722 ldr r12,[r14],#4 @ *K256++
723 add r9,r9,r2 @ h+=X[i]
726 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
728 add r9,r9,r12 @ h+=K256[i]
729 eor r2,r2,r8 @ Ch(e,f,g)
730 eor r0,r10,r10,ror#11
731 add r9,r9,r2 @ h+=Ch(e,f,g)
734 cmp r12,#0xf2 @ done?
738 ldr r2,[r1],#4 @ prefetch
742 eor r12,r10,r11 @ a^b, b^c in next round
744 ldr r2,[sp,#12*4] @ from future BODY_16_xx
745 eor r12,r10,r11 @ a^b, b^c in next round
746 ldr r1,[sp,#9*4] @ from future BODY_16_xx
748 eor r0,r0,r10,ror#20 @ Sigma0(a)
749 and r3,r3,r12 @ (b^c)&=(a^b)
751 eor r3,r3,r11 @ Maj(a,b,c)
752 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
753 @ add r9,r9,r3 @ h+=Maj(a,b,c)
755 @ ldr r2,[r1],#4 @ 11
757 str r1,[sp,#17*4] @ make room for r1
760 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
761 eor r0,r0,r5,ror#19 @ Sigma1(e)
766 @ ldrb r2,[r1,#3] @ 11
767 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
774 str r1,[sp,#17*4] @ make room for r1
778 eor r0,r0,r5,ror#19 @ Sigma1(e)
780 ldr r3,[r14],#4 @ *K256++
781 add r8,r8,r2 @ h+=X[i]
784 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
786 add r8,r8,r3 @ h+=K256[i]
787 eor r2,r2,r7 @ Ch(e,f,g)
789 add r8,r8,r2 @ h+=Ch(e,f,g)
796 ldr r2,[r1],#4 @ prefetch
800 eor r3,r9,r10 @ a^b, b^c in next round
802 ldr r2,[sp,#13*4] @ from future BODY_16_xx
803 eor r3,r9,r10 @ a^b, b^c in next round
804 ldr r1,[sp,#10*4] @ from future BODY_16_xx
806 eor r0,r0,r9,ror#20 @ Sigma0(a)
807 and r12,r12,r3 @ (b^c)&=(a^b)
809 eor r12,r12,r10 @ Maj(a,b,c)
810 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
811 @ add r8,r8,r12 @ h+=Maj(a,b,c)
813 @ ldr r2,[r1],#4 @ 12
815 str r1,[sp,#17*4] @ make room for r1
818 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
819 eor r0,r0,r4,ror#19 @ Sigma1(e)
824 @ ldrb r2,[r1,#3] @ 12
825 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
832 str r1,[sp,#17*4] @ make room for r1
836 eor r0,r0,r4,ror#19 @ Sigma1(e)
838 ldr r12,[r14],#4 @ *K256++
839 add r7,r7,r2 @ h+=X[i]
842 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
844 add r7,r7,r12 @ h+=K256[i]
845 eor r2,r2,r6 @ Ch(e,f,g)
847 add r7,r7,r2 @ h+=Ch(e,f,g)
850 cmp r12,#0xf2 @ done?
854 ldr r2,[r1],#4 @ prefetch
858 eor r12,r8,r9 @ a^b, b^c in next round
860 ldr r2,[sp,#14*4] @ from future BODY_16_xx
861 eor r12,r8,r9 @ a^b, b^c in next round
862 ldr r1,[sp,#11*4] @ from future BODY_16_xx
864 eor r0,r0,r8,ror#20 @ Sigma0(a)
865 and r3,r3,r12 @ (b^c)&=(a^b)
866 add r11,r11,r7 @ d+=h
867 eor r3,r3,r9 @ Maj(a,b,c)
868 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
869 @ add r7,r7,r3 @ h+=Maj(a,b,c)
871 @ ldr r2,[r1],#4 @ 13
873 str r1,[sp,#17*4] @ make room for r1
876 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
877 eor r0,r0,r11,ror#19 @ Sigma1(e)
882 @ ldrb r2,[r1,#3] @ 13
883 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
890 str r1,[sp,#17*4] @ make room for r1
894 eor r0,r0,r11,ror#19 @ Sigma1(e)
896 ldr r3,[r14],#4 @ *K256++
897 add r6,r6,r2 @ h+=X[i]
900 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
902 add r6,r6,r3 @ h+=K256[i]
903 eor r2,r2,r5 @ Ch(e,f,g)
905 add r6,r6,r2 @ h+=Ch(e,f,g)
912 ldr r2,[r1],#4 @ prefetch
916 eor r3,r7,r8 @ a^b, b^c in next round
918 ldr r2,[sp,#15*4] @ from future BODY_16_xx
919 eor r3,r7,r8 @ a^b, b^c in next round
920 ldr r1,[sp,#12*4] @ from future BODY_16_xx
922 eor r0,r0,r7,ror#20 @ Sigma0(a)
923 and r12,r12,r3 @ (b^c)&=(a^b)
924 add r10,r10,r6 @ d+=h
925 eor r12,r12,r8 @ Maj(a,b,c)
926 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
927 @ add r6,r6,r12 @ h+=Maj(a,b,c)
929 @ ldr r2,[r1],#4 @ 14
931 str r1,[sp,#17*4] @ make room for r1
934 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
935 eor r0,r0,r10,ror#19 @ Sigma1(e)
940 @ ldrb r2,[r1,#3] @ 14
941 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
948 str r1,[sp,#17*4] @ make room for r1
952 eor r0,r0,r10,ror#19 @ Sigma1(e)
954 ldr r12,[r14],#4 @ *K256++
955 add r5,r5,r2 @ h+=X[i]
958 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
960 add r5,r5,r12 @ h+=K256[i]
961 eor r2,r2,r4 @ Ch(e,f,g)
963 add r5,r5,r2 @ h+=Ch(e,f,g)
966 cmp r12,#0xf2 @ done?
970 ldr r2,[r1],#4 @ prefetch
974 eor r12,r6,r7 @ a^b, b^c in next round
976 ldr r2,[sp,#0*4] @ from future BODY_16_xx
977 eor r12,r6,r7 @ a^b, b^c in next round
978 ldr r1,[sp,#13*4] @ from future BODY_16_xx
980 eor r0,r0,r6,ror#20 @ Sigma0(a)
981 and r3,r3,r12 @ (b^c)&=(a^b)
983 eor r3,r3,r7 @ Maj(a,b,c)
984 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
985 @ add r5,r5,r3 @ h+=Maj(a,b,c)
987 @ ldr r2,[r1],#4 @ 15
989 str r1,[sp,#17*4] @ make room for r1
992 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
993 eor r0,r0,r9,ror#19 @ Sigma1(e)
998 @ ldrb r2,[r1,#3] @ 15
999 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1006 str r1,[sp,#17*4] @ make room for r1
1010 eor r0,r0,r9,ror#19 @ Sigma1(e)
1012 ldr r3,[r14],#4 @ *K256++
1013 add r4,r4,r2 @ h+=X[i]
1016 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1018 add r4,r4,r3 @ h+=K256[i]
1019 eor r2,r2,r11 @ Ch(e,f,g)
1021 add r4,r4,r2 @ h+=Ch(e,f,g)
1024 cmp r3,#0xf2 @ done?
1027 # if __ARM_ARCH__>=7
1028 ldr r2,[r1],#4 @ prefetch
1032 eor r3,r5,r6 @ a^b, b^c in next round
1034 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1035 eor r3,r5,r6 @ a^b, b^c in next round
1036 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1038 eor r0,r0,r5,ror#20 @ Sigma0(a)
1039 and r12,r12,r3 @ (b^c)&=(a^b)
1041 eor r12,r12,r6 @ Maj(a,b,c)
1042 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1043 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1045 @ ldr r2,[sp,#1*4] @ 16
1048 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1051 eor r12,r12,r1,ror#19
1052 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1054 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1058 eor r0,r8,r8,ror#5 @ from BODY_00_15
1060 eor r0,r0,r8,ror#19 @ Sigma1(e)
1062 ldr r12,[r14],#4 @ *K256++
1063 add r11,r11,r2 @ h+=X[i]
1066 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1068 add r11,r11,r12 @ h+=K256[i]
1069 eor r2,r2,r10 @ Ch(e,f,g)
1071 add r11,r11,r2 @ h+=Ch(e,f,g)
1074 cmp r12,#0xf2 @ done?
1077 # if __ARM_ARCH__>=7
1078 ldr r2,[r1],#4 @ prefetch
1082 eor r12,r4,r5 @ a^b, b^c in next round
1084 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1085 eor r12,r4,r5 @ a^b, b^c in next round
1086 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1088 eor r0,r0,r4,ror#20 @ Sigma0(a)
1089 and r3,r3,r12 @ (b^c)&=(a^b)
1090 add r7,r7,r11 @ d+=h
1091 eor r3,r3,r5 @ Maj(a,b,c)
1092 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1093 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1094 @ ldr r2,[sp,#2*4] @ 17
1097 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1101 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1103 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1107 eor r0,r7,r7,ror#5 @ from BODY_00_15
1109 eor r0,r0,r7,ror#19 @ Sigma1(e)
1111 ldr r3,[r14],#4 @ *K256++
1112 add r10,r10,r2 @ h+=X[i]
1115 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1117 add r10,r10,r3 @ h+=K256[i]
1118 eor r2,r2,r9 @ Ch(e,f,g)
1119 eor r0,r11,r11,ror#11
1120 add r10,r10,r2 @ h+=Ch(e,f,g)
1123 cmp r3,#0xf2 @ done?
1126 # if __ARM_ARCH__>=7
1127 ldr r2,[r1],#4 @ prefetch
1131 eor r3,r11,r4 @ a^b, b^c in next round
1133 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1134 eor r3,r11,r4 @ a^b, b^c in next round
1135 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1137 eor r0,r0,r11,ror#20 @ Sigma0(a)
1138 and r12,r12,r3 @ (b^c)&=(a^b)
1139 add r6,r6,r10 @ d+=h
1140 eor r12,r12,r4 @ Maj(a,b,c)
1141 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1142 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1143 @ ldr r2,[sp,#3*4] @ 18
1146 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1149 eor r12,r12,r1,ror#19
1150 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1152 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1156 eor r0,r6,r6,ror#5 @ from BODY_00_15
1158 eor r0,r0,r6,ror#19 @ Sigma1(e)
1160 ldr r12,[r14],#4 @ *K256++
1161 add r9,r9,r2 @ h+=X[i]
1164 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1166 add r9,r9,r12 @ h+=K256[i]
1167 eor r2,r2,r8 @ Ch(e,f,g)
1168 eor r0,r10,r10,ror#11
1169 add r9,r9,r2 @ h+=Ch(e,f,g)
1172 cmp r12,#0xf2 @ done?
1175 # if __ARM_ARCH__>=7
1176 ldr r2,[r1],#4 @ prefetch
1180 eor r12,r10,r11 @ a^b, b^c in next round
1182 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1183 eor r12,r10,r11 @ a^b, b^c in next round
1184 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1186 eor r0,r0,r10,ror#20 @ Sigma0(a)
1187 and r3,r3,r12 @ (b^c)&=(a^b)
1189 eor r3,r3,r11 @ Maj(a,b,c)
1190 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1191 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1192 @ ldr r2,[sp,#4*4] @ 19
1195 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1199 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1201 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1205 eor r0,r5,r5,ror#5 @ from BODY_00_15
1207 eor r0,r0,r5,ror#19 @ Sigma1(e)
1209 ldr r3,[r14],#4 @ *K256++
1210 add r8,r8,r2 @ h+=X[i]
1213 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1215 add r8,r8,r3 @ h+=K256[i]
1216 eor r2,r2,r7 @ Ch(e,f,g)
1218 add r8,r8,r2 @ h+=Ch(e,f,g)
1221 cmp r3,#0xf2 @ done?
1224 # if __ARM_ARCH__>=7
1225 ldr r2,[r1],#4 @ prefetch
1229 eor r3,r9,r10 @ a^b, b^c in next round
1231 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1232 eor r3,r9,r10 @ a^b, b^c in next round
1233 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1235 eor r0,r0,r9,ror#20 @ Sigma0(a)
1236 and r12,r12,r3 @ (b^c)&=(a^b)
1238 eor r12,r12,r10 @ Maj(a,b,c)
1239 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1240 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1241 @ ldr r2,[sp,#5*4] @ 20
1244 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1247 eor r12,r12,r1,ror#19
1248 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1250 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1254 eor r0,r4,r4,ror#5 @ from BODY_00_15
1256 eor r0,r0,r4,ror#19 @ Sigma1(e)
1258 ldr r12,[r14],#4 @ *K256++
1259 add r7,r7,r2 @ h+=X[i]
1262 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1264 add r7,r7,r12 @ h+=K256[i]
1265 eor r2,r2,r6 @ Ch(e,f,g)
1267 add r7,r7,r2 @ h+=Ch(e,f,g)
1270 cmp r12,#0xf2 @ done?
1273 # if __ARM_ARCH__>=7
1274 ldr r2,[r1],#4 @ prefetch
1278 eor r12,r8,r9 @ a^b, b^c in next round
1280 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1281 eor r12,r8,r9 @ a^b, b^c in next round
1282 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1284 eor r0,r0,r8,ror#20 @ Sigma0(a)
1285 and r3,r3,r12 @ (b^c)&=(a^b)
1286 add r11,r11,r7 @ d+=h
1287 eor r3,r3,r9 @ Maj(a,b,c)
1288 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1289 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1290 @ ldr r2,[sp,#6*4] @ 21
1293 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1297 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1299 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1303 eor r0,r11,r11,ror#5 @ from BODY_00_15
1305 eor r0,r0,r11,ror#19 @ Sigma1(e)
1307 ldr r3,[r14],#4 @ *K256++
1308 add r6,r6,r2 @ h+=X[i]
1311 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1313 add r6,r6,r3 @ h+=K256[i]
1314 eor r2,r2,r5 @ Ch(e,f,g)
1316 add r6,r6,r2 @ h+=Ch(e,f,g)
1319 cmp r3,#0xf2 @ done?
1322 # if __ARM_ARCH__>=7
1323 ldr r2,[r1],#4 @ prefetch
1327 eor r3,r7,r8 @ a^b, b^c in next round
1329 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1330 eor r3,r7,r8 @ a^b, b^c in next round
1331 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1333 eor r0,r0,r7,ror#20 @ Sigma0(a)
1334 and r12,r12,r3 @ (b^c)&=(a^b)
1335 add r10,r10,r6 @ d+=h
1336 eor r12,r12,r8 @ Maj(a,b,c)
1337 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1338 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1339 @ ldr r2,[sp,#7*4] @ 22
1342 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1345 eor r12,r12,r1,ror#19
1346 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1348 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1352 eor r0,r10,r10,ror#5 @ from BODY_00_15
1354 eor r0,r0,r10,ror#19 @ Sigma1(e)
1356 ldr r12,[r14],#4 @ *K256++
1357 add r5,r5,r2 @ h+=X[i]
1360 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1362 add r5,r5,r12 @ h+=K256[i]
1363 eor r2,r2,r4 @ Ch(e,f,g)
1365 add r5,r5,r2 @ h+=Ch(e,f,g)
1368 cmp r12,#0xf2 @ done?
1371 # if __ARM_ARCH__>=7
1372 ldr r2,[r1],#4 @ prefetch
1376 eor r12,r6,r7 @ a^b, b^c in next round
1378 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1379 eor r12,r6,r7 @ a^b, b^c in next round
1380 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1382 eor r0,r0,r6,ror#20 @ Sigma0(a)
1383 and r3,r3,r12 @ (b^c)&=(a^b)
1385 eor r3,r3,r7 @ Maj(a,b,c)
1386 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1387 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1388 @ ldr r2,[sp,#8*4] @ 23
1391 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1395 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1397 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1401 eor r0,r9,r9,ror#5 @ from BODY_00_15
1403 eor r0,r0,r9,ror#19 @ Sigma1(e)
1405 ldr r3,[r14],#4 @ *K256++
1406 add r4,r4,r2 @ h+=X[i]
1409 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1411 add r4,r4,r3 @ h+=K256[i]
1412 eor r2,r2,r11 @ Ch(e,f,g)
1414 add r4,r4,r2 @ h+=Ch(e,f,g)
1417 cmp r3,#0xf2 @ done?
1420 # if __ARM_ARCH__>=7
1421 ldr r2,[r1],#4 @ prefetch
1425 eor r3,r5,r6 @ a^b, b^c in next round
1427 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1428 eor r3,r5,r6 @ a^b, b^c in next round
1429 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1431 eor r0,r0,r5,ror#20 @ Sigma0(a)
1432 and r12,r12,r3 @ (b^c)&=(a^b)
1434 eor r12,r12,r6 @ Maj(a,b,c)
1435 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1436 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1437 @ ldr r2,[sp,#9*4] @ 24
1440 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1443 eor r12,r12,r1,ror#19
1444 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1446 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1450 eor r0,r8,r8,ror#5 @ from BODY_00_15
1452 eor r0,r0,r8,ror#19 @ Sigma1(e)
1454 ldr r12,[r14],#4 @ *K256++
1455 add r11,r11,r2 @ h+=X[i]
1458 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1460 add r11,r11,r12 @ h+=K256[i]
1461 eor r2,r2,r10 @ Ch(e,f,g)
1463 add r11,r11,r2 @ h+=Ch(e,f,g)
1466 cmp r12,#0xf2 @ done?
1469 # if __ARM_ARCH__>=7
1470 ldr r2,[r1],#4 @ prefetch
1474 eor r12,r4,r5 @ a^b, b^c in next round
1476 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1477 eor r12,r4,r5 @ a^b, b^c in next round
1478 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1480 eor r0,r0,r4,ror#20 @ Sigma0(a)
1481 and r3,r3,r12 @ (b^c)&=(a^b)
1482 add r7,r7,r11 @ d+=h
1483 eor r3,r3,r5 @ Maj(a,b,c)
1484 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1485 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1486 @ ldr r2,[sp,#10*4] @ 25
1489 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1493 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1495 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1499 eor r0,r7,r7,ror#5 @ from BODY_00_15
1501 eor r0,r0,r7,ror#19 @ Sigma1(e)
1503 ldr r3,[r14],#4 @ *K256++
1504 add r10,r10,r2 @ h+=X[i]
1507 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1509 add r10,r10,r3 @ h+=K256[i]
1510 eor r2,r2,r9 @ Ch(e,f,g)
1511 eor r0,r11,r11,ror#11
1512 add r10,r10,r2 @ h+=Ch(e,f,g)
1515 cmp r3,#0xf2 @ done?
1518 # if __ARM_ARCH__>=7
1519 ldr r2,[r1],#4 @ prefetch
1523 eor r3,r11,r4 @ a^b, b^c in next round
1525 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1526 eor r3,r11,r4 @ a^b, b^c in next round
1527 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1529 eor r0,r0,r11,ror#20 @ Sigma0(a)
1530 and r12,r12,r3 @ (b^c)&=(a^b)
1531 add r6,r6,r10 @ d+=h
1532 eor r12,r12,r4 @ Maj(a,b,c)
1533 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1534 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1535 @ ldr r2,[sp,#11*4] @ 26
1538 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1541 eor r12,r12,r1,ror#19
1542 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1544 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1548 eor r0,r6,r6,ror#5 @ from BODY_00_15
1550 eor r0,r0,r6,ror#19 @ Sigma1(e)
1552 ldr r12,[r14],#4 @ *K256++
1553 add r9,r9,r2 @ h+=X[i]
1556 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1558 add r9,r9,r12 @ h+=K256[i]
1559 eor r2,r2,r8 @ Ch(e,f,g)
1560 eor r0,r10,r10,ror#11
1561 add r9,r9,r2 @ h+=Ch(e,f,g)
1564 cmp r12,#0xf2 @ done?
1567 # if __ARM_ARCH__>=7
1568 ldr r2,[r1],#4 @ prefetch
1572 eor r12,r10,r11 @ a^b, b^c in next round
1574 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1575 eor r12,r10,r11 @ a^b, b^c in next round
1576 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1578 eor r0,r0,r10,ror#20 @ Sigma0(a)
1579 and r3,r3,r12 @ (b^c)&=(a^b)
1581 eor r3,r3,r11 @ Maj(a,b,c)
1582 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1583 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1584 @ ldr r2,[sp,#12*4] @ 27
1587 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1591 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1593 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1597 eor r0,r5,r5,ror#5 @ from BODY_00_15
1599 eor r0,r0,r5,ror#19 @ Sigma1(e)
1601 ldr r3,[r14],#4 @ *K256++
1602 add r8,r8,r2 @ h+=X[i]
1605 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1607 add r8,r8,r3 @ h+=K256[i]
1608 eor r2,r2,r7 @ Ch(e,f,g)
1610 add r8,r8,r2 @ h+=Ch(e,f,g)
1613 cmp r3,#0xf2 @ done?
1616 # if __ARM_ARCH__>=7
1617 ldr r2,[r1],#4 @ prefetch
1621 eor r3,r9,r10 @ a^b, b^c in next round
1623 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1624 eor r3,r9,r10 @ a^b, b^c in next round
1625 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1627 eor r0,r0,r9,ror#20 @ Sigma0(a)
1628 and r12,r12,r3 @ (b^c)&=(a^b)
1630 eor r12,r12,r10 @ Maj(a,b,c)
1631 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1632 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1633 @ ldr r2,[sp,#13*4] @ 28
1636 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1639 eor r12,r12,r1,ror#19
1640 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1642 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1646 eor r0,r4,r4,ror#5 @ from BODY_00_15
1648 eor r0,r0,r4,ror#19 @ Sigma1(e)
1650 ldr r12,[r14],#4 @ *K256++
1651 add r7,r7,r2 @ h+=X[i]
1654 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1656 add r7,r7,r12 @ h+=K256[i]
1657 eor r2,r2,r6 @ Ch(e,f,g)
1659 add r7,r7,r2 @ h+=Ch(e,f,g)
1662 cmp r12,#0xf2 @ done?
1665 # if __ARM_ARCH__>=7
1666 ldr r2,[r1],#4 @ prefetch
1670 eor r12,r8,r9 @ a^b, b^c in next round
1672 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1673 eor r12,r8,r9 @ a^b, b^c in next round
1674 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1676 eor r0,r0,r8,ror#20 @ Sigma0(a)
1677 and r3,r3,r12 @ (b^c)&=(a^b)
1678 add r11,r11,r7 @ d+=h
1679 eor r3,r3,r9 @ Maj(a,b,c)
1680 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1681 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1682 @ ldr r2,[sp,#14*4] @ 29
1685 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1689 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1691 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1695 eor r0,r11,r11,ror#5 @ from BODY_00_15
1697 eor r0,r0,r11,ror#19 @ Sigma1(e)
1699 ldr r3,[r14],#4 @ *K256++
1700 add r6,r6,r2 @ h+=X[i]
1703 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1705 add r6,r6,r3 @ h+=K256[i]
1706 eor r2,r2,r5 @ Ch(e,f,g)
1708 add r6,r6,r2 @ h+=Ch(e,f,g)
1711 cmp r3,#0xf2 @ done?
1714 # if __ARM_ARCH__>=7
1715 ldr r2,[r1],#4 @ prefetch
1719 eor r3,r7,r8 @ a^b, b^c in next round
1721 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1722 eor r3,r7,r8 @ a^b, b^c in next round
1723 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1725 eor r0,r0,r7,ror#20 @ Sigma0(a)
1726 and r12,r12,r3 @ (b^c)&=(a^b)
1727 add r10,r10,r6 @ d+=h
1728 eor r12,r12,r8 @ Maj(a,b,c)
1729 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1730 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1731 @ ldr r2,[sp,#15*4] @ 30
1734 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1737 eor r12,r12,r1,ror#19
1738 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1740 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1744 eor r0,r10,r10,ror#5 @ from BODY_00_15
1746 eor r0,r0,r10,ror#19 @ Sigma1(e)
1748 ldr r12,[r14],#4 @ *K256++
1749 add r5,r5,r2 @ h+=X[i]
1752 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1754 add r5,r5,r12 @ h+=K256[i]
1755 eor r2,r2,r4 @ Ch(e,f,g)
1757 add r5,r5,r2 @ h+=Ch(e,f,g)
1760 cmp r12,#0xf2 @ done?
1763 # if __ARM_ARCH__>=7
1764 ldr r2,[r1],#4 @ prefetch
1768 eor r12,r6,r7 @ a^b, b^c in next round
1770 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1771 eor r12,r6,r7 @ a^b, b^c in next round
1772 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1774 eor r0,r0,r6,ror#20 @ Sigma0(a)
1775 and r3,r3,r12 @ (b^c)&=(a^b)
1777 eor r3,r3,r7 @ Maj(a,b,c)
1778 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1779 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1780 @ ldr r2,[sp,#0*4] @ 31
1783 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1787 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1789 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1793 eor r0,r9,r9,ror#5 @ from BODY_00_15
1795 eor r0,r0,r9,ror#19 @ Sigma1(e)
1797 ldr r3,[r14],#4 @ *K256++
1798 add r4,r4,r2 @ h+=X[i]
1801 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1803 add r4,r4,r3 @ h+=K256[i]
1804 eor r2,r2,r11 @ Ch(e,f,g)
1806 add r4,r4,r2 @ h+=Ch(e,f,g)
1809 cmp r3,#0xf2 @ done?
1812 # if __ARM_ARCH__>=7
1813 ldr r2,[r1],#4 @ prefetch
1817 eor r3,r5,r6 @ a^b, b^c in next round
1819 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1820 eor r3,r5,r6 @ a^b, b^c in next round
1821 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1823 eor r0,r0,r5,ror#20 @ Sigma0(a)
1824 and r12,r12,r3 @ (b^c)&=(a^b)
1826 eor r12,r12,r6 @ Maj(a,b,c)
1827 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1828 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1830 ite eq @ Thumb2 thing, sanity check in ARM
1832 ldreq r3,[sp,#16*4] @ pull ctx
1835 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1850 ldr r1,[sp,#17*4] @ pull inp
1851 ldr r12,[sp,#18*4] @ pull inp+len
1854 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1856 sub r14,r14,#256 @ rewind Ktbl
1859 add sp,sp,#19*4 @ destroy frame
1861 ldmia sp!,{r4-r11,pc}
1863 ldmia sp!,{r4-r11,lr}
1865 moveq pc,lr @ be binary compatible with V4, yet
1866 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1868 .size sha256_block_data_order,.-sha256_block_data_order
1869 #if __ARM_MAX_ARCH__>=7
1873 .global sha256_block_data_order_neon
1874 .type sha256_block_data_order_neon,%function
1876 sha256_block_data_order_neon:
1878 stmdb sp!,{r4-r12,lr}
1882 bic r11,r11,#15 @ align for 128-bit stores
1885 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1891 vld1.32 {q8},[r14,:128]!
1892 vld1.32 {q9},[r14,:128]!
1893 vld1.32 {q10},[r14,:128]!
1894 vld1.32 {q11},[r14,:128]!
1895 vrev32.8 q0,q0 @ yes, even on
1897 vrev32.8 q1,q1 @ big-endian
1903 str r12,[sp,#76] @ save original sp
1906 vst1.32 {q8},[r1,:128]!
1908 vst1.32 {q9},[r1,:128]!
1910 vst1.32 {q10},[r1,:128]!
1911 vst1.32 {q11},[r1,:128]!
1929 eor r12,r0,r8,ror#19
1934 add r11,r11,r12,ror#6
1944 add r11,r11,r0,ror#2
1956 eor r0,r11,r11,ror#11
1959 add r10,r10,r3,ror#6
1962 eor r0,r0,r11,ror#20
1970 add r10,r10,r0,ror#2
1980 eor r12,r0,r6,ror#19
1982 eor r0,r10,r10,ror#11
1988 eor r0,r0,r10,ror#20
1997 vld1.32 {q8},[r14,:128]!
2017 vst1.32 {q8},[r1,:128]!
2027 eor r12,r0,r4,ror#19
2048 eor r0,r11,r11,ror#5
2053 eor r3,r0,r11,ror#19
2074 eor r0,r10,r10,ror#5
2078 eor r12,r0,r10,ror#19
2095 vld1.32 {q8},[r14,:128]!
2115 vst1.32 {q8},[r1,:128]!
2125 eor r12,r0,r8,ror#19
2130 add r11,r11,r12,ror#6
2140 add r11,r11,r0,ror#2
2152 eor r0,r11,r11,ror#11
2155 add r10,r10,r3,ror#6
2158 eor r0,r0,r11,ror#20
2166 add r10,r10,r0,ror#2
2176 eor r12,r0,r6,ror#19
2178 eor r0,r10,r10,ror#11
2184 eor r0,r0,r10,ror#20
2193 vld1.32 {q8},[r14,:128]!
2213 vst1.32 {q8},[r1,:128]!
2223 eor r12,r0,r4,ror#19
2244 eor r0,r11,r11,ror#5
2249 eor r3,r0,r11,ror#19
2270 eor r0,r10,r10,ror#5
2274 eor r12,r0,r10,ror#19
2291 vld1.32 {q8},[r14,:128]!
2311 vst1.32 {q8},[r1,:128]!
2314 teq r2,#0 @ check for K256 terminator
2321 sub r14,r14,#256 @ rewind r14
2324 subeq r1,r1,#64 @ avoid SEGV
2325 vld1.8 {q0},[r1]! @ load next input block
2336 vld1.32 {q8},[r14,:128]!
2338 eor r12,r0,r8,ror#19
2342 add r11,r11,r12,ror#6
2350 add r11,r11,r0,ror#2
2358 eor r0,r11,r11,ror#11
2360 add r10,r10,r3,ror#6
2362 eor r0,r0,r11,ror#20
2367 add r10,r10,r0,ror#2
2374 eor r12,r0,r6,ror#19
2375 eor r0,r10,r10,ror#11
2379 eor r0,r0,r10,ror#20
2403 vst1.32 {q8},[r1,:128]!
2408 vld1.32 {q8},[r14,:128]!
2410 eor r12,r0,r4,ror#19
2426 eor r0,r11,r11,ror#5
2429 eor r3,r0,r11,ror#19
2443 eor r0,r10,r10,ror#5
2446 eor r12,r0,r10,ror#19
2475 vst1.32 {q8},[r1,:128]!
2480 vld1.32 {q8},[r14,:128]!
2482 eor r12,r0,r8,ror#19
2486 add r11,r11,r12,ror#6
2494 add r11,r11,r0,ror#2
2502 eor r0,r11,r11,ror#11
2504 add r10,r10,r3,ror#6
2506 eor r0,r0,r11,ror#20
2511 add r10,r10,r0,ror#2
2518 eor r12,r0,r6,ror#19
2519 eor r0,r10,r10,ror#11
2523 eor r0,r0,r10,ror#20
2547 vst1.32 {q8},[r1,:128]!
2552 vld1.32 {q8},[r14,:128]!
2554 eor r12,r0,r4,ror#19
2570 eor r0,r11,r11,ror#5
2573 eor r3,r0,r11,ror#19
2587 eor r0,r10,r10,ror#5
2590 eor r12,r0,r10,ror#19
2619 vst1.32 {q8},[r1,:128]!
2621 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2625 add r4,r4,r0 @ accumulate
2647 ldreq sp,[sp,#76] @ restore original sp
2652 ldmia sp!,{r4-r12,pc}
2653 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2655 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2658 # define INST(a,b,c,d) .byte c,d|0xc,a,b
2660 # define INST(a,b,c,d) .byte a,b,c,d
2663 .type sha256_block_data_order_armv8,%function
2665 sha256_block_data_order_armv8:
2667 vld1.32 {q0,q1},[r0]
2670 sub r3,r3,#.LARMv8-K256
2674 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2677 vld1.8 {q8-q9},[r1]!
2678 vld1.8 {q10-q11},[r1]!
2684 vmov q14,q0 @ offload
2689 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2691 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2692 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2693 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2696 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2698 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2699 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2700 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2702 vadd.i32 q12,q12,q10
2703 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2705 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2706 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2707 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2709 vadd.i32 q13,q13,q11
2710 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2712 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2713 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2714 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2717 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2719 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2720 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2721 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2724 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2726 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2727 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2728 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2730 vadd.i32 q12,q12,q10
2731 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2733 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2734 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2735 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2737 vadd.i32 q13,q13,q11
2738 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2740 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2741 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2742 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2745 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2747 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2748 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2749 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2752 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2754 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2755 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2756 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2758 vadd.i32 q12,q12,q10
2759 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2761 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2762 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2763 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2765 vadd.i32 q13,q13,q11
2766 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2768 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2769 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2770 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2774 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2775 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2780 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2781 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2784 vadd.i32 q12,q12,q10
2785 sub r3,r3,#256-16 @ rewind
2787 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2788 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2790 vadd.i32 q13,q13,q11
2792 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2793 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2800 vst1.32 {q0,q1},[r0]
2803 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2805 .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2807 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2808 .comm OPENSSL_armcap_P,4,4