GNU Linux-libre 4.14.254-gnu1
[releases.git] / arch / arm / crypto / sha256-core.S_shipped
1
2 @ ====================================================================
3 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4 @ project. The module is, however, dual licensed under OpenSSL and
5 @ CRYPTOGAMS licenses depending on where you obtain it. For further
6 @ details see http://www.openssl.org/~appro/cryptogams/.
7 @
8 @ Permission to use under GPL terms is granted.
9 @ ====================================================================
10
11 @ SHA256 block procedure for ARMv4. May 2007.
12
13 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15 @ byte [on single-issue Xscale PXA250 core].
16
17 @ July 2010.
18 @
19 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20 @ Cortex A8 core and ~20 cycles per processed byte.
21
22 @ February 2011.
23 @
24 @ Profiler-assisted and platform-specific optimization resulted in 16%
25 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27 @ September 2013.
28 @
29 @ Add NEON implementation. On Cortex A8 it was measured to process one
30 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32 @ code (meaning that latter performs sub-optimally, nothing was done
33 @ about it).
34
35 @ May 2014.
36 @
37 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39 #ifndef __KERNEL__
40 # include "arm_arch.h"
41 #else
42 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
43 # define __ARM_MAX_ARCH__ 7
44 #endif
45
46 .text
47 #if __ARM_ARCH__<7
48 .code   32
49 #else
50 .syntax unified
51 # ifdef __thumb2__
52 #  define adrl adr
53 .thumb
54 # else
55 .code   32
56 # endif
57 #endif
58
59 .type   K256,%object
60 .align  5
61 K256:
62 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78 .size   K256,.-K256
79 .word   0                               @ terminator
80 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81 .LOPENSSL_armcap:
82 .word   OPENSSL_armcap_P-sha256_block_data_order
83 #endif
84 .align  5
85
86 .global sha256_block_data_order
87 .type   sha256_block_data_order,%function
88 sha256_block_data_order:
89 .Lsha256_block_data_order:
90 #if __ARM_ARCH__<7
91         sub     r3,pc,#8                @ sha256_block_data_order
92 #else
93         adr     r3,.Lsha256_block_data_order
94 #endif
95 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
96         ldr     r12,.LOPENSSL_armcap
97         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
98         tst     r12,#ARMV8_SHA256
99         bne     .LARMv8
100         tst     r12,#ARMV7_NEON
101         bne     .LNEON
102 #endif
103         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
104         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
105         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
106         sub     r14,r3,#256+32  @ K256
107         sub     sp,sp,#16*4             @ alloca(X[16])
108 .Loop:
109 # if __ARM_ARCH__>=7
110         ldr     r2,[r1],#4
111 # else
112         ldrb    r2,[r1,#3]
113 # endif
114         eor     r3,r5,r6                @ magic
115         eor     r12,r12,r12
116 #if __ARM_ARCH__>=7
117         @ ldr   r2,[r1],#4                      @ 0
118 # if 0==15
119         str     r1,[sp,#17*4]                   @ make room for r1
120 # endif
121         eor     r0,r8,r8,ror#5
122         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
123         eor     r0,r0,r8,ror#19 @ Sigma1(e)
124 # ifndef __ARMEB__
125         rev     r2,r2
126 # endif
127 #else
128         @ ldrb  r2,[r1,#3]                      @ 0
129         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
130         ldrb    r12,[r1,#2]
131         ldrb    r0,[r1,#1]
132         orr     r2,r2,r12,lsl#8
133         ldrb    r12,[r1],#4
134         orr     r2,r2,r0,lsl#16
135 # if 0==15
136         str     r1,[sp,#17*4]                   @ make room for r1
137 # endif
138         eor     r0,r8,r8,ror#5
139         orr     r2,r2,r12,lsl#24
140         eor     r0,r0,r8,ror#19 @ Sigma1(e)
141 #endif
142         ldr     r12,[r14],#4                    @ *K256++
143         add     r11,r11,r2                      @ h+=X[i]
144         str     r2,[sp,#0*4]
145         eor     r2,r9,r10
146         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
147         and     r2,r2,r8
148         add     r11,r11,r12                     @ h+=K256[i]
149         eor     r2,r2,r10                       @ Ch(e,f,g)
150         eor     r0,r4,r4,ror#11
151         add     r11,r11,r2                      @ h+=Ch(e,f,g)
152 #if 0==31
153         and     r12,r12,#0xff
154         cmp     r12,#0xf2                       @ done?
155 #endif
156 #if 0<15
157 # if __ARM_ARCH__>=7
158         ldr     r2,[r1],#4                      @ prefetch
159 # else
160         ldrb    r2,[r1,#3]
161 # endif
162         eor     r12,r4,r5                       @ a^b, b^c in next round
163 #else
164         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
165         eor     r12,r4,r5                       @ a^b, b^c in next round
166         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
167 #endif
168         eor     r0,r0,r4,ror#20 @ Sigma0(a)
169         and     r3,r3,r12                       @ (b^c)&=(a^b)
170         add     r7,r7,r11                       @ d+=h
171         eor     r3,r3,r5                        @ Maj(a,b,c)
172         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
173         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
174 #if __ARM_ARCH__>=7
175         @ ldr   r2,[r1],#4                      @ 1
176 # if 1==15
177         str     r1,[sp,#17*4]                   @ make room for r1
178 # endif
179         eor     r0,r7,r7,ror#5
180         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
181         eor     r0,r0,r7,ror#19 @ Sigma1(e)
182 # ifndef __ARMEB__
183         rev     r2,r2
184 # endif
185 #else
186         @ ldrb  r2,[r1,#3]                      @ 1
187         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
188         ldrb    r3,[r1,#2]
189         ldrb    r0,[r1,#1]
190         orr     r2,r2,r3,lsl#8
191         ldrb    r3,[r1],#4
192         orr     r2,r2,r0,lsl#16
193 # if 1==15
194         str     r1,[sp,#17*4]                   @ make room for r1
195 # endif
196         eor     r0,r7,r7,ror#5
197         orr     r2,r2,r3,lsl#24
198         eor     r0,r0,r7,ror#19 @ Sigma1(e)
199 #endif
200         ldr     r3,[r14],#4                     @ *K256++
201         add     r10,r10,r2                      @ h+=X[i]
202         str     r2,[sp,#1*4]
203         eor     r2,r8,r9
204         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
205         and     r2,r2,r7
206         add     r10,r10,r3                      @ h+=K256[i]
207         eor     r2,r2,r9                        @ Ch(e,f,g)
208         eor     r0,r11,r11,ror#11
209         add     r10,r10,r2                      @ h+=Ch(e,f,g)
210 #if 1==31
211         and     r3,r3,#0xff
212         cmp     r3,#0xf2                        @ done?
213 #endif
214 #if 1<15
215 # if __ARM_ARCH__>=7
216         ldr     r2,[r1],#4                      @ prefetch
217 # else
218         ldrb    r2,[r1,#3]
219 # endif
220         eor     r3,r11,r4                       @ a^b, b^c in next round
221 #else
222         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
223         eor     r3,r11,r4                       @ a^b, b^c in next round
224         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
225 #endif
226         eor     r0,r0,r11,ror#20        @ Sigma0(a)
227         and     r12,r12,r3                      @ (b^c)&=(a^b)
228         add     r6,r6,r10                       @ d+=h
229         eor     r12,r12,r4                      @ Maj(a,b,c)
230         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
231         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
232 #if __ARM_ARCH__>=7
233         @ ldr   r2,[r1],#4                      @ 2
234 # if 2==15
235         str     r1,[sp,#17*4]                   @ make room for r1
236 # endif
237         eor     r0,r6,r6,ror#5
238         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
239         eor     r0,r0,r6,ror#19 @ Sigma1(e)
240 # ifndef __ARMEB__
241         rev     r2,r2
242 # endif
243 #else
244         @ ldrb  r2,[r1,#3]                      @ 2
245         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
246         ldrb    r12,[r1,#2]
247         ldrb    r0,[r1,#1]
248         orr     r2,r2,r12,lsl#8
249         ldrb    r12,[r1],#4
250         orr     r2,r2,r0,lsl#16
251 # if 2==15
252         str     r1,[sp,#17*4]                   @ make room for r1
253 # endif
254         eor     r0,r6,r6,ror#5
255         orr     r2,r2,r12,lsl#24
256         eor     r0,r0,r6,ror#19 @ Sigma1(e)
257 #endif
258         ldr     r12,[r14],#4                    @ *K256++
259         add     r9,r9,r2                        @ h+=X[i]
260         str     r2,[sp,#2*4]
261         eor     r2,r7,r8
262         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
263         and     r2,r2,r6
264         add     r9,r9,r12                       @ h+=K256[i]
265         eor     r2,r2,r8                        @ Ch(e,f,g)
266         eor     r0,r10,r10,ror#11
267         add     r9,r9,r2                        @ h+=Ch(e,f,g)
268 #if 2==31
269         and     r12,r12,#0xff
270         cmp     r12,#0xf2                       @ done?
271 #endif
272 #if 2<15
273 # if __ARM_ARCH__>=7
274         ldr     r2,[r1],#4                      @ prefetch
275 # else
276         ldrb    r2,[r1,#3]
277 # endif
278         eor     r12,r10,r11                     @ a^b, b^c in next round
279 #else
280         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
281         eor     r12,r10,r11                     @ a^b, b^c in next round
282         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
283 #endif
284         eor     r0,r0,r10,ror#20        @ Sigma0(a)
285         and     r3,r3,r12                       @ (b^c)&=(a^b)
286         add     r5,r5,r9                        @ d+=h
287         eor     r3,r3,r11                       @ Maj(a,b,c)
288         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
289         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
290 #if __ARM_ARCH__>=7
291         @ ldr   r2,[r1],#4                      @ 3
292 # if 3==15
293         str     r1,[sp,#17*4]                   @ make room for r1
294 # endif
295         eor     r0,r5,r5,ror#5
296         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
297         eor     r0,r0,r5,ror#19 @ Sigma1(e)
298 # ifndef __ARMEB__
299         rev     r2,r2
300 # endif
301 #else
302         @ ldrb  r2,[r1,#3]                      @ 3
303         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
304         ldrb    r3,[r1,#2]
305         ldrb    r0,[r1,#1]
306         orr     r2,r2,r3,lsl#8
307         ldrb    r3,[r1],#4
308         orr     r2,r2,r0,lsl#16
309 # if 3==15
310         str     r1,[sp,#17*4]                   @ make room for r1
311 # endif
312         eor     r0,r5,r5,ror#5
313         orr     r2,r2,r3,lsl#24
314         eor     r0,r0,r5,ror#19 @ Sigma1(e)
315 #endif
316         ldr     r3,[r14],#4                     @ *K256++
317         add     r8,r8,r2                        @ h+=X[i]
318         str     r2,[sp,#3*4]
319         eor     r2,r6,r7
320         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
321         and     r2,r2,r5
322         add     r8,r8,r3                        @ h+=K256[i]
323         eor     r2,r2,r7                        @ Ch(e,f,g)
324         eor     r0,r9,r9,ror#11
325         add     r8,r8,r2                        @ h+=Ch(e,f,g)
326 #if 3==31
327         and     r3,r3,#0xff
328         cmp     r3,#0xf2                        @ done?
329 #endif
330 #if 3<15
331 # if __ARM_ARCH__>=7
332         ldr     r2,[r1],#4                      @ prefetch
333 # else
334         ldrb    r2,[r1,#3]
335 # endif
336         eor     r3,r9,r10                       @ a^b, b^c in next round
337 #else
338         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
339         eor     r3,r9,r10                       @ a^b, b^c in next round
340         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
341 #endif
342         eor     r0,r0,r9,ror#20 @ Sigma0(a)
343         and     r12,r12,r3                      @ (b^c)&=(a^b)
344         add     r4,r4,r8                        @ d+=h
345         eor     r12,r12,r10                     @ Maj(a,b,c)
346         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
347         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
348 #if __ARM_ARCH__>=7
349         @ ldr   r2,[r1],#4                      @ 4
350 # if 4==15
351         str     r1,[sp,#17*4]                   @ make room for r1
352 # endif
353         eor     r0,r4,r4,ror#5
354         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
355         eor     r0,r0,r4,ror#19 @ Sigma1(e)
356 # ifndef __ARMEB__
357         rev     r2,r2
358 # endif
359 #else
360         @ ldrb  r2,[r1,#3]                      @ 4
361         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
362         ldrb    r12,[r1,#2]
363         ldrb    r0,[r1,#1]
364         orr     r2,r2,r12,lsl#8
365         ldrb    r12,[r1],#4
366         orr     r2,r2,r0,lsl#16
367 # if 4==15
368         str     r1,[sp,#17*4]                   @ make room for r1
369 # endif
370         eor     r0,r4,r4,ror#5
371         orr     r2,r2,r12,lsl#24
372         eor     r0,r0,r4,ror#19 @ Sigma1(e)
373 #endif
374         ldr     r12,[r14],#4                    @ *K256++
375         add     r7,r7,r2                        @ h+=X[i]
376         str     r2,[sp,#4*4]
377         eor     r2,r5,r6
378         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
379         and     r2,r2,r4
380         add     r7,r7,r12                       @ h+=K256[i]
381         eor     r2,r2,r6                        @ Ch(e,f,g)
382         eor     r0,r8,r8,ror#11
383         add     r7,r7,r2                        @ h+=Ch(e,f,g)
384 #if 4==31
385         and     r12,r12,#0xff
386         cmp     r12,#0xf2                       @ done?
387 #endif
388 #if 4<15
389 # if __ARM_ARCH__>=7
390         ldr     r2,[r1],#4                      @ prefetch
391 # else
392         ldrb    r2,[r1,#3]
393 # endif
394         eor     r12,r8,r9                       @ a^b, b^c in next round
395 #else
396         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
397         eor     r12,r8,r9                       @ a^b, b^c in next round
398         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
399 #endif
400         eor     r0,r0,r8,ror#20 @ Sigma0(a)
401         and     r3,r3,r12                       @ (b^c)&=(a^b)
402         add     r11,r11,r7                      @ d+=h
403         eor     r3,r3,r9                        @ Maj(a,b,c)
404         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
405         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
406 #if __ARM_ARCH__>=7
407         @ ldr   r2,[r1],#4                      @ 5
408 # if 5==15
409         str     r1,[sp,#17*4]                   @ make room for r1
410 # endif
411         eor     r0,r11,r11,ror#5
412         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
413         eor     r0,r0,r11,ror#19        @ Sigma1(e)
414 # ifndef __ARMEB__
415         rev     r2,r2
416 # endif
417 #else
418         @ ldrb  r2,[r1,#3]                      @ 5
419         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
420         ldrb    r3,[r1,#2]
421         ldrb    r0,[r1,#1]
422         orr     r2,r2,r3,lsl#8
423         ldrb    r3,[r1],#4
424         orr     r2,r2,r0,lsl#16
425 # if 5==15
426         str     r1,[sp,#17*4]                   @ make room for r1
427 # endif
428         eor     r0,r11,r11,ror#5
429         orr     r2,r2,r3,lsl#24
430         eor     r0,r0,r11,ror#19        @ Sigma1(e)
431 #endif
432         ldr     r3,[r14],#4                     @ *K256++
433         add     r6,r6,r2                        @ h+=X[i]
434         str     r2,[sp,#5*4]
435         eor     r2,r4,r5
436         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
437         and     r2,r2,r11
438         add     r6,r6,r3                        @ h+=K256[i]
439         eor     r2,r2,r5                        @ Ch(e,f,g)
440         eor     r0,r7,r7,ror#11
441         add     r6,r6,r2                        @ h+=Ch(e,f,g)
442 #if 5==31
443         and     r3,r3,#0xff
444         cmp     r3,#0xf2                        @ done?
445 #endif
446 #if 5<15
447 # if __ARM_ARCH__>=7
448         ldr     r2,[r1],#4                      @ prefetch
449 # else
450         ldrb    r2,[r1,#3]
451 # endif
452         eor     r3,r7,r8                        @ a^b, b^c in next round
453 #else
454         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
455         eor     r3,r7,r8                        @ a^b, b^c in next round
456         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
457 #endif
458         eor     r0,r0,r7,ror#20 @ Sigma0(a)
459         and     r12,r12,r3                      @ (b^c)&=(a^b)
460         add     r10,r10,r6                      @ d+=h
461         eor     r12,r12,r8                      @ Maj(a,b,c)
462         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
463         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
464 #if __ARM_ARCH__>=7
465         @ ldr   r2,[r1],#4                      @ 6
466 # if 6==15
467         str     r1,[sp,#17*4]                   @ make room for r1
468 # endif
469         eor     r0,r10,r10,ror#5
470         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
471         eor     r0,r0,r10,ror#19        @ Sigma1(e)
472 # ifndef __ARMEB__
473         rev     r2,r2
474 # endif
475 #else
476         @ ldrb  r2,[r1,#3]                      @ 6
477         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
478         ldrb    r12,[r1,#2]
479         ldrb    r0,[r1,#1]
480         orr     r2,r2,r12,lsl#8
481         ldrb    r12,[r1],#4
482         orr     r2,r2,r0,lsl#16
483 # if 6==15
484         str     r1,[sp,#17*4]                   @ make room for r1
485 # endif
486         eor     r0,r10,r10,ror#5
487         orr     r2,r2,r12,lsl#24
488         eor     r0,r0,r10,ror#19        @ Sigma1(e)
489 #endif
490         ldr     r12,[r14],#4                    @ *K256++
491         add     r5,r5,r2                        @ h+=X[i]
492         str     r2,[sp,#6*4]
493         eor     r2,r11,r4
494         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
495         and     r2,r2,r10
496         add     r5,r5,r12                       @ h+=K256[i]
497         eor     r2,r2,r4                        @ Ch(e,f,g)
498         eor     r0,r6,r6,ror#11
499         add     r5,r5,r2                        @ h+=Ch(e,f,g)
500 #if 6==31
501         and     r12,r12,#0xff
502         cmp     r12,#0xf2                       @ done?
503 #endif
504 #if 6<15
505 # if __ARM_ARCH__>=7
506         ldr     r2,[r1],#4                      @ prefetch
507 # else
508         ldrb    r2,[r1,#3]
509 # endif
510         eor     r12,r6,r7                       @ a^b, b^c in next round
511 #else
512         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
513         eor     r12,r6,r7                       @ a^b, b^c in next round
514         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
515 #endif
516         eor     r0,r0,r6,ror#20 @ Sigma0(a)
517         and     r3,r3,r12                       @ (b^c)&=(a^b)
518         add     r9,r9,r5                        @ d+=h
519         eor     r3,r3,r7                        @ Maj(a,b,c)
520         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
521         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
522 #if __ARM_ARCH__>=7
523         @ ldr   r2,[r1],#4                      @ 7
524 # if 7==15
525         str     r1,[sp,#17*4]                   @ make room for r1
526 # endif
527         eor     r0,r9,r9,ror#5
528         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
529         eor     r0,r0,r9,ror#19 @ Sigma1(e)
530 # ifndef __ARMEB__
531         rev     r2,r2
532 # endif
533 #else
534         @ ldrb  r2,[r1,#3]                      @ 7
535         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
536         ldrb    r3,[r1,#2]
537         ldrb    r0,[r1,#1]
538         orr     r2,r2,r3,lsl#8
539         ldrb    r3,[r1],#4
540         orr     r2,r2,r0,lsl#16
541 # if 7==15
542         str     r1,[sp,#17*4]                   @ make room for r1
543 # endif
544         eor     r0,r9,r9,ror#5
545         orr     r2,r2,r3,lsl#24
546         eor     r0,r0,r9,ror#19 @ Sigma1(e)
547 #endif
548         ldr     r3,[r14],#4                     @ *K256++
549         add     r4,r4,r2                        @ h+=X[i]
550         str     r2,[sp,#7*4]
551         eor     r2,r10,r11
552         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
553         and     r2,r2,r9
554         add     r4,r4,r3                        @ h+=K256[i]
555         eor     r2,r2,r11                       @ Ch(e,f,g)
556         eor     r0,r5,r5,ror#11
557         add     r4,r4,r2                        @ h+=Ch(e,f,g)
558 #if 7==31
559         and     r3,r3,#0xff
560         cmp     r3,#0xf2                        @ done?
561 #endif
562 #if 7<15
563 # if __ARM_ARCH__>=7
564         ldr     r2,[r1],#4                      @ prefetch
565 # else
566         ldrb    r2,[r1,#3]
567 # endif
568         eor     r3,r5,r6                        @ a^b, b^c in next round
569 #else
570         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
571         eor     r3,r5,r6                        @ a^b, b^c in next round
572         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
573 #endif
574         eor     r0,r0,r5,ror#20 @ Sigma0(a)
575         and     r12,r12,r3                      @ (b^c)&=(a^b)
576         add     r8,r8,r4                        @ d+=h
577         eor     r12,r12,r6                      @ Maj(a,b,c)
578         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
579         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
580 #if __ARM_ARCH__>=7
581         @ ldr   r2,[r1],#4                      @ 8
582 # if 8==15
583         str     r1,[sp,#17*4]                   @ make room for r1
584 # endif
585         eor     r0,r8,r8,ror#5
586         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
587         eor     r0,r0,r8,ror#19 @ Sigma1(e)
588 # ifndef __ARMEB__
589         rev     r2,r2
590 # endif
591 #else
592         @ ldrb  r2,[r1,#3]                      @ 8
593         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
594         ldrb    r12,[r1,#2]
595         ldrb    r0,[r1,#1]
596         orr     r2,r2,r12,lsl#8
597         ldrb    r12,[r1],#4
598         orr     r2,r2,r0,lsl#16
599 # if 8==15
600         str     r1,[sp,#17*4]                   @ make room for r1
601 # endif
602         eor     r0,r8,r8,ror#5
603         orr     r2,r2,r12,lsl#24
604         eor     r0,r0,r8,ror#19 @ Sigma1(e)
605 #endif
606         ldr     r12,[r14],#4                    @ *K256++
607         add     r11,r11,r2                      @ h+=X[i]
608         str     r2,[sp,#8*4]
609         eor     r2,r9,r10
610         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
611         and     r2,r2,r8
612         add     r11,r11,r12                     @ h+=K256[i]
613         eor     r2,r2,r10                       @ Ch(e,f,g)
614         eor     r0,r4,r4,ror#11
615         add     r11,r11,r2                      @ h+=Ch(e,f,g)
616 #if 8==31
617         and     r12,r12,#0xff
618         cmp     r12,#0xf2                       @ done?
619 #endif
620 #if 8<15
621 # if __ARM_ARCH__>=7
622         ldr     r2,[r1],#4                      @ prefetch
623 # else
624         ldrb    r2,[r1,#3]
625 # endif
626         eor     r12,r4,r5                       @ a^b, b^c in next round
627 #else
628         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
629         eor     r12,r4,r5                       @ a^b, b^c in next round
630         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
631 #endif
632         eor     r0,r0,r4,ror#20 @ Sigma0(a)
633         and     r3,r3,r12                       @ (b^c)&=(a^b)
634         add     r7,r7,r11                       @ d+=h
635         eor     r3,r3,r5                        @ Maj(a,b,c)
636         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
637         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
638 #if __ARM_ARCH__>=7
639         @ ldr   r2,[r1],#4                      @ 9
640 # if 9==15
641         str     r1,[sp,#17*4]                   @ make room for r1
642 # endif
643         eor     r0,r7,r7,ror#5
644         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
645         eor     r0,r0,r7,ror#19 @ Sigma1(e)
646 # ifndef __ARMEB__
647         rev     r2,r2
648 # endif
649 #else
650         @ ldrb  r2,[r1,#3]                      @ 9
651         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
652         ldrb    r3,[r1,#2]
653         ldrb    r0,[r1,#1]
654         orr     r2,r2,r3,lsl#8
655         ldrb    r3,[r1],#4
656         orr     r2,r2,r0,lsl#16
657 # if 9==15
658         str     r1,[sp,#17*4]                   @ make room for r1
659 # endif
660         eor     r0,r7,r7,ror#5
661         orr     r2,r2,r3,lsl#24
662         eor     r0,r0,r7,ror#19 @ Sigma1(e)
663 #endif
664         ldr     r3,[r14],#4                     @ *K256++
665         add     r10,r10,r2                      @ h+=X[i]
666         str     r2,[sp,#9*4]
667         eor     r2,r8,r9
668         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
669         and     r2,r2,r7
670         add     r10,r10,r3                      @ h+=K256[i]
671         eor     r2,r2,r9                        @ Ch(e,f,g)
672         eor     r0,r11,r11,ror#11
673         add     r10,r10,r2                      @ h+=Ch(e,f,g)
674 #if 9==31
675         and     r3,r3,#0xff
676         cmp     r3,#0xf2                        @ done?
677 #endif
678 #if 9<15
679 # if __ARM_ARCH__>=7
680         ldr     r2,[r1],#4                      @ prefetch
681 # else
682         ldrb    r2,[r1,#3]
683 # endif
684         eor     r3,r11,r4                       @ a^b, b^c in next round
685 #else
686         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
687         eor     r3,r11,r4                       @ a^b, b^c in next round
688         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
689 #endif
690         eor     r0,r0,r11,ror#20        @ Sigma0(a)
691         and     r12,r12,r3                      @ (b^c)&=(a^b)
692         add     r6,r6,r10                       @ d+=h
693         eor     r12,r12,r4                      @ Maj(a,b,c)
694         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
695         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
696 #if __ARM_ARCH__>=7
697         @ ldr   r2,[r1],#4                      @ 10
698 # if 10==15
699         str     r1,[sp,#17*4]                   @ make room for r1
700 # endif
701         eor     r0,r6,r6,ror#5
702         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
703         eor     r0,r0,r6,ror#19 @ Sigma1(e)
704 # ifndef __ARMEB__
705         rev     r2,r2
706 # endif
707 #else
708         @ ldrb  r2,[r1,#3]                      @ 10
709         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
710         ldrb    r12,[r1,#2]
711         ldrb    r0,[r1,#1]
712         orr     r2,r2,r12,lsl#8
713         ldrb    r12,[r1],#4
714         orr     r2,r2,r0,lsl#16
715 # if 10==15
716         str     r1,[sp,#17*4]                   @ make room for r1
717 # endif
718         eor     r0,r6,r6,ror#5
719         orr     r2,r2,r12,lsl#24
720         eor     r0,r0,r6,ror#19 @ Sigma1(e)
721 #endif
722         ldr     r12,[r14],#4                    @ *K256++
723         add     r9,r9,r2                        @ h+=X[i]
724         str     r2,[sp,#10*4]
725         eor     r2,r7,r8
726         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
727         and     r2,r2,r6
728         add     r9,r9,r12                       @ h+=K256[i]
729         eor     r2,r2,r8                        @ Ch(e,f,g)
730         eor     r0,r10,r10,ror#11
731         add     r9,r9,r2                        @ h+=Ch(e,f,g)
732 #if 10==31
733         and     r12,r12,#0xff
734         cmp     r12,#0xf2                       @ done?
735 #endif
736 #if 10<15
737 # if __ARM_ARCH__>=7
738         ldr     r2,[r1],#4                      @ prefetch
739 # else
740         ldrb    r2,[r1,#3]
741 # endif
742         eor     r12,r10,r11                     @ a^b, b^c in next round
743 #else
744         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
745         eor     r12,r10,r11                     @ a^b, b^c in next round
746         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
747 #endif
748         eor     r0,r0,r10,ror#20        @ Sigma0(a)
749         and     r3,r3,r12                       @ (b^c)&=(a^b)
750         add     r5,r5,r9                        @ d+=h
751         eor     r3,r3,r11                       @ Maj(a,b,c)
752         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
753         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
754 #if __ARM_ARCH__>=7
755         @ ldr   r2,[r1],#4                      @ 11
756 # if 11==15
757         str     r1,[sp,#17*4]                   @ make room for r1
758 # endif
759         eor     r0,r5,r5,ror#5
760         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
761         eor     r0,r0,r5,ror#19 @ Sigma1(e)
762 # ifndef __ARMEB__
763         rev     r2,r2
764 # endif
765 #else
766         @ ldrb  r2,[r1,#3]                      @ 11
767         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
768         ldrb    r3,[r1,#2]
769         ldrb    r0,[r1,#1]
770         orr     r2,r2,r3,lsl#8
771         ldrb    r3,[r1],#4
772         orr     r2,r2,r0,lsl#16
773 # if 11==15
774         str     r1,[sp,#17*4]                   @ make room for r1
775 # endif
776         eor     r0,r5,r5,ror#5
777         orr     r2,r2,r3,lsl#24
778         eor     r0,r0,r5,ror#19 @ Sigma1(e)
779 #endif
780         ldr     r3,[r14],#4                     @ *K256++
781         add     r8,r8,r2                        @ h+=X[i]
782         str     r2,[sp,#11*4]
783         eor     r2,r6,r7
784         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
785         and     r2,r2,r5
786         add     r8,r8,r3                        @ h+=K256[i]
787         eor     r2,r2,r7                        @ Ch(e,f,g)
788         eor     r0,r9,r9,ror#11
789         add     r8,r8,r2                        @ h+=Ch(e,f,g)
790 #if 11==31
791         and     r3,r3,#0xff
792         cmp     r3,#0xf2                        @ done?
793 #endif
794 #if 11<15
795 # if __ARM_ARCH__>=7
796         ldr     r2,[r1],#4                      @ prefetch
797 # else
798         ldrb    r2,[r1,#3]
799 # endif
800         eor     r3,r9,r10                       @ a^b, b^c in next round
801 #else
802         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
803         eor     r3,r9,r10                       @ a^b, b^c in next round
804         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
805 #endif
806         eor     r0,r0,r9,ror#20 @ Sigma0(a)
807         and     r12,r12,r3                      @ (b^c)&=(a^b)
808         add     r4,r4,r8                        @ d+=h
809         eor     r12,r12,r10                     @ Maj(a,b,c)
810         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
811         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
812 #if __ARM_ARCH__>=7
813         @ ldr   r2,[r1],#4                      @ 12
814 # if 12==15
815         str     r1,[sp,#17*4]                   @ make room for r1
816 # endif
817         eor     r0,r4,r4,ror#5
818         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
819         eor     r0,r0,r4,ror#19 @ Sigma1(e)
820 # ifndef __ARMEB__
821         rev     r2,r2
822 # endif
823 #else
824         @ ldrb  r2,[r1,#3]                      @ 12
825         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
826         ldrb    r12,[r1,#2]
827         ldrb    r0,[r1,#1]
828         orr     r2,r2,r12,lsl#8
829         ldrb    r12,[r1],#4
830         orr     r2,r2,r0,lsl#16
831 # if 12==15
832         str     r1,[sp,#17*4]                   @ make room for r1
833 # endif
834         eor     r0,r4,r4,ror#5
835         orr     r2,r2,r12,lsl#24
836         eor     r0,r0,r4,ror#19 @ Sigma1(e)
837 #endif
838         ldr     r12,[r14],#4                    @ *K256++
839         add     r7,r7,r2                        @ h+=X[i]
840         str     r2,[sp,#12*4]
841         eor     r2,r5,r6
842         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
843         and     r2,r2,r4
844         add     r7,r7,r12                       @ h+=K256[i]
845         eor     r2,r2,r6                        @ Ch(e,f,g)
846         eor     r0,r8,r8,ror#11
847         add     r7,r7,r2                        @ h+=Ch(e,f,g)
848 #if 12==31
849         and     r12,r12,#0xff
850         cmp     r12,#0xf2                       @ done?
851 #endif
852 #if 12<15
853 # if __ARM_ARCH__>=7
854         ldr     r2,[r1],#4                      @ prefetch
855 # else
856         ldrb    r2,[r1,#3]
857 # endif
858         eor     r12,r8,r9                       @ a^b, b^c in next round
859 #else
860         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
861         eor     r12,r8,r9                       @ a^b, b^c in next round
862         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
863 #endif
864         eor     r0,r0,r8,ror#20 @ Sigma0(a)
865         and     r3,r3,r12                       @ (b^c)&=(a^b)
866         add     r11,r11,r7                      @ d+=h
867         eor     r3,r3,r9                        @ Maj(a,b,c)
868         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
869         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
870 #if __ARM_ARCH__>=7
871         @ ldr   r2,[r1],#4                      @ 13
872 # if 13==15
873         str     r1,[sp,#17*4]                   @ make room for r1
874 # endif
875         eor     r0,r11,r11,ror#5
876         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
877         eor     r0,r0,r11,ror#19        @ Sigma1(e)
878 # ifndef __ARMEB__
879         rev     r2,r2
880 # endif
881 #else
882         @ ldrb  r2,[r1,#3]                      @ 13
883         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
884         ldrb    r3,[r1,#2]
885         ldrb    r0,[r1,#1]
886         orr     r2,r2,r3,lsl#8
887         ldrb    r3,[r1],#4
888         orr     r2,r2,r0,lsl#16
889 # if 13==15
890         str     r1,[sp,#17*4]                   @ make room for r1
891 # endif
892         eor     r0,r11,r11,ror#5
893         orr     r2,r2,r3,lsl#24
894         eor     r0,r0,r11,ror#19        @ Sigma1(e)
895 #endif
896         ldr     r3,[r14],#4                     @ *K256++
897         add     r6,r6,r2                        @ h+=X[i]
898         str     r2,[sp,#13*4]
899         eor     r2,r4,r5
900         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
901         and     r2,r2,r11
902         add     r6,r6,r3                        @ h+=K256[i]
903         eor     r2,r2,r5                        @ Ch(e,f,g)
904         eor     r0,r7,r7,ror#11
905         add     r6,r6,r2                        @ h+=Ch(e,f,g)
906 #if 13==31
907         and     r3,r3,#0xff
908         cmp     r3,#0xf2                        @ done?
909 #endif
910 #if 13<15
911 # if __ARM_ARCH__>=7
912         ldr     r2,[r1],#4                      @ prefetch
913 # else
914         ldrb    r2,[r1,#3]
915 # endif
916         eor     r3,r7,r8                        @ a^b, b^c in next round
917 #else
918         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
919         eor     r3,r7,r8                        @ a^b, b^c in next round
920         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
921 #endif
922         eor     r0,r0,r7,ror#20 @ Sigma0(a)
923         and     r12,r12,r3                      @ (b^c)&=(a^b)
924         add     r10,r10,r6                      @ d+=h
925         eor     r12,r12,r8                      @ Maj(a,b,c)
926         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
927         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
928 #if __ARM_ARCH__>=7
929         @ ldr   r2,[r1],#4                      @ 14
930 # if 14==15
931         str     r1,[sp,#17*4]                   @ make room for r1
932 # endif
933         eor     r0,r10,r10,ror#5
934         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
935         eor     r0,r0,r10,ror#19        @ Sigma1(e)
936 # ifndef __ARMEB__
937         rev     r2,r2
938 # endif
939 #else
940         @ ldrb  r2,[r1,#3]                      @ 14
941         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
942         ldrb    r12,[r1,#2]
943         ldrb    r0,[r1,#1]
944         orr     r2,r2,r12,lsl#8
945         ldrb    r12,[r1],#4
946         orr     r2,r2,r0,lsl#16
947 # if 14==15
948         str     r1,[sp,#17*4]                   @ make room for r1
949 # endif
950         eor     r0,r10,r10,ror#5
951         orr     r2,r2,r12,lsl#24
952         eor     r0,r0,r10,ror#19        @ Sigma1(e)
953 #endif
954         ldr     r12,[r14],#4                    @ *K256++
955         add     r5,r5,r2                        @ h+=X[i]
956         str     r2,[sp,#14*4]
957         eor     r2,r11,r4
958         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
959         and     r2,r2,r10
960         add     r5,r5,r12                       @ h+=K256[i]
961         eor     r2,r2,r4                        @ Ch(e,f,g)
962         eor     r0,r6,r6,ror#11
963         add     r5,r5,r2                        @ h+=Ch(e,f,g)
964 #if 14==31
965         and     r12,r12,#0xff
966         cmp     r12,#0xf2                       @ done?
967 #endif
968 #if 14<15
969 # if __ARM_ARCH__>=7
970         ldr     r2,[r1],#4                      @ prefetch
971 # else
972         ldrb    r2,[r1,#3]
973 # endif
974         eor     r12,r6,r7                       @ a^b, b^c in next round
975 #else
976         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
977         eor     r12,r6,r7                       @ a^b, b^c in next round
978         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
979 #endif
980         eor     r0,r0,r6,ror#20 @ Sigma0(a)
981         and     r3,r3,r12                       @ (b^c)&=(a^b)
982         add     r9,r9,r5                        @ d+=h
983         eor     r3,r3,r7                        @ Maj(a,b,c)
984         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
985         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
986 #if __ARM_ARCH__>=7
987         @ ldr   r2,[r1],#4                      @ 15
988 # if 15==15
989         str     r1,[sp,#17*4]                   @ make room for r1
990 # endif
991         eor     r0,r9,r9,ror#5
992         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
993         eor     r0,r0,r9,ror#19 @ Sigma1(e)
994 # ifndef __ARMEB__
995         rev     r2,r2
996 # endif
997 #else
998         @ ldrb  r2,[r1,#3]                      @ 15
999         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1000         ldrb    r3,[r1,#2]
1001         ldrb    r0,[r1,#1]
1002         orr     r2,r2,r3,lsl#8
1003         ldrb    r3,[r1],#4
1004         orr     r2,r2,r0,lsl#16
1005 # if 15==15
1006         str     r1,[sp,#17*4]                   @ make room for r1
1007 # endif
1008         eor     r0,r9,r9,ror#5
1009         orr     r2,r2,r3,lsl#24
1010         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1011 #endif
1012         ldr     r3,[r14],#4                     @ *K256++
1013         add     r4,r4,r2                        @ h+=X[i]
1014         str     r2,[sp,#15*4]
1015         eor     r2,r10,r11
1016         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1017         and     r2,r2,r9
1018         add     r4,r4,r3                        @ h+=K256[i]
1019         eor     r2,r2,r11                       @ Ch(e,f,g)
1020         eor     r0,r5,r5,ror#11
1021         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1022 #if 15==31
1023         and     r3,r3,#0xff
1024         cmp     r3,#0xf2                        @ done?
1025 #endif
1026 #if 15<15
1027 # if __ARM_ARCH__>=7
1028         ldr     r2,[r1],#4                      @ prefetch
1029 # else
1030         ldrb    r2,[r1,#3]
1031 # endif
1032         eor     r3,r5,r6                        @ a^b, b^c in next round
1033 #else
1034         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1035         eor     r3,r5,r6                        @ a^b, b^c in next round
1036         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1037 #endif
1038         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1039         and     r12,r12,r3                      @ (b^c)&=(a^b)
1040         add     r8,r8,r4                        @ d+=h
1041         eor     r12,r12,r6                      @ Maj(a,b,c)
1042         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1043         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1044 .Lrounds_16_xx:
1045         @ ldr   r2,[sp,#1*4]            @ 16
1046         @ ldr   r1,[sp,#14*4]
1047         mov     r0,r2,ror#7
1048         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1049         mov     r12,r1,ror#17
1050         eor     r0,r0,r2,ror#18
1051         eor     r12,r12,r1,ror#19
1052         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1053         ldr     r2,[sp,#0*4]
1054         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1055         ldr     r1,[sp,#9*4]
1056
1057         add     r12,r12,r0
1058         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1059         add     r2,r2,r12
1060         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1061         add     r2,r2,r1                        @ X[i]
1062         ldr     r12,[r14],#4                    @ *K256++
1063         add     r11,r11,r2                      @ h+=X[i]
1064         str     r2,[sp,#0*4]
1065         eor     r2,r9,r10
1066         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1067         and     r2,r2,r8
1068         add     r11,r11,r12                     @ h+=K256[i]
1069         eor     r2,r2,r10                       @ Ch(e,f,g)
1070         eor     r0,r4,r4,ror#11
1071         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1072 #if 16==31
1073         and     r12,r12,#0xff
1074         cmp     r12,#0xf2                       @ done?
1075 #endif
1076 #if 16<15
1077 # if __ARM_ARCH__>=7
1078         ldr     r2,[r1],#4                      @ prefetch
1079 # else
1080         ldrb    r2,[r1,#3]
1081 # endif
1082         eor     r12,r4,r5                       @ a^b, b^c in next round
1083 #else
1084         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1085         eor     r12,r4,r5                       @ a^b, b^c in next round
1086         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1087 #endif
1088         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1089         and     r3,r3,r12                       @ (b^c)&=(a^b)
1090         add     r7,r7,r11                       @ d+=h
1091         eor     r3,r3,r5                        @ Maj(a,b,c)
1092         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1093         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1094         @ ldr   r2,[sp,#2*4]            @ 17
1095         @ ldr   r1,[sp,#15*4]
1096         mov     r0,r2,ror#7
1097         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1098         mov     r3,r1,ror#17
1099         eor     r0,r0,r2,ror#18
1100         eor     r3,r3,r1,ror#19
1101         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1102         ldr     r2,[sp,#1*4]
1103         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1104         ldr     r1,[sp,#10*4]
1105
1106         add     r3,r3,r0
1107         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1108         add     r2,r2,r3
1109         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1110         add     r2,r2,r1                        @ X[i]
1111         ldr     r3,[r14],#4                     @ *K256++
1112         add     r10,r10,r2                      @ h+=X[i]
1113         str     r2,[sp,#1*4]
1114         eor     r2,r8,r9
1115         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1116         and     r2,r2,r7
1117         add     r10,r10,r3                      @ h+=K256[i]
1118         eor     r2,r2,r9                        @ Ch(e,f,g)
1119         eor     r0,r11,r11,ror#11
1120         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1121 #if 17==31
1122         and     r3,r3,#0xff
1123         cmp     r3,#0xf2                        @ done?
1124 #endif
1125 #if 17<15
1126 # if __ARM_ARCH__>=7
1127         ldr     r2,[r1],#4                      @ prefetch
1128 # else
1129         ldrb    r2,[r1,#3]
1130 # endif
1131         eor     r3,r11,r4                       @ a^b, b^c in next round
1132 #else
1133         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1134         eor     r3,r11,r4                       @ a^b, b^c in next round
1135         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1136 #endif
1137         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1138         and     r12,r12,r3                      @ (b^c)&=(a^b)
1139         add     r6,r6,r10                       @ d+=h
1140         eor     r12,r12,r4                      @ Maj(a,b,c)
1141         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1142         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1143         @ ldr   r2,[sp,#3*4]            @ 18
1144         @ ldr   r1,[sp,#0*4]
1145         mov     r0,r2,ror#7
1146         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1147         mov     r12,r1,ror#17
1148         eor     r0,r0,r2,ror#18
1149         eor     r12,r12,r1,ror#19
1150         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1151         ldr     r2,[sp,#2*4]
1152         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1153         ldr     r1,[sp,#11*4]
1154
1155         add     r12,r12,r0
1156         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1157         add     r2,r2,r12
1158         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1159         add     r2,r2,r1                        @ X[i]
1160         ldr     r12,[r14],#4                    @ *K256++
1161         add     r9,r9,r2                        @ h+=X[i]
1162         str     r2,[sp,#2*4]
1163         eor     r2,r7,r8
1164         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1165         and     r2,r2,r6
1166         add     r9,r9,r12                       @ h+=K256[i]
1167         eor     r2,r2,r8                        @ Ch(e,f,g)
1168         eor     r0,r10,r10,ror#11
1169         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1170 #if 18==31
1171         and     r12,r12,#0xff
1172         cmp     r12,#0xf2                       @ done?
1173 #endif
1174 #if 18<15
1175 # if __ARM_ARCH__>=7
1176         ldr     r2,[r1],#4                      @ prefetch
1177 # else
1178         ldrb    r2,[r1,#3]
1179 # endif
1180         eor     r12,r10,r11                     @ a^b, b^c in next round
1181 #else
1182         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1183         eor     r12,r10,r11                     @ a^b, b^c in next round
1184         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1185 #endif
1186         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1187         and     r3,r3,r12                       @ (b^c)&=(a^b)
1188         add     r5,r5,r9                        @ d+=h
1189         eor     r3,r3,r11                       @ Maj(a,b,c)
1190         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1191         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1192         @ ldr   r2,[sp,#4*4]            @ 19
1193         @ ldr   r1,[sp,#1*4]
1194         mov     r0,r2,ror#7
1195         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1196         mov     r3,r1,ror#17
1197         eor     r0,r0,r2,ror#18
1198         eor     r3,r3,r1,ror#19
1199         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1200         ldr     r2,[sp,#3*4]
1201         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1202         ldr     r1,[sp,#12*4]
1203
1204         add     r3,r3,r0
1205         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1206         add     r2,r2,r3
1207         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1208         add     r2,r2,r1                        @ X[i]
1209         ldr     r3,[r14],#4                     @ *K256++
1210         add     r8,r8,r2                        @ h+=X[i]
1211         str     r2,[sp,#3*4]
1212         eor     r2,r6,r7
1213         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1214         and     r2,r2,r5
1215         add     r8,r8,r3                        @ h+=K256[i]
1216         eor     r2,r2,r7                        @ Ch(e,f,g)
1217         eor     r0,r9,r9,ror#11
1218         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1219 #if 19==31
1220         and     r3,r3,#0xff
1221         cmp     r3,#0xf2                        @ done?
1222 #endif
1223 #if 19<15
1224 # if __ARM_ARCH__>=7
1225         ldr     r2,[r1],#4                      @ prefetch
1226 # else
1227         ldrb    r2,[r1,#3]
1228 # endif
1229         eor     r3,r9,r10                       @ a^b, b^c in next round
1230 #else
1231         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1232         eor     r3,r9,r10                       @ a^b, b^c in next round
1233         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1234 #endif
1235         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1236         and     r12,r12,r3                      @ (b^c)&=(a^b)
1237         add     r4,r4,r8                        @ d+=h
1238         eor     r12,r12,r10                     @ Maj(a,b,c)
1239         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1240         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1241         @ ldr   r2,[sp,#5*4]            @ 20
1242         @ ldr   r1,[sp,#2*4]
1243         mov     r0,r2,ror#7
1244         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1245         mov     r12,r1,ror#17
1246         eor     r0,r0,r2,ror#18
1247         eor     r12,r12,r1,ror#19
1248         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1249         ldr     r2,[sp,#4*4]
1250         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1251         ldr     r1,[sp,#13*4]
1252
1253         add     r12,r12,r0
1254         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1255         add     r2,r2,r12
1256         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1257         add     r2,r2,r1                        @ X[i]
1258         ldr     r12,[r14],#4                    @ *K256++
1259         add     r7,r7,r2                        @ h+=X[i]
1260         str     r2,[sp,#4*4]
1261         eor     r2,r5,r6
1262         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1263         and     r2,r2,r4
1264         add     r7,r7,r12                       @ h+=K256[i]
1265         eor     r2,r2,r6                        @ Ch(e,f,g)
1266         eor     r0,r8,r8,ror#11
1267         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1268 #if 20==31
1269         and     r12,r12,#0xff
1270         cmp     r12,#0xf2                       @ done?
1271 #endif
1272 #if 20<15
1273 # if __ARM_ARCH__>=7
1274         ldr     r2,[r1],#4                      @ prefetch
1275 # else
1276         ldrb    r2,[r1,#3]
1277 # endif
1278         eor     r12,r8,r9                       @ a^b, b^c in next round
1279 #else
1280         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1281         eor     r12,r8,r9                       @ a^b, b^c in next round
1282         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1283 #endif
1284         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1285         and     r3,r3,r12                       @ (b^c)&=(a^b)
1286         add     r11,r11,r7                      @ d+=h
1287         eor     r3,r3,r9                        @ Maj(a,b,c)
1288         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1289         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1290         @ ldr   r2,[sp,#6*4]            @ 21
1291         @ ldr   r1,[sp,#3*4]
1292         mov     r0,r2,ror#7
1293         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1294         mov     r3,r1,ror#17
1295         eor     r0,r0,r2,ror#18
1296         eor     r3,r3,r1,ror#19
1297         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1298         ldr     r2,[sp,#5*4]
1299         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1300         ldr     r1,[sp,#14*4]
1301
1302         add     r3,r3,r0
1303         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1304         add     r2,r2,r3
1305         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1306         add     r2,r2,r1                        @ X[i]
1307         ldr     r3,[r14],#4                     @ *K256++
1308         add     r6,r6,r2                        @ h+=X[i]
1309         str     r2,[sp,#5*4]
1310         eor     r2,r4,r5
1311         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1312         and     r2,r2,r11
1313         add     r6,r6,r3                        @ h+=K256[i]
1314         eor     r2,r2,r5                        @ Ch(e,f,g)
1315         eor     r0,r7,r7,ror#11
1316         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1317 #if 21==31
1318         and     r3,r3,#0xff
1319         cmp     r3,#0xf2                        @ done?
1320 #endif
1321 #if 21<15
1322 # if __ARM_ARCH__>=7
1323         ldr     r2,[r1],#4                      @ prefetch
1324 # else
1325         ldrb    r2,[r1,#3]
1326 # endif
1327         eor     r3,r7,r8                        @ a^b, b^c in next round
1328 #else
1329         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1330         eor     r3,r7,r8                        @ a^b, b^c in next round
1331         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1332 #endif
1333         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1334         and     r12,r12,r3                      @ (b^c)&=(a^b)
1335         add     r10,r10,r6                      @ d+=h
1336         eor     r12,r12,r8                      @ Maj(a,b,c)
1337         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1338         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1339         @ ldr   r2,[sp,#7*4]            @ 22
1340         @ ldr   r1,[sp,#4*4]
1341         mov     r0,r2,ror#7
1342         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1343         mov     r12,r1,ror#17
1344         eor     r0,r0,r2,ror#18
1345         eor     r12,r12,r1,ror#19
1346         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1347         ldr     r2,[sp,#6*4]
1348         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1349         ldr     r1,[sp,#15*4]
1350
1351         add     r12,r12,r0
1352         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1353         add     r2,r2,r12
1354         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1355         add     r2,r2,r1                        @ X[i]
1356         ldr     r12,[r14],#4                    @ *K256++
1357         add     r5,r5,r2                        @ h+=X[i]
1358         str     r2,[sp,#6*4]
1359         eor     r2,r11,r4
1360         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1361         and     r2,r2,r10
1362         add     r5,r5,r12                       @ h+=K256[i]
1363         eor     r2,r2,r4                        @ Ch(e,f,g)
1364         eor     r0,r6,r6,ror#11
1365         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1366 #if 22==31
1367         and     r12,r12,#0xff
1368         cmp     r12,#0xf2                       @ done?
1369 #endif
1370 #if 22<15
1371 # if __ARM_ARCH__>=7
1372         ldr     r2,[r1],#4                      @ prefetch
1373 # else
1374         ldrb    r2,[r1,#3]
1375 # endif
1376         eor     r12,r6,r7                       @ a^b, b^c in next round
1377 #else
1378         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1379         eor     r12,r6,r7                       @ a^b, b^c in next round
1380         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1381 #endif
1382         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1383         and     r3,r3,r12                       @ (b^c)&=(a^b)
1384         add     r9,r9,r5                        @ d+=h
1385         eor     r3,r3,r7                        @ Maj(a,b,c)
1386         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1387         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1388         @ ldr   r2,[sp,#8*4]            @ 23
1389         @ ldr   r1,[sp,#5*4]
1390         mov     r0,r2,ror#7
1391         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1392         mov     r3,r1,ror#17
1393         eor     r0,r0,r2,ror#18
1394         eor     r3,r3,r1,ror#19
1395         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1396         ldr     r2,[sp,#7*4]
1397         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1398         ldr     r1,[sp,#0*4]
1399
1400         add     r3,r3,r0
1401         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1402         add     r2,r2,r3
1403         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1404         add     r2,r2,r1                        @ X[i]
1405         ldr     r3,[r14],#4                     @ *K256++
1406         add     r4,r4,r2                        @ h+=X[i]
1407         str     r2,[sp,#7*4]
1408         eor     r2,r10,r11
1409         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1410         and     r2,r2,r9
1411         add     r4,r4,r3                        @ h+=K256[i]
1412         eor     r2,r2,r11                       @ Ch(e,f,g)
1413         eor     r0,r5,r5,ror#11
1414         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1415 #if 23==31
1416         and     r3,r3,#0xff
1417         cmp     r3,#0xf2                        @ done?
1418 #endif
1419 #if 23<15
1420 # if __ARM_ARCH__>=7
1421         ldr     r2,[r1],#4                      @ prefetch
1422 # else
1423         ldrb    r2,[r1,#3]
1424 # endif
1425         eor     r3,r5,r6                        @ a^b, b^c in next round
1426 #else
1427         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1428         eor     r3,r5,r6                        @ a^b, b^c in next round
1429         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1430 #endif
1431         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1432         and     r12,r12,r3                      @ (b^c)&=(a^b)
1433         add     r8,r8,r4                        @ d+=h
1434         eor     r12,r12,r6                      @ Maj(a,b,c)
1435         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1436         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1437         @ ldr   r2,[sp,#9*4]            @ 24
1438         @ ldr   r1,[sp,#6*4]
1439         mov     r0,r2,ror#7
1440         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1441         mov     r12,r1,ror#17
1442         eor     r0,r0,r2,ror#18
1443         eor     r12,r12,r1,ror#19
1444         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1445         ldr     r2,[sp,#8*4]
1446         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1447         ldr     r1,[sp,#1*4]
1448
1449         add     r12,r12,r0
1450         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1451         add     r2,r2,r12
1452         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1453         add     r2,r2,r1                        @ X[i]
1454         ldr     r12,[r14],#4                    @ *K256++
1455         add     r11,r11,r2                      @ h+=X[i]
1456         str     r2,[sp,#8*4]
1457         eor     r2,r9,r10
1458         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1459         and     r2,r2,r8
1460         add     r11,r11,r12                     @ h+=K256[i]
1461         eor     r2,r2,r10                       @ Ch(e,f,g)
1462         eor     r0,r4,r4,ror#11
1463         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1464 #if 24==31
1465         and     r12,r12,#0xff
1466         cmp     r12,#0xf2                       @ done?
1467 #endif
1468 #if 24<15
1469 # if __ARM_ARCH__>=7
1470         ldr     r2,[r1],#4                      @ prefetch
1471 # else
1472         ldrb    r2,[r1,#3]
1473 # endif
1474         eor     r12,r4,r5                       @ a^b, b^c in next round
1475 #else
1476         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1477         eor     r12,r4,r5                       @ a^b, b^c in next round
1478         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1479 #endif
1480         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1481         and     r3,r3,r12                       @ (b^c)&=(a^b)
1482         add     r7,r7,r11                       @ d+=h
1483         eor     r3,r3,r5                        @ Maj(a,b,c)
1484         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1485         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1486         @ ldr   r2,[sp,#10*4]           @ 25
1487         @ ldr   r1,[sp,#7*4]
1488         mov     r0,r2,ror#7
1489         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1490         mov     r3,r1,ror#17
1491         eor     r0,r0,r2,ror#18
1492         eor     r3,r3,r1,ror#19
1493         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1494         ldr     r2,[sp,#9*4]
1495         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1496         ldr     r1,[sp,#2*4]
1497
1498         add     r3,r3,r0
1499         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1500         add     r2,r2,r3
1501         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1502         add     r2,r2,r1                        @ X[i]
1503         ldr     r3,[r14],#4                     @ *K256++
1504         add     r10,r10,r2                      @ h+=X[i]
1505         str     r2,[sp,#9*4]
1506         eor     r2,r8,r9
1507         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1508         and     r2,r2,r7
1509         add     r10,r10,r3                      @ h+=K256[i]
1510         eor     r2,r2,r9                        @ Ch(e,f,g)
1511         eor     r0,r11,r11,ror#11
1512         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1513 #if 25==31
1514         and     r3,r3,#0xff
1515         cmp     r3,#0xf2                        @ done?
1516 #endif
1517 #if 25<15
1518 # if __ARM_ARCH__>=7
1519         ldr     r2,[r1],#4                      @ prefetch
1520 # else
1521         ldrb    r2,[r1,#3]
1522 # endif
1523         eor     r3,r11,r4                       @ a^b, b^c in next round
1524 #else
1525         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1526         eor     r3,r11,r4                       @ a^b, b^c in next round
1527         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1528 #endif
1529         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1530         and     r12,r12,r3                      @ (b^c)&=(a^b)
1531         add     r6,r6,r10                       @ d+=h
1532         eor     r12,r12,r4                      @ Maj(a,b,c)
1533         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1534         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1535         @ ldr   r2,[sp,#11*4]           @ 26
1536         @ ldr   r1,[sp,#8*4]
1537         mov     r0,r2,ror#7
1538         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1539         mov     r12,r1,ror#17
1540         eor     r0,r0,r2,ror#18
1541         eor     r12,r12,r1,ror#19
1542         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1543         ldr     r2,[sp,#10*4]
1544         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1545         ldr     r1,[sp,#3*4]
1546
1547         add     r12,r12,r0
1548         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1549         add     r2,r2,r12
1550         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1551         add     r2,r2,r1                        @ X[i]
1552         ldr     r12,[r14],#4                    @ *K256++
1553         add     r9,r9,r2                        @ h+=X[i]
1554         str     r2,[sp,#10*4]
1555         eor     r2,r7,r8
1556         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1557         and     r2,r2,r6
1558         add     r9,r9,r12                       @ h+=K256[i]
1559         eor     r2,r2,r8                        @ Ch(e,f,g)
1560         eor     r0,r10,r10,ror#11
1561         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1562 #if 26==31
1563         and     r12,r12,#0xff
1564         cmp     r12,#0xf2                       @ done?
1565 #endif
1566 #if 26<15
1567 # if __ARM_ARCH__>=7
1568         ldr     r2,[r1],#4                      @ prefetch
1569 # else
1570         ldrb    r2,[r1,#3]
1571 # endif
1572         eor     r12,r10,r11                     @ a^b, b^c in next round
1573 #else
1574         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1575         eor     r12,r10,r11                     @ a^b, b^c in next round
1576         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1577 #endif
1578         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1579         and     r3,r3,r12                       @ (b^c)&=(a^b)
1580         add     r5,r5,r9                        @ d+=h
1581         eor     r3,r3,r11                       @ Maj(a,b,c)
1582         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1583         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1584         @ ldr   r2,[sp,#12*4]           @ 27
1585         @ ldr   r1,[sp,#9*4]
1586         mov     r0,r2,ror#7
1587         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1588         mov     r3,r1,ror#17
1589         eor     r0,r0,r2,ror#18
1590         eor     r3,r3,r1,ror#19
1591         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1592         ldr     r2,[sp,#11*4]
1593         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1594         ldr     r1,[sp,#4*4]
1595
1596         add     r3,r3,r0
1597         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1598         add     r2,r2,r3
1599         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1600         add     r2,r2,r1                        @ X[i]
1601         ldr     r3,[r14],#4                     @ *K256++
1602         add     r8,r8,r2                        @ h+=X[i]
1603         str     r2,[sp,#11*4]
1604         eor     r2,r6,r7
1605         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1606         and     r2,r2,r5
1607         add     r8,r8,r3                        @ h+=K256[i]
1608         eor     r2,r2,r7                        @ Ch(e,f,g)
1609         eor     r0,r9,r9,ror#11
1610         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1611 #if 27==31
1612         and     r3,r3,#0xff
1613         cmp     r3,#0xf2                        @ done?
1614 #endif
1615 #if 27<15
1616 # if __ARM_ARCH__>=7
1617         ldr     r2,[r1],#4                      @ prefetch
1618 # else
1619         ldrb    r2,[r1,#3]
1620 # endif
1621         eor     r3,r9,r10                       @ a^b, b^c in next round
1622 #else
1623         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1624         eor     r3,r9,r10                       @ a^b, b^c in next round
1625         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1626 #endif
1627         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1628         and     r12,r12,r3                      @ (b^c)&=(a^b)
1629         add     r4,r4,r8                        @ d+=h
1630         eor     r12,r12,r10                     @ Maj(a,b,c)
1631         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1632         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1633         @ ldr   r2,[sp,#13*4]           @ 28
1634         @ ldr   r1,[sp,#10*4]
1635         mov     r0,r2,ror#7
1636         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1637         mov     r12,r1,ror#17
1638         eor     r0,r0,r2,ror#18
1639         eor     r12,r12,r1,ror#19
1640         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1641         ldr     r2,[sp,#12*4]
1642         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1643         ldr     r1,[sp,#5*4]
1644
1645         add     r12,r12,r0
1646         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1647         add     r2,r2,r12
1648         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1649         add     r2,r2,r1                        @ X[i]
1650         ldr     r12,[r14],#4                    @ *K256++
1651         add     r7,r7,r2                        @ h+=X[i]
1652         str     r2,[sp,#12*4]
1653         eor     r2,r5,r6
1654         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1655         and     r2,r2,r4
1656         add     r7,r7,r12                       @ h+=K256[i]
1657         eor     r2,r2,r6                        @ Ch(e,f,g)
1658         eor     r0,r8,r8,ror#11
1659         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1660 #if 28==31
1661         and     r12,r12,#0xff
1662         cmp     r12,#0xf2                       @ done?
1663 #endif
1664 #if 28<15
1665 # if __ARM_ARCH__>=7
1666         ldr     r2,[r1],#4                      @ prefetch
1667 # else
1668         ldrb    r2,[r1,#3]
1669 # endif
1670         eor     r12,r8,r9                       @ a^b, b^c in next round
1671 #else
1672         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1673         eor     r12,r8,r9                       @ a^b, b^c in next round
1674         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1675 #endif
1676         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1677         and     r3,r3,r12                       @ (b^c)&=(a^b)
1678         add     r11,r11,r7                      @ d+=h
1679         eor     r3,r3,r9                        @ Maj(a,b,c)
1680         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1681         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1682         @ ldr   r2,[sp,#14*4]           @ 29
1683         @ ldr   r1,[sp,#11*4]
1684         mov     r0,r2,ror#7
1685         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1686         mov     r3,r1,ror#17
1687         eor     r0,r0,r2,ror#18
1688         eor     r3,r3,r1,ror#19
1689         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1690         ldr     r2,[sp,#13*4]
1691         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1692         ldr     r1,[sp,#6*4]
1693
1694         add     r3,r3,r0
1695         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1696         add     r2,r2,r3
1697         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1698         add     r2,r2,r1                        @ X[i]
1699         ldr     r3,[r14],#4                     @ *K256++
1700         add     r6,r6,r2                        @ h+=X[i]
1701         str     r2,[sp,#13*4]
1702         eor     r2,r4,r5
1703         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1704         and     r2,r2,r11
1705         add     r6,r6,r3                        @ h+=K256[i]
1706         eor     r2,r2,r5                        @ Ch(e,f,g)
1707         eor     r0,r7,r7,ror#11
1708         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1709 #if 29==31
1710         and     r3,r3,#0xff
1711         cmp     r3,#0xf2                        @ done?
1712 #endif
1713 #if 29<15
1714 # if __ARM_ARCH__>=7
1715         ldr     r2,[r1],#4                      @ prefetch
1716 # else
1717         ldrb    r2,[r1,#3]
1718 # endif
1719         eor     r3,r7,r8                        @ a^b, b^c in next round
1720 #else
1721         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1722         eor     r3,r7,r8                        @ a^b, b^c in next round
1723         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1724 #endif
1725         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1726         and     r12,r12,r3                      @ (b^c)&=(a^b)
1727         add     r10,r10,r6                      @ d+=h
1728         eor     r12,r12,r8                      @ Maj(a,b,c)
1729         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1730         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1731         @ ldr   r2,[sp,#15*4]           @ 30
1732         @ ldr   r1,[sp,#12*4]
1733         mov     r0,r2,ror#7
1734         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1735         mov     r12,r1,ror#17
1736         eor     r0,r0,r2,ror#18
1737         eor     r12,r12,r1,ror#19
1738         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1739         ldr     r2,[sp,#14*4]
1740         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1741         ldr     r1,[sp,#7*4]
1742
1743         add     r12,r12,r0
1744         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1745         add     r2,r2,r12
1746         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1747         add     r2,r2,r1                        @ X[i]
1748         ldr     r12,[r14],#4                    @ *K256++
1749         add     r5,r5,r2                        @ h+=X[i]
1750         str     r2,[sp,#14*4]
1751         eor     r2,r11,r4
1752         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1753         and     r2,r2,r10
1754         add     r5,r5,r12                       @ h+=K256[i]
1755         eor     r2,r2,r4                        @ Ch(e,f,g)
1756         eor     r0,r6,r6,ror#11
1757         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1758 #if 30==31
1759         and     r12,r12,#0xff
1760         cmp     r12,#0xf2                       @ done?
1761 #endif
1762 #if 30<15
1763 # if __ARM_ARCH__>=7
1764         ldr     r2,[r1],#4                      @ prefetch
1765 # else
1766         ldrb    r2,[r1,#3]
1767 # endif
1768         eor     r12,r6,r7                       @ a^b, b^c in next round
1769 #else
1770         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1771         eor     r12,r6,r7                       @ a^b, b^c in next round
1772         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1773 #endif
1774         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1775         and     r3,r3,r12                       @ (b^c)&=(a^b)
1776         add     r9,r9,r5                        @ d+=h
1777         eor     r3,r3,r7                        @ Maj(a,b,c)
1778         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1779         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1780         @ ldr   r2,[sp,#0*4]            @ 31
1781         @ ldr   r1,[sp,#13*4]
1782         mov     r0,r2,ror#7
1783         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1784         mov     r3,r1,ror#17
1785         eor     r0,r0,r2,ror#18
1786         eor     r3,r3,r1,ror#19
1787         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1788         ldr     r2,[sp,#15*4]
1789         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1790         ldr     r1,[sp,#8*4]
1791
1792         add     r3,r3,r0
1793         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1794         add     r2,r2,r3
1795         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1796         add     r2,r2,r1                        @ X[i]
1797         ldr     r3,[r14],#4                     @ *K256++
1798         add     r4,r4,r2                        @ h+=X[i]
1799         str     r2,[sp,#15*4]
1800         eor     r2,r10,r11
1801         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1802         and     r2,r2,r9
1803         add     r4,r4,r3                        @ h+=K256[i]
1804         eor     r2,r2,r11                       @ Ch(e,f,g)
1805         eor     r0,r5,r5,ror#11
1806         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1807 #if 31==31
1808         and     r3,r3,#0xff
1809         cmp     r3,#0xf2                        @ done?
1810 #endif
1811 #if 31<15
1812 # if __ARM_ARCH__>=7
1813         ldr     r2,[r1],#4                      @ prefetch
1814 # else
1815         ldrb    r2,[r1,#3]
1816 # endif
1817         eor     r3,r5,r6                        @ a^b, b^c in next round
1818 #else
1819         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1820         eor     r3,r5,r6                        @ a^b, b^c in next round
1821         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1822 #endif
1823         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1824         and     r12,r12,r3                      @ (b^c)&=(a^b)
1825         add     r8,r8,r4                        @ d+=h
1826         eor     r12,r12,r6                      @ Maj(a,b,c)
1827         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1828         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1829 #if __ARM_ARCH__>=7
1830         ite     eq                      @ Thumb2 thing, sanity check in ARM
1831 #endif
1832         ldreq   r3,[sp,#16*4]           @ pull ctx
1833         bne     .Lrounds_16_xx
1834
1835         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1836         ldr     r0,[r3,#0]
1837         ldr     r2,[r3,#4]
1838         ldr     r12,[r3,#8]
1839         add     r4,r4,r0
1840         ldr     r0,[r3,#12]
1841         add     r5,r5,r2
1842         ldr     r2,[r3,#16]
1843         add     r6,r6,r12
1844         ldr     r12,[r3,#20]
1845         add     r7,r7,r0
1846         ldr     r0,[r3,#24]
1847         add     r8,r8,r2
1848         ldr     r2,[r3,#28]
1849         add     r9,r9,r12
1850         ldr     r1,[sp,#17*4]           @ pull inp
1851         ldr     r12,[sp,#18*4]          @ pull inp+len
1852         add     r10,r10,r0
1853         add     r11,r11,r2
1854         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1855         cmp     r1,r12
1856         sub     r14,r14,#256    @ rewind Ktbl
1857         bne     .Loop
1858
1859         add     sp,sp,#19*4     @ destroy frame
1860 #if __ARM_ARCH__>=5
1861         ldmia   sp!,{r4-r11,pc}
1862 #else
1863         ldmia   sp!,{r4-r11,lr}
1864         tst     lr,#1
1865         moveq   pc,lr                   @ be binary compatible with V4, yet
1866         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1867 #endif
1868 .size   sha256_block_data_order,.-sha256_block_data_order
1869 #if __ARM_MAX_ARCH__>=7
1870 .arch   armv7-a
1871 .fpu    neon
1872
1873 .global sha256_block_data_order_neon
1874 .type   sha256_block_data_order_neon,%function
1875 .align  4
1876 sha256_block_data_order_neon:
1877 .LNEON:
1878         stmdb   sp!,{r4-r12,lr}
1879
1880         sub     r11,sp,#16*4+16
1881         adrl    r14,K256
1882         bic     r11,r11,#15             @ align for 128-bit stores
1883         mov     r12,sp
1884         mov     sp,r11                  @ alloca
1885         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1886
1887         vld1.8          {q0},[r1]!
1888         vld1.8          {q1},[r1]!
1889         vld1.8          {q2},[r1]!
1890         vld1.8          {q3},[r1]!
1891         vld1.32         {q8},[r14,:128]!
1892         vld1.32         {q9},[r14,:128]!
1893         vld1.32         {q10},[r14,:128]!
1894         vld1.32         {q11},[r14,:128]!
1895         vrev32.8        q0,q0           @ yes, even on
1896         str             r0,[sp,#64]
1897         vrev32.8        q1,q1           @ big-endian
1898         str             r1,[sp,#68]
1899         mov             r1,sp
1900         vrev32.8        q2,q2
1901         str             r2,[sp,#72]
1902         vrev32.8        q3,q3
1903         str             r12,[sp,#76]            @ save original sp
1904         vadd.i32        q8,q8,q0
1905         vadd.i32        q9,q9,q1
1906         vst1.32         {q8},[r1,:128]!
1907         vadd.i32        q10,q10,q2
1908         vst1.32         {q9},[r1,:128]!
1909         vadd.i32        q11,q11,q3
1910         vst1.32         {q10},[r1,:128]!
1911         vst1.32         {q11},[r1,:128]!
1912
1913         ldmia           r0,{r4-r11}
1914         sub             r1,r1,#64
1915         ldr             r2,[sp,#0]
1916         eor             r12,r12,r12
1917         eor             r3,r5,r6
1918         b               .L_00_48
1919
1920 .align  4
1921 .L_00_48:
1922         vext.8  q8,q0,q1,#4
1923         add     r11,r11,r2
1924         eor     r2,r9,r10
1925         eor     r0,r8,r8,ror#5
1926         vext.8  q9,q2,q3,#4
1927         add     r4,r4,r12
1928         and     r2,r2,r8
1929         eor     r12,r0,r8,ror#19
1930         vshr.u32        q10,q8,#7
1931         eor     r0,r4,r4,ror#11
1932         eor     r2,r2,r10
1933         vadd.i32        q0,q0,q9
1934         add     r11,r11,r12,ror#6
1935         eor     r12,r4,r5
1936         vshr.u32        q9,q8,#3
1937         eor     r0,r0,r4,ror#20
1938         add     r11,r11,r2
1939         vsli.32 q10,q8,#25
1940         ldr     r2,[sp,#4]
1941         and     r3,r3,r12
1942         vshr.u32        q11,q8,#18
1943         add     r7,r7,r11
1944         add     r11,r11,r0,ror#2
1945         eor     r3,r3,r5
1946         veor    q9,q9,q10
1947         add     r10,r10,r2
1948         vsli.32 q11,q8,#14
1949         eor     r2,r8,r9
1950         eor     r0,r7,r7,ror#5
1951         vshr.u32        d24,d7,#17
1952         add     r11,r11,r3
1953         and     r2,r2,r7
1954         veor    q9,q9,q11
1955         eor     r3,r0,r7,ror#19
1956         eor     r0,r11,r11,ror#11
1957         vsli.32 d24,d7,#15
1958         eor     r2,r2,r9
1959         add     r10,r10,r3,ror#6
1960         vshr.u32        d25,d7,#10
1961         eor     r3,r11,r4
1962         eor     r0,r0,r11,ror#20
1963         vadd.i32        q0,q0,q9
1964         add     r10,r10,r2
1965         ldr     r2,[sp,#8]
1966         veor    d25,d25,d24
1967         and     r12,r12,r3
1968         add     r6,r6,r10
1969         vshr.u32        d24,d7,#19
1970         add     r10,r10,r0,ror#2
1971         eor     r12,r12,r4
1972         vsli.32 d24,d7,#13
1973         add     r9,r9,r2
1974         eor     r2,r7,r8
1975         veor    d25,d25,d24
1976         eor     r0,r6,r6,ror#5
1977         add     r10,r10,r12
1978         vadd.i32        d0,d0,d25
1979         and     r2,r2,r6
1980         eor     r12,r0,r6,ror#19
1981         vshr.u32        d24,d0,#17
1982         eor     r0,r10,r10,ror#11
1983         eor     r2,r2,r8
1984         vsli.32 d24,d0,#15
1985         add     r9,r9,r12,ror#6
1986         eor     r12,r10,r11
1987         vshr.u32        d25,d0,#10
1988         eor     r0,r0,r10,ror#20
1989         add     r9,r9,r2
1990         veor    d25,d25,d24
1991         ldr     r2,[sp,#12]
1992         and     r3,r3,r12
1993         vshr.u32        d24,d0,#19
1994         add     r5,r5,r9
1995         add     r9,r9,r0,ror#2
1996         eor     r3,r3,r11
1997         vld1.32 {q8},[r14,:128]!
1998         add     r8,r8,r2
1999         vsli.32 d24,d0,#13
2000         eor     r2,r6,r7
2001         eor     r0,r5,r5,ror#5
2002         veor    d25,d25,d24
2003         add     r9,r9,r3
2004         and     r2,r2,r5
2005         vadd.i32        d1,d1,d25
2006         eor     r3,r0,r5,ror#19
2007         eor     r0,r9,r9,ror#11
2008         vadd.i32        q8,q8,q0
2009         eor     r2,r2,r7
2010         add     r8,r8,r3,ror#6
2011         eor     r3,r9,r10
2012         eor     r0,r0,r9,ror#20
2013         add     r8,r8,r2
2014         ldr     r2,[sp,#16]
2015         and     r12,r12,r3
2016         add     r4,r4,r8
2017         vst1.32 {q8},[r1,:128]!
2018         add     r8,r8,r0,ror#2
2019         eor     r12,r12,r10
2020         vext.8  q8,q1,q2,#4
2021         add     r7,r7,r2
2022         eor     r2,r5,r6
2023         eor     r0,r4,r4,ror#5
2024         vext.8  q9,q3,q0,#4
2025         add     r8,r8,r12
2026         and     r2,r2,r4
2027         eor     r12,r0,r4,ror#19
2028         vshr.u32        q10,q8,#7
2029         eor     r0,r8,r8,ror#11
2030         eor     r2,r2,r6
2031         vadd.i32        q1,q1,q9
2032         add     r7,r7,r12,ror#6
2033         eor     r12,r8,r9
2034         vshr.u32        q9,q8,#3
2035         eor     r0,r0,r8,ror#20
2036         add     r7,r7,r2
2037         vsli.32 q10,q8,#25
2038         ldr     r2,[sp,#20]
2039         and     r3,r3,r12
2040         vshr.u32        q11,q8,#18
2041         add     r11,r11,r7
2042         add     r7,r7,r0,ror#2
2043         eor     r3,r3,r9
2044         veor    q9,q9,q10
2045         add     r6,r6,r2
2046         vsli.32 q11,q8,#14
2047         eor     r2,r4,r5
2048         eor     r0,r11,r11,ror#5
2049         vshr.u32        d24,d1,#17
2050         add     r7,r7,r3
2051         and     r2,r2,r11
2052         veor    q9,q9,q11
2053         eor     r3,r0,r11,ror#19
2054         eor     r0,r7,r7,ror#11
2055         vsli.32 d24,d1,#15
2056         eor     r2,r2,r5
2057         add     r6,r6,r3,ror#6
2058         vshr.u32        d25,d1,#10
2059         eor     r3,r7,r8
2060         eor     r0,r0,r7,ror#20
2061         vadd.i32        q1,q1,q9
2062         add     r6,r6,r2
2063         ldr     r2,[sp,#24]
2064         veor    d25,d25,d24
2065         and     r12,r12,r3
2066         add     r10,r10,r6
2067         vshr.u32        d24,d1,#19
2068         add     r6,r6,r0,ror#2
2069         eor     r12,r12,r8
2070         vsli.32 d24,d1,#13
2071         add     r5,r5,r2
2072         eor     r2,r11,r4
2073         veor    d25,d25,d24
2074         eor     r0,r10,r10,ror#5
2075         add     r6,r6,r12
2076         vadd.i32        d2,d2,d25
2077         and     r2,r2,r10
2078         eor     r12,r0,r10,ror#19
2079         vshr.u32        d24,d2,#17
2080         eor     r0,r6,r6,ror#11
2081         eor     r2,r2,r4
2082         vsli.32 d24,d2,#15
2083         add     r5,r5,r12,ror#6
2084         eor     r12,r6,r7
2085         vshr.u32        d25,d2,#10
2086         eor     r0,r0,r6,ror#20
2087         add     r5,r5,r2
2088         veor    d25,d25,d24
2089         ldr     r2,[sp,#28]
2090         and     r3,r3,r12
2091         vshr.u32        d24,d2,#19
2092         add     r9,r9,r5
2093         add     r5,r5,r0,ror#2
2094         eor     r3,r3,r7
2095         vld1.32 {q8},[r14,:128]!
2096         add     r4,r4,r2
2097         vsli.32 d24,d2,#13
2098         eor     r2,r10,r11
2099         eor     r0,r9,r9,ror#5
2100         veor    d25,d25,d24
2101         add     r5,r5,r3
2102         and     r2,r2,r9
2103         vadd.i32        d3,d3,d25
2104         eor     r3,r0,r9,ror#19
2105         eor     r0,r5,r5,ror#11
2106         vadd.i32        q8,q8,q1
2107         eor     r2,r2,r11
2108         add     r4,r4,r3,ror#6
2109         eor     r3,r5,r6
2110         eor     r0,r0,r5,ror#20
2111         add     r4,r4,r2
2112         ldr     r2,[sp,#32]
2113         and     r12,r12,r3
2114         add     r8,r8,r4
2115         vst1.32 {q8},[r1,:128]!
2116         add     r4,r4,r0,ror#2
2117         eor     r12,r12,r6
2118         vext.8  q8,q2,q3,#4
2119         add     r11,r11,r2
2120         eor     r2,r9,r10
2121         eor     r0,r8,r8,ror#5
2122         vext.8  q9,q0,q1,#4
2123         add     r4,r4,r12
2124         and     r2,r2,r8
2125         eor     r12,r0,r8,ror#19
2126         vshr.u32        q10,q8,#7
2127         eor     r0,r4,r4,ror#11
2128         eor     r2,r2,r10
2129         vadd.i32        q2,q2,q9
2130         add     r11,r11,r12,ror#6
2131         eor     r12,r4,r5
2132         vshr.u32        q9,q8,#3
2133         eor     r0,r0,r4,ror#20
2134         add     r11,r11,r2
2135         vsli.32 q10,q8,#25
2136         ldr     r2,[sp,#36]
2137         and     r3,r3,r12
2138         vshr.u32        q11,q8,#18
2139         add     r7,r7,r11
2140         add     r11,r11,r0,ror#2
2141         eor     r3,r3,r5
2142         veor    q9,q9,q10
2143         add     r10,r10,r2
2144         vsli.32 q11,q8,#14
2145         eor     r2,r8,r9
2146         eor     r0,r7,r7,ror#5
2147         vshr.u32        d24,d3,#17
2148         add     r11,r11,r3
2149         and     r2,r2,r7
2150         veor    q9,q9,q11
2151         eor     r3,r0,r7,ror#19
2152         eor     r0,r11,r11,ror#11
2153         vsli.32 d24,d3,#15
2154         eor     r2,r2,r9
2155         add     r10,r10,r3,ror#6
2156         vshr.u32        d25,d3,#10
2157         eor     r3,r11,r4
2158         eor     r0,r0,r11,ror#20
2159         vadd.i32        q2,q2,q9
2160         add     r10,r10,r2
2161         ldr     r2,[sp,#40]
2162         veor    d25,d25,d24
2163         and     r12,r12,r3
2164         add     r6,r6,r10
2165         vshr.u32        d24,d3,#19
2166         add     r10,r10,r0,ror#2
2167         eor     r12,r12,r4
2168         vsli.32 d24,d3,#13
2169         add     r9,r9,r2
2170         eor     r2,r7,r8
2171         veor    d25,d25,d24
2172         eor     r0,r6,r6,ror#5
2173         add     r10,r10,r12
2174         vadd.i32        d4,d4,d25
2175         and     r2,r2,r6
2176         eor     r12,r0,r6,ror#19
2177         vshr.u32        d24,d4,#17
2178         eor     r0,r10,r10,ror#11
2179         eor     r2,r2,r8
2180         vsli.32 d24,d4,#15
2181         add     r9,r9,r12,ror#6
2182         eor     r12,r10,r11
2183         vshr.u32        d25,d4,#10
2184         eor     r0,r0,r10,ror#20
2185         add     r9,r9,r2
2186         veor    d25,d25,d24
2187         ldr     r2,[sp,#44]
2188         and     r3,r3,r12
2189         vshr.u32        d24,d4,#19
2190         add     r5,r5,r9
2191         add     r9,r9,r0,ror#2
2192         eor     r3,r3,r11
2193         vld1.32 {q8},[r14,:128]!
2194         add     r8,r8,r2
2195         vsli.32 d24,d4,#13
2196         eor     r2,r6,r7
2197         eor     r0,r5,r5,ror#5
2198         veor    d25,d25,d24
2199         add     r9,r9,r3
2200         and     r2,r2,r5
2201         vadd.i32        d5,d5,d25
2202         eor     r3,r0,r5,ror#19
2203         eor     r0,r9,r9,ror#11
2204         vadd.i32        q8,q8,q2
2205         eor     r2,r2,r7
2206         add     r8,r8,r3,ror#6
2207         eor     r3,r9,r10
2208         eor     r0,r0,r9,ror#20
2209         add     r8,r8,r2
2210         ldr     r2,[sp,#48]
2211         and     r12,r12,r3
2212         add     r4,r4,r8
2213         vst1.32 {q8},[r1,:128]!
2214         add     r8,r8,r0,ror#2
2215         eor     r12,r12,r10
2216         vext.8  q8,q3,q0,#4
2217         add     r7,r7,r2
2218         eor     r2,r5,r6
2219         eor     r0,r4,r4,ror#5
2220         vext.8  q9,q1,q2,#4
2221         add     r8,r8,r12
2222         and     r2,r2,r4
2223         eor     r12,r0,r4,ror#19
2224         vshr.u32        q10,q8,#7
2225         eor     r0,r8,r8,ror#11
2226         eor     r2,r2,r6
2227         vadd.i32        q3,q3,q9
2228         add     r7,r7,r12,ror#6
2229         eor     r12,r8,r9
2230         vshr.u32        q9,q8,#3
2231         eor     r0,r0,r8,ror#20
2232         add     r7,r7,r2
2233         vsli.32 q10,q8,#25
2234         ldr     r2,[sp,#52]
2235         and     r3,r3,r12
2236         vshr.u32        q11,q8,#18
2237         add     r11,r11,r7
2238         add     r7,r7,r0,ror#2
2239         eor     r3,r3,r9
2240         veor    q9,q9,q10
2241         add     r6,r6,r2
2242         vsli.32 q11,q8,#14
2243         eor     r2,r4,r5
2244         eor     r0,r11,r11,ror#5
2245         vshr.u32        d24,d5,#17
2246         add     r7,r7,r3
2247         and     r2,r2,r11
2248         veor    q9,q9,q11
2249         eor     r3,r0,r11,ror#19
2250         eor     r0,r7,r7,ror#11
2251         vsli.32 d24,d5,#15
2252         eor     r2,r2,r5
2253         add     r6,r6,r3,ror#6
2254         vshr.u32        d25,d5,#10
2255         eor     r3,r7,r8
2256         eor     r0,r0,r7,ror#20
2257         vadd.i32        q3,q3,q9
2258         add     r6,r6,r2
2259         ldr     r2,[sp,#56]
2260         veor    d25,d25,d24
2261         and     r12,r12,r3
2262         add     r10,r10,r6
2263         vshr.u32        d24,d5,#19
2264         add     r6,r6,r0,ror#2
2265         eor     r12,r12,r8
2266         vsli.32 d24,d5,#13
2267         add     r5,r5,r2
2268         eor     r2,r11,r4
2269         veor    d25,d25,d24
2270         eor     r0,r10,r10,ror#5
2271         add     r6,r6,r12
2272         vadd.i32        d6,d6,d25
2273         and     r2,r2,r10
2274         eor     r12,r0,r10,ror#19
2275         vshr.u32        d24,d6,#17
2276         eor     r0,r6,r6,ror#11
2277         eor     r2,r2,r4
2278         vsli.32 d24,d6,#15
2279         add     r5,r5,r12,ror#6
2280         eor     r12,r6,r7
2281         vshr.u32        d25,d6,#10
2282         eor     r0,r0,r6,ror#20
2283         add     r5,r5,r2
2284         veor    d25,d25,d24
2285         ldr     r2,[sp,#60]
2286         and     r3,r3,r12
2287         vshr.u32        d24,d6,#19
2288         add     r9,r9,r5
2289         add     r5,r5,r0,ror#2
2290         eor     r3,r3,r7
2291         vld1.32 {q8},[r14,:128]!
2292         add     r4,r4,r2
2293         vsli.32 d24,d6,#13
2294         eor     r2,r10,r11
2295         eor     r0,r9,r9,ror#5
2296         veor    d25,d25,d24
2297         add     r5,r5,r3
2298         and     r2,r2,r9
2299         vadd.i32        d7,d7,d25
2300         eor     r3,r0,r9,ror#19
2301         eor     r0,r5,r5,ror#11
2302         vadd.i32        q8,q8,q3
2303         eor     r2,r2,r11
2304         add     r4,r4,r3,ror#6
2305         eor     r3,r5,r6
2306         eor     r0,r0,r5,ror#20
2307         add     r4,r4,r2
2308         ldr     r2,[r14]
2309         and     r12,r12,r3
2310         add     r8,r8,r4
2311         vst1.32 {q8},[r1,:128]!
2312         add     r4,r4,r0,ror#2
2313         eor     r12,r12,r6
2314         teq     r2,#0                           @ check for K256 terminator
2315         ldr     r2,[sp,#0]
2316         sub     r1,r1,#64
2317         bne     .L_00_48
2318
2319         ldr             r1,[sp,#68]
2320         ldr             r0,[sp,#72]
2321         sub             r14,r14,#256    @ rewind r14
2322         teq             r1,r0
2323         it              eq
2324         subeq           r1,r1,#64               @ avoid SEGV
2325         vld1.8          {q0},[r1]!              @ load next input block
2326         vld1.8          {q1},[r1]!
2327         vld1.8          {q2},[r1]!
2328         vld1.8          {q3},[r1]!
2329         it              ne
2330         strne           r1,[sp,#68]
2331         mov             r1,sp
2332         add     r11,r11,r2
2333         eor     r2,r9,r10
2334         eor     r0,r8,r8,ror#5
2335         add     r4,r4,r12
2336         vld1.32 {q8},[r14,:128]!
2337         and     r2,r2,r8
2338         eor     r12,r0,r8,ror#19
2339         eor     r0,r4,r4,ror#11
2340         eor     r2,r2,r10
2341         vrev32.8        q0,q0
2342         add     r11,r11,r12,ror#6
2343         eor     r12,r4,r5
2344         eor     r0,r0,r4,ror#20
2345         add     r11,r11,r2
2346         vadd.i32        q8,q8,q0
2347         ldr     r2,[sp,#4]
2348         and     r3,r3,r12
2349         add     r7,r7,r11
2350         add     r11,r11,r0,ror#2
2351         eor     r3,r3,r5
2352         add     r10,r10,r2
2353         eor     r2,r8,r9
2354         eor     r0,r7,r7,ror#5
2355         add     r11,r11,r3
2356         and     r2,r2,r7
2357         eor     r3,r0,r7,ror#19
2358         eor     r0,r11,r11,ror#11
2359         eor     r2,r2,r9
2360         add     r10,r10,r3,ror#6
2361         eor     r3,r11,r4
2362         eor     r0,r0,r11,ror#20
2363         add     r10,r10,r2
2364         ldr     r2,[sp,#8]
2365         and     r12,r12,r3
2366         add     r6,r6,r10
2367         add     r10,r10,r0,ror#2
2368         eor     r12,r12,r4
2369         add     r9,r9,r2
2370         eor     r2,r7,r8
2371         eor     r0,r6,r6,ror#5
2372         add     r10,r10,r12
2373         and     r2,r2,r6
2374         eor     r12,r0,r6,ror#19
2375         eor     r0,r10,r10,ror#11
2376         eor     r2,r2,r8
2377         add     r9,r9,r12,ror#6
2378         eor     r12,r10,r11
2379         eor     r0,r0,r10,ror#20
2380         add     r9,r9,r2
2381         ldr     r2,[sp,#12]
2382         and     r3,r3,r12
2383         add     r5,r5,r9
2384         add     r9,r9,r0,ror#2
2385         eor     r3,r3,r11
2386         add     r8,r8,r2
2387         eor     r2,r6,r7
2388         eor     r0,r5,r5,ror#5
2389         add     r9,r9,r3
2390         and     r2,r2,r5
2391         eor     r3,r0,r5,ror#19
2392         eor     r0,r9,r9,ror#11
2393         eor     r2,r2,r7
2394         add     r8,r8,r3,ror#6
2395         eor     r3,r9,r10
2396         eor     r0,r0,r9,ror#20
2397         add     r8,r8,r2
2398         ldr     r2,[sp,#16]
2399         and     r12,r12,r3
2400         add     r4,r4,r8
2401         add     r8,r8,r0,ror#2
2402         eor     r12,r12,r10
2403         vst1.32 {q8},[r1,:128]!
2404         add     r7,r7,r2
2405         eor     r2,r5,r6
2406         eor     r0,r4,r4,ror#5
2407         add     r8,r8,r12
2408         vld1.32 {q8},[r14,:128]!
2409         and     r2,r2,r4
2410         eor     r12,r0,r4,ror#19
2411         eor     r0,r8,r8,ror#11
2412         eor     r2,r2,r6
2413         vrev32.8        q1,q1
2414         add     r7,r7,r12,ror#6
2415         eor     r12,r8,r9
2416         eor     r0,r0,r8,ror#20
2417         add     r7,r7,r2
2418         vadd.i32        q8,q8,q1
2419         ldr     r2,[sp,#20]
2420         and     r3,r3,r12
2421         add     r11,r11,r7
2422         add     r7,r7,r0,ror#2
2423         eor     r3,r3,r9
2424         add     r6,r6,r2
2425         eor     r2,r4,r5
2426         eor     r0,r11,r11,ror#5
2427         add     r7,r7,r3
2428         and     r2,r2,r11
2429         eor     r3,r0,r11,ror#19
2430         eor     r0,r7,r7,ror#11
2431         eor     r2,r2,r5
2432         add     r6,r6,r3,ror#6
2433         eor     r3,r7,r8
2434         eor     r0,r0,r7,ror#20
2435         add     r6,r6,r2
2436         ldr     r2,[sp,#24]
2437         and     r12,r12,r3
2438         add     r10,r10,r6
2439         add     r6,r6,r0,ror#2
2440         eor     r12,r12,r8
2441         add     r5,r5,r2
2442         eor     r2,r11,r4
2443         eor     r0,r10,r10,ror#5
2444         add     r6,r6,r12
2445         and     r2,r2,r10
2446         eor     r12,r0,r10,ror#19
2447         eor     r0,r6,r6,ror#11
2448         eor     r2,r2,r4
2449         add     r5,r5,r12,ror#6
2450         eor     r12,r6,r7
2451         eor     r0,r0,r6,ror#20
2452         add     r5,r5,r2
2453         ldr     r2,[sp,#28]
2454         and     r3,r3,r12
2455         add     r9,r9,r5
2456         add     r5,r5,r0,ror#2
2457         eor     r3,r3,r7
2458         add     r4,r4,r2
2459         eor     r2,r10,r11
2460         eor     r0,r9,r9,ror#5
2461         add     r5,r5,r3
2462         and     r2,r2,r9
2463         eor     r3,r0,r9,ror#19
2464         eor     r0,r5,r5,ror#11
2465         eor     r2,r2,r11
2466         add     r4,r4,r3,ror#6
2467         eor     r3,r5,r6
2468         eor     r0,r0,r5,ror#20
2469         add     r4,r4,r2
2470         ldr     r2,[sp,#32]
2471         and     r12,r12,r3
2472         add     r8,r8,r4
2473         add     r4,r4,r0,ror#2
2474         eor     r12,r12,r6
2475         vst1.32 {q8},[r1,:128]!
2476         add     r11,r11,r2
2477         eor     r2,r9,r10
2478         eor     r0,r8,r8,ror#5
2479         add     r4,r4,r12
2480         vld1.32 {q8},[r14,:128]!
2481         and     r2,r2,r8
2482         eor     r12,r0,r8,ror#19
2483         eor     r0,r4,r4,ror#11
2484         eor     r2,r2,r10
2485         vrev32.8        q2,q2
2486         add     r11,r11,r12,ror#6
2487         eor     r12,r4,r5
2488         eor     r0,r0,r4,ror#20
2489         add     r11,r11,r2
2490         vadd.i32        q8,q8,q2
2491         ldr     r2,[sp,#36]
2492         and     r3,r3,r12
2493         add     r7,r7,r11
2494         add     r11,r11,r0,ror#2
2495         eor     r3,r3,r5
2496         add     r10,r10,r2
2497         eor     r2,r8,r9
2498         eor     r0,r7,r7,ror#5
2499         add     r11,r11,r3
2500         and     r2,r2,r7
2501         eor     r3,r0,r7,ror#19
2502         eor     r0,r11,r11,ror#11
2503         eor     r2,r2,r9
2504         add     r10,r10,r3,ror#6
2505         eor     r3,r11,r4
2506         eor     r0,r0,r11,ror#20
2507         add     r10,r10,r2
2508         ldr     r2,[sp,#40]
2509         and     r12,r12,r3
2510         add     r6,r6,r10
2511         add     r10,r10,r0,ror#2
2512         eor     r12,r12,r4
2513         add     r9,r9,r2
2514         eor     r2,r7,r8
2515         eor     r0,r6,r6,ror#5
2516         add     r10,r10,r12
2517         and     r2,r2,r6
2518         eor     r12,r0,r6,ror#19
2519         eor     r0,r10,r10,ror#11
2520         eor     r2,r2,r8
2521         add     r9,r9,r12,ror#6
2522         eor     r12,r10,r11
2523         eor     r0,r0,r10,ror#20
2524         add     r9,r9,r2
2525         ldr     r2,[sp,#44]
2526         and     r3,r3,r12
2527         add     r5,r5,r9
2528         add     r9,r9,r0,ror#2
2529         eor     r3,r3,r11
2530         add     r8,r8,r2
2531         eor     r2,r6,r7
2532         eor     r0,r5,r5,ror#5
2533         add     r9,r9,r3
2534         and     r2,r2,r5
2535         eor     r3,r0,r5,ror#19
2536         eor     r0,r9,r9,ror#11
2537         eor     r2,r2,r7
2538         add     r8,r8,r3,ror#6
2539         eor     r3,r9,r10
2540         eor     r0,r0,r9,ror#20
2541         add     r8,r8,r2
2542         ldr     r2,[sp,#48]
2543         and     r12,r12,r3
2544         add     r4,r4,r8
2545         add     r8,r8,r0,ror#2
2546         eor     r12,r12,r10
2547         vst1.32 {q8},[r1,:128]!
2548         add     r7,r7,r2
2549         eor     r2,r5,r6
2550         eor     r0,r4,r4,ror#5
2551         add     r8,r8,r12
2552         vld1.32 {q8},[r14,:128]!
2553         and     r2,r2,r4
2554         eor     r12,r0,r4,ror#19
2555         eor     r0,r8,r8,ror#11
2556         eor     r2,r2,r6
2557         vrev32.8        q3,q3
2558         add     r7,r7,r12,ror#6
2559         eor     r12,r8,r9
2560         eor     r0,r0,r8,ror#20
2561         add     r7,r7,r2
2562         vadd.i32        q8,q8,q3
2563         ldr     r2,[sp,#52]
2564         and     r3,r3,r12
2565         add     r11,r11,r7
2566         add     r7,r7,r0,ror#2
2567         eor     r3,r3,r9
2568         add     r6,r6,r2
2569         eor     r2,r4,r5
2570         eor     r0,r11,r11,ror#5
2571         add     r7,r7,r3
2572         and     r2,r2,r11
2573         eor     r3,r0,r11,ror#19
2574         eor     r0,r7,r7,ror#11
2575         eor     r2,r2,r5
2576         add     r6,r6,r3,ror#6
2577         eor     r3,r7,r8
2578         eor     r0,r0,r7,ror#20
2579         add     r6,r6,r2
2580         ldr     r2,[sp,#56]
2581         and     r12,r12,r3
2582         add     r10,r10,r6
2583         add     r6,r6,r0,ror#2
2584         eor     r12,r12,r8
2585         add     r5,r5,r2
2586         eor     r2,r11,r4
2587         eor     r0,r10,r10,ror#5
2588         add     r6,r6,r12
2589         and     r2,r2,r10
2590         eor     r12,r0,r10,ror#19
2591         eor     r0,r6,r6,ror#11
2592         eor     r2,r2,r4
2593         add     r5,r5,r12,ror#6
2594         eor     r12,r6,r7
2595         eor     r0,r0,r6,ror#20
2596         add     r5,r5,r2
2597         ldr     r2,[sp,#60]
2598         and     r3,r3,r12
2599         add     r9,r9,r5
2600         add     r5,r5,r0,ror#2
2601         eor     r3,r3,r7
2602         add     r4,r4,r2
2603         eor     r2,r10,r11
2604         eor     r0,r9,r9,ror#5
2605         add     r5,r5,r3
2606         and     r2,r2,r9
2607         eor     r3,r0,r9,ror#19
2608         eor     r0,r5,r5,ror#11
2609         eor     r2,r2,r11
2610         add     r4,r4,r3,ror#6
2611         eor     r3,r5,r6
2612         eor     r0,r0,r5,ror#20
2613         add     r4,r4,r2
2614         ldr     r2,[sp,#64]
2615         and     r12,r12,r3
2616         add     r8,r8,r4
2617         add     r4,r4,r0,ror#2
2618         eor     r12,r12,r6
2619         vst1.32 {q8},[r1,:128]!
2620         ldr     r0,[r2,#0]
2621         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2622         ldr     r12,[r2,#4]
2623         ldr     r3,[r2,#8]
2624         ldr     r1,[r2,#12]
2625         add     r4,r4,r0                        @ accumulate
2626         ldr     r0,[r2,#16]
2627         add     r5,r5,r12
2628         ldr     r12,[r2,#20]
2629         add     r6,r6,r3
2630         ldr     r3,[r2,#24]
2631         add     r7,r7,r1
2632         ldr     r1,[r2,#28]
2633         add     r8,r8,r0
2634         str     r4,[r2],#4
2635         add     r9,r9,r12
2636         str     r5,[r2],#4
2637         add     r10,r10,r3
2638         str     r6,[r2],#4
2639         add     r11,r11,r1
2640         str     r7,[r2],#4
2641         stmia   r2,{r8-r11}
2642
2643         ittte   ne
2644         movne   r1,sp
2645         ldrne   r2,[sp,#0]
2646         eorne   r12,r12,r12
2647         ldreq   sp,[sp,#76]                     @ restore original sp
2648         itt     ne
2649         eorne   r3,r5,r6
2650         bne     .L_00_48
2651
2652         ldmia   sp!,{r4-r12,pc}
2653 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2654 #endif
2655 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2656
2657 # ifdef __thumb2__
2658 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2659 # else
2660 #  define INST(a,b,c,d) .byte   a,b,c,d
2661 # endif
2662
2663 .type   sha256_block_data_order_armv8,%function
2664 .align  5
2665 sha256_block_data_order_armv8:
2666 .LARMv8:
2667         vld1.32 {q0,q1},[r0]
2668 # ifdef __thumb2__
2669         adr     r3,.LARMv8
2670         sub     r3,r3,#.LARMv8-K256
2671 # else
2672         adrl    r3,K256
2673 # endif
2674         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2675
2676 .Loop_v8:
2677         vld1.8          {q8-q9},[r1]!
2678         vld1.8          {q10-q11},[r1]!
2679         vld1.32         {q12},[r3]!
2680         vrev32.8        q8,q8
2681         vrev32.8        q9,q9
2682         vrev32.8        q10,q10
2683         vrev32.8        q11,q11
2684         vmov            q14,q0  @ offload
2685         vmov            q15,q1
2686         teq             r1,r2
2687         vld1.32         {q13},[r3]!
2688         vadd.i32        q12,q12,q8
2689         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2690         vmov            q2,q0
2691         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2692         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2693         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2694         vld1.32         {q12},[r3]!
2695         vadd.i32        q13,q13,q9
2696         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2697         vmov            q2,q0
2698         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2699         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2700         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2701         vld1.32         {q13},[r3]!
2702         vadd.i32        q12,q12,q10
2703         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2704         vmov            q2,q0
2705         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2706         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2707         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2708         vld1.32         {q12},[r3]!
2709         vadd.i32        q13,q13,q11
2710         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2711         vmov            q2,q0
2712         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2713         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2714         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2715         vld1.32         {q13},[r3]!
2716         vadd.i32        q12,q12,q8
2717         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2718         vmov            q2,q0
2719         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2720         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2721         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2722         vld1.32         {q12},[r3]!
2723         vadd.i32        q13,q13,q9
2724         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2725         vmov            q2,q0
2726         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2727         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2728         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2729         vld1.32         {q13},[r3]!
2730         vadd.i32        q12,q12,q10
2731         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2732         vmov            q2,q0
2733         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2734         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2735         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2736         vld1.32         {q12},[r3]!
2737         vadd.i32        q13,q13,q11
2738         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2739         vmov            q2,q0
2740         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2741         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2742         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2743         vld1.32         {q13},[r3]!
2744         vadd.i32        q12,q12,q8
2745         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2746         vmov            q2,q0
2747         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2748         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2749         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2750         vld1.32         {q12},[r3]!
2751         vadd.i32        q13,q13,q9
2752         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2753         vmov            q2,q0
2754         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2755         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2756         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2757         vld1.32         {q13},[r3]!
2758         vadd.i32        q12,q12,q10
2759         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2760         vmov            q2,q0
2761         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2762         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2763         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2764         vld1.32         {q12},[r3]!
2765         vadd.i32        q13,q13,q11
2766         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2767         vmov            q2,q0
2768         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2769         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2770         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2771         vld1.32         {q13},[r3]!
2772         vadd.i32        q12,q12,q8
2773         vmov            q2,q0
2774         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2775         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2776
2777         vld1.32         {q12},[r3]!
2778         vadd.i32        q13,q13,q9
2779         vmov            q2,q0
2780         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2781         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2782
2783         vld1.32         {q13},[r3]
2784         vadd.i32        q12,q12,q10
2785         sub             r3,r3,#256-16   @ rewind
2786         vmov            q2,q0
2787         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2788         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2789
2790         vadd.i32        q13,q13,q11
2791         vmov            q2,q0
2792         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2793         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2794
2795         vadd.i32        q0,q0,q14
2796         vadd.i32        q1,q1,q15
2797         it              ne
2798         bne             .Loop_v8
2799
2800         vst1.32         {q0,q1},[r0]
2801
2802         bx      lr              @ bx lr
2803 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2804 #endif
2805 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2806 .align  2
2807 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2808 .comm   OPENSSL_armcap_P,4,4
2809 #endif