GNU Linux-libre 4.9.287-gnu1
[releases.git] / arch / powerpc / crypto / sha1-powerpc-asm.S
1 /*
2  * SHA-1 implementation for PowerPC.
3  *
4  * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
5  */
6
7 #include <asm/ppc_asm.h>
8 #include <asm/asm-offsets.h>
9
10 #ifdef __BIG_ENDIAN__
11 #define LWZ(rt, d, ra)  \
12         lwz     rt,d(ra)
13 #else
14 #define LWZ(rt, d, ra)  \
15         li      rt,d;   \
16         lwbrx   rt,rt,ra
17 #endif
18
19 /*
20  * We roll the registers for T, A, B, C, D, E around on each
21  * iteration; T on iteration t is A on iteration t+1, and so on.
22  * We use registers 7 - 12 for this.
23  */
24 #define RT(t)   ((((t)+5)%6)+7)
25 #define RA(t)   ((((t)+4)%6)+7)
26 #define RB(t)   ((((t)+3)%6)+7)
27 #define RC(t)   ((((t)+2)%6)+7)
28 #define RD(t)   ((((t)+1)%6)+7)
29 #define RE(t)   ((((t)+0)%6)+7)
30
31 /* We use registers 16 - 31 for the W values */
32 #define W(t)    (((t)%16)+16)
33
34 #define LOADW(t)                                \
35         LWZ(W(t),(t)*4,r4)
36
37 #define STEPD0_LOAD(t)                          \
38         andc    r0,RD(t),RB(t);         \
39         and     r6,RB(t),RC(t);         \
40         rotlwi  RT(t),RA(t),5;                  \
41         or      r6,r6,r0;                       \
42         add     r0,RE(t),r15;                   \
43         add     RT(t),RT(t),r6;         \
44         add     r14,r0,W(t);                    \
45         LWZ(W((t)+4),((t)+4)*4,r4);     \
46         rotlwi  RB(t),RB(t),30;                 \
47         add     RT(t),RT(t),r14
48
49 #define STEPD0_UPDATE(t)                        \
50         and     r6,RB(t),RC(t);         \
51         andc    r0,RD(t),RB(t);         \
52         rotlwi  RT(t),RA(t),5;                  \
53         rotlwi  RB(t),RB(t),30;                 \
54         or      r6,r6,r0;                       \
55         add     r0,RE(t),r15;                   \
56         xor     r5,W((t)+4-3),W((t)+4-8);               \
57         add     RT(t),RT(t),r6;         \
58         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
59         add     r0,r0,W(t);                     \
60         xor     W((t)+4),W((t)+4),r5;                   \
61         add     RT(t),RT(t),r0;         \
62         rotlwi  W((t)+4),W((t)+4),1
63
64 #define STEPD1(t)                               \
65         xor     r6,RB(t),RC(t);         \
66         rotlwi  RT(t),RA(t),5;                  \
67         rotlwi  RB(t),RB(t),30;                 \
68         xor     r6,r6,RD(t);                    \
69         add     r0,RE(t),r15;                   \
70         add     RT(t),RT(t),r6;         \
71         add     r0,r0,W(t);                     \
72         add     RT(t),RT(t),r0
73
74 #define STEPD1_UPDATE(t)                                \
75         xor     r6,RB(t),RC(t);         \
76         rotlwi  RT(t),RA(t),5;                  \
77         rotlwi  RB(t),RB(t),30;                 \
78         xor     r6,r6,RD(t);                    \
79         add     r0,RE(t),r15;                   \
80         xor     r5,W((t)+4-3),W((t)+4-8);               \
81         add     RT(t),RT(t),r6;         \
82         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
83         add     r0,r0,W(t);                     \
84         xor     W((t)+4),W((t)+4),r5;                   \
85         add     RT(t),RT(t),r0;         \
86         rotlwi  W((t)+4),W((t)+4),1
87
88 #define STEPD2_UPDATE(t)                        \
89         and     r6,RB(t),RC(t);         \
90         and     r0,RB(t),RD(t);         \
91         rotlwi  RT(t),RA(t),5;                  \
92         or      r6,r6,r0;                       \
93         rotlwi  RB(t),RB(t),30;                 \
94         and     r0,RC(t),RD(t);         \
95         xor     r5,W((t)+4-3),W((t)+4-8);       \
96         or      r6,r6,r0;                       \
97         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
98         add     r0,RE(t),r15;                   \
99         add     RT(t),RT(t),r6;         \
100         add     r0,r0,W(t);                     \
101         xor     W((t)+4),W((t)+4),r5;           \
102         add     RT(t),RT(t),r0;         \
103         rotlwi  W((t)+4),W((t)+4),1
104
105 #define STEP0LD4(t)                             \
106         STEPD0_LOAD(t);                         \
107         STEPD0_LOAD((t)+1);                     \
108         STEPD0_LOAD((t)+2);                     \
109         STEPD0_LOAD((t)+3)
110
111 #define STEPUP4(t, fn)                          \
112         STEP##fn##_UPDATE(t);                   \
113         STEP##fn##_UPDATE((t)+1);               \
114         STEP##fn##_UPDATE((t)+2);               \
115         STEP##fn##_UPDATE((t)+3)
116
117 #define STEPUP20(t, fn)                         \
118         STEPUP4(t, fn);                         \
119         STEPUP4((t)+4, fn);                     \
120         STEPUP4((t)+8, fn);                     \
121         STEPUP4((t)+12, fn);                    \
122         STEPUP4((t)+16, fn)
123
124 _GLOBAL(powerpc_sha_transform)
125         PPC_STLU r1,-INT_FRAME_SIZE(r1)
126         SAVE_8GPRS(14, r1)
127         SAVE_10GPRS(22, r1)
128
129         /* Load up A - E */
130         lwz     RA(0),0(r3)     /* A */
131         lwz     RB(0),4(r3)     /* B */
132         lwz     RC(0),8(r3)     /* C */
133         lwz     RD(0),12(r3)    /* D */
134         lwz     RE(0),16(r3)    /* E */
135
136         LOADW(0)
137         LOADW(1)
138         LOADW(2)
139         LOADW(3)
140
141         lis     r15,0x5a82      /* K0-19 */
142         ori     r15,r15,0x7999
143         STEP0LD4(0)
144         STEP0LD4(4)
145         STEP0LD4(8)
146         STEPUP4(12, D0)
147         STEPUP4(16, D0)
148
149         lis     r15,0x6ed9      /* K20-39 */
150         ori     r15,r15,0xeba1
151         STEPUP20(20, D1)
152
153         lis     r15,0x8f1b      /* K40-59 */
154         ori     r15,r15,0xbcdc
155         STEPUP20(40, D2)
156
157         lis     r15,0xca62      /* K60-79 */
158         ori     r15,r15,0xc1d6
159         STEPUP4(60, D1)
160         STEPUP4(64, D1)
161         STEPUP4(68, D1)
162         STEPUP4(72, D1)
163         lwz     r20,16(r3)
164         STEPD1(76)
165         lwz     r19,12(r3)
166         STEPD1(77)
167         lwz     r18,8(r3)
168         STEPD1(78)
169         lwz     r17,4(r3)
170         STEPD1(79)
171
172         lwz     r16,0(r3)
173         add     r20,RE(80),r20
174         add     RD(0),RD(80),r19
175         add     RC(0),RC(80),r18
176         add     RB(0),RB(80),r17
177         add     RA(0),RA(80),r16
178         mr      RE(0),r20
179         stw     RA(0),0(r3)
180         stw     RB(0),4(r3)
181         stw     RC(0),8(r3)
182         stw     RD(0),12(r3)
183         stw     RE(0),16(r3)
184
185         REST_8GPRS(14, r1)
186         REST_10GPRS(22, r1)
187         addi    r1,r1,INT_FRAME_SIZE
188         blr