GNU Linux-libre 4.19.281-gnu1
[releases.git] / arch / powerpc / crypto / sha1-powerpc-asm.S
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * SHA-1 implementation for PowerPC.
4  *
5  * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
6  */
7
8 #include <asm/ppc_asm.h>
9 #include <asm/asm-offsets.h>
10 #include <asm/asm-compat.h>
11
12 #ifdef __BIG_ENDIAN__
13 #define LWZ(rt, d, ra)  \
14         lwz     rt,d(ra)
15 #else
16 #define LWZ(rt, d, ra)  \
17         li      rt,d;   \
18         lwbrx   rt,rt,ra
19 #endif
20
21 /*
22  * We roll the registers for T, A, B, C, D, E around on each
23  * iteration; T on iteration t is A on iteration t+1, and so on.
24  * We use registers 7 - 12 for this.
25  */
26 #define RT(t)   ((((t)+5)%6)+7)
27 #define RA(t)   ((((t)+4)%6)+7)
28 #define RB(t)   ((((t)+3)%6)+7)
29 #define RC(t)   ((((t)+2)%6)+7)
30 #define RD(t)   ((((t)+1)%6)+7)
31 #define RE(t)   ((((t)+0)%6)+7)
32
33 /* We use registers 16 - 31 for the W values */
34 #define W(t)    (((t)%16)+16)
35
36 #define LOADW(t)                                \
37         LWZ(W(t),(t)*4,r4)
38
39 #define STEPD0_LOAD(t)                          \
40         andc    r0,RD(t),RB(t);         \
41         and     r6,RB(t),RC(t);         \
42         rotlwi  RT(t),RA(t),5;                  \
43         or      r6,r6,r0;                       \
44         add     r0,RE(t),r15;                   \
45         add     RT(t),RT(t),r6;         \
46         add     r14,r0,W(t);                    \
47         LWZ(W((t)+4),((t)+4)*4,r4);     \
48         rotlwi  RB(t),RB(t),30;                 \
49         add     RT(t),RT(t),r14
50
51 #define STEPD0_UPDATE(t)                        \
52         and     r6,RB(t),RC(t);         \
53         andc    r0,RD(t),RB(t);         \
54         rotlwi  RT(t),RA(t),5;                  \
55         rotlwi  RB(t),RB(t),30;                 \
56         or      r6,r6,r0;                       \
57         add     r0,RE(t),r15;                   \
58         xor     r5,W((t)+4-3),W((t)+4-8);               \
59         add     RT(t),RT(t),r6;         \
60         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
61         add     r0,r0,W(t);                     \
62         xor     W((t)+4),W((t)+4),r5;                   \
63         add     RT(t),RT(t),r0;         \
64         rotlwi  W((t)+4),W((t)+4),1
65
66 #define STEPD1(t)                               \
67         xor     r6,RB(t),RC(t);         \
68         rotlwi  RT(t),RA(t),5;                  \
69         rotlwi  RB(t),RB(t),30;                 \
70         xor     r6,r6,RD(t);                    \
71         add     r0,RE(t),r15;                   \
72         add     RT(t),RT(t),r6;         \
73         add     r0,r0,W(t);                     \
74         add     RT(t),RT(t),r0
75
76 #define STEPD1_UPDATE(t)                                \
77         xor     r6,RB(t),RC(t);         \
78         rotlwi  RT(t),RA(t),5;                  \
79         rotlwi  RB(t),RB(t),30;                 \
80         xor     r6,r6,RD(t);                    \
81         add     r0,RE(t),r15;                   \
82         xor     r5,W((t)+4-3),W((t)+4-8);               \
83         add     RT(t),RT(t),r6;         \
84         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
85         add     r0,r0,W(t);                     \
86         xor     W((t)+4),W((t)+4),r5;                   \
87         add     RT(t),RT(t),r0;         \
88         rotlwi  W((t)+4),W((t)+4),1
89
90 #define STEPD2_UPDATE(t)                        \
91         and     r6,RB(t),RC(t);         \
92         and     r0,RB(t),RD(t);         \
93         rotlwi  RT(t),RA(t),5;                  \
94         or      r6,r6,r0;                       \
95         rotlwi  RB(t),RB(t),30;                 \
96         and     r0,RC(t),RD(t);         \
97         xor     r5,W((t)+4-3),W((t)+4-8);       \
98         or      r6,r6,r0;                       \
99         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
100         add     r0,RE(t),r15;                   \
101         add     RT(t),RT(t),r6;         \
102         add     r0,r0,W(t);                     \
103         xor     W((t)+4),W((t)+4),r5;           \
104         add     RT(t),RT(t),r0;         \
105         rotlwi  W((t)+4),W((t)+4),1
106
107 #define STEP0LD4(t)                             \
108         STEPD0_LOAD(t);                         \
109         STEPD0_LOAD((t)+1);                     \
110         STEPD0_LOAD((t)+2);                     \
111         STEPD0_LOAD((t)+3)
112
113 #define STEPUP4(t, fn)                          \
114         STEP##fn##_UPDATE(t);                   \
115         STEP##fn##_UPDATE((t)+1);               \
116         STEP##fn##_UPDATE((t)+2);               \
117         STEP##fn##_UPDATE((t)+3)
118
119 #define STEPUP20(t, fn)                         \
120         STEPUP4(t, fn);                         \
121         STEPUP4((t)+4, fn);                     \
122         STEPUP4((t)+8, fn);                     \
123         STEPUP4((t)+12, fn);                    \
124         STEPUP4((t)+16, fn)
125
126 _GLOBAL(powerpc_sha_transform)
127         PPC_STLU r1,-INT_FRAME_SIZE(r1)
128         SAVE_8GPRS(14, r1)
129         SAVE_10GPRS(22, r1)
130
131         /* Load up A - E */
132         lwz     RA(0),0(r3)     /* A */
133         lwz     RB(0),4(r3)     /* B */
134         lwz     RC(0),8(r3)     /* C */
135         lwz     RD(0),12(r3)    /* D */
136         lwz     RE(0),16(r3)    /* E */
137
138         LOADW(0)
139         LOADW(1)
140         LOADW(2)
141         LOADW(3)
142
143         lis     r15,0x5a82      /* K0-19 */
144         ori     r15,r15,0x7999
145         STEP0LD4(0)
146         STEP0LD4(4)
147         STEP0LD4(8)
148         STEPUP4(12, D0)
149         STEPUP4(16, D0)
150
151         lis     r15,0x6ed9      /* K20-39 */
152         ori     r15,r15,0xeba1
153         STEPUP20(20, D1)
154
155         lis     r15,0x8f1b      /* K40-59 */
156         ori     r15,r15,0xbcdc
157         STEPUP20(40, D2)
158
159         lis     r15,0xca62      /* K60-79 */
160         ori     r15,r15,0xc1d6
161         STEPUP4(60, D1)
162         STEPUP4(64, D1)
163         STEPUP4(68, D1)
164         STEPUP4(72, D1)
165         lwz     r20,16(r3)
166         STEPD1(76)
167         lwz     r19,12(r3)
168         STEPD1(77)
169         lwz     r18,8(r3)
170         STEPD1(78)
171         lwz     r17,4(r3)
172         STEPD1(79)
173
174         lwz     r16,0(r3)
175         add     r20,RE(80),r20
176         add     RD(0),RD(80),r19
177         add     RC(0),RC(80),r18
178         add     RB(0),RB(80),r17
179         add     RA(0),RA(80),r16
180         mr      RE(0),r20
181         stw     RA(0),0(r3)
182         stw     RB(0),4(r3)
183         stw     RC(0),8(r3)
184         stw     RD(0),12(r3)
185         stw     RE(0),16(r3)
186
187         REST_8GPRS(14, r1)
188         REST_10GPRS(22, r1)
189         addi    r1,r1,INT_FRAME_SIZE
190         blr