1 // SPDX-License-Identifier: GPL-2.0
3 * Routines to emulate some Altivec/VMX instructions, specifically
4 * those that can trap when given denormalized operands in Java mode.
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/sched.h>
9 #include <asm/ptrace.h>
10 #include <asm/processor.h>
11 #include <linux/uaccess.h>
13 /* Functions in vector.S */
14 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
15 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
16 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
18 extern void vrefp(vector128 *dst, vector128 *src);
19 extern void vrsqrtefp(vector128 *dst, vector128 *src);
20 extern void vexptep(vector128 *dst, vector128 *src);
22 static unsigned int exp2s[8] = {
34 * Computes an estimate of 2^x. The `s' argument is the 32-bit
35 * single-precision floating-point representation of x.
37 static unsigned int eexp2(unsigned int s)
40 unsigned int mant, frac;
42 /* extract exponent field from input */
43 exp = ((s >> 23) & 0xff) - 127;
45 /* check for NaN input */
46 if (exp == 128 && (s & 0x7fffff) != 0)
47 return s | 0x400000; /* return QNaN */
48 /* 2^-big = 0, 2^+big = +Inf */
49 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
52 return 0x3f800000; /* 1.0 */
54 /* convert to fixed point integer in 9.23 representation */
55 pwr = (s & 0x7fffff) | 0x800000;
63 /* extract integer part, which becomes exponent part of result */
64 exp = (pwr >> 23) + 126;
70 /* table lookup on top 3 bits of fraction to get mantissa */
71 mant = exp2s[(pwr >> 20) & 7];
73 /* linear interpolation using remaining 20 bits of fraction */
74 asm("mulhwu %0,%1,%2" : "=r" (frac)
75 : "r" (pwr << 12), "r" (0x172b83ff));
76 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
80 return mant + (exp << 23);
82 /* denormalized result */
84 mant += 1 << (exp - 1);
89 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
90 * single-precision floating-point representation of x.
92 static unsigned int elog2(unsigned int s)
94 int exp, mant, lz, frac;
98 if (exp == 0x7f800000) { /* Inf or NaN */
100 s |= 0x400000; /* turn NaN into QNaN */
103 if ((exp | mant) == 0) /* +0 or -0 */
104 return 0xff800000; /* return -Inf */
108 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
110 exp = (-118 - lz) << 23;
116 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
117 exp |= 0x400000; /* 0.5 * 2^23 */
118 asm("mulhwu %0,%1,%2" : "=r" (mant)
119 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
121 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
122 exp |= 0x200000; /* 0.25 * 2^23 */
123 asm("mulhwu %0,%1,%2" : "=r" (mant)
124 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
126 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
127 exp |= 0x100000; /* 0.125 * 2^23 */
128 asm("mulhwu %0,%1,%2" : "=r" (mant)
129 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
131 if (mant > 0x800000) { /* 1.0 * 2^23 */
132 /* calculate (mant - 1) * 1.381097463 */
133 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
134 asm("mulhwu %0,%1,%2" : "=r" (frac)
135 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
138 s = exp & 0x80000000;
142 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
148 s += ((lz + 126) << 23) + exp;
155 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
159 exp = (x >> 23) & 0xff;
161 if (exp == 255 && mant != 0)
162 return 0; /* NaN -> 0 */
163 exp = exp - 127 + scale;
165 return 0; /* round towards zero */
167 /* saturate, unless the result would be -2^31 */
168 if (x + (scale << 23) != 0xcf000000)
170 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
173 mant = (mant << 7) >> (30 - exp);
174 return (x & 0x80000000)? -mant: mant;
177 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
182 exp = (x >> 23) & 0xff;
184 if (exp == 255 && mant != 0)
185 return 0; /* NaN -> 0 */
186 exp = exp - 127 + scale;
188 return 0; /* round towards zero */
189 if (x & 0x80000000) {
190 /* negative => saturate to 0 */
200 mant = (mant << 8) >> (31 - exp);
204 /* Round to floating integer, towards 0 */
205 static unsigned int rfiz(unsigned int x)
209 exp = ((x >> 23) & 0xff) - 127;
210 if (exp == 128 && (x & 0x7fffff) != 0)
211 return x | 0x400000; /* NaN -> make it a QNaN */
213 return x; /* it's an integer already (or Inf) */
215 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
216 return x & ~(0x7fffff >> exp);
219 /* Round to floating integer, towards +/- Inf */
220 static unsigned int rfii(unsigned int x)
224 exp = ((x >> 23) & 0xff) - 127;
225 if (exp == 128 && (x & 0x7fffff) != 0)
226 return x | 0x400000; /* NaN -> make it a QNaN */
228 return x; /* it's an integer already (or Inf) */
229 if ((x & 0x7fffffff) == 0)
230 return x; /* +/-0 -> +/-0 */
232 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
233 return (x & 0x80000000) | 0x3f800000;
234 mask = 0x7fffff >> exp;
235 /* mantissa overflows into exponent - that's OK,
236 it can't overflow into the sign bit */
237 return (x + mask) & ~mask;
240 /* Round to floating integer, to nearest */
241 static unsigned int rfin(unsigned int x)
245 exp = ((x >> 23) & 0xff) - 127;
246 if (exp == 128 && (x & 0x7fffff) != 0)
247 return x | 0x400000; /* NaN -> make it a QNaN */
249 return x; /* it's an integer already (or Inf) */
251 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
253 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
254 return (x & 0x80000000) | 0x3f800000;
255 half = 0x400000 >> exp;
256 /* add 0.5 to the magnitude and chop off the fraction bits */
257 return (x + half) & ~(0x7fffff >> exp);
260 int emulate_altivec(struct pt_regs *regs)
262 unsigned int instr, i;
263 unsigned int va, vb, vc, vd;
266 if (get_user(instr, (unsigned int __user *) regs->nip))
268 if ((instr >> 26) != 4)
269 return -EINVAL; /* not an altivec instruction */
270 vd = (instr >> 21) & 0x1f;
271 va = (instr >> 16) & 0x1f;
272 vb = (instr >> 11) & 0x1f;
273 vc = (instr >> 6) & 0x1f;
275 vrs = current->thread.vr_state.vr;
276 switch (instr & 0x3f) {
280 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
283 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
286 vrefp(&vrs[vd], &vrs[vb]);
288 case 5: /* vrsqrtefp */
289 vrsqrtefp(&vrs[vd], &vrs[vb]);
291 case 6: /* vexptefp */
292 for (i = 0; i < 4; ++i)
293 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
295 case 7: /* vlogefp */
296 for (i = 0; i < 4; ++i)
297 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
300 for (i = 0; i < 4; ++i)
301 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
304 for (i = 0; i < 4; ++i)
305 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
308 for (i = 0; i < 4; ++i) {
309 u32 x = vrs[vb].u[i];
310 x = (x & 0x80000000)? rfiz(x): rfii(x);
315 for (i = 0; i < 4; ++i) {
316 u32 x = vrs[vb].u[i];
317 x = (x & 0x80000000)? rfii(x): rfiz(x);
321 case 14: /* vctuxs */
322 for (i = 0; i < 4; ++i)
323 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
324 ¤t->thread.vr_state.vscr.u[3]);
326 case 15: /* vctsxs */
327 for (i = 0; i < 4; ++i)
328 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
329 ¤t->thread.vr_state.vscr.u[3]);
335 case 46: /* vmaddfp */
336 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
338 case 47: /* vnmsubfp */
339 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);