2 * IEEE754 floating point arithmetic
3 * single precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754sp.h"
17 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
35 u32 zm; int ze; int zs __maybe_unused; int zc;
39 EXPLODESP(z, zc, zs, ze, zm)
43 FLUSHSP(z, zc, zs, ze, zm);
48 case IEEE754_CLASS_SNAN:
49 ieee754_setcx(IEEE754_INVALID_OPERATION);
50 return ieee754sp_nanxcpt(z);
51 case IEEE754_CLASS_DNORM:
53 /* QNAN is handled separately below */
56 switch (CLPAIR(xc, yc)) {
57 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
58 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
59 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
60 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
61 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
62 return ieee754sp_nanxcpt(y);
64 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
65 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
66 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
67 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
70 return ieee754sp_nanxcpt(x);
72 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
73 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
74 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
75 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
78 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
79 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
80 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
81 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
88 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
89 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
90 if (zc == IEEE754_CLASS_QNAN)
92 ieee754_setcx(IEEE754_INVALID_OPERATION);
93 return ieee754sp_indef();
95 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
96 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
97 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
98 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
99 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
100 if (zc == IEEE754_CLASS_QNAN)
102 return ieee754sp_inf(xs ^ ys);
104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
105 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
106 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
107 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
108 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
109 if (zc == IEEE754_CLASS_INF)
110 return ieee754sp_inf(zs);
111 /* Multiplication is 0 so just return z */
114 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
117 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
118 if (zc == IEEE754_CLASS_QNAN)
120 else if (zc == IEEE754_CLASS_INF)
121 return ieee754sp_inf(zs);
125 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
126 if (zc == IEEE754_CLASS_QNAN)
128 else if (zc == IEEE754_CLASS_INF)
129 return ieee754sp_inf(zs);
133 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
134 if (zc == IEEE754_CLASS_QNAN)
136 else if (zc == IEEE754_CLASS_INF)
137 return ieee754sp_inf(zs);
138 /* fall through to real computations */
141 /* Finally get to do some computation */
144 * Do the multiplication bit first
146 * rm = xm * ym, re = xe + ye basically
148 * At this point xm and ym should have been normalized.
151 /* rm = xm * ym, re = xe+ye basically */
152 assert(xm & SP_HIDDEN_BIT);
153 assert(ym & SP_HIDDEN_BIT);
158 /* shunt to top of word */
159 xm <<= 32 - (SP_FBITS + 1);
160 ym <<= 32 - (SP_FBITS + 1);
163 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170 lrm = lxm * lym; /* 16 * 16 => 32 */
171 hrm = hxm * hym; /* 16 * 16 => 32 */
173 t = lxm * hym; /* 16 * 16 => 32 */
174 at = lrm + (t << 16);
177 hrm = hrm + (t >> 16);
179 t = hxm * lym; /* 16 * 16 => 32 */
180 at = lrm + (t << 16);
183 hrm = hrm + (t >> 16);
185 rm = hrm | (lrm != 0);
188 * Sticky shift down to normal rounding precision.
191 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
192 ((rm << (SP_FBITS + 1 + 3)) != 0);
195 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
196 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
198 assert(rm & (SP_HIDDEN_BIT << 3));
200 /* And now the addition */
202 assert(zm & SP_HIDDEN_BIT);
205 * Provide guard,round and stick bit space.
211 * Have to shift y fraction right to align.
215 } else if (re > ze) {
217 * Have to shift x fraction right to align.
223 assert(ze <= SP_EMAX);
227 * Generate 28 bit result of adding two 27 bit numbers
228 * leaving result in zm, zs and ze.
232 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
243 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
246 * Normalize in extended single precision
248 while ((zm >> (SP_MBITS + 3)) == 0) {
254 return ieee754sp_format(zs, ze, zm);