3 ===============================================================================
5 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6 Arithmetic Package, Release 2.
8 Written by John R. Hauser. This work was made possible in part by the
9 International Computer Science Institute, located at Suite 600, 1947 Center
10 Street, Berkeley, California 94704. Funding was partially provided by the
11 National Science Foundation under grant MIP-9311980. The original version
12 of this code was written as part of a project to build a fixed-point vector
13 processor in collaboration with the University of California at Berkeley,
14 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15 is available through the web page
16 http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) they include prominent notice that the work is derivative, and (2) they
26 include prominent notice akin to these three paragraphs for those parts of
27 this code that are retained.
29 ===============================================================================
33 -------------------------------------------------------------------------------
34 Shifts `a' right by the number of bits given in `count'. If any nonzero
35 bits are shifted off, they are ``jammed'' into the least significant bit of
36 the result by setting the least significant bit to 1. The value of `count'
37 can be arbitrarily large; in particular, if `count' is greater than 32, the
38 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 The result is stored in the location pointed to by `zPtr'.
40 -------------------------------------------------------------------------------
42 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
48 else if ( count < 32 ) {
49 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
58 -------------------------------------------------------------------------------
59 Shifts `a' right by the number of bits given in `count'. If any nonzero
60 bits are shifted off, they are ``jammed'' into the least significant bit of
61 the result by setting the least significant bit to 1. The value of `count'
62 can be arbitrarily large; in particular, if `count' is greater than 64, the
63 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
64 The result is stored in the location pointed to by `zPtr'.
65 -------------------------------------------------------------------------------
67 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
71 __asm__("@shift64RightJamming -- start");
75 else if ( count < 64 ) {
76 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
81 __asm__("@shift64RightJamming -- end");
86 -------------------------------------------------------------------------------
87 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88 _plus_ the number of bits given in `count'. The shifted result is at most
89 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
90 bits shifted off form a second 64-bit result as follows: The _last_ bit
91 shifted off is the most-significant bit of the extra result, and the other
92 63 bits of the extra result are all zero if and only if _all_but_the_last_
93 bits shifted off were all zero. This extra result is stored in the location
94 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
95 (This routine makes more sense if `a0' and `a1' are considered to form a
96 fixed-point value with binary point between `a0' and `a1'. This fixed-point
97 value is shifted right by the number of bits given in `count', and the
98 integer part of the result is returned at the location pointed to by
99 `z0Ptr'. The fractional part of the result may be slightly corrupted as
100 described above, and is returned at the location pointed to by `z1Ptr'.)
101 -------------------------------------------------------------------------------
104 shift64ExtraRightJamming(
105 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
108 int8 negCount = ( - count ) & 63;
114 else if ( count < 64 ) {
115 z1 = ( a0<<negCount ) | ( a1 != 0 );
120 z1 = a0 | ( a1 != 0 );
123 z1 = ( ( a0 | a1 ) != 0 );
133 -------------------------------------------------------------------------------
134 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135 number of bits given in `count'. Any bits shifted off are lost. The value
136 of `count' can be arbitrarily large; in particular, if `count' is greater
137 than 128, the result will be 0. The result is broken into two 64-bit pieces
138 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139 -------------------------------------------------------------------------------
143 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
146 int8 negCount = ( - count ) & 63;
152 else if ( count < 64 ) {
153 z1 = ( a0<<negCount ) | ( a1>>count );
157 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
166 -------------------------------------------------------------------------------
167 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168 number of bits given in `count'. If any nonzero bits are shifted off, they
169 are ``jammed'' into the least significant bit of the result by setting the
170 least significant bit to 1. The value of `count' can be arbitrarily large;
171 in particular, if `count' is greater than 128, the result will be either 0
172 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
173 nonzero. The result is broken into two 64-bit pieces which are stored at
174 the locations pointed to by `z0Ptr' and `z1Ptr'.
175 -------------------------------------------------------------------------------
178 shift128RightJamming(
179 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182 int8 negCount = ( - count ) & 63;
188 else if ( count < 64 ) {
189 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
194 z1 = a0 | ( a1 != 0 );
196 else if ( count < 128 ) {
197 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200 z1 = ( ( a0 | a1 ) != 0 );
210 -------------------------------------------------------------------------------
211 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212 by 64 _plus_ the number of bits given in `count'. The shifted result is
213 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
214 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
215 off form a third 64-bit result as follows: The _last_ bit shifted off is
216 the most-significant bit of the extra result, and the other 63 bits of the
217 extra result are all zero if and only if _all_but_the_last_ bits shifted off
218 were all zero. This extra result is stored in the location pointed to by
219 `z2Ptr'. The value of `count' can be arbitrarily large.
220 (This routine makes more sense if `a0', `a1', and `a2' are considered
221 to form a fixed-point value with binary point between `a1' and `a2'. This
222 fixed-point value is shifted right by the number of bits given in `count',
223 and the integer part of the result is returned at the locations pointed to
224 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
225 corrupted as described above, and is returned at the location pointed to by
227 -------------------------------------------------------------------------------
230 shift128ExtraRightJamming(
241 int8 negCount = ( - count ) & 63;
251 z1 = ( a0<<negCount ) | ( a1>>count );
263 z1 = a0>>( count & 63 );
266 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
281 -------------------------------------------------------------------------------
282 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283 number of bits given in `count'. Any bits shifted off are lost. The value
284 of `count' must be less than 64. The result is broken into two 64-bit
285 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286 -------------------------------------------------------------------------------
290 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
295 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
300 -------------------------------------------------------------------------------
301 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302 by the number of bits given in `count'. Any bits shifted off are lost.
303 The value of `count' must be less than 64. The result is broken into three
304 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
305 `z1Ptr', and `z2Ptr'.
306 -------------------------------------------------------------------------------
326 negCount = ( ( - count ) & 63 );
337 -------------------------------------------------------------------------------
338 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
340 any carry out is lost. The result is broken into two 64-bit pieces which
341 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342 -------------------------------------------------------------------------------
346 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
352 *z0Ptr = a0 + b0 + ( z1 < a1 );
357 -------------------------------------------------------------------------------
358 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
360 modulo 2^192, so any carry out is lost. The result is broken into three
361 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
362 `z1Ptr', and `z2Ptr'.
363 -------------------------------------------------------------------------------
382 carry1 = ( z2 < a2 );
384 carry0 = ( z1 < a1 );
387 z0 += ( z1 < carry1 );
396 -------------------------------------------------------------------------------
397 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
399 2^128, so any borrow out (carry out) is lost. The result is broken into two
400 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
402 -------------------------------------------------------------------------------
406 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
410 *z0Ptr = a0 - b0 - ( a1 < b1 );
415 -------------------------------------------------------------------------------
416 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
419 result is broken into three 64-bit pieces which are stored at the locations
420 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421 -------------------------------------------------------------------------------
437 int8 borrow0, borrow1;
440 borrow1 = ( a2 < b2 );
442 borrow0 = ( a1 < b1 );
444 z0 -= ( z1 < borrow1 );
454 -------------------------------------------------------------------------------
455 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
456 into two 64-bit pieces which are stored at the locations pointed to by
458 -------------------------------------------------------------------------------
460 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
462 bits32 aHigh, aLow, bHigh, bLow;
463 bits64 z0, zMiddleA, zMiddleB, z1;
469 z1 = ( (bits64) aLow ) * bLow;
470 zMiddleA = ( (bits64) aLow ) * bHigh;
471 zMiddleB = ( (bits64) aHigh ) * bLow;
472 z0 = ( (bits64) aHigh ) * bHigh;
473 zMiddleA += zMiddleB;
474 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
477 z0 += ( z1 < zMiddleA );
484 -------------------------------------------------------------------------------
485 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486 obtain a 192-bit product. The product is broken into three 64-bit pieces
487 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
489 -------------------------------------------------------------------------------
501 bits64 z0, z1, z2, more1;
503 mul64To128( a1, b, &z1, &z2 );
504 mul64To128( a0, b, &z0, &more1 );
505 add128( z0, more1, 0, z1, &z0, &z1 );
513 -------------------------------------------------------------------------------
514 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516 product. The product is broken into four 64-bit pieces which are stored at
517 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518 -------------------------------------------------------------------------------
532 bits64 z0, z1, z2, z3;
535 mul64To128( a1, b1, &z2, &z3 );
536 mul64To128( a1, b0, &z1, &more2 );
537 add128( z1, more2, 0, z2, &z1, &z2 );
538 mul64To128( a0, b0, &z0, &more1 );
539 add128( z0, more1, 0, z1, &z0, &z1 );
540 mul64To128( a0, b1, &more1, &more2 );
541 add128( more1, more2, 0, z2, &more1, &z2 );
542 add128( z0, z1, 0, more1, &z0, &z1 );
551 -------------------------------------------------------------------------------
552 Returns an approximation to the 64-bit integer quotient obtained by dividing
553 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
554 divisor `b' must be at least 2^63. If q is the exact quotient truncated
555 toward zero, the approximation returned lies between q and q + 2 inclusive.
556 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557 unsigned integer is returned.
558 -------------------------------------------------------------------------------
560 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
563 bits64 rem0, rem1, term0, term1;
565 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
566 b0 = b>>32; /* hence b0 is 32 bits wide now */
567 if ( b0<<32 <= a0 ) {
568 z = LIT64( 0xFFFFFFFF00000000 );
574 mul64To128( b, z, &term0, &term1 );
575 sub128( a0, a1, term0, term1, &rem0, &rem1 );
576 while ( ( (sbits64) rem0 ) < 0 ) {
577 z -= LIT64( 0x100000000 );
579 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
581 rem0 = ( rem0<<32 ) | ( rem1>>32 );
582 if ( b0<<32 <= rem0 ) {
593 -------------------------------------------------------------------------------
594 Returns an approximation to the square root of the 32-bit significand given
595 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
596 `aExp' (the least significant bit) is 1, the integer returned approximates
597 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
598 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
599 case, the approximation returned lies strictly within +/-2 of the exact
601 -------------------------------------------------------------------------------
603 static bits32 estimateSqrt32( int16 aExp, bits32 a )
605 static const bits16 sqrtOddAdjustments[] = {
606 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
607 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
609 static const bits16 sqrtEvenAdjustments[] = {
610 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
611 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
617 index = ( a>>27 ) & 15;
619 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
620 z = ( ( a / z )<<14 ) + ( z<<15 );
624 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
626 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
627 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
629 A = ( (bits64) a )<<31;
631 return ( (bits32) A ) + ( z>>1 );
636 -------------------------------------------------------------------------------
637 Returns the number of leading 0 bits before the most-significant 1 bit
638 of `a'. If `a' is zero, 32 is returned.
639 -------------------------------------------------------------------------------
641 static int8 countLeadingZeros32( bits32 a )
643 static const int8 countLeadingZerosHigh[] = {
644 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
646 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
647 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
649 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
650 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
668 if ( a < 0x1000000 ) {
672 shiftCount += countLeadingZerosHigh[ a>>24 ];
678 -------------------------------------------------------------------------------
679 Returns the number of leading 0 bits before the most-significant 1 bit
680 of `a'. If `a' is zero, 64 is returned.
681 -------------------------------------------------------------------------------
683 static int8 countLeadingZeros64( bits64 a )
688 if ( a < ( (bits64) 1 )<<32 ) {
694 shiftCount += countLeadingZeros32( a );
700 -------------------------------------------------------------------------------
701 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
702 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
703 Otherwise, returns 0.
704 -------------------------------------------------------------------------------
706 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
709 return ( a0 == b0 ) && ( a1 == b1 );
714 -------------------------------------------------------------------------------
715 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
717 Otherwise, returns 0.
718 -------------------------------------------------------------------------------
720 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
723 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
728 -------------------------------------------------------------------------------
729 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
730 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
732 -------------------------------------------------------------------------------
734 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
737 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
742 -------------------------------------------------------------------------------
743 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
744 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
745 Otherwise, returns 0.
746 -------------------------------------------------------------------------------
748 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
751 return ( a0 != b0 ) || ( a1 != b1 );