You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3262 lines
99 KiB
3262 lines
99 KiB
Partial backports of: |
|
===================== |
|
|
|
commit c5d5d574cbfa96d0f6c1db24d1e072c472627e41 |
|
Author: Ondřej Bílka <neleai@seznam.cz> |
|
Date: Thu Oct 17 16:03:24 2013 +0200 |
|
|
|
Format floating routines. |
|
|
|
commit da08f647d58d674db08cdb3e61c8826c89470e2e |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Dec 21 09:15:10 2012 +0530 |
|
|
|
Move more constants into static variables |
|
|
|
Code cleanup. |
|
|
|
commit f93a8d15699ee699282465dc1e03e033f3fabb52 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Jan 16 16:06:48 2013 +0530 |
|
|
|
Consolidate constant defines into mpa.h |
|
|
|
commit caa99d06e7f1403887294442af520b0f8c6f3de0 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Jan 18 11:18:13 2013 +0530 |
|
|
|
Simplify calculation of 2^-m in __mpexp |
|
|
|
commit 107a5bf085f5c4ef8c28266a34d476724cfc3475 |
|
Author: Joseph Myers <joseph@codesourcery.com> |
|
Date: Tue Nov 18 15:40:56 2014 +0000 |
|
|
|
Fix libm mpone, mptwo namespace (bug 17616). |
|
|
|
To provided __mptwo for __inv. |
|
|
|
Full backports of the following: |
|
================================ |
|
|
|
commit 44e0d4c20ce5bf3825897e5d4b7caae94016214d |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Jan 2 11:44:13 2013 +0530 |
|
|
|
Split mantissa calculation loop and add branch prediction |
|
|
|
commit f8af25d218202ff2f5d167b8e44e4b79f91d147f |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Jan 4 15:09:33 2013 +0530 |
|
|
|
Remove commented declarations |
|
|
|
commit a9e48ab40e230c7fe34e4892bec8af4f3f975a20 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Jan 4 15:42:09 2013 +0530 |
|
|
|
Clean up comment for MP_NO |
|
|
|
commit fffb407f4668b40b3df1eb8ee3ae3bc64ee79e20 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Jan 4 22:52:12 2013 +0530 |
|
|
|
Remove unused __cr and __cpymn |
|
|
|
commit 950c99ca9094e7dc6394e90395f51e12093393aa |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Jan 9 19:07:15 2013 +0530 |
|
|
|
Update comments in mpa.c |
|
|
|
Fixed comment style and clearer wording in some cases. |
|
|
|
commit 1066a53440d2744566e97c59bcd0d422186b3e90 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Mon Jan 14 21:31:25 2013 +0530 |
|
|
|
Fix code formatting in mpa.c |
|
|
|
This includes the overridden mpa.c in power4. |
|
|
|
commit 2a91b5735ac1bc65ce5c2a3646d75ba7208e26e9 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Mon Jan 14 21:36:58 2013 +0530 |
|
|
|
Minor tweak to mp multiplication |
|
|
|
Add a local variable to remove extra copies to/from memory in the Z |
|
array. |
|
|
|
ommit 45f058844c33f670475bd02f266942746bcb332b |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Tue Feb 26 21:28:16 2013 +0530 |
|
|
|
Another tweak to the multiplication algorithm |
|
|
|
Reduce the formula to calculate mantissa so that we reduce the net |
|
number of multiplications performed. |
|
|
|
commit bab8a695ee79a5a6e9b2b699938952b006fcbbec |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Thu Feb 21 14:29:18 2013 +0530 |
|
|
|
Fix whitespace differences between generic and powerpc mpa.c |
|
|
|
|
|
commit 2d0e0f29f85036d1189231cb7c1f19f27ba14a89 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Fri Feb 15 23:56:20 2013 +0530 |
|
|
|
Fix determination of lower precision in __mul |
|
|
|
commit 909279a5cfa938c989c9b01c8f48a0276291ec45 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Feb 13 14:16:23 2013 +0530 |
|
|
|
Optimized mp multiplication |
|
|
|
Don't bother multiplying zeroes since that only wastes cycles. |
|
|
|
commit bdf028142eb77d6ae59500db875068fa5d7b059d |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Feb 13 13:55:29 2013 +0530 |
|
|
|
Clean up add_magnitudes and sub_magnitudes |
|
|
|
commit d6752ccd696c71d23cd3df8fb9cc60b61c32e65a |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Thu Feb 14 10:31:09 2013 +0530 |
|
|
|
New __sqr function as a faster special case of __mul |
|
|
|
commit 4709fe7602b56e9f6ee1ab6afb4067409a784f29 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Sat Feb 16 00:09:29 2013 +0530 |
|
|
|
Use intermediate variable to compute exponent in __mul |
|
|
|
commit 20cd7fb3ae63795ae7c9a464abf5ed19b364ade0 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Wed Feb 20 18:56:20 2013 +0530 |
|
|
|
Copy comment about inner loop from powerpc mpa.c to the default one |
|
|
|
commit e69804d14e43f14c3c65dc570afdbfb822c9838b |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Mon Feb 25 16:43:02 2013 +0530 |
|
|
|
Use long wherever possible in mpa.c |
|
|
|
Using long throughout like powerpc does is beneficial since it reduces |
|
the need to switch to 32-bit instructions. It gives a very minor |
|
performance improvement. |
|
|
|
commit 82a9811d29c00161c7c8ea7f70e2cc30988e192e |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Thu Mar 7 12:23:29 2013 +0530 |
|
|
|
Use generic mpa.c code for everything except __mul and __sqr |
|
|
|
commit 6f2e90e78f151bab153c2b38492505ae2012db06 |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Tue Mar 26 19:28:50 2013 +0530 |
|
|
|
Make mantissa type of mp_no configurable |
|
|
|
The mantissa of mp_no is intended to take only integral values. This |
|
is a relatively good choice for powerpc due to its 4 fpus, but not for |
|
other architectures, which suffer due to this choice. This change |
|
makes the default mantissa a long integer and allows powerpc to |
|
override it. Additionally, some operations have been optimized for |
|
integer manipulation, resulting in a significant improvement in |
|
performance. |
|
|
|
commit 5739f705eed5cf58e7b439e5983542e06d7fc2da |
|
Author: Siddhesh Poyarekar <siddhesh@redhat.com> |
|
Date: Tue Mar 26 20:24:04 2013 +0530 |
|
|
|
Use integral constants |
|
|
|
The compiler is smart enough to convert those into double for powerpc, |
|
but if we put them as doubles, it adds overhead by performing those |
|
operations in floating point mode. |
|
|
|
commit 89f3b6e18c6e7833438789746fcfc2e7189f7cac |
|
Author: Joseph Myers <joseph@codesourcery.com> |
|
Date: Thu May 21 23:05:45 2015 +0000 |
|
|
|
Fix sysdeps/ieee754/dbl-64/mpa.c for -Wuninitialized. |
|
|
|
If you remove the "override CFLAGS += -Wno-uninitialized" in |
|
math/Makefile, one of the errors you get is: |
|
|
|
../sysdeps/ieee754/dbl-64/mpa.c: In function '__mp_dbl.part.0': |
|
../sysdeps/ieee754/dbl-64/mpa.c:183:5: error: 'c' may be used uninitialized in this function [-Werror=maybe-uninitialized] |
|
c *= X[0]; |
|
|
|
The problem is that the p < 5 case initializes c if p is 1, 2, 3 or 4 |
|
but not otherwise, and in fact p is positive for all calls to this |
|
function so the uninitialized case can't actually occur. This patch |
|
replaces the "if (p == 4)" last case with a comment so the compiler |
|
can see that all paths do initialize c. |
|
|
|
Tested for x86_64. |
|
|
|
* sysdeps/ieee754/dbl-64/mpa.c (norm): Remove if condition on |
|
(p == 4) case. |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c |
|
@@ -1,7 +1,7 @@ |
|
/* |
|
* IBM Accurate Mathematical Library |
|
* written by International Business Machines Corp. |
|
- * Copyright (C) 2001, 2011 Free Software Foundation |
|
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc. |
|
* |
|
* This program is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU Lesser General Public License as published by |
|
@@ -22,9 +22,7 @@ |
|
/* FUNCTIONS: */ |
|
/* mcr */ |
|
/* acr */ |
|
-/* cr */ |
|
/* cpy */ |
|
-/* cpymn */ |
|
/* norm */ |
|
/* denorm */ |
|
/* mp_dbl */ |
|
@@ -44,479 +42,868 @@ |
|
|
|
#include "endian.h" |
|
#include "mpa.h" |
|
-#include "mpa2.h" |
|
-#include <sys/param.h> /* For MIN() */ |
|
+#include <sys/param.h> |
|
+#include <alloca.h> |
|
|
|
#ifndef SECTION |
|
# define SECTION |
|
#endif |
|
|
|
+#ifndef NO__CONST |
|
+/* TODO: With only a partial backport of the constant cleanup from |
|
+ upstream we limit the definition of these two constants to |
|
+ this file. */ |
|
+static const mp_no __mpone = { 1, { 1.0, 1.0 } }; |
|
+static const mp_no __mptwo = { 1, { 1.0, 2.0 } }; |
|
+#endif |
|
+ |
|
#ifndef NO___ACR |
|
-/* mcr() compares the sizes of the mantissas of two multiple precision */ |
|
-/* numbers. Mantissas are compared regardless of the signs of the */ |
|
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */ |
|
-/* disregarded. */ |
|
+/* Compare mantissa of two multiple precision numbers regardless of the sign |
|
+ and exponent of the numbers. */ |
|
static int |
|
-mcr(const mp_no *x, const mp_no *y, int p) { |
|
- int i; |
|
- for (i=1; i<=p; i++) { |
|
- if (X[i] == Y[i]) continue; |
|
- else if (X[i] > Y[i]) return 1; |
|
- else return -1; } |
|
+mcr (const mp_no *x, const mp_no *y, int p) |
|
+{ |
|
+ long i; |
|
+ long p2 = p; |
|
+ for (i = 1; i <= p2; i++) |
|
+ { |
|
+ if (X[i] == Y[i]) |
|
+ continue; |
|
+ else if (X[i] > Y[i]) |
|
+ return 1; |
|
+ else |
|
+ return -1; |
|
+ } |
|
return 0; |
|
} |
|
|
|
- |
|
-/* acr() compares the absolute values of two multiple precision numbers */ |
|
+/* Compare the absolute values of two multiple precision numbers. */ |
|
int |
|
-__acr(const mp_no *x, const mp_no *y, int p) { |
|
- int i; |
|
- |
|
- if (X[0] == ZERO) { |
|
- if (Y[0] == ZERO) i= 0; |
|
- else i=-1; |
|
- } |
|
- else if (Y[0] == ZERO) i= 1; |
|
- else { |
|
- if (EX > EY) i= 1; |
|
- else if (EX < EY) i=-1; |
|
- else i= mcr(x,y,p); |
|
- } |
|
- |
|
- return i; |
|
-} |
|
-#endif |
|
- |
|
+__acr (const mp_no *x, const mp_no *y, int p) |
|
+{ |
|
+ long i; |
|
|
|
-#if 0 |
|
-/* cr() compares the values of two multiple precision numbers */ |
|
-static int __cr(const mp_no *x, const mp_no *y, int p) { |
|
- int i; |
|
- |
|
- if (X[0] > Y[0]) i= 1; |
|
- else if (X[0] < Y[0]) i=-1; |
|
- else if (X[0] < ZERO ) i= __acr(y,x,p); |
|
- else i= __acr(x,y,p); |
|
+ if (X[0] == 0) |
|
+ { |
|
+ if (Y[0] == 0) |
|
+ i = 0; |
|
+ else |
|
+ i = -1; |
|
+ } |
|
+ else if (Y[0] == 0) |
|
+ i = 1; |
|
+ else |
|
+ { |
|
+ if (EX > EY) |
|
+ i = 1; |
|
+ else if (EX < EY) |
|
+ i = -1; |
|
+ else |
|
+ i = mcr (x, y, p); |
|
+ } |
|
|
|
return i; |
|
} |
|
#endif |
|
|
|
- |
|
#ifndef NO___CPY |
|
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */ |
|
-void __cpy(const mp_no *x, mp_no *y, int p) { |
|
+/* Copy multiple precision number X into Y. They could be the same |
|
+ number. */ |
|
+void |
|
+__cpy (const mp_no *x, mp_no *y, int p) |
|
+{ |
|
+ long i; |
|
+ |
|
EY = EX; |
|
- for (int i=0; i <= p; i++) Y[i] = X[i]; |
|
+ for (i = 0; i <= p; i++) |
|
+ Y[i] = X[i]; |
|
} |
|
#endif |
|
|
|
+#ifndef NO___MP_DBL |
|
+/* Convert a multiple precision number *X into a double precision |
|
+ number *Y, normalized case (|x| >= 2**(-1022))). X has precision |
|
+ P, which is positive. */ |
|
+static void |
|
+norm (const mp_no *x, double *y, int p) |
|
+{ |
|
+# define R RADIXI |
|
+ long i; |
|
+ double c; |
|
+ mantissa_t a, u, v, z[5]; |
|
+ if (p < 5) |
|
+ { |
|
+ if (p == 1) |
|
+ c = X[1]; |
|
+ else if (p == 2) |
|
+ c = X[1] + R * X[2]; |
|
+ else if (p == 3) |
|
+ c = X[1] + R * (X[2] + R * X[3]); |
|
+ else /* p == 4. */ |
|
+ c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); |
|
+ } |
|
+ else |
|
+ { |
|
+ for (a = 1, z[1] = X[1]; z[1] < TWO23; ) |
|
+ { |
|
+ a *= 2; |
|
+ z[1] *= 2; |
|
+ } |
|
|
|
-#if 0 |
|
-/* Copy a multiple precision number x of precision m into a */ |
|
-/* multiple precision number y of precision n. In case n>m, */ |
|
-/* the digits of y beyond the m'th are set to zero. In case */ |
|
-/* n<m, the digits of x beyond the n'th are ignored. */ |
|
-/* x=y is permissible. */ |
|
- |
|
-static void __cpymn(const mp_no *x, int m, mp_no *y, int n) { |
|
- |
|
- int i,k; |
|
- |
|
- EY = EX; k=MIN(m,n); |
|
- for (i=0; i <= k; i++) Y[i] = X[i]; |
|
- for ( ; i <= n; i++) Y[i] = ZERO; |
|
-} |
|
-#endif |
|
+ for (i = 2; i < 5; i++) |
|
+ { |
|
+ mantissa_store_t d, r; |
|
+ d = X[i] * (mantissa_store_t) a; |
|
+ DIV_RADIX (d, r); |
|
+ z[i] = r; |
|
+ z[i - 1] += d; |
|
+ } |
|
|
|
+ u = ALIGN_DOWN_TO (z[3], TWO19); |
|
+ v = z[3] - u; |
|
|
|
-#ifndef NO___MP_DBL |
|
-/* Convert a multiple precision number *x into a double precision */ |
|
-/* number *y, normalized case (|x| >= 2**(-1022))) */ |
|
-static void norm(const mp_no *x, double *y, int p) |
|
-{ |
|
- #define R radixi.d |
|
- int i; |
|
-#if 0 |
|
- int k; |
|
-#endif |
|
- double a,c,u,v,z[5]; |
|
- if (p<5) { |
|
- if (p==1) c = X[1]; |
|
- else if (p==2) c = X[1] + R* X[2]; |
|
- else if (p==3) c = X[1] + R*(X[2] + R* X[3]); |
|
- else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); |
|
- } |
|
- else { |
|
- for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) |
|
- {a *= TWO; z[1] *= TWO; } |
|
- |
|
- for (i=2; i<5; i++) { |
|
- z[i] = X[i]*a; |
|
- u = (z[i] + CUTTER)-CUTTER; |
|
- if (u > z[i]) u -= RADIX; |
|
- z[i] -= u; |
|
- z[i-1] += u*RADIXI; |
|
- } |
|
- |
|
- u = (z[3] + TWO71) - TWO71; |
|
- if (u > z[3]) u -= TWO19; |
|
- v = z[3]-u; |
|
- |
|
- if (v == TWO18) { |
|
- if (z[4] == ZERO) { |
|
- for (i=5; i <= p; i++) { |
|
- if (X[i] == ZERO) continue; |
|
- else {z[3] += ONE; break; } |
|
+ if (v == TWO18) |
|
+ { |
|
+ if (z[4] == 0) |
|
+ { |
|
+ for (i = 5; i <= p; i++) |
|
+ { |
|
+ if (X[i] == 0) |
|
+ continue; |
|
+ else |
|
+ { |
|
+ z[3] += 1; |
|
+ break; |
|
+ } |
|
+ } |
|
+ } |
|
+ else |
|
+ z[3] += 1; |
|
} |
|
- } |
|
- else z[3] += ONE; |
|
- } |
|
|
|
- c = (z[1] + R *(z[2] + R * z[3]))/a; |
|
- } |
|
+ c = (z[1] + R * (z[2] + R * z[3])) / a; |
|
+ } |
|
|
|
c *= X[0]; |
|
|
|
- for (i=1; i<EX; i++) c *= RADIX; |
|
- for (i=1; i>EX; i--) c *= RADIXI; |
|
+ for (i = 1; i < EX; i++) |
|
+ c *= RADIX; |
|
+ for (i = 1; i > EX; i--) |
|
+ c *= RADIXI; |
|
|
|
*y = c; |
|
-#undef R |
|
+# undef R |
|
} |
|
|
|
-/* Convert a multiple precision number *x into a double precision */ |
|
-/* number *y, denormalized case (|x| < 2**(-1022))) */ |
|
-static void denorm(const mp_no *x, double *y, int p) |
|
+/* Convert a multiple precision number *X into a double precision |
|
+ number *Y, Denormal case (|x| < 2**(-1022))). */ |
|
+static void |
|
+denorm (const mp_no *x, double *y, int p) |
|
{ |
|
- int i,k; |
|
- double c,u,z[5]; |
|
-#if 0 |
|
- double a,v; |
|
-#endif |
|
+ long i, k; |
|
+ long p2 = p; |
|
+ double c; |
|
+ mantissa_t u, z[5]; |
|
+ |
|
+# define R RADIXI |
|
+ if (EX < -44 || (EX == -44 && X[1] < TWO5)) |
|
+ { |
|
+ *y = 0; |
|
+ return; |
|
+ } |
|
|
|
-#define R radixi.d |
|
- if (EX<-44 || (EX==-44 && X[1]<TWO5)) |
|
- { *y=ZERO; return; } |
|
- |
|
- if (p==1) { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} |
|
- } |
|
- else if (p==2) { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} |
|
- } |
|
- else { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; k=1;} |
|
- z[3] = X[k]; |
|
- } |
|
- |
|
- u = (z[3] + TWO57) - TWO57; |
|
- if (u > z[3]) u -= TWO5; |
|
- |
|
- if (u==z[3]) { |
|
- for (i=k+1; i <= p; i++) { |
|
- if (X[i] == ZERO) continue; |
|
- else {z[3] += ONE; break; } |
|
- } |
|
- } |
|
- |
|
- c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); |
|
- |
|
- *y = c*TWOM1032; |
|
-#undef R |
|
-} |
|
- |
|
-/* Convert a multiple precision number *x into a double precision number *y. */ |
|
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */ |
|
- |
|
-void __mp_dbl(const mp_no *x, double *y, int p) { |
|
-#if 0 |
|
- int i,k; |
|
- double a,c,u,v,z[5]; |
|
-#endif |
|
+ if (p2 == 1) |
|
+ { |
|
+ if (EX == -42) |
|
+ { |
|
+ z[1] = X[1] + TWO10; |
|
+ z[2] = 0; |
|
+ z[3] = 0; |
|
+ k = 3; |
|
+ } |
|
+ else if (EX == -43) |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = X[1]; |
|
+ z[3] = 0; |
|
+ k = 2; |
|
+ } |
|
+ else |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = 0; |
|
+ z[3] = X[1]; |
|
+ k = 1; |
|
+ } |
|
+ } |
|
+ else if (p2 == 2) |
|
+ { |
|
+ if (EX == -42) |
|
+ { |
|
+ z[1] = X[1] + TWO10; |
|
+ z[2] = X[2]; |
|
+ z[3] = 0; |
|
+ k = 3; |
|
+ } |
|
+ else if (EX == -43) |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = X[1]; |
|
+ z[3] = X[2]; |
|
+ k = 2; |
|
+ } |
|
+ else |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = 0; |
|
+ z[3] = X[1]; |
|
+ k = 1; |
|
+ } |
|
+ } |
|
+ else |
|
+ { |
|
+ if (EX == -42) |
|
+ { |
|
+ z[1] = X[1] + TWO10; |
|
+ z[2] = X[2]; |
|
+ k = 3; |
|
+ } |
|
+ else if (EX == -43) |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = X[1]; |
|
+ k = 2; |
|
+ } |
|
+ else |
|
+ { |
|
+ z[1] = TWO10; |
|
+ z[2] = 0; |
|
+ k = 1; |
|
+ } |
|
+ z[3] = X[k]; |
|
+ } |
|
|
|
- if (X[0] == ZERO) {*y = ZERO; return; } |
|
+ u = ALIGN_DOWN_TO (z[3], TWO5); |
|
|
|
- if (EX> -42) norm(x,y,p); |
|
- else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); |
|
- else denorm(x,y,p); |
|
+ if (u == z[3]) |
|
+ { |
|
+ for (i = k + 1; i <= p2; i++) |
|
+ { |
|
+ if (X[i] == 0) |
|
+ continue; |
|
+ else |
|
+ { |
|
+ z[3] += 1; |
|
+ break; |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); |
|
+ |
|
+ *y = c * TWOM1032; |
|
+# undef R |
|
} |
|
-#endif |
|
|
|
+/* Convert multiple precision number *X into double precision number *Y. The |
|
+ result is correctly rounded to the nearest/even. */ |
|
+void |
|
+__mp_dbl (const mp_no *x, double *y, int p) |
|
+{ |
|
+ if (X[0] == 0) |
|
+ { |
|
+ *y = 0; |
|
+ return; |
|
+ } |
|
|
|
-/* dbl_mp() converts a double precision number x into a multiple precision */ |
|
-/* number *y. If the precision p is too small the result is truncated. x is */ |
|
-/* left unchanged. */ |
|
+ if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10))) |
|
+ norm (x, y, p); |
|
+ else |
|
+ denorm (x, y, p); |
|
+} |
|
+#endif |
|
|
|
+/* Get the multiple precision equivalent of X into *Y. If the precision is too |
|
+ small, the result is truncated. */ |
|
void |
|
SECTION |
|
-__dbl_mp(double x, mp_no *y, int p) { |
|
+__dbl_mp (double x, mp_no *y, int p) |
|
+{ |
|
+ long i, n; |
|
+ long p2 = p; |
|
|
|
- int i,n; |
|
- double u; |
|
+ /* Sign. */ |
|
+ if (x == 0) |
|
+ { |
|
+ Y[0] = 0; |
|
+ return; |
|
+ } |
|
+ else if (x > 0) |
|
+ Y[0] = 1; |
|
+ else |
|
+ { |
|
+ Y[0] = -1; |
|
+ x = -x; |
|
+ } |
|
|
|
- /* Sign */ |
|
- if (x == ZERO) {Y[0] = ZERO; return; } |
|
- else if (x > ZERO) Y[0] = ONE; |
|
- else {Y[0] = MONE; x=-x; } |
|
- |
|
- /* Exponent */ |
|
- for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; |
|
- for ( ; x < ONE; EY -= ONE) x *= RADIX; |
|
- |
|
- /* Digits */ |
|
- n=MIN(p,4); |
|
- for (i=1; i<=n; i++) { |
|
- u = (x + TWO52) - TWO52; |
|
- if (u>x) u -= ONE; |
|
- Y[i] = u; x -= u; x *= RADIX; } |
|
- for ( ; i<=p; i++) Y[i] = ZERO; |
|
+ /* Exponent. */ |
|
+ for (EY = 1; x >= RADIX; EY += 1) |
|
+ x *= RADIXI; |
|
+ for (; x < 1; EY -= 1) |
|
+ x *= RADIX; |
|
+ |
|
+ /* Digits. */ |
|
+ n = MIN (p2, 4); |
|
+ for (i = 1; i <= n; i++) |
|
+ { |
|
+ INTEGER_OF (x, Y[i]); |
|
+ x *= RADIX; |
|
+ } |
|
+ for (; i <= p2; i++) |
|
+ Y[i] = 0; |
|
} |
|
|
|
- |
|
-/* add_magnitudes() adds the magnitudes of *x & *y assuming that */ |
|
-/* abs(*x) >= abs(*y) > 0. */ |
|
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */ |
|
-/* No guard digit is used. The result equals the exact sum, truncated. */ |
|
-/* *x & *y are left unchanged. */ |
|
- |
|
+/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The |
|
+ sign of the sum *Z is not changed. X and Y may overlap but not X and Z or |
|
+ Y and Z. No guard digit is used. The result equals the exact sum, |
|
+ truncated. */ |
|
static void |
|
SECTION |
|
-add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- int i,j,k; |
|
+add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
+ long i, j, k; |
|
+ long p2 = p; |
|
+ mantissa_t zk; |
|
|
|
EZ = EX; |
|
|
|
- i=p; j=p+ EY - EX; k=p+1; |
|
+ i = p2; |
|
+ j = p2 + EY - EX; |
|
+ k = p2 + 1; |
|
+ |
|
+ if (__glibc_unlikely (j < 1)) |
|
+ { |
|
+ __cpy (x, z, p); |
|
+ return; |
|
+ } |
|
|
|
- if (j<1) |
|
- {__cpy(x,z,p); return; } |
|
- else Z[k] = ZERO; |
|
- |
|
- for (; j>0; i--,j--) { |
|
- Z[k] += X[i] + Y[j]; |
|
- if (Z[k] >= RADIX) { |
|
- Z[k] -= RADIX; |
|
- Z[--k] = ONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- for (; i>0; i--) { |
|
- Z[k] += X[i]; |
|
- if (Z[k] >= RADIX) { |
|
- Z[k] -= RADIX; |
|
- Z[--k] = ONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- if (Z[1] == ZERO) { |
|
- for (i=1; i<=p; i++) Z[i] = Z[i+1]; } |
|
- else EZ += ONE; |
|
-} |
|
+ zk = 0; |
|
|
|
+ for (; j > 0; i--, j--) |
|
+ { |
|
+ zk += X[i] + Y[j]; |
|
+ if (zk >= RADIX) |
|
+ { |
|
+ Z[k--] = zk - RADIX; |
|
+ zk = 1; |
|
+ } |
|
+ else |
|
+ { |
|
+ Z[k--] = zk; |
|
+ zk = 0; |
|
+ } |
|
+ } |
|
|
|
-/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */ |
|
-/* abs(*x) > abs(*y) > 0. */ |
|
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z */ |
|
-/* or y&z. One guard digit is used. The error is less than one ulp. */ |
|
-/* *x & *y are left unchanged. */ |
|
+ for (; i > 0; i--) |
|
+ { |
|
+ zk += X[i]; |
|
+ if (zk >= RADIX) |
|
+ { |
|
+ Z[k--] = zk - RADIX; |
|
+ zk = 1; |
|
+ } |
|
+ else |
|
+ { |
|
+ Z[k--] = zk; |
|
+ zk = 0; |
|
+ } |
|
+ } |
|
|
|
+ if (zk == 0) |
|
+ { |
|
+ for (i = 1; i <= p2; i++) |
|
+ Z[i] = Z[i + 1]; |
|
+ } |
|
+ else |
|
+ { |
|
+ Z[1] = zk; |
|
+ EZ += 1; |
|
+ } |
|
+} |
|
+ |
|
+/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. |
|
+ The sign of the difference *Z is not changed. X and Y may overlap but not X |
|
+ and Z or Y and Z. One guard digit is used. The error is less than one |
|
+ ULP. */ |
|
static void |
|
SECTION |
|
-sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- int i,j,k; |
|
+sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
+ long i, j, k; |
|
+ long p2 = p; |
|
+ mantissa_t zk; |
|
|
|
EZ = EX; |
|
+ i = p2; |
|
+ j = p2 + EY - EX; |
|
+ k = p2; |
|
+ |
|
+ /* Y is too small compared to X, copy X over to the result. */ |
|
+ if (__glibc_unlikely (j < 1)) |
|
+ { |
|
+ __cpy (x, z, p); |
|
+ return; |
|
+ } |
|
+ |
|
+ /* The relevant least significant digit in Y is non-zero, so we factor it in |
|
+ to enhance accuracy. */ |
|
+ if (j < p2 && Y[j + 1] > 0) |
|
+ { |
|
+ Z[k + 1] = RADIX - Y[j + 1]; |
|
+ zk = -1; |
|
+ } |
|
+ else |
|
+ zk = Z[k + 1] = 0; |
|
|
|
- if (EX == EY) { |
|
- i=j=k=p; |
|
- Z[k] = Z[k+1] = ZERO; } |
|
- else { |
|
- j= EX - EY; |
|
- if (j > p) {__cpy(x,z,p); return; } |
|
- else { |
|
- i=p; j=p+1-j; k=p; |
|
- if (Y[j] > ZERO) { |
|
- Z[k+1] = RADIX - Y[j--]; |
|
- Z[k] = MONE; } |
|
- else { |
|
- Z[k+1] = ZERO; |
|
- Z[k] = ZERO; j--;} |
|
- } |
|
- } |
|
- |
|
- for (; j>0; i--,j--) { |
|
- Z[k] += (X[i] - Y[j]); |
|
- if (Z[k] < ZERO) { |
|
- Z[k] += RADIX; |
|
- Z[--k] = MONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- for (; i>0; i--) { |
|
- Z[k] += X[i]; |
|
- if (Z[k] < ZERO) { |
|
- Z[k] += RADIX; |
|
- Z[--k] = MONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
+ /* Subtract and borrow. */ |
|
+ for (; j > 0; i--, j--) |
|
+ { |
|
+ zk += (X[i] - Y[j]); |
|
+ if (zk < 0) |
|
+ { |
|
+ Z[k--] = zk + RADIX; |
|
+ zk = -1; |
|
+ } |
|
+ else |
|
+ { |
|
+ Z[k--] = zk; |
|
+ zk = 0; |
|
+ } |
|
+ } |
|
|
|
- for (i=1; Z[i] == ZERO; i++) ; |
|
+ /* We're done with digits from Y, so it's just digits in X. */ |
|
+ for (; i > 0; i--) |
|
+ { |
|
+ zk += X[i]; |
|
+ if (zk < 0) |
|
+ { |
|
+ Z[k--] = zk + RADIX; |
|
+ zk = -1; |
|
+ } |
|
+ else |
|
+ { |
|
+ Z[k--] = zk; |
|
+ zk = 0; |
|
+ } |
|
+ } |
|
+ |
|
+ /* Normalize. */ |
|
+ for (i = 1; Z[i] == 0; i++) |
|
+ ; |
|
EZ = EZ - i + 1; |
|
- for (k=1; i <= p+1; ) |
|
+ for (k = 1; i <= p2 + 1; ) |
|
Z[k++] = Z[i++]; |
|
- for (; k <= p; ) |
|
- Z[k++] = ZERO; |
|
+ for (; k <= p2; ) |
|
+ Z[k++] = 0; |
|
} |
|
|
|
- |
|
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */ |
|
-/* but not x&z or y&z. One guard digit is used. The error is less than */ |
|
-/* one ulp. *x & *y are left unchanged. */ |
|
- |
|
+/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X |
|
+ and Z or Y and Z. One guard digit is used. The error is less than one |
|
+ ULP. */ |
|
void |
|
SECTION |
|
-__add(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
+__add (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
int n; |
|
|
|
- if (X[0] == ZERO) {__cpy(y,z,p); return; } |
|
- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } |
|
+ if (X[0] == 0) |
|
+ { |
|
+ __cpy (y, z, p); |
|
+ return; |
|
+ } |
|
+ else if (Y[0] == 0) |
|
+ { |
|
+ __cpy (x, z, p); |
|
+ return; |
|
+ } |
|
|
|
- if (X[0] == Y[0]) { |
|
- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } |
|
- } |
|
- else { |
|
- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } |
|
- else Z[0] = ZERO; |
|
- } |
|
+ if (X[0] == Y[0]) |
|
+ { |
|
+ if (__acr (x, y, p) > 0) |
|
+ { |
|
+ add_magnitudes (x, y, z, p); |
|
+ Z[0] = X[0]; |
|
+ } |
|
+ else |
|
+ { |
|
+ add_magnitudes (y, x, z, p); |
|
+ Z[0] = Y[0]; |
|
+ } |
|
+ } |
|
+ else |
|
+ { |
|
+ if ((n = __acr (x, y, p)) == 1) |
|
+ { |
|
+ sub_magnitudes (x, y, z, p); |
|
+ Z[0] = X[0]; |
|
+ } |
|
+ else if (n == -1) |
|
+ { |
|
+ sub_magnitudes (y, x, z, p); |
|
+ Z[0] = Y[0]; |
|
+ } |
|
+ else |
|
+ Z[0] = 0; |
|
+ } |
|
} |
|
|
|
+/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but |
|
+ not X and Z or Y and Z. One guard digit is used. The error is less than |
|
+ one ULP. */ |
|
+void |
|
+SECTION |
|
+__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
+ int n; |
|
+ |
|
+ if (X[0] == 0) |
|
+ { |
|
+ __cpy (y, z, p); |
|
+ Z[0] = -Z[0]; |
|
+ return; |
|
+ } |
|
+ else if (Y[0] == 0) |
|
+ { |
|
+ __cpy (x, z, p); |
|
+ return; |
|
+ } |
|
|
|
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */ |
|
-/* overlap but not x&z or y&z. One guard digit is used. The error is */ |
|
-/* less than one ulp. *x & *y are left unchanged. */ |
|
+ if (X[0] != Y[0]) |
|
+ { |
|
+ if (__acr (x, y, p) > 0) |
|
+ { |
|
+ add_magnitudes (x, y, z, p); |
|
+ Z[0] = X[0]; |
|
+ } |
|
+ else |
|
+ { |
|
+ add_magnitudes (y, x, z, p); |
|
+ Z[0] = -Y[0]; |
|
+ } |
|
+ } |
|
+ else |
|
+ { |
|
+ if ((n = __acr (x, y, p)) == 1) |
|
+ { |
|
+ sub_magnitudes (x, y, z, p); |
|
+ Z[0] = X[0]; |
|
+ } |
|
+ else if (n == -1) |
|
+ { |
|
+ sub_magnitudes (y, x, z, p); |
|
+ Z[0] = -Y[0]; |
|
+ } |
|
+ else |
|
+ Z[0] = 0; |
|
+ } |
|
+} |
|
|
|
+#ifndef NO__MUL |
|
+/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X |
|
+ and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P |
|
+ digits. In case P > 3 the error is bounded by 1.001 ULP. */ |
|
void |
|
SECTION |
|
-__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
+ long i, j, k, ip, ip2; |
|
+ long p2 = p; |
|
+ mantissa_store_t zk; |
|
+ const mp_no *a; |
|
+ mantissa_store_t *diag; |
|
+ |
|
+ /* Is z=0? */ |
|
+ if (__glibc_unlikely (X[0] * Y[0] == 0)) |
|
+ { |
|
+ Z[0] = 0; |
|
+ return; |
|
+ } |
|
|
|
- int n; |
|
+ /* We need not iterate through all X's and Y's since it's pointless to |
|
+ multiply zeroes. Here, both are zero... */ |
|
+ for (ip2 = p2; ip2 > 0; ip2--) |
|
+ if (X[ip2] != 0 || Y[ip2] != 0) |
|
+ break; |
|
+ |
|
+ a = X[ip2] != 0 ? y : x; |
|
+ |
|
+ /* ... and here, at least one of them is still zero. */ |
|
+ for (ip = ip2; ip > 0; ip--) |
|
+ if (a->d[ip] != 0) |
|
+ break; |
|
+ |
|
+ /* The product looks like this for p = 3 (as an example): |
|
+ |
|
+ |
|
+ a1 a2 a3 |
|
+ x b1 b2 b3 |
|
+ ----------------------------- |
|
+ a1*b3 a2*b3 a3*b3 |
|
+ a1*b2 a2*b2 a3*b2 |
|
+ a1*b1 a2*b1 a3*b1 |
|
+ |
|
+ So our K needs to ideally be P*2, but we're limiting ourselves to P + 3 |
|
+ for P >= 3. We compute the above digits in two parts; the last P-1 |
|
+ digits and then the first P digits. The last P-1 digits are a sum of |
|
+ products of the input digits from P to P-k where K is 0 for the least |
|
+ significant digit and increases as we go towards the left. The product |
|
+ term is of the form X[k]*X[P-k] as can be seen in the above example. |
|
+ |
|
+ The first P digits are also a sum of products with the same product term, |
|
+ except that the sum is from 1 to k. This is also evident from the above |
|
+ example. |
|
+ |
|
+ Another thing that becomes evident is that only the most significant |
|
+ ip+ip2 digits of the result are non-zero, where ip and ip2 are the |
|
+ 'internal precision' of the input numbers, i.e. digits after ip and ip2 |
|
+ are all 0. */ |
|
+ |
|
+ k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3; |
|
+ |
|
+ while (k > ip + ip2 + 1) |
|
+ Z[k--] = 0; |
|
+ |
|
+ zk = 0; |
|
+ |
|
+ /* Precompute sums of diagonal elements so that we can directly use them |
|
+ later. See the next comment to know we why need them. */ |
|
+ diag = alloca (k * sizeof (mantissa_store_t)); |
|
+ mantissa_store_t d = 0; |
|
+ for (i = 1; i <= ip; i++) |
|
+ { |
|
+ d += X[i] * (mantissa_store_t) Y[i]; |
|
+ diag[i] = d; |
|
+ } |
|
+ while (i < k) |
|
+ diag[i++] = d; |
|
|
|
- if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } |
|
- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } |
|
+ while (k > p2) |
|
+ { |
|
+ long lim = k / 2; |
|
|
|
- if (X[0] != Y[0]) { |
|
- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } |
|
- } |
|
- else { |
|
- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } |
|
- else Z[0] = ZERO; |
|
- } |
|
-} |
|
+ if (k % 2 == 0) |
|
+ /* We want to add this only once, but since we subtract it in the sum |
|
+ of products above, we add twice. */ |
|
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; |
|
|
|
+ for (i = k - p2, j = p2; i < j; i++, j--) |
|
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); |
|
|
|
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */ |
|
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */ |
|
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */ |
|
-/* *x & *y are left unchanged. */ |
|
+ zk -= diag[k - 1]; |
|
|
|
-void |
|
-SECTION |
|
-__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
+ DIV_RADIX (zk, Z[k]); |
|
+ k--; |
|
+ } |
|
|
|
- int i, i1, i2, j, k, k2; |
|
- double u; |
|
+ /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i |
|
+ goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the |
|
+ number of multiplications, we halve the range and if k is an even number, |
|
+ add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute |
|
+ X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j]. |
|
+ |
|
+ This reduction tells us that we're summing two things, the first term |
|
+ through the half range and the negative of the sum of the product of all |
|
+ terms of X and Y in the full range. i.e. |
|
+ |
|
+ SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in |
|
+ a single loop so that it completes in O(n) time and can hence be directly |
|
+ used in the loop below. */ |
|
+ while (k > 1) |
|
+ { |
|
+ long lim = k / 2; |
|
+ |
|
+ if (k % 2 == 0) |
|
+ /* We want to add this only once, but since we subtract it in the sum |
|
+ of products above, we add twice. */ |
|
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; |
|
|
|
- /* Is z=0? */ |
|
- if (X[0]*Y[0]==ZERO) |
|
- { Z[0]=ZERO; return; } |
|
- |
|
- /* Multiply, add and carry */ |
|
- k2 = (p<3) ? p+p : p+3; |
|
- Z[k2]=ZERO; |
|
- for (k=k2; k>1; ) { |
|
- if (k > p) {i1=k-p; i2=p+1; } |
|
- else {i1=1; i2=k; } |
|
- for (i=i1,j=i2-1; i<i2; i++,j--) Z[k] += X[i]*Y[j]; |
|
- |
|
- u = (Z[k] + CUTTER)-CUTTER; |
|
- if (u > Z[k]) u -= RADIX; |
|
- Z[k] -= u; |
|
- Z[--k] = u*RADIXI; |
|
- } |
|
- |
|
- /* Is there a carry beyond the most significant digit? */ |
|
- if (Z[1] == ZERO) { |
|
- for (i=1; i<=p; i++) Z[i]=Z[i+1]; |
|
- EZ = EX + EY - 1; } |
|
- else |
|
- EZ = EX + EY; |
|
+ for (i = 1, j = k - 1; i < j; i++, j--) |
|
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); |
|
+ |
|
+ zk -= diag[k - 1]; |
|
+ |
|
+ DIV_RADIX (zk, Z[k]); |
|
+ k--; |
|
+ } |
|
+ Z[k] = zk; |
|
+ |
|
+ /* Get the exponent sum into an intermediate variable. This is a subtle |
|
+ optimization, where given enough registers, all operations on the exponent |
|
+ happen in registers and the result is written out only once into EZ. */ |
|
+ int e = EX + EY; |
|
+ |
|
+ /* Is there a carry beyond the most significant digit? */ |
|
+ if (__glibc_unlikely (Z[1] == 0)) |
|
+ { |
|
+ for (i = 1; i <= p2; i++) |
|
+ Z[i] = Z[i + 1]; |
|
+ e--; |
|
+ } |
|
|
|
+ EZ = e; |
|
Z[0] = X[0] * Y[0]; |
|
} |
|
+#endif |
|
+ |
|
+#ifndef NO__SQR |
|
+/* Square *X and store result in *Y. X and Y may not overlap. For P in |
|
+ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the |
|
+ error is bounded by 1.001 ULP. This is a faster special case of |
|
+ multiplication. */ |
|
+void |
|
+SECTION |
|
+__sqr (const mp_no *x, mp_no *y, int p) |
|
+{ |
|
+ long i, j, k, ip; |
|
+ mantissa_store_t yk; |
|
|
|
+ /* Is z=0? */ |
|
+ if (__glibc_unlikely (X[0] == 0)) |
|
+ { |
|
+ Y[0] = 0; |
|
+ return; |
|
+ } |
|
|
|
-/* Invert a multiple precision number. Set *y = 1 / *x. */ |
|
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */ |
|
-/* 2.001*r**(1-p) for p>3. */ |
|
-/* *x=0 is not permissible. *x is left unchanged. */ |
|
+ /* We need not iterate through all X's since it's pointless to |
|
+ multiply zeroes. */ |
|
+ for (ip = p; ip > 0; ip--) |
|
+ if (X[ip] != 0) |
|
+ break; |
|
|
|
-static |
|
-SECTION |
|
-void __inv(const mp_no *x, mp_no *y, int p) { |
|
- int i; |
|
-#if 0 |
|
- int l; |
|
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; |
|
+ |
|
+ while (k > 2 * ip + 1) |
|
+ Y[k--] = 0; |
|
+ |
|
+ yk = 0; |
|
+ |
|
+ while (k > p) |
|
+ { |
|
+ mantissa_store_t yk2 = 0; |
|
+ long lim = k / 2; |
|
+ |
|
+ if (k % 2 == 0) |
|
+ yk += X[lim] * (mantissa_store_t) X[lim]; |
|
+ |
|
+ /* In __mul, this loop (and the one within the next while loop) run |
|
+ between a range to calculate the mantissa as follows: |
|
+ |
|
+ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] |
|
+ + X[n] * Y[k] |
|
+ |
|
+ For X == Y, we can get away with summing halfway and doubling the |
|
+ result. For cases where the range size is even, the mid-point needs |
|
+ to be added separately (above). */ |
|
+ for (i = k - p, j = p; i < j; i++, j--) |
|
+ yk2 += X[i] * (mantissa_store_t) X[j]; |
|
+ |
|
+ yk += 2 * yk2; |
|
+ |
|
+ DIV_RADIX (yk, Y[k]); |
|
+ k--; |
|
+ } |
|
+ |
|
+ while (k > 1) |
|
+ { |
|
+ mantissa_store_t yk2 = 0; |
|
+ long lim = k / 2; |
|
+ |
|
+ if (k % 2 == 0) |
|
+ yk += X[lim] * (mantissa_store_t) X[lim]; |
|
+ |
|
+ /* Likewise for this loop. */ |
|
+ for (i = 1, j = k - 1; i < j; i++, j--) |
|
+ yk2 += X[i] * (mantissa_store_t) X[j]; |
|
+ |
|
+ yk += 2 * yk2; |
|
+ |
|
+ DIV_RADIX (yk, Y[k]); |
|
+ k--; |
|
+ } |
|
+ Y[k] = yk; |
|
+ |
|
+ /* Squares are always positive. */ |
|
+ Y[0] = 1; |
|
+ |
|
+ /* Get the exponent sum into an intermediate variable. This is a subtle |
|
+ optimization, where given enough registers, all operations on the exponent |
|
+ happen in registers and the result is written out only once into EZ. */ |
|
+ int e = EX * 2; |
|
+ |
|
+ /* Is there a carry beyond the most significant digit? */ |
|
+ if (__glibc_unlikely (Y[1] == 0)) |
|
+ { |
|
+ for (i = 1; i <= p; i++) |
|
+ Y[i] = Y[i + 1]; |
|
+ e--; |
|
+ } |
|
+ |
|
+ EY = e; |
|
+} |
|
#endif |
|
+ |
|
+/* Invert *X and store in *Y. Relative error bound: |
|
+ - For P = 2: 1.001 * R ^ (1 - P) |
|
+ - For P = 3: 1.063 * R ^ (1 - P) |
|
+ - For P > 3: 2.001 * R ^ (1 - P) |
|
+ |
|
+ *X = 0 is not permissible. */ |
|
+static void |
|
+SECTION |
|
+__inv (const mp_no *x, mp_no *y, int p) |
|
+{ |
|
+ long i; |
|
double t; |
|
- mp_no z,w; |
|
- static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, |
|
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; |
|
- const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; |
|
- |
|
- __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); |
|
- t=ONE/t; __dbl_mp(t,y,p); EY -= EX; |
|
- |
|
- for (i=0; i<np1[p]; i++) { |
|
- __cpy(y,&w,p); |
|
- __mul(x,&w,y,p); |
|
- __sub(&mptwo,y,&z,p); |
|
- __mul(&w,&z,y,p); |
|
- } |
|
+ mp_no z, w; |
|
+ static const int np1[] = |
|
+ { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
|
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 |
|
+ }; |
|
+ |
|
+ __cpy (x, &z, p); |
|
+ z.e = 0; |
|
+ __mp_dbl (&z, &t, p); |
|
+ t = 1 / t; |
|
+ __dbl_mp (t, y, p); |
|
+ EY -= EX; |
|
+ |
|
+ for (i = 0; i < np1[p]; i++) |
|
+ { |
|
+ __cpy (y, &w, p); |
|
+ __mul (x, &w, y, p); |
|
+ __sub (&__mptwo, y, &z, p); |
|
+ __mul (&w, &z, y, p); |
|
+ } |
|
} |
|
|
|
+/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z |
|
+ or Y and Z. Relative error bound: |
|
+ - For P = 2: 2.001 * R ^ (1 - P) |
|
+ - For P = 3: 2.063 * R ^ (1 - P) |
|
+ - For P > 3: 3.001 * R ^ (1 - P) |
|
|
|
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */ |
|
-/* are left unchanged. x&y may overlap but not x&z or y&z. */ |
|
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */ |
|
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */ |
|
- |
|
+ *X = 0 is not permissible. */ |
|
void |
|
SECTION |
|
-__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
+__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
+{ |
|
mp_no w; |
|
|
|
- if (X[0] == ZERO) Z[0] = ZERO; |
|
- else {__inv(y,&w,p); __mul(x,&w,z,p);} |
|
+ if (X[0] == 0) |
|
+ Z[0] = 0; |
|
+ else |
|
+ { |
|
+ __inv (y, &w, p); |
|
+ __mul (x, &w, z, p); |
|
+ } |
|
} |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h |
|
@@ -1,7 +1,7 @@ |
|
/* |
|
* IBM Accurate Mathematical Library |
|
* Written by International Business Machines Corp. |
|
- * Copyright (C) 2001, 2011 Free Software Foundation, Inc. |
|
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc. |
|
* |
|
* This program is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU Lesser General Public License as published by |
|
@@ -23,36 +23,58 @@ |
|
/* FUNCTIONS: */ |
|
/* mcr */ |
|
/* acr */ |
|
-/* cr */ |
|
/* cpy */ |
|
-/* cpymn */ |
|
/* mp_dbl */ |
|
/* dbl_mp */ |
|
/* add */ |
|
/* sub */ |
|
/* mul */ |
|
-/* inv */ |
|
/* dvd */ |
|
/* */ |
|
/* Arithmetic functions for multiple precision numbers. */ |
|
/* Common types and definition */ |
|
/************************************************************************/ |
|
|
|
+#include <mpa-arch.h> |
|
|
|
-typedef struct {/* This structure holds the details of a multi-precision */ |
|
- int e; /* floating point number, x: d[0] holds its sign (-1,0 or 1) */ |
|
- double d[40]; /* e holds its exponent (...,-2,-1,0,1,2,...) and */ |
|
-} mp_no; /* d[1]...d[p] hold its mantissa digits. The value of x is, */ |
|
- /* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p). */ |
|
- /* Here r = 2**24, 0 <= d[i] < r and 1 <= p <= 32. */ |
|
- /* p is a global variable. A multi-precision number is */ |
|
- /* always normalized. Namely, d[1] > 0. An exception is */ |
|
- /* a zero which is characterized by d[0] = 0. The terms */ |
|
- /* d[p+1], d[p+2], ... of a none zero number have no */ |
|
- /* significance and so are the terms e, d[1],d[2],... */ |
|
- /* of a zero. */ |
|
+/* The mp_no structure holds the details of a multi-precision floating point |
|
+ number. |
|
|
|
-typedef union { int i[2]; double d; } number; |
|
+ - The radix of the number (R) is 2 ^ 24. |
|
+ |
|
+ - E: The exponent of the number. |
|
+ |
|
+ - D[0]: The sign (-1, 1) or 0 if the value is 0. In the latter case, the |
|
+ values of the remaining members of the structure are ignored. |
|
+ |
|
+ - D[1] - D[p]: The mantissa of the number where: |
|
+ |
|
+ 0 <= D[i] < R and |
|
+ P is the precision of the number and 1 <= p <= 32 |
|
+ |
|
+ D[p+1] ... D[39] have no significance. |
|
+ |
|
+ - The value of the number is: |
|
+ |
|
+ D[1] * R ^ (E - 1) + D[2] * R ^ (E - 2) ... D[p] * R ^ (E - p) |
|
+ |
|
+ */ |
|
+typedef struct |
|
+{ |
|
+ int e; |
|
+ mantissa_t d[40]; |
|
+} mp_no; |
|
+ |
|
+typedef union |
|
+{ |
|
+ int i[2]; |
|
+ double d; |
|
+} number; |
|
+ |
|
+/* TODO: With only a partial backport of the constant cleanup we don't |
|
+ define __mpone or __mptwo here for other code to use. */ |
|
+/* extern const mp_no __mpone; |
|
+extern const mp_no __mptwo; */ |
|
|
|
#define X x->d |
|
#define Y y->d |
|
@@ -63,21 +85,73 @@ typedef union { int i[2]; double d; } nu |
|
|
|
#define ABS(x) ((x) < 0 ? -(x) : (x)) |
|
|
|
-int __acr(const mp_no *, const mp_no *, int); |
|
-// int __cr(const mp_no *, const mp_no *, int); |
|
-void __cpy(const mp_no *, mp_no *, int); |
|
-// void __cpymn(const mp_no *, int, mp_no *, int); |
|
-void __mp_dbl(const mp_no *, double *, int); |
|
-void __dbl_mp(double, mp_no *, int); |
|
-void __add(const mp_no *, const mp_no *, mp_no *, int); |
|
-void __sub(const mp_no *, const mp_no *, mp_no *, int); |
|
-void __mul(const mp_no *, const mp_no *, mp_no *, int); |
|
-// void __inv(const mp_no *, mp_no *, int); |
|
-void __dvd(const mp_no *, const mp_no *, mp_no *, int); |
|
+#ifndef RADIXI |
|
+# define RADIXI 0x1.0p-24 /* 2^-24 */ |
|
+#endif |
|
+ |
|
+#ifndef TWO52 |
|
+# define TWO52 0x1.0p52 /* 2^52 */ |
|
+#endif |
|
+ |
|
+#define TWO5 TWOPOW (5) /* 2^5 */ |
|
+#define TWO8 TWOPOW (8) /* 2^52 */ |
|
+#define TWO10 TWOPOW (10) /* 2^10 */ |
|
+#define TWO18 TWOPOW (18) /* 2^18 */ |
|
+#define TWO19 TWOPOW (19) /* 2^19 */ |
|
+#define TWO23 TWOPOW (23) /* 2^23 */ |
|
+ |
|
+#define TWO57 0x1.0p57 /* 2^57 */ |
|
+#define TWO71 0x1.0p71 /* 2^71 */ |
|
+#define TWOM1032 0x1.0p-1032 /* 2^-1032 */ |
|
+#define TWOM1022 0x1.0p-1022 /* 2^-1022 */ |
|
+ |
|
+#define HALF 0x1.0p-1 /* 1/2 */ |
|
+#define MHALF -0x1.0p-1 /* -1/2 */ |
|
+#define HALFRAD 0x1.0p23 /* 2^23 */ |
|
+ |
|
+int __acr (const mp_no *, const mp_no *, int); |
|
+void __cpy (const mp_no *, mp_no *, int); |
|
+void __mp_dbl (const mp_no *, double *, int); |
|
+void __dbl_mp (double, mp_no *, int); |
|
+void __add (const mp_no *, const mp_no *, mp_no *, int); |
|
+void __sub (const mp_no *, const mp_no *, mp_no *, int); |
|
+void __mul (const mp_no *, const mp_no *, mp_no *, int); |
|
+void __sqr (const mp_no *, mp_no *, int); |
|
+void __dvd (const mp_no *, const mp_no *, mp_no *, int); |
|
|
|
extern void __mpatan (mp_no *, mp_no *, int); |
|
extern void __mpatan2 (mp_no *, mp_no *, mp_no *, int); |
|
extern void __mpsqrt (mp_no *, mp_no *, int); |
|
-extern void __mpexp (mp_no *, mp_no *__y, int); |
|
+extern void __mpexp (mp_no *, mp_no *, int); |
|
extern void __c32 (mp_no *, mp_no *, mp_no *, int); |
|
extern int __mpranred (double, mp_no *, int); |
|
+ |
|
+/* Given a power POW, build a multiprecision number 2^POW. */ |
|
+static inline void |
|
+__pow_mp (int pow, mp_no *y, int p) |
|
+{ |
|
+ int i, rem; |
|
+ |
|
+ /* The exponent is E such that E is a factor of 2^24. The remainder (of the |
|
+ form 2^x) goes entirely into the first digit of the mantissa as it is |
|
+ always less than 2^24. */ |
|
+ EY = pow / 24; |
|
+ rem = pow - EY * 24; |
|
+ EY++; |
|
+ |
|
+ /* If the remainder is negative, it means that POW was negative since |
|
+ |EY * 24| <= |pow|. Adjust so that REM is positive and still less than |
|
+ 24 because of which, the mantissa digit is less than 2^24. */ |
|
+ if (rem < 0) |
|
+ { |
|
+ EY--; |
|
+ rem += 24; |
|
+ } |
|
+ /* The sign of any 2^x is always positive. */ |
|
+ Y[0] = 1; |
|
+ Y[1] = 1 << rem; |
|
+ |
|
+ /* Everything else is 0. */ |
|
+ for (i = 2; i <= p; i++) |
|
+ Y[i] = 0; |
|
+} |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c |
|
@@ -69,13 +69,13 @@ __mpexp(mp_no *x, mp_no *y, int p) { |
|
for (i=0; i<EX; i++) a *= RADIXI; |
|
for ( ; i>EX; i--) a *= RADIX; |
|
b = X[1]*RADIXI; m2 = 24*EX; |
|
- for (; b<HALF; m2--) { a *= TWO; b *= TWO; } |
|
+ for (; b<HALF; m2--) { a *= 2; b *= 2; } |
|
if (b == HALF) { |
|
- for (i=2; i<=p; i++) { if (X[i]!=ZERO) break; } |
|
- if (i==p+1) { m2--; a *= TWO; } |
|
+ for (i=2; i<=p; i++) { if (X[i]!=0) break; } |
|
+ if (i==p+1) { m2--; a *= 2; } |
|
} |
|
if ((m=m1+m2) <= 0) { |
|
- m=0; a=ONE; |
|
+ m=0; a=1; |
|
for (i=n-1; i>0; i--,n--) { if (m1np[i][p]+m2>0) break; } |
|
} |
|
|
|
@@ -84,8 +84,8 @@ __mpexp(mp_no *x, mp_no *y, int p) { |
|
__mul(x,&mpt1,&mps,p); |
|
|
|
/* Evaluate the polynomial. Put result in mpt2 */ |
|
- mpone.e=1; mpone.d[0]=ONE; mpone.d[1]=ONE; |
|
- mpk.e = 1; mpk.d[0] = ONE; mpk.d[1]=__mpexp_nn[n].d; |
|
+ mpone.e=1; mpone.d[0]=1; mpone.d[1]=1; |
|
+ mpk.e = 1; mpk.d[0] = 1; mpk.d[1]=__mpexp_nn[n].d; |
|
__dvd(&mps,&mpk,&mpt1,p); |
|
__add(&mpone,&mpt1,&mpak,p); |
|
for (k=n-1; k>1; k--) { |
|
@@ -99,9 +99,9 @@ __mpexp(mp_no *x, mp_no *y, int p) { |
|
|
|
/* Raise polynomial value to the power of 2**m. Put result in y */ |
|
for (k=0,j=0; k<m; ) { |
|
- __mul(&mpt2,&mpt2,&mpt1,p); k++; |
|
+ __sqr (&mpt2, &mpt1, p); k++; |
|
if (k==m) { j=1; break; } |
|
- __mul(&mpt1,&mpt1,&mpt2,p); k++; |
|
+ __sqr (&mpt1, &mpt2, p); k++; |
|
} |
|
if (j) __cpy(&mpt1,y,p); |
|
else __cpy(&mpt2,y,p); |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-avx.c |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c |
|
@@ -1,5 +1,6 @@ |
|
#define __add __add_avx |
|
#define __mul __mul_avx |
|
+#define __sqr __sqr_avx |
|
#define __sub __sub_avx |
|
#define __dbl_mp __dbl_mp_avx |
|
#define __dvd __dvd_avx |
|
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c |
|
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c |
|
@@ -1,5 +1,6 @@ |
|
#define __add __add_fma4 |
|
#define __mul __mul_fma4 |
|
+#define __sqr __sqr_fma4 |
|
#define __sub __sub_fma4 |
|
#define __dbl_mp __dbl_mp_fma4 |
|
#define __dvd __dvd_fma4 |
|
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/powerpc/power4/fpu/mpa.c |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c |
|
@@ -2,7 +2,7 @@ |
|
/* |
|
* IBM Accurate Mathematical Library |
|
* written by International Business Machines Corp. |
|
- * Copyright (C) 2001, 2006 Free Software Foundation |
|
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc. |
|
* |
|
* This program is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU Lesser General Public License as published by |
|
@@ -17,532 +17,198 @@ |
|
* You should have received a copy of the GNU Lesser General Public License |
|
* along with this program; if not, see <http://www.gnu.org/licenses/>. |
|
*/ |
|
-/************************************************************************/ |
|
-/* MODULE_NAME: mpa.c */ |
|
-/* */ |
|
-/* FUNCTIONS: */ |
|
-/* mcr */ |
|
-/* acr */ |
|
-/* cr */ |
|
-/* cpy */ |
|
-/* cpymn */ |
|
-/* norm */ |
|
-/* denorm */ |
|
-/* mp_dbl */ |
|
-/* dbl_mp */ |
|
-/* add_magnitudes */ |
|
-/* sub_magnitudes */ |
|
-/* add */ |
|
-/* sub */ |
|
-/* mul */ |
|
-/* inv */ |
|
-/* dvd */ |
|
-/* */ |
|
-/* Arithmetic functions for multiple precision numbers. */ |
|
-/* Relative errors are bounded */ |
|
-/************************************************************************/ |
|
- |
|
- |
|
-#include "endian.h" |
|
-#include "mpa.h" |
|
-#include "mpa2.h" |
|
-#include <sys/param.h> /* For MIN() */ |
|
-/* mcr() compares the sizes of the mantissas of two multiple precision */ |
|
-/* numbers. Mantissas are compared regardless of the signs of the */ |
|
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */ |
|
-/* disregarded. */ |
|
-static int mcr(const mp_no *x, const mp_no *y, int p) { |
|
- long i; |
|
- long p2 = p; |
|
- for (i=1; i<=p2; i++) { |
|
- if (X[i] == Y[i]) continue; |
|
- else if (X[i] > Y[i]) return 1; |
|
- else return -1; } |
|
- return 0; |
|
-} |
|
- |
|
- |
|
- |
|
-/* acr() compares the absolute values of two multiple precision numbers */ |
|
-int __acr(const mp_no *x, const mp_no *y, int p) { |
|
- long i; |
|
- |
|
- if (X[0] == ZERO) { |
|
- if (Y[0] == ZERO) i= 0; |
|
- else i=-1; |
|
- } |
|
- else if (Y[0] == ZERO) i= 1; |
|
- else { |
|
- if (EX > EY) i= 1; |
|
- else if (EX < EY) i=-1; |
|
- else i= mcr(x,y,p); |
|
- } |
|
- |
|
- return i; |
|
-} |
|
- |
|
- |
|
-/* cr90 compares the values of two multiple precision numbers */ |
|
-int __cr(const mp_no *x, const mp_no *y, int p) { |
|
- int i; |
|
- |
|
- if (X[0] > Y[0]) i= 1; |
|
- else if (X[0] < Y[0]) i=-1; |
|
- else if (X[0] < ZERO ) i= __acr(y,x,p); |
|
- else i= __acr(x,y,p); |
|
- |
|
- return i; |
|
-} |
|
- |
|
- |
|
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */ |
|
-void __cpy(const mp_no *x, mp_no *y, int p) { |
|
- long i; |
|
- |
|
- EY = EX; |
|
- for (i=0; i <= p; i++) Y[i] = X[i]; |
|
- |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Copy a multiple precision number x of precision m into a */ |
|
-/* multiple precision number y of precision n. In case n>m, */ |
|
-/* the digits of y beyond the m'th are set to zero. In case */ |
|
-/* n<m, the digits of x beyond the n'th are ignored. */ |
|
-/* x=y is permissible. */ |
|
- |
|
-void __cpymn(const mp_no *x, int m, mp_no *y, int n) { |
|
- |
|
- long i,k; |
|
- long n2 = n; |
|
- long m2 = m; |
|
- |
|
- EY = EX; k=MIN(m2,n2); |
|
- for (i=0; i <= k; i++) Y[i] = X[i]; |
|
- for ( ; i <= n2; i++) Y[i] = ZERO; |
|
- |
|
- return; |
|
-} |
|
- |
|
-/* Convert a multiple precision number *x into a double precision */ |
|
-/* number *y, normalized case (|x| >= 2**(-1022))) */ |
|
-static void norm(const mp_no *x, double *y, int p) |
|
-{ |
|
- #define R radixi.d |
|
- long i; |
|
-#if 0 |
|
- int k; |
|
-#endif |
|
- double a,c,u,v,z[5]; |
|
- if (p<5) { |
|
- if (p==1) c = X[1]; |
|
- else if (p==2) c = X[1] + R* X[2]; |
|
- else if (p==3) c = X[1] + R*(X[2] + R* X[3]); |
|
- else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); |
|
- } |
|
- else { |
|
- for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) |
|
- {a *= TWO; z[1] *= TWO; } |
|
- |
|
- for (i=2; i<5; i++) { |
|
- z[i] = X[i]*a; |
|
- u = (z[i] + CUTTER)-CUTTER; |
|
- if (u > z[i]) u -= RADIX; |
|
- z[i] -= u; |
|
- z[i-1] += u*RADIXI; |
|
- } |
|
- |
|
- u = (z[3] + TWO71) - TWO71; |
|
- if (u > z[3]) u -= TWO19; |
|
- v = z[3]-u; |
|
- |
|
- if (v == TWO18) { |
|
- if (z[4] == ZERO) { |
|
- for (i=5; i <= p; i++) { |
|
- if (X[i] == ZERO) continue; |
|
- else {z[3] += ONE; break; } |
|
- } |
|
- } |
|
- else z[3] += ONE; |
|
- } |
|
- |
|
- c = (z[1] + R *(z[2] + R * z[3]))/a; |
|
- } |
|
|
|
- c *= X[0]; |
|
- |
|
- for (i=1; i<EX; i++) c *= RADIX; |
|
- for (i=1; i>EX; i--) c *= RADIXI; |
|
- |
|
- *y = c; |
|
- return; |
|
-#undef R |
|
-} |
|
- |
|
-/* Convert a multiple precision number *x into a double precision */ |
|
-/* number *y, denormalized case (|x| < 2**(-1022))) */ |
|
-static void denorm(const mp_no *x, double *y, int p) |
|
+/* Define __mul and __sqr and use the rest from generic code. */ |
|
+#define NO__MUL |
|
+#define NO__SQR |
|
+ |
|
+#include <sysdeps/ieee754/dbl-64/mpa.c> |
|
+ |
|
+/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X |
|
+ and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P |
|
+ digits. In case P > 3 the error is bounded by 1.001 ULP. */ |
|
+void |
|
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) |
|
{ |
|
- long i,k; |
|
+ long i, i1, i2, j, k, k2; |
|
long p2 = p; |
|
- double c,u,z[5]; |
|
-#if 0 |
|
- double a,v; |
|
-#endif |
|
+ double u, zk, zk2; |
|
|
|
-#define R radixi.d |
|
- if (EX<-44 || (EX==-44 && X[1]<TWO5)) |
|
- { *y=ZERO; return; } |
|
- |
|
- if (p2==1) { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} |
|
- } |
|
- else if (p2==2) { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} |
|
- } |
|
- else { |
|
- if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;} |
|
- else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;} |
|
- else {z[1]= TWO10; z[2]=ZERO; k=1;} |
|
- z[3] = X[k]; |
|
- } |
|
- |
|
- u = (z[3] + TWO57) - TWO57; |
|
- if (u > z[3]) u -= TWO5; |
|
- |
|
- if (u==z[3]) { |
|
- for (i=k+1; i <= p2; i++) { |
|
- if (X[i] == ZERO) continue; |
|
- else {z[3] += ONE; break; } |
|
+ /* Is z=0? */ |
|
+ if (__glibc_unlikely (X[0] * Y[0] == 0)) |
|
+ { |
|
+ Z[0] = 0; |
|
+ return; |
|
} |
|
- } |
|
- |
|
- c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); |
|
|
|
- *y = c*TWOM1032; |
|
- return; |
|
- |
|
-#undef R |
|
-} |
|
- |
|
-/* Convert a multiple precision number *x into a double precision number *y. */ |
|
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */ |
|
- |
|
-void __mp_dbl(const mp_no *x, double *y, int p) { |
|
-#if 0 |
|
- int i,k; |
|
- double a,c,u,v,z[5]; |
|
+ /* Multiply, add and carry */ |
|
+ k2 = (p2 < 3) ? p2 + p2 : p2 + 3; |
|
+ zk = Z[k2] = 0; |
|
+ for (k = k2; k > 1;) |
|
+ { |
|
+ if (k > p2) |
|
+ { |
|
+ i1 = k - p2; |
|
+ i2 = p2 + 1; |
|
+ } |
|
+ else |
|
+ { |
|
+ i1 = 1; |
|
+ i2 = k; |
|
+ } |
|
+#if 1 |
|
+ /* Rearrange this inner loop to allow the fmadd instructions to be |
|
+ independent and execute in parallel on processors that have |
|
+ dual symmetrical FP pipelines. */ |
|
+ if (i1 < (i2 - 1)) |
|
+ { |
|
+ /* Make sure we have at least 2 iterations. */ |
|
+ if (((i2 - i1) & 1L) == 1L) |
|
+ { |
|
+ /* Handle the odd iterations case. */ |
|
+ zk2 = x->d[i2 - 1] * y->d[i1]; |
|
+ } |
|
+ else |
|
+ zk2 = 0.0; |
|
+ /* Do two multiply/adds per loop iteration, using independent |
|
+ accumulators; zk and zk2. */ |
|
+ for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) |
|
+ { |
|
+ zk += x->d[i] * y->d[j]; |
|
+ zk2 += x->d[i + 1] * y->d[j - 1]; |
|
+ } |
|
+ zk += zk2; /* Final sum. */ |
|
+ } |
|
+ else |
|
+ { |
|
+ /* Special case when iterations is 1. */ |
|
+ zk += x->d[i1] * y->d[i1]; |
|
+ } |
|
+#else |
|
+ /* The original code. */ |
|
+ for (i = i1, j = i2 - 1; i < i2; i++, j--) |
|
+ zk += X[i] * Y[j]; |
|
#endif |
|
|
|
- if (X[0] == ZERO) {*y = ZERO; return; } |
|
- |
|
- if (EX> -42) norm(x,y,p); |
|
- else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); |
|
- else denorm(x,y,p); |
|
-} |
|
- |
|
- |
|
-/* dbl_mp() converts a double precision number x into a multiple precision */ |
|
-/* number *y. If the precision p is too small the result is truncated. x is */ |
|
-/* left unchanged. */ |
|
- |
|
-void __dbl_mp(double x, mp_no *y, int p) { |
|
+ u = (zk + CUTTER) - CUTTER; |
|
+ if (u > zk) |
|
+ u -= RADIX; |
|
+ Z[k] = zk - u; |
|
+ zk = u * RADIXI; |
|
+ --k; |
|
+ } |
|
+ Z[k] = zk; |
|
|
|
- long i,n; |
|
- long p2 = p; |
|
- double u; |
|
+ int e = EX + EY; |
|
+ /* Is there a carry beyond the most significant digit? */ |
|
+ if (Z[1] == 0) |
|
+ { |
|
+ for (i = 1; i <= p2; i++) |
|
+ Z[i] = Z[i + 1]; |
|
+ e--; |
|
+ } |
|
|
|
- /* Sign */ |
|
- if (x == ZERO) {Y[0] = ZERO; return; } |
|
- else if (x > ZERO) Y[0] = ONE; |
|
- else {Y[0] = MONE; x=-x; } |
|
- |
|
- /* Exponent */ |
|
- for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; |
|
- for ( ; x < ONE; EY -= ONE) x *= RADIX; |
|
- |
|
- /* Digits */ |
|
- n=MIN(p2,4); |
|
- for (i=1; i<=n; i++) { |
|
- u = (x + TWO52) - TWO52; |
|
- if (u>x) u -= ONE; |
|
- Y[i] = u; x -= u; x *= RADIX; } |
|
- for ( ; i<=p2; i++) Y[i] = ZERO; |
|
- return; |
|
+ EZ = e; |
|
+ Z[0] = X[0] * Y[0]; |
|
} |
|
|
|
+/* Square *X and store result in *Y. X and Y may not overlap. For P in |
|
+ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the |
|
+ error is bounded by 1.001 ULP. This is a faster special case of |
|
+ multiplication. */ |
|
+void |
|
+__sqr (const mp_no *x, mp_no *y, int p) |
|
+{ |
|
+ long i, j, k, ip; |
|
+ double u, yk; |
|
|
|
-/* add_magnitudes() adds the magnitudes of *x & *y assuming that */ |
|
-/* abs(*x) >= abs(*y) > 0. */ |
|
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */ |
|
-/* No guard digit is used. The result equals the exact sum, truncated. */ |
|
-/* *x & *y are left unchanged. */ |
|
+ /* Is z=0? */ |
|
+ if (__glibc_unlikely (X[0] == 0)) |
|
+ { |
|
+ Y[0] = 0; |
|
+ return; |
|
+ } |
|
|
|
-static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
+ /* We need not iterate through all X's since it's pointless to |
|
+ multiply zeroes. */ |
|
+ for (ip = p; ip > 0; ip--) |
|
+ if (X[ip] != 0) |
|
+ break; |
|
|
|
- long i,j,k; |
|
- long p2 = p; |
|
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; |
|
|
|
- EZ = EX; |
|
+ while (k > 2 * ip + 1) |
|
+ Y[k--] = 0; |
|
|
|
- i=p2; j=p2+ EY - EX; k=p2+1; |
|
+ yk = 0; |
|
|
|
- if (j<1) |
|
- {__cpy(x,z,p); return; } |
|
- else Z[k] = ZERO; |
|
- |
|
- for (; j>0; i--,j--) { |
|
- Z[k] += X[i] + Y[j]; |
|
- if (Z[k] >= RADIX) { |
|
- Z[k] -= RADIX; |
|
- Z[--k] = ONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- for (; i>0; i--) { |
|
- Z[k] += X[i]; |
|
- if (Z[k] >= RADIX) { |
|
- Z[k] -= RADIX; |
|
- Z[--k] = ONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- if (Z[1] == ZERO) { |
|
- for (i=1; i<=p2; i++) Z[i] = Z[i+1]; } |
|
- else EZ += ONE; |
|
-} |
|
+ while (k > p) |
|
+ { |
|
+ double yk2 = 0.0; |
|
+ long lim = k / 2; |
|
|
|
+ if (k % 2 == 0) |
|
+ { |
|
+ yk += X[lim] * X[lim]; |
|
+ lim--; |
|
+ } |
|
|
|
-/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */ |
|
-/* abs(*x) > abs(*y) > 0. */ |
|
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z */ |
|
-/* or y&z. One guard digit is used. The error is less than one ulp. */ |
|
-/* *x & *y are left unchanged. */ |
|
+ /* In __mul, this loop (and the one within the next while loop) run |
|
+ between a range to calculate the mantissa as follows: |
|
|
|
-static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
+ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] |
|
+ + X[n] * Y[k] |
|
|
|
- long i,j,k; |
|
- long p2 = p; |
|
+ For X == Y, we can get away with summing halfway and doubling the |
|
+ result. For cases where the range size is even, the mid-point needs |
|
+ to be added separately (above). */ |
|
+ for (i = k - p, j = p; i <= lim; i++, j--) |
|
+ yk2 += X[i] * X[j]; |
|
|
|
- EZ = EX; |
|
+ yk += 2.0 * yk2; |
|
|
|
- if (EX == EY) { |
|
- i=j=k=p2; |
|
- Z[k] = Z[k+1] = ZERO; } |
|
- else { |
|
- j= EX - EY; |
|
- if (j > p2) {__cpy(x,z,p); return; } |
|
- else { |
|
- i=p2; j=p2+1-j; k=p2; |
|
- if (Y[j] > ZERO) { |
|
- Z[k+1] = RADIX - Y[j--]; |
|
- Z[k] = MONE; } |
|
- else { |
|
- Z[k+1] = ZERO; |
|
- Z[k] = ZERO; j--;} |
|
+ u = (yk + CUTTER) - CUTTER; |
|
+ if (u > yk) |
|
+ u -= RADIX; |
|
+ Y[k--] = yk - u; |
|
+ yk = u * RADIXI; |
|
} |
|
- } |
|
- |
|
- for (; j>0; i--,j--) { |
|
- Z[k] += (X[i] - Y[j]); |
|
- if (Z[k] < ZERO) { |
|
- Z[k] += RADIX; |
|
- Z[--k] = MONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- for (; i>0; i--) { |
|
- Z[k] += X[i]; |
|
- if (Z[k] < ZERO) { |
|
- Z[k] += RADIX; |
|
- Z[--k] = MONE; } |
|
- else |
|
- Z[--k] = ZERO; |
|
- } |
|
- |
|
- for (i=1; Z[i] == ZERO; i++) ; |
|
- EZ = EZ - i + 1; |
|
- for (k=1; i <= p2+1; ) |
|
- Z[k++] = Z[i++]; |
|
- for (; k <= p2; ) |
|
- Z[k++] = ZERO; |
|
- |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */ |
|
-/* but not x&z or y&z. One guard digit is used. The error is less than */ |
|
-/* one ulp. *x & *y are left unchanged. */ |
|
- |
|
-void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- int n; |
|
- |
|
- if (X[0] == ZERO) {__cpy(y,z,p); return; } |
|
- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } |
|
- |
|
- if (X[0] == Y[0]) { |
|
- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } |
|
- } |
|
- else { |
|
- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } |
|
- else Z[0] = ZERO; |
|
- } |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */ |
|
-/* overlap but not x&z or y&z. One guard digit is used. The error is */ |
|
-/* less than one ulp. *x & *y are left unchanged. */ |
|
- |
|
-void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- int n; |
|
- |
|
- if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } |
|
- else if (Y[0] == ZERO) {__cpy(x,z,p); return; } |
|
- |
|
- if (X[0] != Y[0]) { |
|
- if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } |
|
- } |
|
- else { |
|
- if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } |
|
- else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } |
|
- else Z[0] = ZERO; |
|
- } |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */ |
|
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */ |
|
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */ |
|
-/* *x & *y are left unchanged. */ |
|
|
|
-void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- long i, i1, i2, j, k, k2; |
|
- long p2 = p; |
|
- double u, zk, zk2; |
|
- |
|
- /* Is z=0? */ |
|
- if (X[0]*Y[0]==ZERO) |
|
- { Z[0]=ZERO; return; } |
|
- |
|
- /* Multiply, add and carry */ |
|
- k2 = (p2<3) ? p2+p2 : p2+3; |
|
- zk = Z[k2]=ZERO; |
|
- for (k=k2; k>1; ) { |
|
- if (k > p2) {i1=k-p2; i2=p2+1; } |
|
- else {i1=1; i2=k; } |
|
-#if 1 |
|
- /* rearange this inner loop to allow the fmadd instructions to be |
|
- independent and execute in parallel on processors that have |
|
- dual symetrical FP pipelines. */ |
|
- if (i1 < (i2-1)) |
|
+ while (k > 1) |
|
{ |
|
- /* make sure we have at least 2 iterations */ |
|
- if (((i2 - i1) & 1L) == 1L) |
|
- { |
|
- /* Handle the odd iterations case. */ |
|
- zk2 = x->d[i2-1]*y->d[i1]; |
|
- } |
|
- else |
|
- zk2 = zero.d; |
|
- /* Do two multiply/adds per loop iteration, using independent |
|
- accumulators; zk and zk2. */ |
|
- for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) |
|
- { |
|
- zk += x->d[i]*y->d[j]; |
|
- zk2 += x->d[i+1]*y->d[j-1]; |
|
+ double yk2 = 0.0; |
|
+ long lim = k / 2; |
|
+ |
|
+ if (k % 2 == 0) |
|
+ { |
|
+ yk += X[lim] * X[lim]; |
|
+ lim--; |
|
} |
|
- zk += zk2; /* final sum. */ |
|
- } |
|
- else |
|
+ |
|
+ /* Likewise for this loop. */ |
|
+ for (i = 1, j = k - 1; i <= lim; i++, j--) |
|
+ yk2 += X[i] * X[j]; |
|
+ |
|
+ yk += 2.0 * yk2; |
|
+ |
|
+ u = (yk + CUTTER) - CUTTER; |
|
+ if (u > yk) |
|
+ u -= RADIX; |
|
+ Y[k--] = yk - u; |
|
+ yk = u * RADIXI; |
|
+ } |
|
+ Y[k] = yk; |
|
+ |
|
+ /* Squares are always positive. */ |
|
+ Y[0] = 1.0; |
|
+ |
|
+ int e = EX * 2; |
|
+ /* Is there a carry beyond the most significant digit? */ |
|
+ if (__glibc_unlikely (Y[1] == 0)) |
|
{ |
|
- /* Special case when iterations is 1. */ |
|
- zk += x->d[i1]*y->d[i1]; |
|
+ for (i = 1; i <= p; i++) |
|
+ Y[i] = Y[i + 1]; |
|
+ e--; |
|
} |
|
-#else |
|
- /* The orginal code. */ |
|
- for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; |
|
-#endif |
|
- |
|
- u = (zk + CUTTER)-CUTTER; |
|
- if (u > zk) u -= RADIX; |
|
- Z[k] = zk - u; |
|
- zk = u*RADIXI; |
|
- --k; |
|
- } |
|
- Z[k] = zk; |
|
- |
|
- /* Is there a carry beyond the most significant digit? */ |
|
- if (Z[1] == ZERO) { |
|
- for (i=1; i<=p2; i++) Z[i]=Z[i+1]; |
|
- EZ = EX + EY - 1; } |
|
- else |
|
- EZ = EX + EY; |
|
- |
|
- Z[0] = X[0] * Y[0]; |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Invert a multiple precision number. Set *y = 1 / *x. */ |
|
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */ |
|
-/* 2.001*r**(1-p) for p>3. */ |
|
-/* *x=0 is not permissible. *x is left unchanged. */ |
|
- |
|
-void __inv(const mp_no *x, mp_no *y, int p) { |
|
- long i; |
|
-#if 0 |
|
- int l; |
|
-#endif |
|
- double t; |
|
- mp_no z,w; |
|
- static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, |
|
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; |
|
- const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; |
|
- |
|
- __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); |
|
- t=ONE/t; __dbl_mp(t,y,p); EY -= EX; |
|
- |
|
- for (i=0; i<np1[p]; i++) { |
|
- __cpy(y,&w,p); |
|
- __mul(x,&w,y,p); |
|
- __sub(&mptwo,y,&z,p); |
|
- __mul(&w,&z,y,p); |
|
- } |
|
- return; |
|
-} |
|
- |
|
- |
|
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */ |
|
-/* are left unchanged. x&y may overlap but not x&z or y&z. */ |
|
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */ |
|
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */ |
|
- |
|
-void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { |
|
- |
|
- mp_no w; |
|
- |
|
- if (X[0] == ZERO) Z[0] = ZERO; |
|
- else {__inv(y,&w,p); __mul(x,&w,z,p);} |
|
- return; |
|
+ EY = e; |
|
} |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h |
|
@@ -138,8 +138,6 @@ |
|
#endif |
|
#endif |
|
|
|
-#define ZERO zero.d |
|
-#define ONE one.d |
|
#define A a.d |
|
#define B b.d |
|
#define C c.d |
|
@@ -160,7 +158,5 @@ |
|
#define U6 u6.d |
|
#define U7 u7.d |
|
#define U8 u8.d |
|
-#define TWO8 two8.d |
|
-#define TWO52 two52.d |
|
|
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat2.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h |
|
@@ -174,11 +174,4 @@ |
|
#endif |
|
#endif |
|
|
|
-#define ZERO zero.d |
|
-#define MZERO mzero.d |
|
-#define ONE one.d |
|
-#define TWO8 two8.d |
|
-#define TWO52 two52.d |
|
-#define TWOM1022 twom1022.d |
|
- |
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa2.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa2.h |
|
+++ /dev/null |
|
@@ -1,94 +0,0 @@ |
|
- |
|
-/* |
|
- * IBM Accurate Mathematical Library |
|
- * Written by International Business Machines Corp. |
|
- * Copyright (C) 2001 Free Software Foundation, Inc. |
|
- * |
|
- * This program is free software; you can redistribute it and/or modify |
|
- * it under the terms of the GNU Lesser General Public License as published by |
|
- * the Free Software Foundation; either version 2.1 of the License, or |
|
- * (at your option) any later version. |
|
- * |
|
- * This program is distributed in the hope that it will be useful, |
|
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
- * GNU Lesser General Public License for more details. |
|
- * |
|
- * You should have received a copy of the GNU Lesser General Public License |
|
- * along with this program; if not, see <http://www.gnu.org/licenses/>. |
|
- */ |
|
- |
|
-/**************************************************************************/ |
|
-/* */ |
|
-/* MODULE_NAME:mpa2.h */ |
|
-/* */ |
|
-/* */ |
|
-/* variables prototype and definition according to type of processor */ |
|
-/* types definition */ |
|
-/**************************************************************************/ |
|
- |
|
-#ifndef MPA2_H |
|
-#define MPA2_H |
|
- |
|
- |
|
-#ifdef BIG_ENDI |
|
-static const number |
|
-/**/ radix = {{0x41700000, 0x00000000} }, /* 2**24 */ |
|
-/**/ radixi = {{0x3e700000, 0x00000000} }, /* 2**-24 */ |
|
-/**/ cutter = {{0x44b00000, 0x00000000} }, /* 2**76 */ |
|
-/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ |
|
-/**/ one = {{0x3ff00000, 0x00000000} }, /* 1 */ |
|
-/**/ mone = {{0xbff00000, 0x00000000} }, /* -1 */ |
|
-/**/ two = {{0x40000000, 0x00000000} }, /* 2 */ |
|
-/**/ two5 = {{0x40400000, 0x00000000} }, /* 2**5 */ |
|
-/**/ two10 = {{0x40900000, 0x00000000} }, /* 2**10 */ |
|
-/**/ two18 = {{0x41100000, 0x00000000} }, /* 2**18 */ |
|
-/**/ two19 = {{0x41200000, 0x00000000} }, /* 2**19 */ |
|
-/**/ two23 = {{0x41600000, 0x00000000} }, /* 2**23 */ |
|
-/**/ two52 = {{0x43300000, 0x00000000} }, /* 2**52 */ |
|
-/**/ two57 = {{0x43800000, 0x00000000} }, /* 2**57 */ |
|
-/**/ two71 = {{0x44600000, 0x00000000} }, /* 2**71 */ |
|
-/**/ twom1032 = {{0x00000400, 0x00000000} }; /* 2**-1032 */ |
|
- |
|
-#else |
|
-#ifdef LITTLE_ENDI |
|
-static const number |
|
-/**/ radix = {{0x00000000, 0x41700000} }, /* 2**24 */ |
|
-/**/ radixi = {{0x00000000, 0x3e700000} }, /* 2**-24 */ |
|
-/**/ cutter = {{0x00000000, 0x44b00000} }, /* 2**76 */ |
|
-/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ |
|
-/**/ one = {{0x00000000, 0x3ff00000} }, /* 1 */ |
|
-/**/ mone = {{0x00000000, 0xbff00000} }, /* -1 */ |
|
-/**/ two = {{0x00000000, 0x40000000} }, /* 2 */ |
|
-/**/ two5 = {{0x00000000, 0x40400000} }, /* 2**5 */ |
|
-/**/ two10 = {{0x00000000, 0x40900000} }, /* 2**10 */ |
|
-/**/ two18 = {{0x00000000, 0x41100000} }, /* 2**18 */ |
|
-/**/ two19 = {{0x00000000, 0x41200000} }, /* 2**19 */ |
|
-/**/ two23 = {{0x00000000, 0x41600000} }, /* 2**23 */ |
|
-/**/ two52 = {{0x00000000, 0x43300000} }, /* 2**52 */ |
|
-/**/ two57 = {{0x00000000, 0x43800000} }, /* 2**57 */ |
|
-/**/ two71 = {{0x00000000, 0x44600000} }, /* 2**71 */ |
|
-/**/ twom1032 = {{0x00000000, 0x00000400} }; /* 2**-1032 */ |
|
- |
|
-#endif |
|
-#endif |
|
- |
|
-#define RADIX radix.d |
|
-#define RADIXI radixi.d |
|
-#define CUTTER cutter.d |
|
-#define ZERO zero.d |
|
-#define ONE one.d |
|
-#define MONE mone.d |
|
-#define TWO two.d |
|
-#define TWO5 two5.d |
|
-#define TWO10 two10.d |
|
-#define TWO18 two18.d |
|
-#define TWO19 two19.d |
|
-#define TWO23 two23.d |
|
-#define TWO52 two52.d |
|
-#define TWO57 two57.d |
|
-#define TWO71 two71.d |
|
-#define TWOM1032 twom1032.d |
|
- |
|
- |
|
-#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h |
|
@@ -177,6 +177,3 @@ __atan_twonm1[33] = { |
|
|
|
#endif |
|
#endif |
|
- |
|
-#define ONE __atan_one.d |
|
-#define TWO __atan_two.d |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan2.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c |
|
@@ -49,18 +49,16 @@ void |
|
SECTION |
|
__mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { |
|
|
|
- static const double ZERO = 0.0, ONE = 1.0; |
|
- |
|
mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, |
|
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; |
|
mp_no mpt1,mpt2,mpt3; |
|
|
|
|
|
- if (X[0] <= ZERO) { |
|
- mpone.e = 1; mpone.d[0] = mpone.d[1] = ONE; |
|
+ if (X[0] <= 0) { |
|
+ mpone.e = 1; mpone.d[0] = mpone.d[1] = 1; |
|
__dvd(x,y,&mpt1,p); __mul(&mpt1,&mpt1,&mpt2,p); |
|
- if (mpt1.d[0] != ZERO) mpt1.d[0] = ONE; |
|
+ if (mpt1.d[0] != 0) mpt1.d[0] = 1; |
|
__add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p); |
|
__add(&mpt1,&mpt2,&mpt3,p); mpt3.d[0]=Y[0]; |
|
__mpatan(&mpt3,&mpt1,p); __add(&mpt1,&mpt1,z,p); |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h |
|
@@ -159,11 +159,4 @@ extern const number __mpexp_half attribu |
|
#endif |
|
#endif |
|
|
|
-#define RADIX __mpexp_radix.d |
|
-#define RADIXI __mpexp_radixi.d |
|
-#define ZERO __mpexp_zero.d |
|
-#define ONE __mpexp_one.d |
|
-#define TWO __mpexp_two.d |
|
-#define HALF __mpexp_half.d |
|
- |
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h |
|
@@ -51,7 +51,4 @@ extern const int __mpsqrt_mp[33] attribu |
|
4,4,4,4,4,4,4,4,4}; |
|
#endif |
|
|
|
-#define ONE __mpsqrt_one.d |
|
-#define HALFRAD __mpsqrt_halfrad.d |
|
- |
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mptan.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c |
|
@@ -47,8 +47,6 @@ void |
|
SECTION |
|
__mptan(double x, mp_no *mpy, int p) { |
|
|
|
- static const double MONE = -1.0; |
|
- |
|
int n; |
|
mp_no mpw, mpc, mps; |
|
|
|
@@ -56,7 +54,7 @@ __mptan(double x, mp_no *mpy, int p) { |
|
__c32(&mpw, &mpc, &mps, p); /* computing sin(x) and cos(x) */ |
|
if (n) /* second or fourth quarter of unit circle */ |
|
{ __dvd(&mpc,&mps,mpy,p); |
|
- mpy->d[0] *= MONE; |
|
+ mpy->d[0] *= -1; |
|
} /* tan is negative in this area */ |
|
else __dvd(&mps,&mpc,mpy,p); |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/ulog.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h |
|
@@ -181,10 +181,6 @@ |
|
#endif |
|
#endif |
|
|
|
-#define ZERO zero.d |
|
-#define ONE one.d |
|
-#define HALF half.d |
|
-#define MHALF mhalf.d |
|
#define SQRT_2 sqrt_2.d |
|
#define DEL_U delu.d |
|
#define DEL_V delv.d |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/utan.h |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h |
|
@@ -270,10 +270,4 @@ |
|
#endif |
|
#endif |
|
|
|
- |
|
-#define ZERO zero.d |
|
-#define ONE one.d |
|
-#define MONE mone.d |
|
-#define TWO8 two8.d |
|
- |
|
#endif |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h |
|
@@ -0,0 +1,47 @@ |
|
+/* Overridable constants and operations. |
|
+ Copyright (C) 2013 Free Software Foundation, Inc. |
|
+ |
|
+ This program is free software; you can redistribute it and/or modify |
|
+ it under the terms of the GNU Lesser General Public License as published by |
|
+ the Free Software Foundation; either version 2.1 of the License, or |
|
+ (at your option) any later version. |
|
+ |
|
+ This program is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
+ GNU Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public License |
|
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <stdint.h> |
|
+ |
|
+typedef long mantissa_t; |
|
+typedef int64_t mantissa_store_t; |
|
+ |
|
+#define TWOPOW(i) (1L << i) |
|
+ |
|
+#define RADIX_EXP 24 |
|
+#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */ |
|
+ |
|
+/* Divide D by RADIX and put the remainder in R. D must be a non-negative |
|
+ integral value. */ |
|
+#define DIV_RADIX(d, r) \ |
|
+ ({ \ |
|
+ r = d & (RADIX - 1); \ |
|
+ d >>= RADIX_EXP; \ |
|
+ }) |
|
+ |
|
+/* Put the integer component of a double X in R and retain the fraction in |
|
+ X. This is used in extracting mantissa digits for MP_NO by using the |
|
+ integer portion of the current value of the number as the current mantissa |
|
+ digit and then scaling by RADIX to get the next mantissa digit in the same |
|
+ manner. */ |
|
+#define INTEGER_OF(x, i) \ |
|
+ ({ \ |
|
+ i = (mantissa_t) x; \ |
|
+ x -= i; \ |
|
+ }) |
|
+ |
|
+/* Align IN down to F. The code assumes that F is a power of two. */ |
|
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f)) |
|
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h |
|
=================================================================== |
|
--- /dev/null |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h |
|
@@ -0,0 +1,56 @@ |
|
+/* Overridable constants and operations. |
|
+ Copyright (C) 2013-2017 Free Software Foundation, Inc. |
|
+ |
|
+ This program is free software; you can redistribute it and/or modify |
|
+ it under the terms of the GNU Lesser General Public License as published by |
|
+ the Free Software Foundation; either version 2.1 of the License, or |
|
+ (at your option) any later version. |
|
+ |
|
+ This program is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
+ GNU Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public License |
|
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+typedef double mantissa_t; |
|
+typedef double mantissa_store_t; |
|
+ |
|
+#define TWOPOW(i) (0x1.0p##i) |
|
+ |
|
+#define RADIX TWOPOW (24) /* 2^24 */ |
|
+#define CUTTER TWOPOW (76) /* 2^76 */ |
|
+#define RADIXI 0x1.0p-24 /* 2^-24 */ |
|
+#define TWO52 TWOPOW (52) /* 2^52 */ |
|
+ |
|
+/* Divide D by RADIX and put the remainder in R. */ |
|
+#define DIV_RADIX(d,r) \ |
|
+ ({ \ |
|
+ double u = ((d) + CUTTER) - CUTTER; \ |
|
+ if (u > (d)) \ |
|
+ u -= RADIX; \ |
|
+ r = (d) - u; \ |
|
+ (d) = u * RADIXI; \ |
|
+ }) |
|
+ |
|
+/* Put the integer component of a double X in R and retain the fraction in |
|
+ X. */ |
|
+#define INTEGER_OF(x, r) \ |
|
+ ({ \ |
|
+ double u = ((x) + TWO52) - TWO52; \ |
|
+ if (u > (x)) \ |
|
+ u -= 1; \ |
|
+ (r) = u; \ |
|
+ (x) -= u; \ |
|
+ }) |
|
+ |
|
+/* Align IN down to a multiple of F, where F is a power of two. */ |
|
+#define ALIGN_DOWN_TO(in, f) \ |
|
+ ({ \ |
|
+ double factor = f * TWO52; \ |
|
+ double u = (in + factor) - factor; \ |
|
+ if (u > in) \ |
|
+ u -= f; \ |
|
+ u; \ |
|
+ }) |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_atan2.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c |
|
@@ -98,15 +98,15 @@ __ieee754_atan2(double y,double x) { |
|
/* y=+-0 */ |
|
if (uy==0x00000000) { |
|
if (dy==0x00000000) { |
|
- if ((ux&0x80000000)==0x00000000) return ZERO; |
|
+ if ((ux&0x80000000)==0x00000000) return 0; |
|
else return opi.d; } } |
|
else if (uy==0x80000000) { |
|
if (dy==0x00000000) { |
|
- if ((ux&0x80000000)==0x00000000) return MZERO; |
|
+ if ((ux&0x80000000)==0x00000000) return -0.0; |
|
else return mopi.d;} } |
|
|
|
/* x=+-0 */ |
|
- if (x==ZERO) { |
|
+ if (x==0) { |
|
if ((uy&0x80000000)==0x00000000) return hpi.d; |
|
else return mhpi.d; } |
|
|
|
@@ -118,8 +118,8 @@ __ieee754_atan2(double y,double x) { |
|
else if (uy==0xfff00000) { |
|
if (dy==0x00000000) return mqpi.d; } |
|
else { |
|
- if ((uy&0x80000000)==0x00000000) return ZERO; |
|
- else return MZERO; } |
|
+ if ((uy&0x80000000)==0x00000000) return 0; |
|
+ else return -0.0; } |
|
} |
|
} |
|
else if (ux==0xfff00000) { |
|
@@ -141,14 +141,14 @@ __ieee754_atan2(double y,double x) { |
|
if (dy==0x00000000) return mhpi.d; } |
|
|
|
/* either x/y or y/x is very close to zero */ |
|
- ax = (x<ZERO) ? -x : x; ay = (y<ZERO) ? -y : y; |
|
+ ax = (x<0) ? -x : x; ay = (y<0) ? -y : y; |
|
de = (uy & 0x7ff00000) - (ux & 0x7ff00000); |
|
- if (de>=ep) { return ((y>ZERO) ? hpi.d : mhpi.d); } |
|
+ if (de>=ep) { return ((y>0) ? hpi.d : mhpi.d); } |
|
else if (de<=em) { |
|
- if (x>ZERO) { |
|
+ if (x>0) { |
|
if ((z=ay/ax)<TWOM1022) return normalized(ax,ay,y,z); |
|
else return signArctan2(y,z); } |
|
- else { return ((y>ZERO) ? opi.d : mopi.d); } } |
|
+ else { return ((y>0) ? opi.d : mopi.d); } } |
|
|
|
/* if either x or y is extremely close to zero, scale abs(x), abs(y). */ |
|
if (ax<twom500.d || ay<twom500.d) { ax*=two500.d; ay*=two500.d; } |
|
@@ -170,7 +170,7 @@ __ieee754_atan2(double y,double x) { |
|
EMULV(ay,u,v,vv,t1,t2,t3,t4,t5) |
|
du=((ax-v)-vv)/ay; } |
|
|
|
- if (x>ZERO) { |
|
+ if (x>0) { |
|
|
|
/* (i) x>0, abs(y)< abs(x): atan(ay/ax) */ |
|
if (ay<ax) { |
|
@@ -180,7 +180,7 @@ __ieee754_atan2(double y,double x) { |
|
|
|
MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8) |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -212,7 +212,7 @@ __ieee754_atan2(double y,double x) { |
|
EADD(t1,du,v,vv) |
|
s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+ |
|
v*(hij[i][14].d+v* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -237,7 +237,7 @@ __ieee754_atan2(double y,double x) { |
|
|
|
MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8) |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -265,7 +265,7 @@ __ieee754_atan2(double y,double x) { |
|
EADD(t1,du,v,vv) |
|
s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+ |
|
v*(hij[i][14].d+v* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -293,7 +293,7 @@ __ieee754_atan2(double y,double x) { |
|
|
|
MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8) |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -321,7 +321,7 @@ __ieee754_atan2(double y,double x) { |
|
EADD(t1,du,v,vv) |
|
s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+ |
|
v*(hij[i][14].d+v* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -347,7 +347,7 @@ __ieee754_atan2(double y,double x) { |
|
|
|
MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8) |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -375,7 +375,7 @@ __ieee754_atan2(double y,double x) { |
|
EADD(t1,du,v,vv) |
|
s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+ |
|
v*(hij[i][14].d+v* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_log.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c |
|
@@ -78,9 +78,9 @@ __ieee754_log(double x) { |
|
n=0; |
|
if (__builtin_expect(ux < 0x00100000, 0)) { |
|
if (__builtin_expect(((ux & 0x7fffffff) | dx) == 0, 0)) |
|
- return MHALF/ZERO; /* return -INF */ |
|
+ return MHALF/0; /* return -INF */ |
|
if (__builtin_expect(ux < 0, 0)) |
|
- return (x-x)/ZERO; /* return NaN */ |
|
+ return (x-x)/0; /* return NaN */ |
|
n -= 54; x *= two54.d; /* scale x */ |
|
num.d = x; |
|
} |
|
@@ -89,7 +89,7 @@ __ieee754_log(double x) { |
|
|
|
/* Regular values of x */ |
|
|
|
- w = x-ONE; |
|
+ w = x-1; |
|
if (__builtin_expect(ABS(w) > U03, 1)) { goto case_03; } |
|
|
|
|
|
@@ -113,25 +113,25 @@ __ieee754_log(double x) { |
|
w*(d17.d+w*(d18.d+w*(d19.d+w*d20.d)))))))); |
|
EMULV(w,a,s2,ss2,t1,t2,t3,t4,t5) |
|
ADD2(d10.d,dd10.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d9.d,dd9.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d8.d,dd8.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d7.d,dd7.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d6.d,dd6.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d5.d,dd5.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d4.d,dd4.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d3.d,dd3.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(d2.d,dd2.d,s2,ss2,s3,ss3,t1,t2) |
|
- MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
- MUL2(w,ZERO,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) |
|
- ADD2(w,ZERO, s3,ss3, b, bb,t1,t2) |
|
+ MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(w,0,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ ADD2(w,0, s3,ss3, b, bb,t1,t2) |
|
|
|
/* End stage II, case abs(x-1) < 0.03 */ |
|
if ((y=b+(bb+b*E4)) == b+(bb-b*E4)) return y; |
|
@@ -155,7 +155,7 @@ __ieee754_log(double x) { |
|
j = (num.i[HIGH_HALF] & 0x000fffff) >> 4; |
|
|
|
/* Compute w=(u-ui*vj)/(ui*vj) */ |
|
- p0=(ONE+(i-75)*DEL_U)*(ONE+(j-180)*DEL_V); |
|
+ p0=(1+(i-75)*DEL_U)*(1+(j-180)*DEL_V); |
|
q=u-p0; r0=Iu[i].d*Iv[j].d; w=q*r0; |
|
|
|
/* Evaluate polynomial I */ |
|
@@ -178,11 +178,11 @@ __ieee754_log(double x) { |
|
|
|
/* Improve the accuracy of r0 */ |
|
EMULV(p0,r0,sa,sb,t1,t2,t3,t4,t5) |
|
- t=r0*((ONE-sa)-sb); |
|
+ t=r0*((1-sa)-sb); |
|
EADD(r0,t,ra,rb) |
|
|
|
/* Compute w */ |
|
- MUL2(q,ZERO,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(q,0,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8) |
|
|
|
EADD(A,B0,a0,aa0) |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_atan.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c |
|
@@ -82,7 +82,7 @@ double atan(double x) { |
|
return x+x; |
|
|
|
/* Regular values of x, including denormals +-0 and +-INF */ |
|
- u = (x<ZERO) ? -x : x; |
|
+ u = (x<0) ? -x : x; |
|
if (u<C) { |
|
if (u<B) { |
|
if (u<A) { /* u < A */ |
|
@@ -93,7 +93,7 @@ double atan(double x) { |
|
|
|
EMULV(x,x,v,vv,t1,t2,t3,t4,t5) /* v+vv=x^2 */ |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -101,8 +101,8 @@ double atan(double x) { |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
- MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
- ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2) |
|
+ MUL2(x,0,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ ADD2(x,0,s2,ss2,s1,ss1,t1,t2) |
|
if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1)) return y; |
|
|
|
return atanMp(x,pr); |
|
@@ -124,14 +124,14 @@ double atan(double x) { |
|
z=u-hij[i][0].d; |
|
s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+ |
|
z*(hij[i][14].d+z* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
- MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
+ MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
- MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2) |
|
- MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2) |
|
- MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
+ MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2) |
|
if ((y=s2+(ss2-U6*s2)) == s2+(ss2+U6*s2)) return __signArctan(x,y); |
|
|
|
@@ -140,9 +140,9 @@ double atan(double x) { |
|
} |
|
else { |
|
if (u<D) { /* C <= u < D */ |
|
- w=ONE/u; |
|
+ w=1/u; |
|
EMULV(w,u,t1,t2,t3,t4,t5,t6,t7) |
|
- ww=w*((ONE-t1)-t2); |
|
+ ww=w*((1-t1)-t2); |
|
i=(TWO52+TWO8*w)-TWO52; i-=16; |
|
z=(w-cij[i][0].d)+ww; |
|
yy=HPI1-z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+ |
|
@@ -152,12 +152,12 @@ double atan(double x) { |
|
else u3=U32; /* w >= 1/2 */ |
|
if ((y=t1+(yy-u3)) == t1+(yy+u3)) return __signArctan(x,y); |
|
|
|
- DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) |
|
+ DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) |
|
t1=w-hij[i][0].d; |
|
EADD(t1,ww,z,zz) |
|
s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+ |
|
z*(hij[i][14].d+z* hij[i][15].d)))); |
|
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2) |
|
MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
@@ -173,18 +173,18 @@ double atan(double x) { |
|
} |
|
else { |
|
if (u<E) { /* D <= u < E */ |
|
- w=ONE/u; v=w*w; |
|
+ w=1/u; v=w*w; |
|
EMULV(w,u,t1,t2,t3,t4,t5,t6,t7) |
|
yy=w*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d))))); |
|
- ww=w*((ONE-t1)-t2); |
|
+ ww=w*((1-t1)-t2); |
|
ESUB(HPI,w,t3,cor) |
|
yy=((HPI1+cor)-ww)-yy; |
|
if ((y=t3+(yy-U4)) == t3+(yy+U4)) return __signArctan(x,y); |
|
|
|
- DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) |
|
+ DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10) |
|
MUL2(w,ww,w,ww,v,vv,t1,t2,t3,t4,t5,t6,t7,t8) |
|
s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); |
|
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) |
|
+ ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) |
|
MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_tan.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c |
|
@@ -84,7 +84,7 @@ tan(double x) { |
|
goto ret; |
|
} |
|
|
|
- w=(x<ZERO) ? -x : x; |
|
+ w=(x<0) ? -x : x; |
|
|
|
/* (I) The case abs(x) <= 1.259e-8 */ |
|
if (w<=g1.d) { retval = x; goto ret; } |
|
@@ -125,11 +125,11 @@ tan(double x) { |
|
|
|
/* First stage */ |
|
i = ((int) (mfftnhf.d+TWO8*w)); |
|
- z = w-xfg[i][0].d; z2 = z*z; s = (x<ZERO) ? MONE : ONE; |
|
+ z = w-xfg[i][0].d; z2 = z*z; s = (x<0) ? -1 : 1; |
|
pz = z+z*z2*(e0.d+z2*e1.d); |
|
fi = xfg[i][1].d; gi = xfg[i][2].d; t2 = pz*(gi+fi)/(gi-pz); |
|
if ((y=fi+(t2-fi*u3.d))==fi+(t2+fi*u3.d)) { retval = (s*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = fi*ua3.d+t3*ub3.d; |
|
if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (s*y); goto ret; } |
|
|
|
@@ -165,8 +165,8 @@ tan(double x) { |
|
da = xn*mp3.d; |
|
a=t1-da; |
|
da = (t1-a)-da; |
|
- if (a<ZERO) {ya=-a; yya=-da; sy=MONE;} |
|
- else {ya= a; yya= da; sy= ONE;} |
|
+ if (a<0) {ya=-a; yya=-da; sy=-1;} |
|
+ else {ya= a; yya= da; sy= 1;} |
|
|
|
/* (IV),(V) The case 0.787 < abs(x) <= 25, abs(y) <= 1e-7 */ |
|
if (ya<=gy1.d) { retval = tanMp(x); goto ret; } |
|
@@ -240,14 +240,14 @@ tan(double x) { |
|
/* -cot */ |
|
t2 = pz*(fi+gi)/(fi+pz); |
|
if ((y=gi-(t2-gi*u10.d))==gi-(t2+gi*u10.d)) { retval = (-sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = gi*ua10.d+t3*ub10.d; |
|
if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } } |
|
else { |
|
/* tan */ |
|
t2 = pz*(gi+fi)/(gi-pz); |
|
if ((y=fi+(t2-fi*u9.d))==fi+(t2+fi*u9.d)) { retval = (sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = fi*ua9.d+t3*ub9.d; |
|
if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } } |
|
|
|
@@ -295,8 +295,8 @@ tan(double x) { |
|
a = t - t1; |
|
da = ((t-a)-t1)+da; |
|
EADD(a,da,t1,t2) a=t1; da=t2; |
|
- if (a<ZERO) {ya=-a; yya=-da; sy=MONE;} |
|
- else {ya= a; yya= da; sy= ONE;} |
|
+ if (a<0) {ya=-a; yya=-da; sy=-1;} |
|
+ else {ya= a; yya= da; sy= 1;} |
|
|
|
/* (+++) The case 25 < abs(x) <= 1e8, abs(y) <= 1e-7 */ |
|
if (ya<=gy1.d) { retval = tanMp(x); goto ret; } |
|
@@ -355,14 +355,14 @@ tan(double x) { |
|
/* -cot */ |
|
t2 = pz*(fi+gi)/(fi+pz); |
|
if ((y=gi-(t2-gi*u18.d))==gi-(t2+gi*u18.d)) { retval = (-sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = gi*ua18.d+t3*ub18.d; |
|
if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } } |
|
else { |
|
/* tan */ |
|
t2 = pz*(gi+fi)/(gi-pz); |
|
if ((y=fi+(t2-fi*u17.d))==fi+(t2+fi*u17.d)) { retval = (sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = fi*ua17.d+t3*ub17.d; |
|
if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } } |
|
|
|
@@ -398,8 +398,8 @@ tan(double x) { |
|
/* Range reduction by algorithm iii */ |
|
n = (__branred(x,&a,&da)) & 0x00000001; |
|
EADD(a,da,t1,t2) a=t1; da=t2; |
|
- if (a<ZERO) {ya=-a; yya=-da; sy=MONE;} |
|
- else {ya= a; yya= da; sy= ONE;} |
|
+ if (a<0) {ya=-a; yya=-da; sy=-1;} |
|
+ else {ya= a; yya= da; sy= 1;} |
|
|
|
/* (+++) The case 1e8 < abs(x) < 2**1024, abs(y) <= 1e-7 */ |
|
if (ya<=gy1.d) { retval = tanMp(x); goto ret; } |
|
@@ -463,14 +463,14 @@ tan(double x) { |
|
/* -cot */ |
|
t2 = pz*(fi+gi)/(fi+pz); |
|
if ((y=gi-(t2-gi*u26.d))==gi-(t2+gi*u26.d)) { retval = (-sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = gi*ua26.d+t3*ub26.d; |
|
if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } } |
|
else { |
|
/* tan */ |
|
t2 = pz*(gi+fi)/(gi-pz); |
|
if ((y=fi+(t2-fi*u25.d))==fi+(t2+fi*u25.d)) { retval = (sy*y); goto ret; } |
|
- t3 = (t2<ZERO) ? -t2 : t2; |
|
+ t3 = (t2<0) ? -t2 : t2; |
|
t4 = fi*ua25.d+t3*ub25.d; |
|
if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } } |
|
|
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c |
|
@@ -69,8 +69,8 @@ __mpatan(mp_no *x, mp_no *y, int p) { |
|
{if (dx>__atan_xm[m].d) break;} |
|
} |
|
mpone.e = mptwo.e = mptwoim1.e = 1; |
|
- mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE; |
|
- mptwo.d[1] = TWO; |
|
+ mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = 1; |
|
+ mptwo.d[1] = 2; |
|
|
|
/* Reduce x m times */ |
|
__mul(x,x,&mpsm,p); |
|
@@ -92,7 +92,7 @@ __mpatan(mp_no *x, mp_no *y, int p) { |
|
n=__atan_np[p]; mptwoim1.d[1] = __atan_twonm1[p].d; |
|
__dvd(&mpsm,&mptwoim1,&mpt,p); |
|
for (i=n-1; i>1; i--) { |
|
- mptwoim1.d[1] -= TWO; |
|
+ mptwoim1.d[1] -= 2; |
|
__dvd(&mpsm,&mptwoim1,&mpt1,p); |
|
__mul(&mpsm,&mpt,&mpt2,p); |
|
__sub(&mpt1,&mpt2,&mpt,p); |
|
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.c |
|
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c |
|
@@ -62,8 +62,8 @@ __mpsqrt(mp_no *x, mp_no *y, int p) { |
|
mp_no mpxn,mpz,mpu,mpt1,mpt2; |
|
|
|
/* Prepare multi-precision 1/2 and 3/2 */ |
|
- mphalf.e =0; mphalf.d[0] =ONE; mphalf.d[1] =HALFRAD; |
|
- mp3halfs.e=1; mp3halfs.d[0]=ONE; mp3halfs.d[1]=ONE; mp3halfs.d[2]=HALFRAD; |
|
+ mphalf.e =0; mphalf.d[0] =1; mphalf.d[1] =HALFRAD; |
|
+ mp3halfs.e=1; mp3halfs.d[0]=1; mp3halfs.d[1]=1; mp3halfs.d[2]=HALFRAD; |
|
|
|
ey=EX/2; __cpy(x,&mpxn,p); mpxn.e -= (ey+ey); |
|
__mp_dbl(&mpxn,&dx,p); dy=fastiroot(dx); __dbl_mp(dy,&mpu,p);
|
|
|