You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
113 lines
4.2 KiB
113 lines
4.2 KiB
From 987322bc0b170570a7bd539480252453fcc7a6f5 Mon Sep 17 00:00:00 2001 |
|
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com> |
|
Date: Fri, 29 Mar 2013 18:15:28 -0500 |
|
Subject: [PATCH 23/42] PowerPC: remove branch prediction from rint |
|
implementation |
|
|
|
The branch prediction hints is actually hurts performance in this case. |
|
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' |
|
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a |
|
general floating point function, expected input is not bounded and then |
|
it is better to let the hardware handle the branches. |
|
|
|
(backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f) |
|
|
|
This backport does not include the benchmark tests from the original |
|
commit. |
|
--- |
|
sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 +++--- |
|
sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++--- |
|
sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 +++--- |
|
sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++--- |
|
4 files changed, 12 insertions(+), 12 deletions(-) |
|
|
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S |
|
index 0ab9e6c..c28e7f6 100644 |
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S |
|
@@ -45,14 +45,14 @@ ENTRY (__rint) |
|
fsub fp12,fp13,fp13 /* generate 0.0 */ |
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ |
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
|
- bnllr- cr7 |
|
- bng- cr6,.L4 |
|
+ bnllr cr7 |
|
+ bng cr6,.L4 |
|
fadd fp1,fp1,fp13 /* x+= TWO52; */ |
|
fsub fp1,fp1,fp13 /* x-= TWO52; */ |
|
fabs fp1,fp1 /* if (x == 0.0) */ |
|
blr /* x = 0.0; */ |
|
.L4: |
|
- bnllr- cr6 /* if (x < 0.0) */ |
|
+ bnllr cr6 /* if (x < 0.0) */ |
|
fsub fp1,fp1,fp13 /* x-= TWO52; */ |
|
fadd fp1,fp1,fp13 /* x+= TWO52; */ |
|
fnabs fp1,fp1 /* if (x == 0.0) */ |
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S |
|
index ddb47db..69aed9c 100644 |
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S |
|
@@ -41,14 +41,14 @@ ENTRY (__rintf) |
|
fsubs fp12,fp13,fp13 /* generate 0.0 */ |
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ |
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
|
- bnllr- cr7 |
|
- bng- cr6,.L4 |
|
+ bnllr cr7 |
|
+ bng cr6,.L4 |
|
fadds fp1,fp1,fp13 /* x+= TWO23; */ |
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
|
fabs fp1,fp1 /* if (x == 0.0) */ |
|
blr /* x = 0.0; */ |
|
.L4: |
|
- bnllr- cr6 /* if (x < 0.0) */ |
|
+ bnllr cr6 /* if (x < 0.0) */ |
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
|
fadds fp1,fp1,fp13 /* x+= TWO23; */ |
|
fnabs fp1,fp1 /* if (x == 0.0) */ |
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S |
|
index db62405..560905a 100644 |
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S |
|
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0) |
|
fsub fp12,fp13,fp13 /* generate 0.0 */ |
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ |
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
|
- bnllr- cr7 |
|
- bng- cr6,.L4 |
|
+ bnllr cr7 |
|
+ bng cr6,.L4 |
|
fadd fp1,fp1,fp13 /* x+= TWO52; */ |
|
fsub fp1,fp1,fp13 /* x-= TWO52; */ |
|
fabs fp1,fp1 /* if (x == 0.0) */ |
|
blr /* x = 0.0; */ |
|
.L4: |
|
- bnllr- cr6 /* if (x < 0.0) */ |
|
+ bnllr cr6 /* if (x < 0.0) */ |
|
fsub fp1,fp1,fp13 /* x-= TWO52; */ |
|
fadd fp1,fp1,fp13 /* x+= TWO52; */ |
|
fnabs fp1,fp1 /* if (x == 0.0) */ |
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S |
|
index 248649d..c120d91 100644 |
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S |
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S |
|
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0) |
|
fsubs fp12,fp13,fp13 /* generate 0.0 */ |
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ |
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ |
|
- bnllr- cr7 |
|
- bng- cr6,.L4 |
|
+ bnllr cr7 |
|
+ bng cr6,.L4 |
|
fadds fp1,fp1,fp13 /* x+= TWO23; */ |
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
|
fabs fp1,fp1 /* if (x == 0.0) */ |
|
blr /* x = 0.0; */ |
|
.L4: |
|
- bnllr- cr6 /* if (x < 0.0) */ |
|
+ bnllr cr6 /* if (x < 0.0) */ |
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */ |
|
fadds fp1,fp1,fp13 /* x+= TWO23; */ |
|
fnabs fp1,fp1 /* if (x == 0.0) */ |
|
-- |
|
1.7.11.7
|
|
|