You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
4.2 KiB
114 lines
4.2 KiB
7 years ago
|
From 987322bc0b170570a7bd539480252453fcc7a6f5 Mon Sep 17 00:00:00 2001
|
||
|
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||
|
Date: Fri, 29 Mar 2013 18:15:28 -0500
|
||
|
Subject: [PATCH 23/42] PowerPC: remove branch prediction from rint
|
||
|
implementation
|
||
|
|
||
|
The branch prediction hints is actually hurts performance in this case.
|
||
|
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
|
||
|
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
|
||
|
general floating point function, expected input is not bounded and then
|
||
|
it is better to let the hardware handle the branches.
|
||
|
|
||
|
(backported from commit 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f)
|
||
|
|
||
|
This backport does not include the benchmark tests from the original
|
||
|
commit.
|
||
|
---
|
||
|
sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 +++---
|
||
|
sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++---
|
||
|
sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 +++---
|
||
|
sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++---
|
||
|
4 files changed, 12 insertions(+), 12 deletions(-)
|
||
|
|
||
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
||
|
index 0ab9e6c..c28e7f6 100644
|
||
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
||
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rint.S
|
||
|
@@ -45,14 +45,14 @@ ENTRY (__rint)
|
||
|
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||
|
- bnllr- cr7
|
||
|
- bng- cr6,.L4
|
||
|
+ bnllr cr7
|
||
|
+ bng cr6,.L4
|
||
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||
|
blr /* x = 0.0; */
|
||
|
.L4:
|
||
|
- bnllr- cr6 /* if (x < 0.0) */
|
||
|
+ bnllr cr6 /* if (x < 0.0) */
|
||
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
||
|
index ddb47db..69aed9c 100644
|
||
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
||
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
|
||
|
@@ -41,14 +41,14 @@ ENTRY (__rintf)
|
||
|
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||
|
- bnllr- cr7
|
||
|
- bng- cr6,.L4
|
||
|
+ bnllr cr7
|
||
|
+ bng cr6,.L4
|
||
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||
|
blr /* x = 0.0; */
|
||
|
.L4:
|
||
|
- bnllr- cr6 /* if (x < 0.0) */
|
||
|
+ bnllr cr6 /* if (x < 0.0) */
|
||
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
||
|
index db62405..560905a 100644
|
||
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
||
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rint.S
|
||
|
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
|
||
|
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||
|
- bnllr- cr7
|
||
|
- bng- cr6,.L4
|
||
|
+ bnllr cr7
|
||
|
+ bng cr6,.L4
|
||
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||
|
blr /* x = 0.0; */
|
||
|
.L4:
|
||
|
- bnllr- cr6 /* if (x < 0.0) */
|
||
|
+ bnllr cr6 /* if (x < 0.0) */
|
||
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||
|
diff --git glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
||
|
index 248649d..c120d91 100644
|
||
|
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
||
|
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
|
||
|
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
|
||
|
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||
|
- bnllr- cr7
|
||
|
- bng- cr6,.L4
|
||
|
+ bnllr cr7
|
||
|
+ bng cr6,.L4
|
||
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||
|
blr /* x = 0.0; */
|
||
|
.L4:
|
||
|
- bnllr- cr6 /* if (x < 0.0) */
|
||
|
+ bnllr cr6 /* if (x < 0.0) */
|
||
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||
|
--
|
||
|
1.7.11.7
|