You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
205 lines
7.9 KiB
205 lines
7.9 KiB
commit e6d90d675d4cae810be76a5ff41b8ae8bd6bc914 |
|
Author: Wilco Dijkstra <wdijkstr@arm.com> |
|
Date: Mon Jun 23 17:15:41 2014 +0100 |
|
|
|
Add generic HAVE_RM_CTX implementation |
|
|
|
This patch adds a generic implementation of HAVE_RM_CTX using standard |
|
fenv calls. As a result math functions using SET_RESTORE_ROUND* macros |
|
do not suffer from a large slowdown on targets which do not implement |
|
optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline |
|
functions are now unused and could be removed in the future (there are |
|
a few math functions left which use a mixture of standard fenv calls |
|
and libc_fe* inline functions - they could be updated to use |
|
SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations |
|
across just a few FP instructions). |
|
|
|
libc_feholdsetround*_noex_ctx is added to enable better optimization of |
|
SET_RESTORE_ROUND_NOEX* implementations. |
|
|
|
Performance measurements on ARM and x86 of sin() show significant gains |
|
over the current default, fairly close to a highly optimized fenv_private: |
|
|
|
ARM x86 |
|
no fenv_private : 100% 100% |
|
generic HAVE_RM_CTX : 250% 350% |
|
fenv_private (CTX) : 250% 450% |
|
|
|
2014-06-23 Will Newton <will.newton@linaro.org> |
|
Wilco <wdijkstr@arm.com> |
|
|
|
* sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX |
|
implementation. Include get-rounding-mode.h. |
|
[!HAVE_RM_CTX]: Define HAVE_RM_CTX to zero. |
|
[!libc_feholdsetround_noex_ctx]: Define |
|
libc_feholdsetround_noex_ctx. |
|
[!libc_feholdsetround_noexf_ctx]: Define |
|
libc_feholdsetround_noexf_ctx. |
|
[!libc_feholdsetround_noexl_ctx]: Define |
|
libc_feholdsetround_noexl_ctx. |
|
(libc_feholdsetround_ctx): New function. |
|
(libc_feresetround_ctx): New function. |
|
(libc_feholdsetround_noex_ctx): New function. |
|
(libc_feresetround_noex_ctx): New function. |
|
|
|
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h |
|
index 9b881a3..94c1e4a 100644 |
|
--- a/sysdeps/generic/math_private.h |
|
+++ b/sysdeps/generic/math_private.h |
|
@@ -20,6 +20,7 @@ |
|
#include <stdint.h> |
|
#include <sys/types.h> |
|
#include <fenv.h> |
|
+#include <get-rounding-mode.h> |
|
|
|
/* The original fdlibm code used statements like: |
|
n0 = ((*(int*)&one)>>29)^1; * index of high word * |
|
@@ -551,12 +552,26 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) |
|
# define libc_feresetround_noexl libc_fesetenvl |
|
#endif |
|
|
|
+#ifndef HAVE_RM_CTX |
|
+# define HAVE_RM_CTX 0 |
|
+#endif |
|
+ |
|
#if HAVE_RM_CTX |
|
/* Set/Restore Rounding Modes only when necessary. If defined, these functions |
|
set/restore floating point state only if the state needed within the lexical |
|
block is different from the current state. This saves a lot of time when |
|
the floating point unit is much slower than the fixed point units. */ |
|
|
|
+# ifndef libc_feholdsetround_noex_ctx |
|
+# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx |
|
+# endif |
|
+# ifndef libc_feholdsetround_noexf_ctx |
|
+# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx |
|
+# endif |
|
+# ifndef libc_feholdsetround_noexl_ctx |
|
+# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx |
|
+# endif |
|
+ |
|
# ifndef libc_feresetround_noex_ctx |
|
# define libc_feresetround_noex_ctx libc_fesetenv_ctx |
|
# endif |
|
@@ -567,24 +582,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) |
|
# define libc_feresetround_noexl_ctx libc_fesetenvl_ctx |
|
# endif |
|
|
|
-# ifndef libc_feholdsetround_53bit_ctx |
|
-# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx |
|
-# endif |
|
+#else |
|
|
|
-# ifndef libc_feresetround_53bit_ctx |
|
-# define libc_feresetround_53bit_ctx libc_feresetround_ctx |
|
-# endif |
|
+/* Default implementation using standard fenv functions. |
|
+ Avoid unnecessary rounding mode changes by first checking the |
|
+ current rounding mode. Note the use of __glibc_unlikely is |
|
+ important for performance. */ |
|
|
|
-# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ |
|
- struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \ |
|
- ROUNDFUNC ## _ctx (&ctx, (RM)) |
|
-#else |
|
-# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \ |
|
- fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \ |
|
- ROUNDFUNC (&__libc_save_rm, (RM)) |
|
+static __always_inline void |
|
+libc_feholdsetround_ctx (struct rm_ctx *ctx, int round) |
|
+{ |
|
+ ctx->updated_status = false; |
|
+ |
|
+ /* Update rounding mode only if different. */ |
|
+ if (__glibc_unlikely (round != get_rounding_mode ())) |
|
+ { |
|
+ ctx->updated_status = true; |
|
+ fegetenv (&ctx->env); |
|
+ fesetround (round); |
|
+ } |
|
+} |
|
+ |
|
+static __always_inline void |
|
+libc_feresetround_ctx (struct rm_ctx *ctx) |
|
+{ |
|
+ /* Restore the rounding mode if updated. */ |
|
+ if (__glibc_unlikely (ctx->updated_status)) |
|
+ feupdateenv (&ctx->env); |
|
+} |
|
+ |
|
+static __always_inline void |
|
+libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round) |
|
+{ |
|
+ /* Save exception flags and rounding mode. */ |
|
+ fegetenv (&ctx->env); |
|
+ |
|
+ /* Update rounding mode only if different. */ |
|
+ if (__glibc_unlikely (round != get_rounding_mode ())) |
|
+ fesetround (round); |
|
+} |
|
+ |
|
+static __always_inline void |
|
+libc_feresetround_noex_ctx (struct rm_ctx *ctx) |
|
+{ |
|
+ /* Restore exception flags and rounding mode. */ |
|
+ fesetenv (&ctx->env); |
|
+} |
|
+ |
|
+# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx |
|
+# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx |
|
+# define libc_feresetroundf_ctx libc_feresetround_ctx |
|
+# define libc_feresetroundl_ctx libc_feresetround_ctx |
|
+ |
|
+# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx |
|
+# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx |
|
+# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx |
|
+# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx |
|
+ |
|
+#endif |
|
+ |
|
+#ifndef libc_feholdsetround_53bit_ctx |
|
+# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx |
|
+#endif |
|
+#ifndef libc_feresetround_53bit_ctx |
|
+# define libc_feresetround_53bit_ctx libc_feresetround_ctx |
|
#endif |
|
|
|
-/* Save and restore the rounding mode within a lexical block. */ |
|
+#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ |
|
+ struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \ |
|
+ ROUNDFUNC ## _ctx (&ctx, (RM)) |
|
+ |
|
+/* Set the rounding mode within a lexical block. Restore the rounding mode to |
|
+ the value at the start of the block. The exception mode must be preserved. |
|
+ Exceptions raised within the block must be set in the exception flags. |
|
+ Non-stop mode may be enabled inside the block. */ |
|
|
|
#define SET_RESTORE_ROUND(RM) \ |
|
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround) |
|
@@ -593,15 +664,21 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) |
|
#define SET_RESTORE_ROUNDL(RM) \ |
|
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl) |
|
|
|
-/* Save and restore the rounding mode within a lexical block, and also |
|
- the set of exceptions raised within the block may be discarded. */ |
|
+/* Set the rounding mode within a lexical block. Restore the rounding mode to |
|
+ the value at the start of the block. The exception mode must be preserved. |
|
+ Exceptions raised within the block must be discarded, and exception flags |
|
+ are restored to the value at the start of the block. |
|
+ Non-stop mode may be enabled inside the block. */ |
|
|
|
#define SET_RESTORE_ROUND_NOEX(RM) \ |
|
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex) |
|
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noex, \ |
|
+ libc_feresetround_noex) |
|
#define SET_RESTORE_ROUND_NOEXF(RM) \ |
|
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf) |
|
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexf, \ |
|
+ libc_feresetround_noexf) |
|
#define SET_RESTORE_ROUND_NOEXL(RM) \ |
|
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl) |
|
+ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_noexl, \ |
|
+ libc_feresetround_noexl) |
|
|
|
/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ |
|
#define SET_RESTORE_ROUND_53BIT(RM) \
|
|
|