You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
2.5 KiB
56 lines
2.5 KiB
commit 6e008c884dad5a25f91085c68d044bb5e2d63761 |
|
Author: Noah Goldstein <goldstein.w.n@gmail.com> |
|
Date: Tue Jun 14 13:50:11 2022 -0700 |
|
|
|
x86: Fix misordered logic for setting `rep_movsb_stop_threshold` |
|
|
|
Move the setting of `rep_movsb_stop_threshold` to after the tunables |
|
have been collected so that the `rep_movsb_stop_threshold` (which |
|
is used to redirect control flow to the non_temporal case) will |
|
use any user value for `non_temporal_threshold` (set using |
|
glibc.cpu.x86_non_temporal_threshold) |
|
|
|
(cherry picked from commit 035591551400cfc810b07244a015c9411e8bff7c) |
|
|
|
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h |
|
index 2e43e67e4f4037d3..560bf260e8fbd7bf 100644 |
|
--- a/sysdeps/x86/dl-cacheinfo.h |
|
+++ b/sysdeps/x86/dl-cacheinfo.h |
|
@@ -898,18 +898,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) |
|
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) |
|
rep_movsb_threshold = 2112; |
|
|
|
- unsigned long int rep_movsb_stop_threshold; |
|
- /* ERMS feature is implemented from AMD Zen3 architecture and it is |
|
- performing poorly for data above L2 cache size. Henceforth, adding |
|
- an upper bound threshold parameter to limit the usage of Enhanced |
|
- REP MOVSB operations and setting its value to L2 cache size. */ |
|
- if (cpu_features->basic.kind == arch_kind_amd) |
|
- rep_movsb_stop_threshold = core; |
|
- /* Setting the upper bound of ERMS to the computed value of |
|
- non-temporal threshold for architectures other than AMD. */ |
|
- else |
|
- rep_movsb_stop_threshold = non_temporal_threshold; |
|
- |
|
/* The default threshold to use Enhanced REP STOSB. */ |
|
unsigned long int rep_stosb_threshold = 2048; |
|
|
|
@@ -951,6 +939,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) |
|
SIZE_MAX); |
|
#endif |
|
|
|
+ unsigned long int rep_movsb_stop_threshold; |
|
+ /* ERMS feature is implemented from AMD Zen3 architecture and it is |
|
+ performing poorly for data above L2 cache size. Henceforth, adding |
|
+ an upper bound threshold parameter to limit the usage of Enhanced |
|
+ REP MOVSB operations and setting its value to L2 cache size. */ |
|
+ if (cpu_features->basic.kind == arch_kind_amd) |
|
+ rep_movsb_stop_threshold = core; |
|
+ /* Setting the upper bound of ERMS to the computed value of |
|
+ non-temporal threshold for architectures other than AMD. */ |
|
+ else |
|
+ rep_movsb_stop_threshold = non_temporal_threshold; |
|
+ |
|
cpu_features->data_cache_size = data; |
|
cpu_features->shared_cache_size = shared; |
|
cpu_features->non_temporal_threshold = non_temporal_threshold;
|
|
|