You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
476 lines
13 KiB
476 lines
13 KiB
7 months ago
|
From f193ea20eddc6cef84cba54cf1a647204ee6a86b Mon Sep 17 00:00:00 2001
|
||
|
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
Date: Wed, 7 Jun 2023 13:18:02 -0500
|
||
|
Subject: [PATCH] x86: Refactor Intel `init_cpu_features`
|
||
|
Content-type: text/plain; charset=UTF-8
|
||
|
|
||
|
This patch should have no affect on existing functionality.
|
||
|
|
||
|
The current code, which has a single switch for model detection and
|
||
|
setting prefered features, is difficult to follow/extend. The cases
|
||
|
use magic numbers and many microarchitectures are missing. This makes
|
||
|
it difficult to reason about what is implemented so far and/or
|
||
|
how/where to add support for new features.
|
||
|
|
||
|
This patch splits the model detection and preference setting stages so
|
||
|
that CPU preferences can be set based on a complete list of available
|
||
|
microarchitectures, rather than based on model magic numbers.
|
||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||
|
---
|
||
|
sysdeps/x86/cpu-features.c | 390 +++++++++++++++++++++++++++++--------
|
||
|
1 file changed, 309 insertions(+), 81 deletions(-)
|
||
|
|
||
|
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
||
|
index 0a99efdb28..d52a718e92 100644
|
||
|
--- a/sysdeps/x86/cpu-features.c
|
||
|
+++ b/sysdeps/x86/cpu-features.c
|
||
|
@@ -417,6 +417,216 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
|
||
|
== index_arch_Fast_Copy_Backward)),
|
||
|
"Incorrect index_arch_Fast_Unaligned_Load");
|
||
|
|
||
|
+
|
||
|
+/* Intel Family-6 microarch list. */
|
||
|
+enum
|
||
|
+{
|
||
|
+ /* Atom processors. */
|
||
|
+ INTEL_ATOM_BONNELL,
|
||
|
+ INTEL_ATOM_SILVERMONT,
|
||
|
+ INTEL_ATOM_AIRMONT,
|
||
|
+ INTEL_ATOM_GOLDMONT,
|
||
|
+ INTEL_ATOM_GOLDMONT_PLUS,
|
||
|
+ INTEL_ATOM_SIERRAFOREST,
|
||
|
+ INTEL_ATOM_GRANDRIDGE,
|
||
|
+ INTEL_ATOM_TREMONT,
|
||
|
+
|
||
|
+ /* Bigcore processors. */
|
||
|
+ INTEL_BIGCORE_MEROM,
|
||
|
+ INTEL_BIGCORE_PENRYN,
|
||
|
+ INTEL_BIGCORE_DUNNINGTON,
|
||
|
+ INTEL_BIGCORE_NEHALEM,
|
||
|
+ INTEL_BIGCORE_WESTMERE,
|
||
|
+ INTEL_BIGCORE_SANDYBRIDGE,
|
||
|
+ INTEL_BIGCORE_IVYBRIDGE,
|
||
|
+ INTEL_BIGCORE_HASWELL,
|
||
|
+ INTEL_BIGCORE_BROADWELL,
|
||
|
+ INTEL_BIGCORE_SKYLAKE,
|
||
|
+ INTEL_BIGCORE_KABYLAKE,
|
||
|
+ INTEL_BIGCORE_COMETLAKE,
|
||
|
+ INTEL_BIGCORE_SKYLAKE_AVX512,
|
||
|
+ INTEL_BIGCORE_CANNONLAKE,
|
||
|
+ INTEL_BIGCORE_ICELAKE,
|
||
|
+ INTEL_BIGCORE_TIGERLAKE,
|
||
|
+ INTEL_BIGCORE_ROCKETLAKE,
|
||
|
+ INTEL_BIGCORE_SAPPHIRERAPIDS,
|
||
|
+ INTEL_BIGCORE_RAPTORLAKE,
|
||
|
+ INTEL_BIGCORE_EMERALDRAPIDS,
|
||
|
+ INTEL_BIGCORE_METEORLAKE,
|
||
|
+ INTEL_BIGCORE_LUNARLAKE,
|
||
|
+ INTEL_BIGCORE_ARROWLAKE,
|
||
|
+ INTEL_BIGCORE_GRANITERAPIDS,
|
||
|
+
|
||
|
+ /* Mixed (bigcore + atom SOC). */
|
||
|
+ INTEL_MIXED_LAKEFIELD,
|
||
|
+ INTEL_MIXED_ALDERLAKE,
|
||
|
+
|
||
|
+ /* KNL. */
|
||
|
+ INTEL_KNIGHTS_MILL,
|
||
|
+ INTEL_KNIGHTS_LANDING,
|
||
|
+
|
||
|
+ /* Unknown. */
|
||
|
+ INTEL_UNKNOWN,
|
||
|
+};
|
||
|
+
|
||
|
+static unsigned int
|
||
|
+intel_get_fam6_microarch (unsigned int model,
|
||
|
+ __attribute__ ((unused)) unsigned int stepping)
|
||
|
+{
|
||
|
+ switch (model)
|
||
|
+ {
|
||
|
+ case 0x1C:
|
||
|
+ case 0x26:
|
||
|
+ return INTEL_ATOM_BONNELL;
|
||
|
+ case 0x27:
|
||
|
+ case 0x35:
|
||
|
+ case 0x36:
|
||
|
+ /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
|
||
|
+ (microarchitecturally identical). */
|
||
|
+ return INTEL_ATOM_BONNELL;
|
||
|
+ case 0x37:
|
||
|
+ case 0x4A:
|
||
|
+ case 0x4D:
|
||
|
+ case 0x5D:
|
||
|
+ return INTEL_ATOM_SILVERMONT;
|
||
|
+ case 0x4C:
|
||
|
+ case 0x5A:
|
||
|
+ case 0x75:
|
||
|
+ return INTEL_ATOM_AIRMONT;
|
||
|
+ case 0x5C:
|
||
|
+ case 0x5F:
|
||
|
+ return INTEL_ATOM_GOLDMONT;
|
||
|
+ case 0x7A:
|
||
|
+ return INTEL_ATOM_GOLDMONT_PLUS;
|
||
|
+ case 0xAF:
|
||
|
+ return INTEL_ATOM_SIERRAFOREST;
|
||
|
+ case 0xB6:
|
||
|
+ return INTEL_ATOM_GRANDRIDGE;
|
||
|
+ case 0x86:
|
||
|
+ case 0x96:
|
||
|
+ case 0x9C:
|
||
|
+ return INTEL_ATOM_TREMONT;
|
||
|
+ case 0x0F:
|
||
|
+ case 0x16:
|
||
|
+ return INTEL_BIGCORE_MEROM;
|
||
|
+ case 0x17:
|
||
|
+ return INTEL_BIGCORE_PENRYN;
|
||
|
+ case 0x1D:
|
||
|
+ return INTEL_BIGCORE_DUNNINGTON;
|
||
|
+ case 0x1A:
|
||
|
+ case 0x1E:
|
||
|
+ case 0x1F:
|
||
|
+ case 0x2E:
|
||
|
+ return INTEL_BIGCORE_NEHALEM;
|
||
|
+ case 0x25:
|
||
|
+ case 0x2C:
|
||
|
+ case 0x2F:
|
||
|
+ return INTEL_BIGCORE_WESTMERE;
|
||
|
+ case 0x2A:
|
||
|
+ case 0x2D:
|
||
|
+ return INTEL_BIGCORE_SANDYBRIDGE;
|
||
|
+ case 0x3A:
|
||
|
+ case 0x3E:
|
||
|
+ return INTEL_BIGCORE_IVYBRIDGE;
|
||
|
+ case 0x3C:
|
||
|
+ case 0x3F:
|
||
|
+ case 0x45:
|
||
|
+ case 0x46:
|
||
|
+ return INTEL_BIGCORE_HASWELL;
|
||
|
+ case 0x3D:
|
||
|
+ case 0x47:
|
||
|
+ case 0x4F:
|
||
|
+ case 0x56:
|
||
|
+ return INTEL_BIGCORE_BROADWELL;
|
||
|
+ case 0x4E:
|
||
|
+ case 0x5E:
|
||
|
+ return INTEL_BIGCORE_SKYLAKE;
|
||
|
+ case 0x8E:
|
||
|
+ /*
|
||
|
+ Stepping = {9}
|
||
|
+ -> Amberlake
|
||
|
+ Stepping = {10}
|
||
|
+ -> Coffeelake
|
||
|
+ Stepping = {11, 12}
|
||
|
+ -> Whiskeylake
|
||
|
+ else
|
||
|
+ -> Kabylake
|
||
|
+
|
||
|
+ All of these are derivatives of Kabylake (Skylake client).
|
||
|
+ */
|
||
|
+ return INTEL_BIGCORE_KABYLAKE;
|
||
|
+ case 0x9E:
|
||
|
+ /*
|
||
|
+ Stepping = {10, 11, 12, 13}
|
||
|
+ -> Coffeelake
|
||
|
+ else
|
||
|
+ -> Kabylake
|
||
|
+
|
||
|
+ Coffeelake is a derivatives of Kabylake (Skylake client).
|
||
|
+ */
|
||
|
+ return INTEL_BIGCORE_KABYLAKE;
|
||
|
+ case 0xA5:
|
||
|
+ case 0xA6:
|
||
|
+ return INTEL_BIGCORE_COMETLAKE;
|
||
|
+ case 0x66:
|
||
|
+ return INTEL_BIGCORE_CANNONLAKE;
|
||
|
+ case 0x55:
|
||
|
+ /*
|
||
|
+ Stepping = {6, 7}
|
||
|
+ -> Cascadelake
|
||
|
+ Stepping = {11}
|
||
|
+ -> Cooperlake
|
||
|
+ else
|
||
|
+ -> Skylake-avx512
|
||
|
+
|
||
|
+ These are all microarchitecturally indentical, so use
|
||
|
+ Skylake-avx512 for all of them.
|
||
|
+ */
|
||
|
+ return INTEL_BIGCORE_SKYLAKE_AVX512;
|
||
|
+ case 0x6A:
|
||
|
+ case 0x6C:
|
||
|
+ case 0x7D:
|
||
|
+ case 0x7E:
|
||
|
+ case 0x9D:
|
||
|
+ return INTEL_BIGCORE_ICELAKE;
|
||
|
+ case 0x8C:
|
||
|
+ case 0x8D:
|
||
|
+ return INTEL_BIGCORE_TIGERLAKE;
|
||
|
+ case 0xA7:
|
||
|
+ return INTEL_BIGCORE_ROCKETLAKE;
|
||
|
+ case 0x8F:
|
||
|
+ return INTEL_BIGCORE_SAPPHIRERAPIDS;
|
||
|
+ case 0xB7:
|
||
|
+ case 0xBA:
|
||
|
+ case 0xBF:
|
||
|
+ return INTEL_BIGCORE_RAPTORLAKE;
|
||
|
+ case 0xCF:
|
||
|
+ return INTEL_BIGCORE_EMERALDRAPIDS;
|
||
|
+ case 0xAA:
|
||
|
+ case 0xAC:
|
||
|
+ return INTEL_BIGCORE_METEORLAKE;
|
||
|
+ case 0xbd:
|
||
|
+ return INTEL_BIGCORE_LUNARLAKE;
|
||
|
+ case 0xc6:
|
||
|
+ return INTEL_BIGCORE_ARROWLAKE;
|
||
|
+ case 0xAD:
|
||
|
+ case 0xAE:
|
||
|
+ return INTEL_BIGCORE_GRANITERAPIDS;
|
||
|
+ case 0x8A:
|
||
|
+ return INTEL_MIXED_LAKEFIELD;
|
||
|
+ case 0x97:
|
||
|
+ case 0x9A:
|
||
|
+ case 0xBE:
|
||
|
+ return INTEL_MIXED_ALDERLAKE;
|
||
|
+ case 0x85:
|
||
|
+ return INTEL_KNIGHTS_MILL;
|
||
|
+ case 0x57:
|
||
|
+ return INTEL_KNIGHTS_LANDING;
|
||
|
+ default:
|
||
|
+ return INTEL_UNKNOWN;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
static inline void
|
||
|
init_cpu_features (struct cpu_features *cpu_features)
|
||
|
{
|
||
|
@@ -453,129 +663,147 @@ init_cpu_features (struct cpu_features *cpu_features)
|
||
|
if (family == 0x06)
|
||
|
{
|
||
|
model += extended_model;
|
||
|
- switch (model)
|
||
|
+ unsigned int microarch
|
||
|
+ = intel_get_fam6_microarch (model, stepping);
|
||
|
+
|
||
|
+ switch (microarch)
|
||
|
{
|
||
|
- case 0x1c:
|
||
|
- case 0x26:
|
||
|
- /* BSF is slow on Atom. */
|
||
|
+ /* Atom / KNL tuning. */
|
||
|
+ case INTEL_ATOM_BONNELL:
|
||
|
+ /* BSF is slow on Bonnell. */
|
||
|
cpu_features->preferred[index_arch_Slow_BSF]
|
||
|
- |= bit_arch_Slow_BSF;
|
||
|
+ |= bit_arch_Slow_BSF;
|
||
|
break;
|
||
|
|
||
|
- case 0x57:
|
||
|
- /* Knights Landing. Enable Silvermont optimizations. */
|
||
|
-
|
||
|
- case 0x7a:
|
||
|
- /* Unaligned load versions are faster than SSSE3
|
||
|
- on Goldmont Plus. */
|
||
|
-
|
||
|
- case 0x5c:
|
||
|
- case 0x5f:
|
||
|
/* Unaligned load versions are faster than SSSE3
|
||
|
- on Goldmont. */
|
||
|
+ on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
|
||
|
+ case INTEL_ATOM_AIRMONT:
|
||
|
+ case INTEL_ATOM_SILVERMONT:
|
||
|
+ case INTEL_ATOM_GOLDMONT:
|
||
|
+ case INTEL_ATOM_GOLDMONT_PLUS:
|
||
|
|
||
|
- case 0x4c:
|
||
|
- case 0x5a:
|
||
|
- case 0x75:
|
||
|
- /* Airmont is a die shrink of Silvermont. */
|
||
|
+ /* Knights Landing. Enable Silvermont optimizations. */
|
||
|
+ case INTEL_KNIGHTS_LANDING:
|
||
|
|
||
|
- case 0x37:
|
||
|
- case 0x4a:
|
||
|
- case 0x4d:
|
||
|
- case 0x5d:
|
||
|
- /* Unaligned load versions are faster than SSSE3
|
||
|
- on Silvermont. */
|
||
|
cpu_features->preferred[index_arch_Fast_Unaligned_Load]
|
||
|
- |= (bit_arch_Fast_Unaligned_Load
|
||
|
- | bit_arch_Fast_Unaligned_Copy
|
||
|
- | bit_arch_Prefer_PMINUB_for_stringop
|
||
|
- | bit_arch_Slow_SSE4_2);
|
||
|
+ |= (bit_arch_Fast_Unaligned_Load
|
||
|
+ | bit_arch_Fast_Unaligned_Copy
|
||
|
+ | bit_arch_Prefer_PMINUB_for_stringop
|
||
|
+ | bit_arch_Slow_SSE4_2);
|
||
|
break;
|
||
|
|
||
|
- case 0x86:
|
||
|
- case 0x96:
|
||
|
- case 0x9c:
|
||
|
+ case INTEL_ATOM_TREMONT:
|
||
|
/* Enable rep string instructions, unaligned load, unaligned
|
||
|
- copy, pminub and avoid SSE 4.2 on Tremont. */
|
||
|
+ copy, pminub and avoid SSE 4.2 on Tremont. */
|
||
|
cpu_features->preferred[index_arch_Fast_Rep_String]
|
||
|
- |= (bit_arch_Fast_Rep_String
|
||
|
- | bit_arch_Fast_Unaligned_Load
|
||
|
- | bit_arch_Fast_Unaligned_Copy
|
||
|
- | bit_arch_Prefer_PMINUB_for_stringop
|
||
|
- | bit_arch_Slow_SSE4_2);
|
||
|
+ |= (bit_arch_Fast_Rep_String
|
||
|
+ | bit_arch_Fast_Unaligned_Load
|
||
|
+ | bit_arch_Fast_Unaligned_Copy
|
||
|
+ | bit_arch_Prefer_PMINUB_for_stringop
|
||
|
+ | bit_arch_Slow_SSE4_2);
|
||
|
break;
|
||
|
|
||
|
+ /*
|
||
|
+ Default tuned Knights microarch.
|
||
|
+ case INTEL_KNIGHTS_MILL:
|
||
|
+ */
|
||
|
+
|
||
|
+ /*
|
||
|
+ Default tuned atom microarch.
|
||
|
+ case INTEL_ATOM_SIERRAFOREST:
|
||
|
+ case INTEL_ATOM_GRANDRIDGE:
|
||
|
+ */
|
||
|
+
|
||
|
+ /* Bigcore/Default Tuning. */
|
||
|
default:
|
||
|
/* Unknown family 0x06 processors. Assuming this is one
|
||
|
of Core i3/i5/i7 processors if AVX is available. */
|
||
|
if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
|
||
|
break;
|
||
|
/* Fall through. */
|
||
|
-
|
||
|
- case 0x1a:
|
||
|
- case 0x1e:
|
||
|
- case 0x1f:
|
||
|
- case 0x25:
|
||
|
- case 0x2c:
|
||
|
- case 0x2e:
|
||
|
- case 0x2f:
|
||
|
+ case INTEL_BIGCORE_NEHALEM:
|
||
|
+ case INTEL_BIGCORE_WESTMERE:
|
||
|
/* Rep string instructions, unaligned load, unaligned copy,
|
||
|
and pminub are fast on Intel Core i3, i5 and i7. */
|
||
|
cpu_features->preferred[index_arch_Fast_Rep_String]
|
||
|
- |= (bit_arch_Fast_Rep_String
|
||
|
- | bit_arch_Fast_Unaligned_Load
|
||
|
- | bit_arch_Fast_Unaligned_Copy
|
||
|
- | bit_arch_Prefer_PMINUB_for_stringop);
|
||
|
+ |= (bit_arch_Fast_Rep_String
|
||
|
+ | bit_arch_Fast_Unaligned_Load
|
||
|
+ | bit_arch_Fast_Unaligned_Copy
|
||
|
+ | bit_arch_Prefer_PMINUB_for_stringop);
|
||
|
break;
|
||
|
+
|
||
|
+ /*
|
||
|
+ Default tuned Bigcore microarch.
|
||
|
+ case INTEL_BIGCORE_SANDYBRIDGE:
|
||
|
+ case INTEL_BIGCORE_IVYBRIDGE:
|
||
|
+ case INTEL_BIGCORE_HASWELL:
|
||
|
+ case INTEL_BIGCORE_BROADWELL:
|
||
|
+ case INTEL_BIGCORE_SKYLAKE:
|
||
|
+ case INTEL_BIGCORE_KABYLAKE:
|
||
|
+ case INTEL_BIGCORE_COMETLAKE:
|
||
|
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
|
||
|
+ case INTEL_BIGCORE_CANNONLAKE:
|
||
|
+ case INTEL_BIGCORE_ICELAKE:
|
||
|
+ case INTEL_BIGCORE_TIGERLAKE:
|
||
|
+ case INTEL_BIGCORE_ROCKETLAKE:
|
||
|
+ case INTEL_BIGCORE_RAPTORLAKE:
|
||
|
+ case INTEL_BIGCORE_METEORLAKE:
|
||
|
+ case INTEL_BIGCORE_LUNARLAKE:
|
||
|
+ case INTEL_BIGCORE_ARROWLAKE:
|
||
|
+ case INTEL_BIGCORE_SAPPHIRERAPIDS:
|
||
|
+ case INTEL_BIGCORE_EMERALDRAPIDS:
|
||
|
+ case INTEL_BIGCORE_GRANITERAPIDS:
|
||
|
+ */
|
||
|
+
|
||
|
+ /*
|
||
|
+ Default tuned Mixed (bigcore + atom SOC).
|
||
|
+ case INTEL_MIXED_LAKEFIELD:
|
||
|
+ case INTEL_MIXED_ALDERLAKE:
|
||
|
+ */
|
||
|
}
|
||
|
|
||
|
- /* Disable TSX on some processors to avoid TSX on kernels that
|
||
|
- weren't updated with the latest microcode package (which
|
||
|
- disables broken feature by default). */
|
||
|
- switch (model)
|
||
|
+ /* Disable TSX on some processors to avoid TSX on kernels that
|
||
|
+ weren't updated with the latest microcode package (which
|
||
|
+ disables broken feature by default). */
|
||
|
+ switch (microarch)
|
||
|
{
|
||
|
- case 0x55:
|
||
|
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
|
||
|
+ /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
|
||
|
if (stepping <= 5)
|
||
|
goto disable_tsx;
|
||
|
break;
|
||
|
- case 0x8e:
|
||
|
- /* NB: Although the errata documents that for model == 0x8e,
|
||
|
- only 0xb stepping or lower are impacted, the intention of
|
||
|
- the errata was to disable TSX on all client processors on
|
||
|
- all steppings. Include 0xc stepping which is an Intel
|
||
|
- Core i7-8665U, a client mobile processor. */
|
||
|
- case 0x9e:
|
||
|
+
|
||
|
+ case INTEL_BIGCORE_KABYLAKE:
|
||
|
+ /* NB: Although the errata documents that for model == 0x8e
|
||
|
+ (kabylake skylake client), only 0xb stepping or lower are
|
||
|
+ impacted, the intention of the errata was to disable TSX on
|
||
|
+ all client processors on all steppings. Include 0xc
|
||
|
+ stepping which is an Intel Core i7-8665U, a client mobile
|
||
|
+ processor. */
|
||
|
if (stepping > 0xc)
|
||
|
break;
|
||
|
/* Fall through. */
|
||
|
- case 0x4e:
|
||
|
- case 0x5e:
|
||
|
- {
|
||
|
+ case INTEL_BIGCORE_SKYLAKE:
|
||
|
/* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
|
||
|
processors listed in:
|
||
|
|
||
|
https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
|
||
|
*/
|
||
|
-disable_tsx:
|
||
|
+ disable_tsx:
|
||
|
CPU_FEATURE_UNSET (cpu_features, HLE);
|
||
|
CPU_FEATURE_UNSET (cpu_features, RTM);
|
||
|
CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
|
||
|
- }
|
||
|
- break;
|
||
|
- case 0x3f:
|
||
|
- /* Xeon E7 v3 with stepping >= 4 has working TSX. */
|
||
|
- if (stepping >= 4)
|
||
|
break;
|
||
|
- /* Fall through. */
|
||
|
- case 0x3c:
|
||
|
- case 0x45:
|
||
|
- case 0x46:
|
||
|
- /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
|
||
|
- with stepping >= 4) to avoid TSX on kernels that weren't
|
||
|
- updated with the latest microcode package (which disables
|
||
|
- broken feature by default). */
|
||
|
- CPU_FEATURE_UNSET (cpu_features, RTM);
|
||
|
- break;
|
||
|
+
|
||
|
+ case INTEL_BIGCORE_HASWELL:
|
||
|
+ /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
|
||
|
+ TSX. Haswell also include other model numbers that have
|
||
|
+ working TSX. */
|
||
|
+ if (model == 0x3f && stepping >= 4)
|
||
|
+ break;
|
||
|
+
|
||
|
+ CPU_FEATURE_UNSET (cpu_features, RTM);
|
||
|
+ break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.39.3
|
||
|
|