You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
178 lines
6.2 KiB
178 lines
6.2 KiB
From c5806d668f84a86e9e6a522f84b8aa6cb4cdaae9 Mon Sep 17 00:00:00 2001 |
|
From: Ali Saidi <alisaidi@amazon.com> |
|
Date: Wed, 5 Aug 2020 20:46:28 -0500 |
|
Subject: [PATCH 1/3] Enable unaligned accesses on arm64 |
|
|
|
64-bit Arm platforms support unaligned accesses. |
|
|
|
Running the string benchmarks this change improves performance |
|
by an average of 1.04x, min .96x, max 1.21x, median 1.01x |
|
--- |
|
include/ruby/defines.h | 2 +- |
|
regint.h | 2 +- |
|
siphash.c | 2 +- |
|
st.c | 2 +- |
|
4 files changed, 4 insertions(+), 4 deletions(-) |
|
|
|
diff --git a/include/ruby/defines.h b/include/ruby/defines.h |
|
index 49f673ef936a..0193275e8b78 100644 |
|
--- a/include/ruby/defines.h |
|
+++ b/include/ruby/defines.h |
|
@@ -485,7 +485,7 @@ |
|
#ifndef UNALIGNED_WORD_ACCESS |
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ |
|
- defined(__powerpc64__) || \ |
|
+ defined(__powerpc64__) || defined(__aarch64__) || \ |
|
defined(__mc68020__) |
|
# define UNALIGNED_WORD_ACCESS 1 |
|
# else |
|
diff --git a/regint.h b/regint.h |
|
index a2f5bbba1d1f..0740429688bc 100644 |
|
--- a/regint.h |
|
+++ b/regint.h |
|
@@ -52,7 +52,7 @@ |
|
#ifndef UNALIGNED_WORD_ACCESS |
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ |
|
- defined(__powerpc64__) || \ |
|
+ defined(__powerpc64__) || defined(__aarch64__) || \ |
|
defined(__mc68020__) |
|
# define UNALIGNED_WORD_ACCESS 1 |
|
# else |
|
diff --git a/siphash.c b/siphash.c |
|
index 153d2c690ab9..ddf8ee245d81 100644 |
|
--- a/siphash.c |
|
+++ b/siphash.c |
|
@@ -30,7 +30,7 @@ |
|
#ifndef UNALIGNED_WORD_ACCESS |
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ |
|
- defined(__powerpc64__) || \ |
|
+ defined(__powerpc64__) || defined(__aarch64__) || \ |
|
defined(__mc68020__) |
|
# define UNALIGNED_WORD_ACCESS 1 |
|
# endif |
|
diff --git a/st.c b/st.c |
|
index c11535ef9779..8be466bf733f 100644 |
|
--- a/st.c |
|
+++ b/st.c |
|
@@ -1815,7 +1815,7 @@ st_values_check(st_table *tab, st_data_t *values, st_index_t size, |
|
#ifndef UNALIGNED_WORD_ACCESS |
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ |
|
- defined(__powerpc64__) || \ |
|
+ defined(__powerpc64__) || defined(__aarch64__) || \ |
|
defined(__mc68020__) |
|
# define UNALIGNED_WORD_ACCESS 1 |
|
# endif |
|
|
|
From 79b7b9143fda0f33fc9375980cecc61eb42c6f66 Mon Sep 17 00:00:00 2001 |
|
From: Ali Saidi <alisaidi@amazon.com> |
|
Date: Wed, 5 Aug 2020 21:04:37 -0500 |
|
Subject: [PATCH 2/3] arm64 enable gc optimizations |
|
|
|
Similar to x86 and powerpc optimizations. |
|
|
|
| |compare-ruby|built-ruby| |
|
|:------|-----------:|---------:| |
|
|hash1 | 0.225| 0.237| |
|
| | -| 1.05x| |
|
|hash2 | 0.110| 0.110| |
|
| | 1.00x| -| |
|
--- |
|
gc.c | 13 +++++++++++++ |
|
gc.h | 2 ++ |
|
2 files changed, 15 insertions(+) |
|
|
|
diff --git a/gc.c b/gc.c |
|
index 22972dfc806c..788f06f1586e 100644 |
|
--- a/gc.c |
|
+++ b/gc.c |
|
@@ -1153,6 +1153,19 @@ tick(void) |
|
return val; |
|
} |
|
|
|
+#elif defined(__aarch64__) && defined(__GNUC__) |
|
+typedef unsigned long tick_t; |
|
+#define PRItick "lu" |
|
+ |
|
+static __inline__ tick_t |
|
+tick(void) |
|
+{ |
|
+ unsigned long val; |
|
+ __asm__ __volatile__ ("mrs %0, cntvct_el0", : "=r" (val)); |
|
+ return val; |
|
+} |
|
+ |
|
+ |
|
#elif defined(_WIN32) && defined(_MSC_VER) |
|
#include <intrin.h> |
|
typedef unsigned __int64 tick_t; |
|
diff --git a/gc.h b/gc.h |
|
index 6568079c54e5..47a4ca19a0c5 100644 |
|
--- a/gc.h |
|
+++ b/gc.h |
|
@@ -8,6 +8,8 @@ |
|
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("movl\t%%esp, %0" : "=r" (*(p))) |
|
#elif defined(__powerpc64__) && defined(__GNUC__) |
|
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mr\t%0, %%r1" : "=r" (*(p))) |
|
+#elif defined(__aarch64__) && defined(__GNUC__) |
|
+#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mov\t%0, sp" : "=r" (*(p))) |
|
#else |
|
NOINLINE(void rb_gc_set_stack_end(VALUE **stack_end_p)); |
|
#define SET_MACHINE_STACK_END(p) rb_gc_set_stack_end(p) |
|
|
|
From c985b8c6868a380e44e285368af4a4f414ce3309 Mon Sep 17 00:00:00 2001 |
|
From: Ali Saidi <alisaidi@amazon.com> |
|
Date: Wed, 5 Aug 2020 21:15:55 -0500 |
|
Subject: [PATCH 3/3] vm_exec.c: improve performance for arm64 |
|
|
|
| |compare-ruby|built-ruby| |
|
|:------------------------------|-----------:|---------:| |
|
|vm_array | 26.501M| 27.959M| |
|
| | -| 1.06x| |
|
|vm_attr_ivar | 21.606M| 31.429M| |
|
| | -| 1.45x| |
|
|vm_attr_ivar_set | 21.178M| 26.113M| |
|
| | -| 1.23x| |
|
|vm_backtrace | 6.621| 6.668| |
|
| | -| 1.01x| |
|
|vm_bigarray | 26.205M| 29.958M| |
|
| | -| 1.14x| |
|
|vm_bighash | 504.155k| 479.306k| |
|
| | 1.05x| -| |
|
|vm_block | 16.692M| 21.315M| |
|
| | -| 1.28x| |
|
|block_handler_type_iseq | 5.083| 7.004| |
|
| | -| 1.38x| |
|
--- |
|
vm_exec.c | 8 ++++++++ |
|
1 file changed, 8 insertions(+) |
|
|
|
diff --git a/vm_exec.c b/vm_exec.c |
|
index ce2e053ee745..7aa56f6ad620 100644 |
|
--- a/vm_exec.c |
|
+++ b/vm_exec.c |
|
@@ -27,6 +27,9 @@ static void vm_insns_counter_count_insn(int insn) {} |
|
#elif defined(__GNUC__) && defined(__powerpc64__) |
|
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg) |
|
|
|
+#elif defined(__GNUC__) && defined(__aarch64__) |
|
+#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("x" reg) |
|
+ |
|
#else |
|
#define DECL_SC_REG(type, r, reg) register type reg_##r |
|
#endif |
|
@@ -74,6 +77,11 @@ vm_exec_core(rb_execution_context_t *ec, VALUE initial) |
|
DECL_SC_REG(rb_control_frame_t *, cfp, "15"); |
|
#define USE_MACHINE_REGS 1 |
|
|
|
+#elif defined(__GNUC__) && defined(__aarch64__) |
|
+ DECL_SC_REG(const VALUE *, pc, "19"); |
|
+ DECL_SC_REG(rb_control_frame_t *, cfp, "20"); |
|
+#define USE_MACHINE_REGS 1 |
|
+ |
|
#else |
|
register rb_control_frame_t *reg_cfp; |
|
const VALUE *reg_pc;
|
|
|