You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
804 lines
26 KiB
804 lines
26 KiB
From 7dd60718b327b3eb6112ec3900750007b0259189 Mon Sep 17 00:00:00 2001 |
|
From: raji <raji@oc4354787705.ibm.com> |
|
Date: Tue, 14 Jun 2016 14:51:16 +0530 |
|
Subject: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 |
|
|
|
This implementation utilizes vectors to improve performance |
|
compared to current byte by byte implementation for POWER7. |
|
The performance improvement is upto 4x. This patch is tested |
|
on powerpc64 and powerpc64le. |
|
|
|
(cherry picked from commit c8376f3e07602aaef9cb843bb73cb5f2b860634a) |
|
|
|
Conflicts: |
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
|
--- |
|
ChangeLog | 22 + |
|
sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +- |
|
.../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 + |
|
.../powerpc64/multiarch/strcasecmp-power7.S | 20 +- |
|
.../powerpc64/multiarch/strcasecmp-power8.S | 28 ++ |
|
.../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 21 + |
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 32 +- |
|
.../powerpc/powerpc64/multiarch/strncase-power8.S | 28 ++ |
|
.../powerpc/powerpc64/multiarch/strncase-ppc64.c | 21 + |
|
sysdeps/powerpc/powerpc64/multiarch/strncase.c | 25 +- |
|
sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 +++++++++++++++++++++ |
|
sysdeps/powerpc/powerpc64/power8/strncase.S | 20 + |
|
12 files changed, 622 insertions(+), 51 deletions(-) |
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S |
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S |
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c |
|
create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S |
|
create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S |
|
|
|
diff --git a/ChangeLog b/ChangeLog |
|
index c01d1a0..9385bd0 100644 |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile |
|
index 9ee9bc2..e3ac285 100644 |
|
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile |
|
@@ -21,6 +21,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ |
|
mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ |
|
memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ |
|
stpcpy-power8 stpcpy-power7 stpcpy-ppc64 \ |
|
+ strcasecmp-ppc64 strcasecmp-power8 \ |
|
+ strncase-ppc64 strncase-power8 \ |
|
strcasestr-power8 strcasestr-ppc64 \ |
|
strcat-power8 strcat-power7 strcat-ppc64 \ |
|
strcmp-power8 strcmp-power7 strcmp-ppc64 \ |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c |
|
index 228891f..aabd7bc 100644 |
|
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c |
|
@@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, |
|
/* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ |
|
IFUNC_IMPL (i, name, strcasecmp, |
|
IFUNC_IMPL_ADD (array, i, strcasecmp, |
|
+ hwcap2 & PPC_FEATURE2_ARCH_2_07, |
|
+ __strcasecmp_power8) |
|
+ IFUNC_IMPL_ADD (array, i, strcasecmp, |
|
hwcap & PPC_FEATURE_HAS_VSX, |
|
__strcasecmp_power7) |
|
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) |
|
@@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, |
|
/* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ |
|
IFUNC_IMPL (i, name, strncasecmp, |
|
IFUNC_IMPL_ADD (array, i, strncasecmp, |
|
+ hwcap2 & PPC_FEATURE2_ARCH_2_07, |
|
+ __strncasecmp_power8) |
|
+ IFUNC_IMPL_ADD (array, i, strncasecmp, |
|
hwcap & PPC_FEATURE_HAS_VSX, |
|
__strncasecmp_power7) |
|
IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
|
index 56eed9a..99cd7bd 100644 |
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
|
@@ -1,5 +1,5 @@ |
|
-/* Optimized strcasecmp implementation foOWER7. |
|
- Copyright (C) 2013-2014 Free Software Foundation, Inc. |
|
+/* Optimized strcasecmp implementation for POWER7. |
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
This file is part of the GNU C Library. |
|
|
|
The GNU C Library is free software; you can redistribute it and/or |
|
@@ -18,21 +18,7 @@ |
|
|
|
#include <sysdep.h> |
|
|
|
-#undef ENTRY |
|
-#define ENTRY(name) \ |
|
- .section ".text"; \ |
|
- ENTRY_2(__strcasecmp_power7) \ |
|
- .align ALIGNARG(2); \ |
|
- BODY_LABEL(__strcasecmp_power7): \ |
|
- cfi_startproc; \ |
|
- LOCALENTRY(__strcasecmp_power7) |
|
- |
|
-#undef END |
|
-#define END(name) \ |
|
- cfi_endproc; \ |
|
- TRACEBACK(__strcasecmp_power7) \ |
|
- END_2(__strcasecmp_power7) |
|
- |
|
+#define __strcasecmp __strcasecmp_power7 |
|
#undef weak_alias |
|
#define weak_alias(name, alias) |
|
|
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S |
|
new file mode 100644 |
|
index 0000000..492047a |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S |
|
@@ -0,0 +1,28 @@ |
|
+/* Optimized strcasecmp implementation for POWER8. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <sysdep.h> |
|
+ |
|
+#define __strcasecmp __strcasecmp_power8 |
|
+#undef weak_alias |
|
+#define weak_alias(name, alias) |
|
+ |
|
+#undef libc_hidden_builtin_def |
|
+#define libc_hidden_builtin_def(name) |
|
+ |
|
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |
|
new file mode 100644 |
|
index 0000000..6318b4a |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |
|
@@ -0,0 +1,21 @@ |
|
+/* Multiarch strcasecmp for PPC64. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#define strcasecmp __strcasecmp_ppc |
|
+ |
|
+#include <string/strcasecmp.c> |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
|
index 979e9f1..5ec6885 100644 |
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
|
@@ -1,5 +1,5 @@ |
|
-/* Multiple versions of strcasecmp. |
|
- Copyright (C) 2013-2014 Free Software Foundation, Inc. |
|
+/* Multiple versions of strcasecmp |
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
This file is part of the GNU C Library. |
|
|
|
The GNU C Library is free software; you can redistribute it and/or |
|
@@ -16,25 +16,21 @@ |
|
License along with the GNU C Library; if not, see |
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
-#if IS_IN (libc) |
|
-# include <string.h> |
|
-# define strcasecmp __strcasecmp_ppc |
|
-extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; |
|
-extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; |
|
-#endif |
|
+#include <string.h> |
|
+#include <shlib-compat.h> |
|
+#include "init-arch.h" |
|
|
|
-#include <string/strcasecmp.c> |
|
-#undef strcasecmp |
|
+extern __typeof (__strcasecmp) __libc_strcasecmp; |
|
|
|
-#if IS_IN (libc) |
|
-# include <shlib-compat.h> |
|
-# include "init-arch.h" |
|
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; |
|
+extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; |
|
+extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; |
|
|
|
-extern __typeof (__strcasecmp) __libc_strcasecmp; |
|
libc_ifunc (__libc_strcasecmp, |
|
- (hwcap & PPC_FEATURE_HAS_VSX) |
|
- ? __strcasecmp_power7 |
|
- : __strcasecmp_ppc); |
|
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07) |
|
+ ? __strcasecmp_power8: |
|
+ (hwcap & PPC_FEATURE_HAS_VSX) |
|
+ ? __strcasecmp_power7 |
|
+ : __strcasecmp_ppc); |
|
|
|
weak_alias (__libc_strcasecmp, strcasecmp) |
|
-#endif |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S |
|
new file mode 100644 |
|
index 0000000..01a63b5 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S |
|
@@ -0,0 +1,28 @@ |
|
+/* Optimized strncasecmp implementation for POWER8. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <sysdep.h> |
|
+ |
|
+#define __strncasecmp __strncasecmp_power8 |
|
+#undef weak_alias |
|
+#define weak_alias(name, alias) |
|
+ |
|
+#undef libc_hidden_builtin_def |
|
+#define libc_hidden_builtin_def(name) |
|
+ |
|
+#include <sysdeps/powerpc/powerpc64/power8/strncase.S> |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c |
|
new file mode 100644 |
|
index 0000000..c245d77 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c |
|
@@ -0,0 +1,21 @@ |
|
+/* Multiarch strncasecmp for PPC64. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#define strncasecmp __strncasecmp_ppc |
|
+ |
|
+#include <string/strncase.c> |
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c |
|
index 4339f3a..5bfaf65 100644 |
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c |
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c |
|
@@ -16,26 +16,21 @@ |
|
License along with the GNU C Library; if not, see |
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
-#if IS_IN (libc) |
|
-# include <string.h> |
|
-# define strncasecmp __strncasecmp_ppc |
|
-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; |
|
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; |
|
-#endif |
|
+#include <string.h> |
|
+#include <shlib-compat.h> |
|
+#include "init-arch.h" |
|
|
|
-#include <string/strncase.c> |
|
-#undef strncasecmp |
|
+extern __typeof (__strncasecmp) __libc_strncasecmp; |
|
|
|
-#if IS_IN (libc) |
|
-# include <shlib-compat.h> |
|
-# include "init-arch.h" |
|
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; |
|
+extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; |
|
+extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; |
|
|
|
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle |
|
- ifunc symbol properly. */ |
|
-extern __typeof (__strncasecmp) __libc_strncasecmp; |
|
libc_ifunc (__libc_strncasecmp, |
|
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07) |
|
+ ? __strncasecmp_power8: |
|
(hwcap & PPC_FEATURE_HAS_VSX) |
|
? __strncasecmp_power7 |
|
: __strncasecmp_ppc); |
|
+ |
|
weak_alias (__libc_strncasecmp, strncasecmp) |
|
-#endif |
|
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S |
|
new file mode 100644 |
|
index 0000000..63f6217 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S |
|
@@ -0,0 +1,446 @@ |
|
+/* Optimized strcasecmp implementation for PowerPC64. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <sysdep.h> |
|
+#include <locale-defines.h> |
|
+ |
|
+/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ |
|
+ |
|
+#ifndef USE_AS_STRNCASECMP |
|
+# define __STRCASECMP __strcasecmp |
|
+# define STRCASECMP strcasecmp |
|
+#else |
|
+# define __STRCASECMP __strncasecmp |
|
+# define STRCASECMP strncasecmp |
|
+#endif |
|
+/* Convert 16 bytes to lowercase and compare */ |
|
+#define TOLOWER() \ |
|
+ vaddubm v8, v4, v1; \ |
|
+ vaddubm v7, v4, v3; \ |
|
+ vcmpgtub v8, v8, v2; \ |
|
+ vsel v4, v7, v4, v8; \ |
|
+ vaddubm v8, v5, v1; \ |
|
+ vaddubm v7, v5, v3; \ |
|
+ vcmpgtub v8, v8, v2; \ |
|
+ vsel v5, v7, v5, v8; \ |
|
+ vcmpequb. v7, v5, v4; |
|
+ |
|
+/* Get 16 bytes for unaligned case. */ |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+#define GET16BYTES(reg1, reg2, reg3) \ |
|
+ lvx reg1, 0, reg2; \ |
|
+ vcmpequb. v8, v0, reg1; \ |
|
+ beq cr6, 1f; \ |
|
+ vspltisb v9, 0; \ |
|
+ b 2f; \ |
|
+ .align 4; \ |
|
+1: \ |
|
+ addi r6, reg2, 16; \ |
|
+ lvx v9, 0, r6; \ |
|
+2: \ |
|
+ vperm reg1, v9, reg1, reg3; |
|
+#else |
|
+#define GET16BYTES(reg1, reg2, reg3) \ |
|
+ lvx reg1, 0, reg2; \ |
|
+ vcmpequb. v8, v0, reg1; \ |
|
+ beq cr6, 1f; \ |
|
+ vspltisb v9, 0; \ |
|
+ b 2f; \ |
|
+ .align 4; \ |
|
+1: \ |
|
+ addi r6, reg2, 16; \ |
|
+ lvx v9, 0, r6; \ |
|
+2: \ |
|
+ vperm reg1, reg1, v9, reg3; |
|
+#endif |
|
+ |
|
+/* Check null in v4, v5 and convert to lower. */ |
|
+#define CHECKNULLANDCONVERT() \ |
|
+ vcmpequb. v7, v0, v5; \ |
|
+ beq cr6, 3f; \ |
|
+ vcmpequb. v7, v0, v4; \ |
|
+ beq cr6, 3f; \ |
|
+ b L(null_found); \ |
|
+ .align 4; \ |
|
+3: \ |
|
+ TOLOWER() |
|
+ |
|
+#ifdef _ARCH_PWR8 |
|
+# define VCLZD_V8_v7 vclzd v8, v7; |
|
+# define MFVRD_R3_V1 mfvrd r3, v1; |
|
+# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; |
|
+# define VPOPCNTD_V8_V8 vpopcntd v8, v8; |
|
+# define VADDUQM_V7_V8 vadduqm v9, v7, v8; |
|
+#else |
|
+# define VCLZD_V8_v7 .long 0x11003fc2 |
|
+# define MFVRD_R3_V1 .long 0x7c230067 |
|
+# define VSUBUDM_V9_V8 .long 0x112944c0 |
|
+# define VPOPCNTD_V8_V8 .long 0x110047c3 |
|
+# define VADDUQM_V7_V8 .long 0x11274100 |
|
+#endif |
|
+ |
|
+ .machine power7 |
|
+ |
|
+ENTRY (__STRCASECMP) |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ CALL_MCOUNT 3 |
|
+#else |
|
+ CALL_MCOUNT 2 |
|
+#endif |
|
+#define rRTN r3 /* Return value */ |
|
+#define rSTR1 r10 /* 1st string */ |
|
+#define rSTR2 r4 /* 2nd string */ |
|
+#define rCHAR1 r6 /* Byte read from 1st string */ |
|
+#define rCHAR2 r7 /* Byte read from 2nd string */ |
|
+#define rADDR1 r8 /* Address of tolower(rCHAR1) */ |
|
+#define rADDR2 r12 /* Address of tolower(rCHAR2) */ |
|
+#define rLWR1 r8 /* Word tolower(rCHAR1) */ |
|
+#define rLWR2 r12 /* Word tolower(rCHAR2) */ |
|
+#define rTMP r9 |
|
+#define rLOC r11 /* Default locale address */ |
|
+ |
|
+ cmpd cr7, rRTN, rSTR2 |
|
+ |
|
+ /* Get locale address. */ |
|
+ ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) |
|
+ add rLOC, rTMP, __libc_tsd_LOCALE@tls |
|
+ ld rLOC, 0(rLOC) |
|
+ |
|
+ mr rSTR1, rRTN |
|
+ li rRTN, 0 |
|
+ beqlr cr7 |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ cmpdi cr7, r5, 0 |
|
+ beq cr7, L(retnull) |
|
+ cmpdi cr7, r5, 16 |
|
+ blt cr7, L(bytebybyte) |
|
+#endif |
|
+ vspltisb v0, 0 |
|
+ vspltisb v8, -1 |
|
+ /* Check for null in initial characters. |
|
+ Check max of 16 char depending on the alignment. |
|
+ If null is present, proceed byte by byte. */ |
|
+ lvx v4, 0, rSTR1 |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ lvsr v10, 0, rSTR1 /* Compute mask. */ |
|
+ vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ |
|
+#else |
|
+ lvsl v10, 0, rSTR1 |
|
+ vperm v9, v4, v8, v10 |
|
+#endif |
|
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */ |
|
+ bne cr6, L(bytebybyte) |
|
+ lvx v5, 0, rSTR2 |
|
+ /* Calculate alignment. */ |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ lvsr v6, 0, rSTR2 |
|
+ vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ |
|
+#else |
|
+ lvsl v6, 0, rSTR2 |
|
+ vperm v9, v5, v8, v6 |
|
+#endif |
|
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */ |
|
+ bne cr6, L(bytebybyte) |
|
+ /* Check if locale has non ascii characters. */ |
|
+ ld rTMP, 0(rLOC) |
|
+ addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES |
|
+ lwz rTMP, 0(r6) |
|
+ cmpdi cr7, rTMP, 1 |
|
+ beq cr7, L(bytebybyte) |
|
+ |
|
+ /* Load vector registers with values used for TOLOWER. */ |
|
+ /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ |
|
+ vspltisb v3, 2 |
|
+ vspltisb v9, 4 |
|
+ vsl v3, v3, v9 |
|
+ vaddubm v1, v3, v3 |
|
+ vnor v1, v1, v1 |
|
+ vspltisb v2, 7 |
|
+ vsububm v2, v3, v2 |
|
+ |
|
+ andi. rADDR1, rSTR1, 0xF |
|
+ beq cr0, L(align) |
|
+ addi r6, rSTR1, 16 |
|
+ lvx v9, 0, r6 |
|
+ /* Compute 16 bytes from previous two loads. */ |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ vperm v4, v9, v4, v10 |
|
+#else |
|
+ vperm v4, v4, v9, v10 |
|
+#endif |
|
+L(align): |
|
+ andi. rADDR2, rSTR2, 0xF |
|
+ beq cr0, L(align1) |
|
+ addi r6, rSTR2, 16 |
|
+ lvx v9, 0, r6 |
|
+ /* Compute 16 bytes from previous two loads. */ |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ vperm v5, v9, v5, v6 |
|
+#else |
|
+ vperm v5, v5, v9, v6 |
|
+#endif |
|
+L(align1): |
|
+ CHECKNULLANDCONVERT() |
|
+ blt cr6, L(match) |
|
+ b L(different) |
|
+ .align 4 |
|
+L(match): |
|
+ clrldi r6, rSTR1, 60 |
|
+ subfic r7, r6, 16 |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ sub r5, r5, r7 |
|
+#endif |
|
+ add rSTR1, rSTR1, r7 |
|
+ add rSTR2, rSTR2, r7 |
|
+ andi. rADDR2, rSTR2, 0xF |
|
+ addi rSTR1, rSTR1, -16 |
|
+ addi rSTR2, rSTR2, -16 |
|
+ beq cr0, L(aligned) |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ lvsr v6, 0, rSTR2 |
|
+#else |
|
+ lvsl v6, 0, rSTR2 |
|
+#endif |
|
+ /* There are 2 loops depending on the input alignment. |
|
+ Each loop gets 16 bytes from s1 and s2, check for null, |
|
+ convert to lowercase and compare. Loop till difference |
|
+ or null occurs. */ |
|
+L(s1_align): |
|
+ addi rSTR1, rSTR1, 16 |
|
+ addi rSTR2, rSTR2, 16 |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ cmpdi cr7, r5, 16 |
|
+ blt cr7, L(bytebybyte) |
|
+ addi r5, r5, -16 |
|
+#endif |
|
+ lvx v4, 0, rSTR1 |
|
+ GET16BYTES(v5, rSTR2, v6) |
|
+ CHECKNULLANDCONVERT() |
|
+ blt cr6, L(s1_align) |
|
+ b L(different) |
|
+ .align 4 |
|
+L(aligned): |
|
+ addi rSTR1, rSTR1, 16 |
|
+ addi rSTR2, rSTR2, 16 |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ cmpdi cr7, r5, 16 |
|
+ blt cr7, L(bytebybyte) |
|
+ addi r5, r5, -16 |
|
+#endif |
|
+ lvx v4, 0, rSTR1 |
|
+ lvx v5, 0, rSTR2 |
|
+ CHECKNULLANDCONVERT() |
|
+ blt cr6, L(aligned) |
|
+ |
|
+ /* Calculate and return the difference. */ |
|
+L(different): |
|
+ vaddubm v1, v3, v3 |
|
+ vcmpequb v7, v0, v7 |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ /* Count trailing zero. */ |
|
+ vspltisb v8, -1 |
|
+ VADDUQM_V7_V8 |
|
+ vandc v8, v9, v7 |
|
+ VPOPCNTD_V8_V8 |
|
+ vspltb v6, v8, 15 |
|
+ vcmpequb. v6, v6, v1 |
|
+ blt cr6, L(shift8) |
|
+#else |
|
+ /* Count leading zero. */ |
|
+ VCLZD_V8_v7 |
|
+ vspltb v6, v8, 7 |
|
+ vcmpequb. v6, v6, v1 |
|
+ blt cr6, L(shift8) |
|
+ vsro v8, v8, v1 |
|
+#endif |
|
+ b L(skipsum) |
|
+ .align 4 |
|
+L(shift8): |
|
+ vsumsws v8, v8, v0 |
|
+L(skipsum): |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ /* Shift registers based on leading zero count. */ |
|
+ vsro v6, v5, v8 |
|
+ vsro v7, v4, v8 |
|
+ /* Merge and move to GPR. */ |
|
+ vmrglb v6, v6, v7 |
|
+ vslo v1, v6, v1 |
|
+ MFVRD_R3_V1 |
|
+ /* Place the characters that are different in first position. */ |
|
+ sldi rSTR2, rRTN, 56 |
|
+ srdi rSTR2, rSTR2, 56 |
|
+ sldi rSTR1, rRTN, 48 |
|
+ srdi rSTR1, rSTR1, 56 |
|
+#else |
|
+ vslo v6, v5, v8 |
|
+ vslo v7, v4, v8 |
|
+ vmrghb v1, v6, v7 |
|
+ MFVRD_R3_V1 |
|
+ srdi rSTR2, rRTN, 48 |
|
+ sldi rSTR2, rSTR2, 56 |
|
+ srdi rSTR2, rSTR2, 56 |
|
+ srdi rSTR1, rRTN, 56 |
|
+#endif |
|
+ subf rRTN, rSTR1, rSTR2 |
|
+ extsw rRTN, rRTN |
|
+ blr |
|
+ |
|
+ .align 4 |
|
+ /* OK. We've hit the end of the string. We need to be careful that |
|
+ we don't compare two strings as different because of junk beyond |
|
+ the end of the strings... */ |
|
+L(null_found): |
|
+ vaddubm v10, v3, v3 |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ /* Count trailing zero. */ |
|
+ vspltisb v8, -1 |
|
+ VADDUQM_V7_V8 |
|
+ vandc v8, v9, v7 |
|
+ VPOPCNTD_V8_V8 |
|
+ vspltb v6, v8, 15 |
|
+ vcmpequb. v6, v6, v10 |
|
+ blt cr6, L(shift_8) |
|
+#else |
|
+ /* Count leading zero. */ |
|
+ VCLZD_V8_v7 |
|
+ vspltb v6, v8, 7 |
|
+ vcmpequb. v6, v6, v10 |
|
+ blt cr6, L(shift_8) |
|
+ vsro v8, v8, v10 |
|
+#endif |
|
+ b L(skipsum1) |
|
+ .align 4 |
|
+L(shift_8): |
|
+ vsumsws v8, v8, v0 |
|
+L(skipsum1): |
|
+ /* Calculate shift count based on count of zero. */ |
|
+ vspltisb v10, 7 |
|
+ vslb v10, v10, v10 |
|
+ vsldoi v9, v0, v10, 1 |
|
+ VSUBUDM_V9_V8 |
|
+ vspltisb v8, 8 |
|
+ vsldoi v8, v0, v8, 1 |
|
+ VSUBUDM_V9_V8 |
|
+ /* Shift and remove junk after null character. */ |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ vslo v5, v5, v9 |
|
+ vslo v4, v4, v9 |
|
+#else |
|
+ vsro v5, v5, v9 |
|
+ vsro v4, v4, v9 |
|
+#endif |
|
+ /* Convert and compare 16 bytes. */ |
|
+ TOLOWER() |
|
+ blt cr6, L(retnull) |
|
+ b L(different) |
|
+ .align 4 |
|
+L(retnull): |
|
+ li rRTN, 0 |
|
+ blr |
|
+ .align 4 |
|
+L(bytebybyte): |
|
+ /* Unrolling loop for POWER: loads are done with 'lbz' plus |
|
+ offset and string descriptors are only updated in the end |
|
+ of loop unrolling. */ |
|
+ ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) |
|
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ |
|
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ rldicl rTMP, r5, 62, 2 |
|
+ cmpdi cr7, rTMP, 0 |
|
+ beq cr7, L(lessthan4) |
|
+ mtctr rTMP |
|
+#endif |
|
+L(loop): |
|
+ cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ |
|
+ sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ |
|
+ sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ |
|
+ lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ |
|
+ lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ |
|
+ cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ |
|
+ crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ |
|
+ beq cr1, L(done) |
|
+ lbz rCHAR1, 1(rSTR1) |
|
+ lbz rCHAR2, 1(rSTR2) |
|
+ cmpdi rCHAR1, 0 |
|
+ sldi rADDR1, rCHAR1, 2 |
|
+ sldi rADDR2, rCHAR2, 2 |
|
+ lwzx rLWR1, rLOC, rADDR1 |
|
+ lwzx rLWR2, rLOC, rADDR2 |
|
+ cmpw cr1, rLWR1, rLWR2 |
|
+ crorc 4*cr1+eq,eq,4*cr1+eq |
|
+ beq cr1, L(done) |
|
+ lbz rCHAR1, 2(rSTR1) |
|
+ lbz rCHAR2, 2(rSTR2) |
|
+ cmpdi rCHAR1, 0 |
|
+ sldi rADDR1, rCHAR1, 2 |
|
+ sldi rADDR2, rCHAR2, 2 |
|
+ lwzx rLWR1, rLOC, rADDR1 |
|
+ lwzx rLWR2, rLOC, rADDR2 |
|
+ cmpw cr1, rLWR1, rLWR2 |
|
+ crorc 4*cr1+eq,eq,4*cr1+eq |
|
+ beq cr1, L(done) |
|
+ lbz rCHAR1, 3(rSTR1) |
|
+ lbz rCHAR2, 3(rSTR2) |
|
+ cmpdi rCHAR1, 0 |
|
+ /* Increment both string descriptors */ |
|
+ addi rSTR1, rSTR1, 4 |
|
+ addi rSTR2, rSTR2, 4 |
|
+ sldi rADDR1, rCHAR1, 2 |
|
+ sldi rADDR2, rCHAR2, 2 |
|
+ lwzx rLWR1, rLOC, rADDR1 |
|
+ lwzx rLWR2, rLOC, rADDR2 |
|
+ cmpw cr1, rLWR1, rLWR2 |
|
+ crorc 4*cr1+eq,eq,4*cr1+eq |
|
+ beq cr1, L(done) |
|
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ |
|
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ |
|
+#ifdef USE_AS_STRNCASECMP |
|
+ bdnz L(loop) |
|
+#else |
|
+ b L(loop) |
|
+#endif |
|
+#ifdef USE_AS_STRNCASECMP |
|
+L(lessthan4): |
|
+ clrldi r5, r5, 62 |
|
+ cmpdi cr7, r5, 0 |
|
+ beq cr7, L(retnull) |
|
+ mtctr r5 |
|
+L(loop1): |
|
+ cmpdi rCHAR1, 0 |
|
+ sldi rADDR1, rCHAR1, 2 |
|
+ sldi rADDR2, rCHAR2, 2 |
|
+ lwzx rLWR1, rLOC, rADDR1 |
|
+ lwzx rLWR2, rLOC, rADDR2 |
|
+ cmpw cr1, rLWR1, rLWR2 |
|
+ crorc 4*cr1+eq,eq,4*cr1+eq |
|
+ beq cr1, L(done) |
|
+ addi rSTR1, rSTR1, 1 |
|
+ addi rSTR2, rSTR2, 1 |
|
+ lbz rCHAR1, 0(rSTR1) |
|
+ lbz rCHAR2, 0(rSTR2) |
|
+ bdnz L(loop1) |
|
+#endif |
|
+L(done): |
|
+ subf r0, rLWR2, rLWR1 |
|
+ extsw rRTN, r0 |
|
+ blr |
|
+END (__STRCASECMP) |
|
+ |
|
+weak_alias (__STRCASECMP, STRCASECMP) |
|
+libc_hidden_builtin_def (__STRCASECMP) |
|
diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S |
|
new file mode 100644 |
|
index 0000000..7ce2ed0 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/power8/strncase.S |
|
@@ -0,0 +1,20 @@ |
|
+/* Optimized strncasecmp implementation for POWER8. |
|
+ Copyright (C) 2016 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#define USE_AS_STRNCASECMP 1 |
|
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> |
|
-- |
|
2.1.0 |
|
|
|
|