You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
804 lines
26 KiB
804 lines
26 KiB
7 years ago
|
From 7dd60718b327b3eb6112ec3900750007b0259189 Mon Sep 17 00:00:00 2001
|
||
|
From: raji <raji@oc4354787705.ibm.com>
|
||
|
Date: Tue, 14 Jun 2016 14:51:16 +0530
|
||
|
Subject: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8
|
||
|
|
||
|
This implementation utilizes vectors to improve performance
|
||
|
compared to current byte by byte implementation for POWER7.
|
||
|
The performance improvement is upto 4x. This patch is tested
|
||
|
on powerpc64 and powerpc64le.
|
||
|
|
||
|
(cherry picked from commit c8376f3e07602aaef9cb843bb73cb5f2b860634a)
|
||
|
|
||
|
Conflicts:
|
||
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
|
||
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
|
||
|
---
|
||
|
ChangeLog | 22 +
|
||
|
sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +-
|
||
|
.../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 +
|
||
|
.../powerpc64/multiarch/strcasecmp-power7.S | 20 +-
|
||
|
.../powerpc64/multiarch/strcasecmp-power8.S | 28 ++
|
||
|
.../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 21 +
|
||
|
sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 32 +-
|
||
|
.../powerpc/powerpc64/multiarch/strncase-power8.S | 28 ++
|
||
|
.../powerpc/powerpc64/multiarch/strncase-ppc64.c | 21 +
|
||
|
sysdeps/powerpc/powerpc64/multiarch/strncase.c | 25 +-
|
||
|
sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 +++++++++++++++++++++
|
||
|
sysdeps/powerpc/powerpc64/power8/strncase.S | 20 +
|
||
|
12 files changed, 622 insertions(+), 51 deletions(-)
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S
|
||
|
create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S
|
||
|
|
||
|
diff --git a/ChangeLog b/ChangeLog
|
||
|
index c01d1a0..9385bd0 100644
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
|
||
|
index 9ee9bc2..e3ac285 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
|
||
|
@@ -21,6 +21,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
|
||
|
mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \
|
||
|
memrchr-power7 memrchr-ppc64 rawmemchr-power7 \
|
||
|
stpcpy-power8 stpcpy-power7 stpcpy-ppc64 \
|
||
|
+ strcasecmp-ppc64 strcasecmp-power8 \
|
||
|
+ strncase-ppc64 strncase-power8 \
|
||
|
strcasestr-power8 strcasestr-ppc64 \
|
||
|
strcat-power8 strcat-power7 strcat-ppc64 \
|
||
|
strcmp-power8 strcmp-power7 strcmp-ppc64 \
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
|
||
|
index 228891f..aabd7bc 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
|
||
|
@@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
/* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */
|
||
|
IFUNC_IMPL (i, name, strcasecmp,
|
||
|
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||
|
+ hwcap2 & PPC_FEATURE2_ARCH_2_07,
|
||
|
+ __strcasecmp_power8)
|
||
|
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||
|
hwcap & PPC_FEATURE_HAS_VSX,
|
||
|
__strcasecmp_power7)
|
||
|
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc))
|
||
|
@@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
|
/* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */
|
||
|
IFUNC_IMPL (i, name, strncasecmp,
|
||
|
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||
|
+ hwcap2 & PPC_FEATURE2_ARCH_2_07,
|
||
|
+ __strncasecmp_power8)
|
||
|
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||
|
hwcap & PPC_FEATURE_HAS_VSX,
|
||
|
__strncasecmp_power7)
|
||
|
IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc))
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
|
||
|
index 56eed9a..99cd7bd 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S
|
||
|
@@ -1,5 +1,5 @@
|
||
|
-/* Optimized strcasecmp implementation foOWER7.
|
||
|
- Copyright (C) 2013-2014 Free Software Foundation, Inc.
|
||
|
+/* Optimized strcasecmp implementation for POWER7.
|
||
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
|
||
|
This file is part of the GNU C Library.
|
||
|
|
||
|
The GNU C Library is free software; you can redistribute it and/or
|
||
|
@@ -18,21 +18,7 @@
|
||
|
|
||
|
#include <sysdep.h>
|
||
|
|
||
|
-#undef ENTRY
|
||
|
-#define ENTRY(name) \
|
||
|
- .section ".text"; \
|
||
|
- ENTRY_2(__strcasecmp_power7) \
|
||
|
- .align ALIGNARG(2); \
|
||
|
- BODY_LABEL(__strcasecmp_power7): \
|
||
|
- cfi_startproc; \
|
||
|
- LOCALENTRY(__strcasecmp_power7)
|
||
|
-
|
||
|
-#undef END
|
||
|
-#define END(name) \
|
||
|
- cfi_endproc; \
|
||
|
- TRACEBACK(__strcasecmp_power7) \
|
||
|
- END_2(__strcasecmp_power7)
|
||
|
-
|
||
|
+#define __strcasecmp __strcasecmp_power7
|
||
|
#undef weak_alias
|
||
|
#define weak_alias(name, alias)
|
||
|
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
|
||
|
new file mode 100644
|
||
|
index 0000000..492047a
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S
|
||
|
@@ -0,0 +1,28 @@
|
||
|
+/* Optimized strcasecmp implementation for POWER8.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#include <sysdep.h>
|
||
|
+
|
||
|
+#define __strcasecmp __strcasecmp_power8
|
||
|
+#undef weak_alias
|
||
|
+#define weak_alias(name, alias)
|
||
|
+
|
||
|
+#undef libc_hidden_builtin_def
|
||
|
+#define libc_hidden_builtin_def(name)
|
||
|
+
|
||
|
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
|
||
|
new file mode 100644
|
||
|
index 0000000..6318b4a
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c
|
||
|
@@ -0,0 +1,21 @@
|
||
|
+/* Multiarch strcasecmp for PPC64.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#define strcasecmp __strcasecmp_ppc
|
||
|
+
|
||
|
+#include <string/strcasecmp.c>
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
|
||
|
index 979e9f1..5ec6885 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c
|
||
|
@@ -1,5 +1,5 @@
|
||
|
-/* Multiple versions of strcasecmp.
|
||
|
- Copyright (C) 2013-2014 Free Software Foundation, Inc.
|
||
|
+/* Multiple versions of strcasecmp
|
||
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
|
||
|
This file is part of the GNU C Library.
|
||
|
|
||
|
The GNU C Library is free software; you can redistribute it and/or
|
||
|
@@ -16,25 +16,21 @@
|
||
|
License along with the GNU C Library; if not, see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
-#if IS_IN (libc)
|
||
|
-# include <string.h>
|
||
|
-# define strcasecmp __strcasecmp_ppc
|
||
|
-extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
|
||
|
-extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
|
||
|
-#endif
|
||
|
+#include <string.h>
|
||
|
+#include <shlib-compat.h>
|
||
|
+#include "init-arch.h"
|
||
|
|
||
|
-#include <string/strcasecmp.c>
|
||
|
-#undef strcasecmp
|
||
|
+extern __typeof (__strcasecmp) __libc_strcasecmp;
|
||
|
|
||
|
-#if IS_IN (libc)
|
||
|
-# include <shlib-compat.h>
|
||
|
-# include "init-arch.h"
|
||
|
+extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden;
|
||
|
+extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden;
|
||
|
+extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden;
|
||
|
|
||
|
-extern __typeof (__strcasecmp) __libc_strcasecmp;
|
||
|
libc_ifunc (__libc_strcasecmp,
|
||
|
- (hwcap & PPC_FEATURE_HAS_VSX)
|
||
|
- ? __strcasecmp_power7
|
||
|
- : __strcasecmp_ppc);
|
||
|
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
|
||
|
+ ? __strcasecmp_power8:
|
||
|
+ (hwcap & PPC_FEATURE_HAS_VSX)
|
||
|
+ ? __strcasecmp_power7
|
||
|
+ : __strcasecmp_ppc);
|
||
|
|
||
|
weak_alias (__libc_strcasecmp, strcasecmp)
|
||
|
-#endif
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
|
||
|
new file mode 100644
|
||
|
index 0000000..01a63b5
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S
|
||
|
@@ -0,0 +1,28 @@
|
||
|
+/* Optimized strncasecmp implementation for POWER8.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#include <sysdep.h>
|
||
|
+
|
||
|
+#define __strncasecmp __strncasecmp_power8
|
||
|
+#undef weak_alias
|
||
|
+#define weak_alias(name, alias)
|
||
|
+
|
||
|
+#undef libc_hidden_builtin_def
|
||
|
+#define libc_hidden_builtin_def(name)
|
||
|
+
|
||
|
+#include <sysdeps/powerpc/powerpc64/power8/strncase.S>
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
|
||
|
new file mode 100644
|
||
|
index 0000000..c245d77
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c
|
||
|
@@ -0,0 +1,21 @@
|
||
|
+/* Multiarch strncasecmp for PPC64.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#define strncasecmp __strncasecmp_ppc
|
||
|
+
|
||
|
+#include <string/strncase.c>
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
|
||
|
index 4339f3a..5bfaf65 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
|
||
|
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
|
||
|
@@ -16,26 +16,21 @@
|
||
|
License along with the GNU C Library; if not, see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
-#if IS_IN (libc)
|
||
|
-# include <string.h>
|
||
|
-# define strncasecmp __strncasecmp_ppc
|
||
|
-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
|
||
|
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
|
||
|
-#endif
|
||
|
+#include <string.h>
|
||
|
+#include <shlib-compat.h>
|
||
|
+#include "init-arch.h"
|
||
|
|
||
|
-#include <string/strncase.c>
|
||
|
-#undef strncasecmp
|
||
|
+extern __typeof (__strncasecmp) __libc_strncasecmp;
|
||
|
|
||
|
-#if IS_IN (libc)
|
||
|
-# include <shlib-compat.h>
|
||
|
-# include "init-arch.h"
|
||
|
+extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
|
||
|
+extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
|
||
|
+extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden;
|
||
|
|
||
|
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||
|
- ifunc symbol properly. */
|
||
|
-extern __typeof (__strncasecmp) __libc_strncasecmp;
|
||
|
libc_ifunc (__libc_strncasecmp,
|
||
|
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
|
||
|
+ ? __strncasecmp_power8:
|
||
|
(hwcap & PPC_FEATURE_HAS_VSX)
|
||
|
? __strncasecmp_power7
|
||
|
: __strncasecmp_ppc);
|
||
|
+
|
||
|
weak_alias (__libc_strncasecmp, strncasecmp)
|
||
|
-#endif
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
|
||
|
new file mode 100644
|
||
|
index 0000000..63f6217
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
|
||
|
@@ -0,0 +1,446 @@
|
||
|
+/* Optimized strcasecmp implementation for PowerPC64.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#include <sysdep.h>
|
||
|
+#include <locale-defines.h>
|
||
|
+
|
||
|
+/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
|
||
|
+
|
||
|
+#ifndef USE_AS_STRNCASECMP
|
||
|
+# define __STRCASECMP __strcasecmp
|
||
|
+# define STRCASECMP strcasecmp
|
||
|
+#else
|
||
|
+# define __STRCASECMP __strncasecmp
|
||
|
+# define STRCASECMP strncasecmp
|
||
|
+#endif
|
||
|
+/* Convert 16 bytes to lowercase and compare */
|
||
|
+#define TOLOWER() \
|
||
|
+ vaddubm v8, v4, v1; \
|
||
|
+ vaddubm v7, v4, v3; \
|
||
|
+ vcmpgtub v8, v8, v2; \
|
||
|
+ vsel v4, v7, v4, v8; \
|
||
|
+ vaddubm v8, v5, v1; \
|
||
|
+ vaddubm v7, v5, v3; \
|
||
|
+ vcmpgtub v8, v8, v2; \
|
||
|
+ vsel v5, v7, v5, v8; \
|
||
|
+ vcmpequb. v7, v5, v4;
|
||
|
+
|
||
|
+/* Get 16 bytes for unaligned case. */
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+#define GET16BYTES(reg1, reg2, reg3) \
|
||
|
+ lvx reg1, 0, reg2; \
|
||
|
+ vcmpequb. v8, v0, reg1; \
|
||
|
+ beq cr6, 1f; \
|
||
|
+ vspltisb v9, 0; \
|
||
|
+ b 2f; \
|
||
|
+ .align 4; \
|
||
|
+1: \
|
||
|
+ addi r6, reg2, 16; \
|
||
|
+ lvx v9, 0, r6; \
|
||
|
+2: \
|
||
|
+ vperm reg1, v9, reg1, reg3;
|
||
|
+#else
|
||
|
+#define GET16BYTES(reg1, reg2, reg3) \
|
||
|
+ lvx reg1, 0, reg2; \
|
||
|
+ vcmpequb. v8, v0, reg1; \
|
||
|
+ beq cr6, 1f; \
|
||
|
+ vspltisb v9, 0; \
|
||
|
+ b 2f; \
|
||
|
+ .align 4; \
|
||
|
+1: \
|
||
|
+ addi r6, reg2, 16; \
|
||
|
+ lvx v9, 0, r6; \
|
||
|
+2: \
|
||
|
+ vperm reg1, reg1, v9, reg3;
|
||
|
+#endif
|
||
|
+
|
||
|
+/* Check null in v4, v5 and convert to lower. */
|
||
|
+#define CHECKNULLANDCONVERT() \
|
||
|
+ vcmpequb. v7, v0, v5; \
|
||
|
+ beq cr6, 3f; \
|
||
|
+ vcmpequb. v7, v0, v4; \
|
||
|
+ beq cr6, 3f; \
|
||
|
+ b L(null_found); \
|
||
|
+ .align 4; \
|
||
|
+3: \
|
||
|
+ TOLOWER()
|
||
|
+
|
||
|
+#ifdef _ARCH_PWR8
|
||
|
+# define VCLZD_V8_v7 vclzd v8, v7;
|
||
|
+# define MFVRD_R3_V1 mfvrd r3, v1;
|
||
|
+# define VSUBUDM_V9_V8 vsubudm v9, v9, v8;
|
||
|
+# define VPOPCNTD_V8_V8 vpopcntd v8, v8;
|
||
|
+# define VADDUQM_V7_V8 vadduqm v9, v7, v8;
|
||
|
+#else
|
||
|
+# define VCLZD_V8_v7 .long 0x11003fc2
|
||
|
+# define MFVRD_R3_V1 .long 0x7c230067
|
||
|
+# define VSUBUDM_V9_V8 .long 0x112944c0
|
||
|
+# define VPOPCNTD_V8_V8 .long 0x110047c3
|
||
|
+# define VADDUQM_V7_V8 .long 0x11274100
|
||
|
+#endif
|
||
|
+
|
||
|
+ .machine power7
|
||
|
+
|
||
|
+ENTRY (__STRCASECMP)
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ CALL_MCOUNT 3
|
||
|
+#else
|
||
|
+ CALL_MCOUNT 2
|
||
|
+#endif
|
||
|
+#define rRTN r3 /* Return value */
|
||
|
+#define rSTR1 r10 /* 1st string */
|
||
|
+#define rSTR2 r4 /* 2nd string */
|
||
|
+#define rCHAR1 r6 /* Byte read from 1st string */
|
||
|
+#define rCHAR2 r7 /* Byte read from 2nd string */
|
||
|
+#define rADDR1 r8 /* Address of tolower(rCHAR1) */
|
||
|
+#define rADDR2 r12 /* Address of tolower(rCHAR2) */
|
||
|
+#define rLWR1 r8 /* Word tolower(rCHAR1) */
|
||
|
+#define rLWR2 r12 /* Word tolower(rCHAR2) */
|
||
|
+#define rTMP r9
|
||
|
+#define rLOC r11 /* Default locale address */
|
||
|
+
|
||
|
+ cmpd cr7, rRTN, rSTR2
|
||
|
+
|
||
|
+ /* Get locale address. */
|
||
|
+ ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
|
||
|
+ add rLOC, rTMP, __libc_tsd_LOCALE@tls
|
||
|
+ ld rLOC, 0(rLOC)
|
||
|
+
|
||
|
+ mr rSTR1, rRTN
|
||
|
+ li rRTN, 0
|
||
|
+ beqlr cr7
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ cmpdi cr7, r5, 0
|
||
|
+ beq cr7, L(retnull)
|
||
|
+ cmpdi cr7, r5, 16
|
||
|
+ blt cr7, L(bytebybyte)
|
||
|
+#endif
|
||
|
+ vspltisb v0, 0
|
||
|
+ vspltisb v8, -1
|
||
|
+ /* Check for null in initial characters.
|
||
|
+ Check max of 16 char depending on the alignment.
|
||
|
+ If null is present, proceed byte by byte. */
|
||
|
+ lvx v4, 0, rSTR1
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ lvsr v10, 0, rSTR1 /* Compute mask. */
|
||
|
+ vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */
|
||
|
+#else
|
||
|
+ lvsl v10, 0, rSTR1
|
||
|
+ vperm v9, v4, v8, v10
|
||
|
+#endif
|
||
|
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */
|
||
|
+ bne cr6, L(bytebybyte)
|
||
|
+ lvx v5, 0, rSTR2
|
||
|
+ /* Calculate alignment. */
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ lvsr v6, 0, rSTR2
|
||
|
+ vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */
|
||
|
+#else
|
||
|
+ lvsl v6, 0, rSTR2
|
||
|
+ vperm v9, v5, v8, v6
|
||
|
+#endif
|
||
|
+ vcmpequb. v9, v0, v9 /* Check for null bytes. */
|
||
|
+ bne cr6, L(bytebybyte)
|
||
|
+ /* Check if locale has non ascii characters. */
|
||
|
+ ld rTMP, 0(rLOC)
|
||
|
+ addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
|
||
|
+ lwz rTMP, 0(r6)
|
||
|
+ cmpdi cr7, rTMP, 1
|
||
|
+ beq cr7, L(bytebybyte)
|
||
|
+
|
||
|
+ /* Load vector registers with values used for TOLOWER. */
|
||
|
+ /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */
|
||
|
+ vspltisb v3, 2
|
||
|
+ vspltisb v9, 4
|
||
|
+ vsl v3, v3, v9
|
||
|
+ vaddubm v1, v3, v3
|
||
|
+ vnor v1, v1, v1
|
||
|
+ vspltisb v2, 7
|
||
|
+ vsububm v2, v3, v2
|
||
|
+
|
||
|
+ andi. rADDR1, rSTR1, 0xF
|
||
|
+ beq cr0, L(align)
|
||
|
+ addi r6, rSTR1, 16
|
||
|
+ lvx v9, 0, r6
|
||
|
+ /* Compute 16 bytes from previous two loads. */
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ vperm v4, v9, v4, v10
|
||
|
+#else
|
||
|
+ vperm v4, v4, v9, v10
|
||
|
+#endif
|
||
|
+L(align):
|
||
|
+ andi. rADDR2, rSTR2, 0xF
|
||
|
+ beq cr0, L(align1)
|
||
|
+ addi r6, rSTR2, 16
|
||
|
+ lvx v9, 0, r6
|
||
|
+ /* Compute 16 bytes from previous two loads. */
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ vperm v5, v9, v5, v6
|
||
|
+#else
|
||
|
+ vperm v5, v5, v9, v6
|
||
|
+#endif
|
||
|
+L(align1):
|
||
|
+ CHECKNULLANDCONVERT()
|
||
|
+ blt cr6, L(match)
|
||
|
+ b L(different)
|
||
|
+ .align 4
|
||
|
+L(match):
|
||
|
+ clrldi r6, rSTR1, 60
|
||
|
+ subfic r7, r6, 16
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ sub r5, r5, r7
|
||
|
+#endif
|
||
|
+ add rSTR1, rSTR1, r7
|
||
|
+ add rSTR2, rSTR2, r7
|
||
|
+ andi. rADDR2, rSTR2, 0xF
|
||
|
+ addi rSTR1, rSTR1, -16
|
||
|
+ addi rSTR2, rSTR2, -16
|
||
|
+ beq cr0, L(aligned)
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ lvsr v6, 0, rSTR2
|
||
|
+#else
|
||
|
+ lvsl v6, 0, rSTR2
|
||
|
+#endif
|
||
|
+ /* There are 2 loops depending on the input alignment.
|
||
|
+ Each loop gets 16 bytes from s1 and s2, check for null,
|
||
|
+ convert to lowercase and compare. Loop till difference
|
||
|
+ or null occurs. */
|
||
|
+L(s1_align):
|
||
|
+ addi rSTR1, rSTR1, 16
|
||
|
+ addi rSTR2, rSTR2, 16
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ cmpdi cr7, r5, 16
|
||
|
+ blt cr7, L(bytebybyte)
|
||
|
+ addi r5, r5, -16
|
||
|
+#endif
|
||
|
+ lvx v4, 0, rSTR1
|
||
|
+ GET16BYTES(v5, rSTR2, v6)
|
||
|
+ CHECKNULLANDCONVERT()
|
||
|
+ blt cr6, L(s1_align)
|
||
|
+ b L(different)
|
||
|
+ .align 4
|
||
|
+L(aligned):
|
||
|
+ addi rSTR1, rSTR1, 16
|
||
|
+ addi rSTR2, rSTR2, 16
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ cmpdi cr7, r5, 16
|
||
|
+ blt cr7, L(bytebybyte)
|
||
|
+ addi r5, r5, -16
|
||
|
+#endif
|
||
|
+ lvx v4, 0, rSTR1
|
||
|
+ lvx v5, 0, rSTR2
|
||
|
+ CHECKNULLANDCONVERT()
|
||
|
+ blt cr6, L(aligned)
|
||
|
+
|
||
|
+ /* Calculate and return the difference. */
|
||
|
+L(different):
|
||
|
+ vaddubm v1, v3, v3
|
||
|
+ vcmpequb v7, v0, v7
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ /* Count trailing zero. */
|
||
|
+ vspltisb v8, -1
|
||
|
+ VADDUQM_V7_V8
|
||
|
+ vandc v8, v9, v7
|
||
|
+ VPOPCNTD_V8_V8
|
||
|
+ vspltb v6, v8, 15
|
||
|
+ vcmpequb. v6, v6, v1
|
||
|
+ blt cr6, L(shift8)
|
||
|
+#else
|
||
|
+ /* Count leading zero. */
|
||
|
+ VCLZD_V8_v7
|
||
|
+ vspltb v6, v8, 7
|
||
|
+ vcmpequb. v6, v6, v1
|
||
|
+ blt cr6, L(shift8)
|
||
|
+ vsro v8, v8, v1
|
||
|
+#endif
|
||
|
+ b L(skipsum)
|
||
|
+ .align 4
|
||
|
+L(shift8):
|
||
|
+ vsumsws v8, v8, v0
|
||
|
+L(skipsum):
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ /* Shift registers based on leading zero count. */
|
||
|
+ vsro v6, v5, v8
|
||
|
+ vsro v7, v4, v8
|
||
|
+ /* Merge and move to GPR. */
|
||
|
+ vmrglb v6, v6, v7
|
||
|
+ vslo v1, v6, v1
|
||
|
+ MFVRD_R3_V1
|
||
|
+ /* Place the characters that are different in first position. */
|
||
|
+ sldi rSTR2, rRTN, 56
|
||
|
+ srdi rSTR2, rSTR2, 56
|
||
|
+ sldi rSTR1, rRTN, 48
|
||
|
+ srdi rSTR1, rSTR1, 56
|
||
|
+#else
|
||
|
+ vslo v6, v5, v8
|
||
|
+ vslo v7, v4, v8
|
||
|
+ vmrghb v1, v6, v7
|
||
|
+ MFVRD_R3_V1
|
||
|
+ srdi rSTR2, rRTN, 48
|
||
|
+ sldi rSTR2, rSTR2, 56
|
||
|
+ srdi rSTR2, rSTR2, 56
|
||
|
+ srdi rSTR1, rRTN, 56
|
||
|
+#endif
|
||
|
+ subf rRTN, rSTR1, rSTR2
|
||
|
+ extsw rRTN, rRTN
|
||
|
+ blr
|
||
|
+
|
||
|
+ .align 4
|
||
|
+ /* OK. We've hit the end of the string. We need to be careful that
|
||
|
+ we don't compare two strings as different because of junk beyond
|
||
|
+ the end of the strings... */
|
||
|
+L(null_found):
|
||
|
+ vaddubm v10, v3, v3
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ /* Count trailing zero. */
|
||
|
+ vspltisb v8, -1
|
||
|
+ VADDUQM_V7_V8
|
||
|
+ vandc v8, v9, v7
|
||
|
+ VPOPCNTD_V8_V8
|
||
|
+ vspltb v6, v8, 15
|
||
|
+ vcmpequb. v6, v6, v10
|
||
|
+ blt cr6, L(shift_8)
|
||
|
+#else
|
||
|
+ /* Count leading zero. */
|
||
|
+ VCLZD_V8_v7
|
||
|
+ vspltb v6, v8, 7
|
||
|
+ vcmpequb. v6, v6, v10
|
||
|
+ blt cr6, L(shift_8)
|
||
|
+ vsro v8, v8, v10
|
||
|
+#endif
|
||
|
+ b L(skipsum1)
|
||
|
+ .align 4
|
||
|
+L(shift_8):
|
||
|
+ vsumsws v8, v8, v0
|
||
|
+L(skipsum1):
|
||
|
+ /* Calculate shift count based on count of zero. */
|
||
|
+ vspltisb v10, 7
|
||
|
+ vslb v10, v10, v10
|
||
|
+ vsldoi v9, v0, v10, 1
|
||
|
+ VSUBUDM_V9_V8
|
||
|
+ vspltisb v8, 8
|
||
|
+ vsldoi v8, v0, v8, 1
|
||
|
+ VSUBUDM_V9_V8
|
||
|
+ /* Shift and remove junk after null character. */
|
||
|
+#ifdef __LITTLE_ENDIAN__
|
||
|
+ vslo v5, v5, v9
|
||
|
+ vslo v4, v4, v9
|
||
|
+#else
|
||
|
+ vsro v5, v5, v9
|
||
|
+ vsro v4, v4, v9
|
||
|
+#endif
|
||
|
+ /* Convert and compare 16 bytes. */
|
||
|
+ TOLOWER()
|
||
|
+ blt cr6, L(retnull)
|
||
|
+ b L(different)
|
||
|
+ .align 4
|
||
|
+L(retnull):
|
||
|
+ li rRTN, 0
|
||
|
+ blr
|
||
|
+ .align 4
|
||
|
+L(bytebybyte):
|
||
|
+ /* Unrolling loop for POWER: loads are done with 'lbz' plus
|
||
|
+ offset and string descriptors are only updated in the end
|
||
|
+ of loop unrolling. */
|
||
|
+ ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
|
||
|
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
|
||
|
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ rldicl rTMP, r5, 62, 2
|
||
|
+ cmpdi cr7, rTMP, 0
|
||
|
+ beq cr7, L(lessthan4)
|
||
|
+ mtctr rTMP
|
||
|
+#endif
|
||
|
+L(loop):
|
||
|
+ cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
|
||
|
+ sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
|
||
|
+ sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
|
||
|
+ lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
|
||
|
+ lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
|
||
|
+ cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
|
||
|
+ crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
|
||
|
+ beq cr1, L(done)
|
||
|
+ lbz rCHAR1, 1(rSTR1)
|
||
|
+ lbz rCHAR2, 1(rSTR2)
|
||
|
+ cmpdi rCHAR1, 0
|
||
|
+ sldi rADDR1, rCHAR1, 2
|
||
|
+ sldi rADDR2, rCHAR2, 2
|
||
|
+ lwzx rLWR1, rLOC, rADDR1
|
||
|
+ lwzx rLWR2, rLOC, rADDR2
|
||
|
+ cmpw cr1, rLWR1, rLWR2
|
||
|
+ crorc 4*cr1+eq,eq,4*cr1+eq
|
||
|
+ beq cr1, L(done)
|
||
|
+ lbz rCHAR1, 2(rSTR1)
|
||
|
+ lbz rCHAR2, 2(rSTR2)
|
||
|
+ cmpdi rCHAR1, 0
|
||
|
+ sldi rADDR1, rCHAR1, 2
|
||
|
+ sldi rADDR2, rCHAR2, 2
|
||
|
+ lwzx rLWR1, rLOC, rADDR1
|
||
|
+ lwzx rLWR2, rLOC, rADDR2
|
||
|
+ cmpw cr1, rLWR1, rLWR2
|
||
|
+ crorc 4*cr1+eq,eq,4*cr1+eq
|
||
|
+ beq cr1, L(done)
|
||
|
+ lbz rCHAR1, 3(rSTR1)
|
||
|
+ lbz rCHAR2, 3(rSTR2)
|
||
|
+ cmpdi rCHAR1, 0
|
||
|
+ /* Increment both string descriptors */
|
||
|
+ addi rSTR1, rSTR1, 4
|
||
|
+ addi rSTR2, rSTR2, 4
|
||
|
+ sldi rADDR1, rCHAR1, 2
|
||
|
+ sldi rADDR2, rCHAR2, 2
|
||
|
+ lwzx rLWR1, rLOC, rADDR1
|
||
|
+ lwzx rLWR2, rLOC, rADDR2
|
||
|
+ cmpw cr1, rLWR1, rLWR2
|
||
|
+ crorc 4*cr1+eq,eq,4*cr1+eq
|
||
|
+ beq cr1, L(done)
|
||
|
+ lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
|
||
|
+ lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+ bdnz L(loop)
|
||
|
+#else
|
||
|
+ b L(loop)
|
||
|
+#endif
|
||
|
+#ifdef USE_AS_STRNCASECMP
|
||
|
+L(lessthan4):
|
||
|
+ clrldi r5, r5, 62
|
||
|
+ cmpdi cr7, r5, 0
|
||
|
+ beq cr7, L(retnull)
|
||
|
+ mtctr r5
|
||
|
+L(loop1):
|
||
|
+ cmpdi rCHAR1, 0
|
||
|
+ sldi rADDR1, rCHAR1, 2
|
||
|
+ sldi rADDR2, rCHAR2, 2
|
||
|
+ lwzx rLWR1, rLOC, rADDR1
|
||
|
+ lwzx rLWR2, rLOC, rADDR2
|
||
|
+ cmpw cr1, rLWR1, rLWR2
|
||
|
+ crorc 4*cr1+eq,eq,4*cr1+eq
|
||
|
+ beq cr1, L(done)
|
||
|
+ addi rSTR1, rSTR1, 1
|
||
|
+ addi rSTR2, rSTR2, 1
|
||
|
+ lbz rCHAR1, 0(rSTR1)
|
||
|
+ lbz rCHAR2, 0(rSTR2)
|
||
|
+ bdnz L(loop1)
|
||
|
+#endif
|
||
|
+L(done):
|
||
|
+ subf r0, rLWR2, rLWR1
|
||
|
+ extsw rRTN, r0
|
||
|
+ blr
|
||
|
+END (__STRCASECMP)
|
||
|
+
|
||
|
+weak_alias (__STRCASECMP, STRCASECMP)
|
||
|
+libc_hidden_builtin_def (__STRCASECMP)
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S
|
||
|
new file mode 100644
|
||
|
index 0000000..7ce2ed0
|
||
|
--- /dev/null
|
||
|
+++ b/sysdeps/powerpc/powerpc64/power8/strncase.S
|
||
|
@@ -0,0 +1,20 @@
|
||
|
+/* Optimized strncasecmp implementation for POWER8.
|
||
|
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#define USE_AS_STRNCASECMP 1
|
||
|
+#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
|
||
|
--
|
||
|
2.1.0
|