You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
11 KiB
323 lines
11 KiB
From f34499b3a506359f84fcb63a125e44d4a8e4ee68 Mon Sep 17 00:00:00 2001 |
|
From: Stefan Liebler <stli@linux.vnet.ibm.com> |
|
Date: Thu, 8 Oct 2015 13:23:50 +0200 |
|
Subject: [PATCH 29/30] S390: Optimize wmemcmp. |
|
|
|
upstream-commit-id: f21216015b7395c535abc01b9585a1ae3ceaa132 |
|
https://www.sourceware.org/ml/libc-alpha/2015-07/msg00086.html |
|
|
|
This patch provides optimized version of wmemcmp with the z13 vector |
|
instructions. |
|
|
|
ChangeLog: |
|
|
|
* sysdeps/s390/multiarch/wmemcmp-c.c: New File. |
|
* sysdeps/s390/multiarch/wmemcmp-vx.S: Likewise. |
|
* sysdeps/s390/multiarch/wmemcmp.c: Likewise. |
|
* sysdeps/s390/multiarch/Makefile |
|
(sysdep_routines): Add wmemcmp functions. |
|
* sysdeps/s390/multiarch/ifunc-impl-list-common.c |
|
(__libc_ifunc_impl_list_common): Add ifunc test for wmemcmp. |
|
* benchtests/bench-wmemcmp.c: New File. |
|
* benchtests/Makefile (wcsmbs-bench): Add wmemcmp. |
|
--- |
|
benchtests/Makefile | 2 +- |
|
benchtests/bench-wmemcmp.c | 20 +++++ |
|
sysdeps/s390/multiarch/Makefile | 3 +- |
|
sysdeps/s390/multiarch/ifunc-impl-list.c | 2 + |
|
sysdeps/s390/multiarch/wmemcmp-c.c | 26 ++++++ |
|
sysdeps/s390/multiarch/wmemcmp-vx.S | 149 +++++++++++++++++++++++++++++++ |
|
sysdeps/s390/multiarch/wmemcmp.c | 27 ++++++ |
|
7 files changed, 227 insertions(+), 2 deletions(-) |
|
create mode 100644 benchtests/bench-wmemcmp.c |
|
create mode 100644 sysdeps/s390/multiarch/wmemcmp-c.c |
|
create mode 100644 sysdeps/s390/multiarch/wmemcmp-vx.S |
|
create mode 100644 sysdeps/s390/multiarch/wmemcmp.c |
|
|
|
diff --git a/benchtests/Makefile b/benchtests/Makefile |
|
index b5edfdd..911b6df 100644 |
|
--- a/benchtests/Makefile |
|
+++ b/benchtests/Makefile |
|
@@ -40,7 +40,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ |
|
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok |
|
wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \ |
|
wcsncmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \ |
|
- wmemchr wmemset |
|
+ wmemchr wmemset wmemcmp |
|
string-bench-all := $(string-bench) ${wcsmbs-bench} |
|
|
|
stdlib-bench := strtod |
|
diff --git a/benchtests/bench-wmemcmp.c b/benchtests/bench-wmemcmp.c |
|
new file mode 100644 |
|
index 0000000..8b33f89 |
|
--- /dev/null |
|
+++ b/benchtests/bench-wmemcmp.c |
|
@@ -0,0 +1,20 @@ |
|
+/* Measure wmemcmp functions. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#define WIDE 1 |
|
+#include "bench-memcmp.c" |
|
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile |
|
index eac88e0..929a545 100644 |
|
--- a/sysdeps/s390/multiarch/Makefile |
|
+++ b/sysdeps/s390/multiarch/Makefile |
|
@@ -38,5 +38,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ |
|
wcspbrk wcspbrk-vx wcspbrk-c \ |
|
wcscspn wcscspn-vx wcscspn-c \ |
|
wmemchr wmemchr-vx wmemchr-c \ |
|
- wmemset wmemset-vx wmemset-c |
|
+ wmemset wmemset-vx wmemset-c \ |
|
+ wmemcmp wmemcmp-vx wmemcmp-c |
|
endif |
|
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c |
|
index 44d534b..5ea258b 100644 |
|
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c |
|
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c |
|
@@ -135,6 +135,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, |
|
|
|
IFUNC_VX_IMPL (wmemset); |
|
|
|
+ IFUNC_VX_IMPL (wmemcmp); |
|
+ |
|
#endif /* HAVE_S390_VX_ASM_SUPPORT */ |
|
|
|
return i; |
|
diff --git a/sysdeps/s390/multiarch/wmemcmp-c.c b/sysdeps/s390/multiarch/wmemcmp-c.c |
|
new file mode 100644 |
|
index 0000000..b43e5d4 |
|
--- /dev/null |
|
+++ b/sysdeps/s390/multiarch/wmemcmp-c.c |
|
@@ -0,0 +1,26 @@ |
|
+/* Default wmemcmp implementation for S/390. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc |
|
+# define WMEMCMP __wmemcmp_c |
|
+ |
|
+# include <wchar.h> |
|
+extern __typeof (wmemcmp) __wmemcmp_c; |
|
+ |
|
+# include <wcsmbs/wmemcmp.c> |
|
+#endif |
|
diff --git a/sysdeps/s390/multiarch/wmemcmp-vx.S b/sysdeps/s390/multiarch/wmemcmp-vx.S |
|
new file mode 100644 |
|
index 0000000..b509bef |
|
--- /dev/null |
|
+++ b/sysdeps/s390/multiarch/wmemcmp-vx.S |
|
@@ -0,0 +1,149 @@ |
|
+/* Vector Optimized 32/64 bit S/390 version of wmemcmp. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc |
|
+ |
|
+# include "sysdep.h" |
|
+# include "asm-syntax.h" |
|
+ |
|
+ .text |
|
+ |
|
+/* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) |
|
+ Compare at most n characters of two wchar_t-arrays. |
|
+ |
|
+ Register usage: |
|
+ -r0=tmp |
|
+ -r1=number of blocks |
|
+ -r2=s1 |
|
+ -r3=s2 |
|
+ -r4=n |
|
+ -r5=current_len |
|
+ -v16=part of s1 |
|
+ -v17=part of s2 |
|
+ -v18=index of unequal |
|
+*/ |
|
+ENTRY(__wmemcmp_vx) |
|
+ .machine "z13" |
|
+ .machinemode "zarch_nohighgprs" |
|
+ |
|
+# if !defined __s390x__ |
|
+ llgfr %r4,%r4 |
|
+# endif /* !defined __s390x__ */ |
|
+ clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ |
|
+ |
|
+ /* Check range of maxlen and convert to byte-count. */ |
|
+# ifdef __s390x__ |
|
+ tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ |
|
+ lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ |
|
+# else |
|
+ tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ |
|
+ llilf %r1,4294967292 /* Max byte-count is 4294967292. */ |
|
+# endif /* !__s390x__ */ |
|
+ sllg %r4,%r4,2 /* Convert character-count to byte-count. */ |
|
+ locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ |
|
+ |
|
+ lghi %r5,0 /* current_len = 0. */ |
|
+ |
|
+ clgijh %r4,16,.Lgt16 |
|
+ |
|
+.Lremaining: |
|
+ aghi %r4,-1 /* vstl needs highest index. */ |
|
+ vll %v16,%r4,0(%r2) |
|
+ vll %v17,%r4,0(%r3) |
|
+ vfenef %v18,%v16,%v17 /* Compare not equal. */ |
|
+ vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */ |
|
+ clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded |
|
+ bytes. */ |
|
+ |
|
+.Lend_equal: |
|
+ lghi %r2,0 |
|
+ br %r14 |
|
+ |
|
+.Lfound: |
|
+ /* vfenezf found an unequal element or zero. |
|
+ This instruction compares unsigned words, but wchar_t is signed. |
|
+ Thus we have to compare the found element again. */ |
|
+ vlgvb %r1,%v18,7 /* Extract not equal byte-index. */ |
|
+.Lfound2: |
|
+ srl %r1,2 /* And convert it to character-index. */ |
|
+ vlgvf %r0,%v16,0(%r1) /* Load character-values. */ |
|
+ vlgvf %r1,%v17,0(%r1) |
|
+ cr %r0,%r1 |
|
+ je .Lend_equal |
|
+ lghi %r2,1 |
|
+ lghi %r1,-1 |
|
+ locgrl %r2,%r1 |
|
+ br %r14 |
|
+ |
|
+.Lgt16: |
|
+ clgijh %r4,64,.Lpreloop64 |
|
+ |
|
+.Lpreloop16: |
|
+ srlg %r1,%r4,4 /* Split into 16byte blocks */ |
|
+.Lloop16: |
|
+ vl %v16,0(%r5,%r2) |
|
+ vl %v17,0(%r5,%r3) |
|
+ aghi %r5,16 |
|
+ vfenefs %v18,%v16,%v17 /* Compare not equal. */ |
|
+ jno .Lfound |
|
+ brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ |
|
+ |
|
+ llgfr %r4,%r4 |
|
+ nilf %r4,15 /* Get remaining bytes */ |
|
+ locgre %r2,%r4 |
|
+ ber %r14 |
|
+ la %r2,0(%r5,%r2) |
|
+ la %r3,0(%r5,%r3) |
|
+ j .Lremaining |
|
+ |
|
+.Lpreloop64: |
|
+ srlg %r1,%r4,6 /* Split into 64byte blocks */ |
|
+.Lloop64: |
|
+ vl %v16,0(%r5,%r2) |
|
+ vl %v17,0(%r5,%r3) |
|
+ vfenefs %v18,%v16,%v17 /* Compare not equal. */ |
|
+ jno .Lfound |
|
+ |
|
+ vl %v16,16(%r5,%r2) |
|
+ vl %v17,16(%r5,%r3) |
|
+ vfenefs %v18,%v16,%v17 |
|
+ jno .Lfound |
|
+ |
|
+ vl %v16,32(%r5,%r2) |
|
+ vl %v17,32(%r5,%r3) |
|
+ vfenefs %v18,%v16,%v17 |
|
+ jno .Lfound |
|
+ |
|
+ vl %v16,48(%r5,%r2) |
|
+ vl %v17,48(%r5,%r3) |
|
+ aghi %r5,64 |
|
+ vfenefs %v18,%v16,%v17 |
|
+ jno .Lfound |
|
+ |
|
+ brctg %r1,.Lloop64 /* Loop until all blocks are processed. */ |
|
+ |
|
+ llgfr %r4,%r4 |
|
+ nilf %r4,63 /* Get remaining bytes */ |
|
+ locgre %r2,%r4 |
|
+ ber %r14 |
|
+ clgijh %r4,16,.Lpreloop16 |
|
+ la %r2,0(%r5,%r2) |
|
+ la %r3,0(%r5,%r3) |
|
+ j .Lremaining |
|
+END(__wmemcmp_vx) |
|
+#endif /* HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc */ |
|
diff --git a/sysdeps/s390/multiarch/wmemcmp.c b/sysdeps/s390/multiarch/wmemcmp.c |
|
new file mode 100644 |
|
index 0000000..24a57e9 |
|
--- /dev/null |
|
+++ b/sysdeps/s390/multiarch/wmemcmp.c |
|
@@ -0,0 +1,27 @@ |
|
+/* Multiple versions of wmemcmp. |
|
+ Copyright (C) 2015 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#if defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc |
|
+# include <wchar.h> |
|
+# include <ifunc-resolve.h> |
|
+ |
|
+s390_vx_libc_ifunc2 (__wmemcmp, wmemcmp) |
|
+ |
|
+#else |
|
+# include <wcsmbs/wmemcmp.c> |
|
+#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && !defined NOT_IN_libc) */ |
|
-- |
|
2.3.0 |
|
|
|
|