You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
745 lines
28 KiB
745 lines
28 KiB
7 years ago
|
This patch file contains a backport of the following upstream commits,
|
||
|
which prevent segmentation faults due to unaligned accesses by sse4.2
|
||
|
instructions.
|
||
|
|
||
|
commit 584b18eb4df61ccd447db2dfe8c8a7901f8c8598
|
||
|
Author: Ondej Blka <neleai@seznam.cz>
|
||
|
Date: Sat Dec 14 19:33:56 2
|
||
|
|
||
|
Add strstr with unaligned loads. Fixes bug 12100.
|
||
|
|
||
|
A sse42 version of strstr used pcmpistr instruction which is quite
|
||
|
ineffective. A faster way is look for pairs of characters which is uses
|
||
|
sse2, is faster than pcmpistr and for real strings a pairs we look for
|
||
|
are relatively rare.
|
||
|
|
||
|
For linear time complexity we use buy or rent technique which switches
|
||
|
to two-way algorithm when superlinear behaviour is detected.
|
||
|
|
||
|
commit 1818483b15d22016b0eae41d37ee91cc87b37510
|
||
|
Author: Andreas Schwab <schwab@suse.de>
|
||
|
Date: Wed Dec 18 11:53:27
|
||
|
|
||
|
Remove use of SSE4.2 functions for strstr on i686
|
||
|
|
||
|
The SSE4.2 have been removed from x86_64 by commit 584b18eb. This patch
|
||
|
fixes the build on i686, which attempts to use the removed files.
|
||
|
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-impl-list.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-impl-list.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/ifunc-impl-list.c 2014-07-01 05:38:25.138749032 -0400
|
||
|
@@ -141,12 +141,6 @@
|
||
|
IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
|
||
|
__strcasecmp_l_ia32))
|
||
|
|
||
|
- /* Support sysdeps/i386/i686/multiarch/strcasestr.c. */
|
||
|
- IFUNC_IMPL (i, name, strcasestr,
|
||
|
- IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2,
|
||
|
- __strcasestr_sse42)
|
||
|
- IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_ia32))
|
||
|
-
|
||
|
/* Support sysdeps/i386/i686/multiarch/strcat.S. */
|
||
|
IFUNC_IMPL (i, name, strcat,
|
||
|
IFUNC_IMPL_ADD (array, i, strcat, HAS_SSSE3, __strcat_ssse3)
|
||
|
@@ -234,11 +228,6 @@
|
||
|
IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42)
|
||
|
IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32))
|
||
|
|
||
|
- /* Support sysdeps/i386/i686/multiarch/strstr-c.c. */
|
||
|
- IFUNC_IMPL (i, name, strstr,
|
||
|
- IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42)
|
||
|
- IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_ia32))
|
||
|
-
|
||
|
/* Support sysdeps/i386/i686/multiarch/wcschr.S. */
|
||
|
IFUNC_IMPL (i, name, wcschr,
|
||
|
IFUNC_IMPL_ADD (array, i, wcschr, HAS_SSE2, __wcschr_sse2)
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/Makefile
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/Makefile 2014-07-01 05:37:15.985617738 -0400
|
||
|
@@ -11,7 +11,7 @@
|
||
|
memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
|
||
|
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
|
||
|
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
|
||
|
- memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
|
||
|
+ memcmp-ssse3 memcmp-sse4 varshift \
|
||
|
strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
|
||
|
strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
|
||
|
strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
|
||
|
@@ -25,14 +25,11 @@
|
||
|
strncase_l-c strncase-c strncase_l-ssse3 \
|
||
|
strcasecmp_l-sse4 strncase_l-sse4
|
||
|
ifeq (yes,$(config-cflags-sse4))
|
||
|
-sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||
|
+sysdep_routines += strcspn-c strpbrk-c strspn-c
|
||
|
CFLAGS-varshift.c += -msse4
|
||
|
CFLAGS-strcspn-c.c += -msse4
|
||
|
CFLAGS-strpbrk-c.c += -msse4
|
||
|
CFLAGS-strspn-c.c += -msse4
|
||
|
-CFLAGS-strstr.c += -msse4
|
||
|
-CFLAGS-strcasestr.c += -msse4
|
||
|
-CFLAGS-strcasestr-nonascii.c += -msse4
|
||
|
endif
|
||
|
endif
|
||
|
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1 +0,0 @@
|
||
|
-#include <sysdeps/x86_64/multiarch/strcasestr.c>
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1,2 +0,0 @@
|
||
|
-#include <nmmintrin.h>
|
||
|
-#include <sysdeps/x86_64/multiarch/strcasestr-nonascii.c>
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1 +0,0 @@
|
||
|
-#include <sysdeps/x86_64/multiarch/strstr.c>
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr-c.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr-c.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1,31 +0,0 @@
|
||
|
-/* Multiple versions of strstr
|
||
|
- All versions must be listed in ifunc-impl-list.c. */
|
||
|
-
|
||
|
-#define STRSTR __strstr_ia32
|
||
|
-#if defined SHARED && defined DO_VERSIONING && !defined NO_HIDDEN
|
||
|
-#undef libc_hidden_builtin_def
|
||
|
-#define libc_hidden_builtin_def(name) \
|
||
|
- __hidden_ver1 (__strstr_ia32, __GI_strstr, __strstr_ia32);
|
||
|
-#endif
|
||
|
-
|
||
|
-/* Redefine strstr so that the compiler won't complain about the type
|
||
|
- mismatch with the IFUNC selector in strong_alias, below. */
|
||
|
-#undef strstr
|
||
|
-#define strstr __redirect_strstr
|
||
|
-
|
||
|
-#include "string/strstr.c"
|
||
|
-
|
||
|
-#include "init-arch.h"
|
||
|
-
|
||
|
-extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden;
|
||
|
-extern __typeof (__redirect_strstr) __strstr_ia32 attribute_hidden;
|
||
|
-
|
||
|
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||
|
- ifunc symbol properly. */
|
||
|
-extern __typeof (__redirect_strstr) __libc_strstr;
|
||
|
-libc_ifunc (__libc_strstr,
|
||
|
- HAS_SSE4_2 && !use_unaligned_strstr () ?
|
||
|
- __strstr_sse42 : __strstr_ia32)
|
||
|
-
|
||
|
-#undef strstr
|
||
|
-strong_alias (__libc_strstr, strstr)
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2014-06-30 10:58:16.637073138 -0400
|
||
|
@@ -115,8 +115,6 @@
|
||
|
|
||
|
/* Support sysdeps/x86_64/multiarch/strcasestr.c. */
|
||
|
IFUNC_IMPL (i, name, strcasestr,
|
||
|
- IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2,
|
||
|
- __strcasestr_sse42)
|
||
|
IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2))
|
||
|
|
||
|
/* Support sysdeps/x86_64/multiarch/strcat.S. */
|
||
|
@@ -210,9 +210,7 @@
|
||
|
|
||
|
/* Support sysdeps/x86_64/multiarch/strstr-c.c. */
|
||
|
IFUNC_IMPL (i, name, strstr,
|
||
|
- IFUNC_IMPL_ADD (array, i, strstr, use_unaligned_strstr (),
|
||
|
- __strstr_sse2_unaligned)
|
||
|
- IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42)
|
||
|
+ IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
|
||
|
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
|
||
|
|
||
|
/* Support sysdeps/x86_64/multiarch/wcscpy.S. */
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/Makefile
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/Makefile 2014-06-30 10:55:08.343714449 -0400
|
||
|
@@ -9,7 +9,7 @@
|
||
|
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
||
|
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
|
||
|
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
||
|
- memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
|
||
|
+ memmove-ssse3-back strcasecmp_l-ssse3 \
|
||
|
strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \
|
||
|
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
|
||
|
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
||
|
@@ -17,14 +17,11 @@
|
||
|
strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
|
||
|
memcmp-ssse3 strstr-sse2-unaligned
|
||
|
ifeq (yes,$(config-cflags-sse4))
|
||
|
-sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
||
|
+sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
|
||
|
CFLAGS-varshift.c += -msse4
|
||
|
CFLAGS-strcspn-c.c += -msse4
|
||
|
CFLAGS-strpbrk-c.c += -msse4
|
||
|
CFLAGS-strspn-c.c += -msse4
|
||
|
-CFLAGS-strstr.c += -msse4
|
||
|
-CFLAGS-strcasestr.c += -msse4
|
||
|
-CFLAGS-strcasestr-nonascii.c += -msse4
|
||
|
endif
|
||
|
endif
|
||
|
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr.c 2014-06-30 11:01:34.394453519 -0400
|
||
|
@@ -1,7 +1,13 @@
|
||
|
-extern char *__strcasestr_sse42_nonascii (const unsigned char *s1,
|
||
|
- const unsigned char *s2)
|
||
|
- attribute_hidden;
|
||
|
+/* Multiple versions of strcasestr
|
||
|
+ All versions must be listed in ifunc-impl-list.c. */
|
||
|
|
||
|
-#define USE_AS_STRCASESTR
|
||
|
-#define STRSTR_SSE42 __strcasestr_sse42
|
||
|
-#include "strstr.c"
|
||
|
+#include "init-arch.h"
|
||
|
+
|
||
|
+#define STRCASESTR __strcasestr_sse2
|
||
|
+
|
||
|
+#include "string/strcasestr.c"
|
||
|
+
|
||
|
+extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
|
||
|
+
|
||
|
+libc_ifunc (__strcasestr,
|
||
|
+ __strcasestr_sse2);
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-c.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-c.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1,20 +0,0 @@
|
||
|
-/* Multiple versions of strcasestr
|
||
|
- All versions must be listed in ifunc-impl-list.c. */
|
||
|
-
|
||
|
-#include "init-arch.h"
|
||
|
-
|
||
|
-#define STRCASESTR __strcasestr_sse2
|
||
|
-
|
||
|
-#include "string/strcasestr.c"
|
||
|
-
|
||
|
-extern char *__strcasestr_sse42 (const char *, const char *) attribute_hidden;
|
||
|
-extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
|
||
|
-
|
||
|
-#if 1
|
||
|
-libc_ifunc (__strcasestr,
|
||
|
- HAS_SSE4_2 && !use_unaligned_strstr () ? __strcasestr_sse42 :
|
||
|
- __strcasestr_sse2);
|
||
|
-#else
|
||
|
-libc_ifunc (__strcasestr,
|
||
|
- 0 ? __strcasestr_sse42 : __strcasestr_sse2);
|
||
|
-#endif
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-nonascii.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-nonascii.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-nonascii.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-nonascii.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1,50 +0,0 @@
|
||
|
-/* strstr with SSE4.2 intrinsics
|
||
|
- Copyright (C) 2010 Free Software Foundation, Inc.
|
||
|
- This file is part of the GNU C Library.
|
||
|
-
|
||
|
- The GNU C Library is free software; you can redistribute it and/or
|
||
|
- modify it under the terms of the GNU Lesser General Public
|
||
|
- License as published by the Free Software Foundation; either
|
||
|
- version 2.1 of the License, or (at your option) any later version.
|
||
|
-
|
||
|
- The GNU C Library is distributed in the hope that it will be useful,
|
||
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
- Lesser General Public License for more details.
|
||
|
-
|
||
|
- You should have received a copy of the GNU Lesser General Public
|
||
|
- License along with the GNU C Library; if not, see
|
||
|
- <http://www.gnu.org/licenses/>. */
|
||
|
-
|
||
|
-#include <ctype.h>
|
||
|
-#include <xmmintrin.h>
|
||
|
-
|
||
|
-
|
||
|
-/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
|
||
|
- locale. */
|
||
|
-static inline __m128i
|
||
|
-__m128i_strloadu_tolower (const unsigned char *p)
|
||
|
-{
|
||
|
- union
|
||
|
- {
|
||
|
- char b[16];
|
||
|
- __m128i x;
|
||
|
- } u;
|
||
|
-
|
||
|
- for (int i = 0; i < 16; ++i)
|
||
|
- if (p[i] == 0)
|
||
|
- {
|
||
|
- u.b[i] = 0;
|
||
|
- break;
|
||
|
- }
|
||
|
- else
|
||
|
- u.b[i] = tolower (p[i]);
|
||
|
-
|
||
|
- return u.x;
|
||
|
-}
|
||
|
-
|
||
|
-
|
||
|
-#define STRCASESTR_NONASCII
|
||
|
-#define USE_AS_STRCASESTR
|
||
|
-#define STRSTR_SSE42 __strcasestr_sse42_nonascii
|
||
|
-#include "strstr.c"
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr.c 2014-06-30 11:26:55.025294484 -0400
|
||
|
@@ -1,6 +1,6 @@
|
||
|
-/* strstr with SSE4.2 intrinsics
|
||
|
- Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||
|
- Contributed by Intel Corporation.
|
||
|
+/* Multiple versions of strstr.
|
||
|
+ All versions must be listed in ifunc-impl-list.c.
|
||
|
+ Copyright (C) 2012 Free Software Foundation, Inc.
|
||
|
This file is part of the GNU C Library.
|
||
|
|
||
|
The GNU C Library is free software; you can redistribute it and/or
|
||
|
@@ -17,368 +17,32 @@
|
||
|
License along with the GNU C Library; if not, see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
-#include <nmmintrin.h>
|
||
|
-#include "varshift.h"
|
||
|
-
|
||
|
-#ifndef STRSTR_SSE42
|
||
|
-# define STRSTR_SSE42 __strstr_sse42
|
||
|
-#endif
|
||
|
-
|
||
|
-#ifdef USE_AS_STRCASESTR
|
||
|
-# include <ctype.h>
|
||
|
-# include <locale/localeinfo.h>
|
||
|
-
|
||
|
-# define LOADBYTE(C) tolower (C)
|
||
|
-# define CMPBYTE(C1, C2) (tolower (C1) == tolower (C2))
|
||
|
-#else
|
||
|
-# define LOADBYTE(C) (C)
|
||
|
-# define CMPBYTE(C1, C2) ((C1) == (C2))
|
||
|
+/* Redefine strstr so that the compiler won't complain about the type
|
||
|
+ mismatch with the IFUNC selector in strong_alias, below. */
|
||
|
+#undef strstr
|
||
|
+#define strstr __redirect_strstr
|
||
|
+#include <string.h>
|
||
|
+#undef strstr
|
||
|
+
|
||
|
+#define STRSTR __strstr_sse2
|
||
|
+#ifdef SHARED
|
||
|
+# undef libc_hidden_builtin_def
|
||
|
+# define libc_hidden_builtin_def(name) \
|
||
|
+ __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
|
||
|
#endif
|
||
|
|
||
|
-/* We use 0xe ordered-compare:
|
||
|
- _SIDD_SBYTE_OPS
|
||
|
- | _SIDD_CMP_EQUAL_ORDER
|
||
|
- | _SIDD_LEAST_SIGNIFICANT
|
||
|
- on pcmpistri to do the scanning and string comparsion requirements of
|
||
|
- sub-string match. In the scanning phase, we process Cflag and ECX
|
||
|
- index to locate the first fragment match; once the first fragment
|
||
|
- match position has been identified, we do comparison of subsequent
|
||
|
- string fragments until we can conclude false or true match; whe
|
||
|
- n concluding a false match, we may need to repeat scanning process
|
||
|
- from next relevant offset in the target string.
|
||
|
-
|
||
|
- In the scanning phase we have 4 cases:
|
||
|
- case ECX CFlag ZFlag SFlag
|
||
|
- 1 16 0 0 0
|
||
|
- 2a 16 0 0 1
|
||
|
- 2b 16 0 1 0
|
||
|
- 2c 16 0 1 1
|
||
|
-
|
||
|
- 1. No ordered-comparison match, both 16B fragments are valid, so
|
||
|
- continue to next fragment.
|
||
|
- 2. No ordered-comparison match, there is EOS in either fragment,
|
||
|
- 2a. Zflg = 0, Sflg = 1, we continue
|
||
|
- 2b. Zflg = 1, Sflg = 0, we conclude no match and return.
|
||
|
- 2c. Zflg = 1, sflg = 1, lenth determine match or no match
|
||
|
-
|
||
|
- In the string comparison phase, the 1st fragment match is fixed up
|
||
|
- to produce ECX = 0. Subsequent fragment compare of nonzero index
|
||
|
- and no match conclude a false match.
|
||
|
-
|
||
|
- case ECX CFlag ZFlag SFlag
|
||
|
- 3 X 1 0 0/1
|
||
|
- 4a 0 1 0 0
|
||
|
- 4b 0 1 0 1
|
||
|
- 4c 0 < X 1 0 0/1
|
||
|
- 5 16 0 1 0
|
||
|
-
|
||
|
- 3. An initial ordered-comparison fragment match, we fix up to do
|
||
|
- subsequent string comparison
|
||
|
- 4a. Continuation of fragment comparison of a string compare.
|
||
|
- 4b. EOS reached in the reference string, we conclude true match and
|
||
|
- return
|
||
|
- 4c. String compare failed if index is nonzero, we need to go back to
|
||
|
- scanning
|
||
|
- 5. failed string compare, go back to scanning
|
||
|
- */
|
||
|
-
|
||
|
-/* Simple replacement of movdqu to address 4KB boundary cross issue.
|
||
|
- If EOS occurs within less than 16B before 4KB boundary, we don't
|
||
|
- cross to next page. */
|
||
|
-
|
||
|
-static inline __m128i
|
||
|
-__m128i_strloadu (const unsigned char * p, __m128i zero)
|
||
|
-{
|
||
|
- if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0))
|
||
|
- {
|
||
|
- size_t offset = ((size_t) p & (16 - 1));
|
||
|
- __m128i a = _mm_load_si128 ((__m128i *) (p - offset));
|
||
|
- int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero));
|
||
|
- if ((bmsk >> offset) != 0)
|
||
|
- return __m128i_shift_right (a, offset);
|
||
|
- }
|
||
|
- return _mm_loadu_si128 ((__m128i *) p);
|
||
|
-}
|
||
|
-
|
||
|
-#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
|
||
|
-
|
||
|
-/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
|
||
|
- locale and other which have single-byte letters only in the ASCII
|
||
|
- range. */
|
||
|
-static inline __m128i
|
||
|
-__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow,
|
||
|
- __m128i uchigh, __m128i lcqword)
|
||
|
-{
|
||
|
- __m128i frag = __m128i_strloadu (p, zero);
|
||
|
-
|
||
|
- /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
|
||
|
- __m128i r2 = _mm_cmpgt_epi8 (uchigh, frag);
|
||
|
- /* Compare if bytes are > 'A' - 1. */
|
||
|
- __m128i r1 = _mm_cmpgt_epi8 (frag, uclow);
|
||
|
- /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */
|
||
|
- __m128i mask = _mm_and_si128 (r2, r1);
|
||
|
- /* Apply lowercase bit 6 mask for above mask bytes == ff. */
|
||
|
- return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword));
|
||
|
-}
|
||
|
+#include "string/strstr.c"
|
||
|
|
||
|
-#endif
|
||
|
+extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
|
||
|
+extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
||
|
|
||
|
-/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
|
||
|
- algorithm) overlap for a fully populated 16B vector.
|
||
|
- Input parameter: 1st 16Byte loaded from the reference string of a
|
||
|
- strstr function.
|
||
|
- We don't use KMP algorithm if reference string is less than 16B. */
|
||
|
-static int
|
||
|
-__inline__ __attribute__ ((__always_inline__,))
|
||
|
-KMP16Bovrlap (__m128i s2)
|
||
|
-{
|
||
|
- __m128i b = _mm_unpacklo_epi8 (s2, s2);
|
||
|
- __m128i a = _mm_unpacklo_epi8 (b, b);
|
||
|
- a = _mm_shuffle_epi32 (a, 0);
|
||
|
- b = _mm_srli_si128 (s2, sizeof (char));
|
||
|
- int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (b, a));
|
||
|
-
|
||
|
- /* _BitScanForward(&k1, bmsk); */
|
||
|
- int k1;
|
||
|
- __asm ("bsfl %[bmsk], %[k1]" : [k1] "=r" (k1) : [bmsk] "r" (bmsk));
|
||
|
- if (!bmsk)
|
||
|
- return 16;
|
||
|
- else if (bmsk == 0x7fff)
|
||
|
- return 1;
|
||
|
- else if (!k1)
|
||
|
- {
|
||
|
- /* There are al least two distinct chars in s2. If byte 0 and 1 are
|
||
|
- idential and the distinct value lies farther down, we can deduce
|
||
|
- the next byte offset to restart full compare is least no earlier
|
||
|
- than byte 3. */
|
||
|
- return 3;
|
||
|
- }
|
||
|
- else
|
||
|
- {
|
||
|
- /* Byte 1 is not degenerated to byte 0. */
|
||
|
- return k1 + 1;
|
||
|
- }
|
||
|
-}
|
||
|
-
|
||
|
-char *
|
||
|
-__attribute__ ((section (".text.sse4.2")))
|
||
|
-STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
|
||
|
-{
|
||
|
-#define p1 s1
|
||
|
- const unsigned char *p2 = s2;
|
||
|
-
|
||
|
-#ifndef STRCASESTR_NONASCII
|
||
|
- if (__builtin_expect (p2[0] == '\0', 0))
|
||
|
- return (char *) p1;
|
||
|
-
|
||
|
- if (__builtin_expect (p1[0] == '\0', 0))
|
||
|
- return NULL;
|
||
|
-
|
||
|
- /* Check if p1 length is 1 byte long. */
|
||
|
- if (__builtin_expect (p1[1] == '\0', 0))
|
||
|
- return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
|
||
|
-#endif
|
||
|
+#include "init-arch.h"
|
||
|
|
||
|
-#ifdef USE_AS_STRCASESTR
|
||
|
-# ifndef STRCASESTR_NONASCII
|
||
|
- if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE)
|
||
|
- != 0, 0))
|
||
|
- return __strcasestr_sse42_nonascii (s1, s2);
|
||
|
-
|
||
|
- const __m128i uclow = _mm_set1_epi8 (0x40);
|
||
|
- const __m128i uchigh = _mm_set1_epi8 (0x5b);
|
||
|
- const __m128i lcqword = _mm_set1_epi8 (0x20);
|
||
|
- const __m128i zero = _mm_setzero_si128 ();
|
||
|
-# define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword)
|
||
|
-# else
|
||
|
-# define strloadu __m128i_strloadu_tolower
|
||
|
-# define zero _mm_setzero_si128 ()
|
||
|
-# endif
|
||
|
-#else
|
||
|
-# define strloadu(p) __m128i_strloadu (p, zero)
|
||
|
- const __m128i zero = _mm_setzero_si128 ();
|
||
|
-#endif
|
||
|
+/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||
|
+ ifunc symbol properly. */
|
||
|
+extern __typeof (__redirect_strstr) __libc_strstr;
|
||
|
+libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2)
|
||
|
|
||
|
- /* p1 > 1 byte long. Load up to 16 bytes of fragment. */
|
||
|
- __m128i frag1 = strloadu (p1);
|
||
|
|
||
|
- __m128i frag2;
|
||
|
- if (p2[1] != '\0')
|
||
|
- /* p2 is > 1 byte long. */
|
||
|
- frag2 = strloadu (p2);
|
||
|
- else
|
||
|
- frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0);
|
||
|
-
|
||
|
- /* Unsigned bytes, equal order, does frag2 has null? */
|
||
|
- int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- int cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- int cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||
|
- if (cmp_s & cmp_c)
|
||
|
- {
|
||
|
- int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero));
|
||
|
- int len;
|
||
|
- __asm ("bsfl %[bmsk], %[len]"
|
||
|
- : [len] "=r" (len) : [bmsk] "r" (bmsk));
|
||
|
- p1 += cmp;
|
||
|
- if ((len + cmp) <= 16)
|
||
|
- return (char *) p1;
|
||
|
-
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- if ((len + cmp) <= 16)
|
||
|
- return (char *) p1 + cmp;
|
||
|
- }
|
||
|
-
|
||
|
- if (cmp_s)
|
||
|
- {
|
||
|
- /* Adjust addr for 16B alginment in ensuing loop. */
|
||
|
- while (!cmp_z)
|
||
|
- {
|
||
|
- p1 += cmp;
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- /* Because s2 < 16 bytes and we adjusted p1 by non-zero cmp
|
||
|
- once already, this time cmp will be zero and we can exit. */
|
||
|
- if ((!cmp) & cmp_c)
|
||
|
- break;
|
||
|
- }
|
||
|
-
|
||
|
- if (!cmp_c)
|
||
|
- return NULL;
|
||
|
-
|
||
|
- /* Since s2 is less than 16 bytes, com_c is definitive
|
||
|
- determination of full match. */
|
||
|
- return (char *) p1 + cmp;
|
||
|
- }
|
||
|
-
|
||
|
- /* General case, s2 is at least 16 bytes or more.
|
||
|
- First, the common case of false-match at first byte of p2. */
|
||
|
- const unsigned char *pt = NULL;
|
||
|
- int kmp_fwd = 0;
|
||
|
-re_trace:
|
||
|
- while (!cmp_c)
|
||
|
- {
|
||
|
- /* frag1 has null. */
|
||
|
- if (cmp_z)
|
||
|
- return NULL;
|
||
|
-
|
||
|
- /* frag 1 has no null, advance 16 bytes. */
|
||
|
- p1 += 16;
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
- /* Unsigned bytes, equal order, is there a partial match? */
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- }
|
||
|
-
|
||
|
- /* Next, handle initial positive match as first byte of p2. We have
|
||
|
- a partial fragment match, make full determination until we reached
|
||
|
- end of s2. */
|
||
|
- if (!cmp)
|
||
|
- {
|
||
|
- if (cmp_z)
|
||
|
- return (char *) p1;
|
||
|
-
|
||
|
- pt = p1;
|
||
|
- p1 += 16;
|
||
|
- p2 += 16;
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag2 = strloadu (p2);
|
||
|
- }
|
||
|
- else
|
||
|
- {
|
||
|
- /* Adjust 16B alignment. */
|
||
|
- p1 += cmp;
|
||
|
- pt = p1;
|
||
|
- }
|
||
|
-
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
-
|
||
|
- /* Unsigned bytes, equal order, does frag2 has null? */
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||
|
- while (!(cmp | cmp_z | cmp_s))
|
||
|
- {
|
||
|
- p1 += 16;
|
||
|
- p2 += 16;
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag2 = strloadu (p2);
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
- /* Unsigned bytes, equal order, does frag2 has null? */
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||
|
- }
|
||
|
-
|
||
|
- /* Full determination yielded a false result, retrace s1 to next
|
||
|
- starting position.
|
||
|
- Zflg 1 0 1 0/1
|
||
|
- Sflg 0 1 1 0/1
|
||
|
- cmp na 0 0 >0
|
||
|
- action done done continue continue if s2 < s1
|
||
|
- false match retrace s1 else false
|
||
|
- */
|
||
|
-
|
||
|
- if (cmp_s & !cmp)
|
||
|
- return (char *) pt;
|
||
|
- if (cmp_z)
|
||
|
- {
|
||
|
- if (!cmp_s)
|
||
|
- return NULL;
|
||
|
-
|
||
|
- /* Handle both zero and sign flag set and s1 is shorter in
|
||
|
- length. */
|
||
|
- int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2));
|
||
|
- int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1));
|
||
|
- int len;
|
||
|
- int len1;
|
||
|
- __asm ("bsfl %[bmsk], %[len]"
|
||
|
- : [len] "=r" (len) : [bmsk] "r" (bmsk));
|
||
|
- __asm ("bsfl %[bmsk1], %[len1]"
|
||
|
- : [len1] "=r" (len1) : [bmsk1] "r" (bmsk1));
|
||
|
- if (len >= len1)
|
||
|
- return NULL;
|
||
|
- }
|
||
|
- else if (!cmp)
|
||
|
- return (char *) pt;
|
||
|
-
|
||
|
- /* Otherwise, we have to retrace and continue. Default of multiple
|
||
|
- paths that need to retrace from next byte in s1. */
|
||
|
- p2 = s2;
|
||
|
- frag2 = strloadu (p2);
|
||
|
-
|
||
|
- if (!kmp_fwd)
|
||
|
- kmp_fwd = KMP16Bovrlap (frag2);
|
||
|
-
|
||
|
- /* KMP algorithm predicted overlap needs to be corrected for
|
||
|
- partial fragment compare. */
|
||
|
- p1 = pt + (kmp_fwd > cmp ? cmp : kmp_fwd);
|
||
|
-
|
||
|
- /* Since s2 is at least 16 bytes long, we're certain there is no
|
||
|
- match. */
|
||
|
- if (p1[0] == '\0')
|
||
|
- return NULL;
|
||
|
-
|
||
|
- /* Load up to 16 bytes of fragment. */
|
||
|
- frag1 = strloadu (p1);
|
||
|
-
|
||
|
- /* Unsigned bytes, equal order, is there a partial match? */
|
||
|
- cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||
|
- cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||
|
- cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||
|
- goto re_trace;
|
||
|
-}
|
||
|
+#undef strstr
|
||
|
+strong_alias (__libc_strstr, strstr)
|
||
|
diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr-c.c
|
||
|
--- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c 2012-12-24 22:02:13.000000000 -0500
|
||
|
+++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr-c.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
@@ -1,50 +0,0 @@
|
||
|
-/* Multiple versions of strstr.
|
||
|
- All versions must be listed in ifunc-impl-list.c.
|
||
|
- Copyright (C) 2012 Free Software Foundation, Inc.
|
||
|
- This file is part of the GNU C Library.
|
||
|
-
|
||
|
- The GNU C Library is free software; you can redistribute it and/or
|
||
|
- modify it under the terms of the GNU Lesser General Public
|
||
|
- License as published by the Free Software Foundation; either
|
||
|
- version 2.1 of the License, or (at your option) any later version.
|
||
|
-
|
||
|
- The GNU C Library is distributed in the hope that it will be useful,
|
||
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
- Lesser General Public License for more details.
|
||
|
-
|
||
|
- You should have received a copy of the GNU Lesser General Public
|
||
|
- License along with the GNU C Library; if not, see
|
||
|
- <http://www.gnu.org/licenses/>. */
|
||
|
-
|
||
|
-/* Redefine strstr so that the compiler won't complain about the type
|
||
|
- mismatch with the IFUNC selector in strong_alias, below. */
|
||
|
-#undef strstr
|
||
|
-#define strstr __redirect_strstr
|
||
|
-#include <string.h>
|
||
|
-#undef strstr
|
||
|
-
|
||
|
-#define STRSTR __strstr_sse2
|
||
|
-#ifdef SHARED
|
||
|
-# undef libc_hidden_builtin_def
|
||
|
-# define libc_hidden_builtin_def(name) \
|
||
|
- __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
|
||
|
-#endif
|
||
|
-
|
||
|
-#include "string/strstr.c"
|
||
|
-
|
||
|
-extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden;
|
||
|
-extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
|
||
|
-extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
||
|
-
|
||
|
-#include "init-arch.h"
|
||
|
-
|
||
|
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||
|
- ifunc symbol properly. */
|
||
|
-extern __typeof (__redirect_strstr) __libc_strstr;
|
||
|
-libc_ifunc (__libc_strstr, HAS_SSE4_2 ? (use_unaligned_strstr () ?
|
||
|
- __strstr_sse2_unaligned :
|
||
|
- __strstr_sse42) : __strstr_sse2)
|
||
|
-
|
||
|
-#undef strstr
|
||
|
-strong_alias (__libc_strstr, strstr)
|