This patch file contains a backport of the following upstream commits, which prevent segmentation faults due to unaligned accesses by sse4.2 instructions. commit 584b18eb4df61ccd447db2dfe8c8a7901f8c8598 Author: Ondej Blka Date: Sat Dec 14 19:33:56 2 Add strstr with unaligned loads. Fixes bug 12100. A sse42 version of strstr used pcmpistr instruction which is quite ineffective. A faster way is look for pairs of characters which is uses sse2, is faster than pcmpistr and for real strings a pairs we look for are relatively rare. For linear time complexity we use buy or rent technique which switches to two-way algorithm when superlinear behaviour is detected. commit 1818483b15d22016b0eae41d37ee91cc87b37510 Author: Andreas Schwab Date: Wed Dec 18 11:53:27 Remove use of SSE4.2 functions for strstr on i686 The SSE4.2 have been removed from x86_64 by commit 584b18eb. This patch fixes the build on i686, which attempts to use the removed files. diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-impl-list.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/ifunc-impl-list.c --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/ifunc-impl-list.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/ifunc-impl-list.c 2014-07-01 05:38:25.138749032 -0400 @@ -141,12 +141,6 @@ IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, __strcasecmp_l_ia32)) - /* Support sysdeps/i386/i686/multiarch/strcasestr.c. */ - IFUNC_IMPL (i, name, strcasestr, - IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2, - __strcasestr_sse42) - IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_ia32)) - /* Support sysdeps/i386/i686/multiarch/strcat.S. */ IFUNC_IMPL (i, name, strcat, IFUNC_IMPL_ADD (array, i, strcat, HAS_SSSE3, __strcat_ssse3) @@ -234,11 +228,6 @@ IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32)) - /* Support sysdeps/i386/i686/multiarch/strstr-c.c. */ - IFUNC_IMPL (i, name, strstr, - IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42) - IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_ia32)) - /* Support sysdeps/i386/i686/multiarch/wcschr.S. */ IFUNC_IMPL (i, name, wcschr, IFUNC_IMPL_ADD (array, i, wcschr, HAS_SSE2, __wcschr_sse2) diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/Makefile --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/Makefile 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/Makefile 2014-07-01 05:37:15.985617738 -0400 @@ -11,7 +11,7 @@ memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ - memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \ + memcmp-ssse3 memcmp-sse4 varshift \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ @@ -25,14 +25,11 @@ strncase_l-c strncase-c strncase_l-ssse3 \ strcasecmp_l-sse4 strncase_l-sse4 ifeq (yes,$(config-cflags-sse4)) -sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c +sysdep_routines += strcspn-c strpbrk-c strspn-c CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 -CFLAGS-strstr.c += -msse4 -CFLAGS-strcasestr.c += -msse4 -CFLAGS-strcasestr-nonascii.c += -msse4 endif endif diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr.c --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr.c 1969-12-31 19:00:00.000000000 -0500 @@ -1 +0,0 @@ -#include diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,2 +0,0 @@ -#include -#include diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr.c --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr.c 1969-12-31 19:00:00.000000000 -0500 @@ -1 +0,0 @@ -#include diff -N -u -r glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr-c.c --- glibc-2.17-c758a686/sysdeps/i386/i686/multiarch/strstr-c.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/i386/i686/multiarch/strstr-c.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,31 +0,0 @@ -/* Multiple versions of strstr - All versions must be listed in ifunc-impl-list.c. */ - -#define STRSTR __strstr_ia32 -#if defined SHARED && defined DO_VERSIONING && !defined NO_HIDDEN -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strstr_ia32, __GI_strstr, __strstr_ia32); -#endif - -/* Redefine strstr so that the compiler won't complain about the type - mismatch with the IFUNC selector in strong_alias, below. */ -#undef strstr -#define strstr __redirect_strstr - -#include "string/strstr.c" - -#include "init-arch.h" - -extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden; -extern __typeof (__redirect_strstr) __strstr_ia32 attribute_hidden; - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__redirect_strstr) __libc_strstr; -libc_ifunc (__libc_strstr, - HAS_SSE4_2 && !use_unaligned_strstr () ? - __strstr_sse42 : __strstr_ia32) - -#undef strstr -strong_alias (__libc_strstr, strstr) diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/ifunc-impl-list.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/ifunc-impl-list.c 2014-06-30 10:58:16.637073138 -0400 @@ -115,8 +115,6 @@ /* Support sysdeps/x86_64/multiarch/strcasestr.c. */ IFUNC_IMPL (i, name, strcasestr, - IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2, - __strcasestr_sse42) IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2)) /* Support sysdeps/x86_64/multiarch/strcat.S. */ @@ -210,9 +210,7 @@ /* Support sysdeps/x86_64/multiarch/strstr-c.c. */ IFUNC_IMPL (i, name, strstr, - IFUNC_IMPL_ADD (array, i, strstr, use_unaligned_strstr (), - __strstr_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42) + IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2)) /* Support sysdeps/x86_64/multiarch/wcscpy.S. */ diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/Makefile --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/Makefile 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/Makefile 2014-06-30 10:55:08.343714449 -0400 @@ -9,7 +9,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ - memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ + memmove-ssse3-back strcasecmp_l-ssse3 \ strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ @@ -17,14 +17,11 @@ strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \ memcmp-ssse3 strstr-sse2-unaligned ifeq (yes,$(config-cflags-sse4)) -sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift +sysdep_routines += strcspn-c strpbrk-c strspn-c varshift CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 -CFLAGS-strstr.c += -msse4 -CFLAGS-strcasestr.c += -msse4 -CFLAGS-strcasestr-nonascii.c += -msse4 endif endif diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr.c 2014-06-30 11:01:34.394453519 -0400 @@ -1,7 +1,13 @@ -extern char *__strcasestr_sse42_nonascii (const unsigned char *s1, - const unsigned char *s2) - attribute_hidden; +/* Multiple versions of strcasestr + All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRCASESTR -#define STRSTR_SSE42 __strcasestr_sse42 -#include "strstr.c" +#include "init-arch.h" + +#define STRCASESTR __strcasestr_sse2 + +#include "string/strcasestr.c" + +extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; + +libc_ifunc (__strcasestr, + __strcasestr_sse2); diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-c.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-c.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-c.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,20 +0,0 @@ -/* Multiple versions of strcasestr - All versions must be listed in ifunc-impl-list.c. */ - -#include "init-arch.h" - -#define STRCASESTR __strcasestr_sse2 - -#include "string/strcasestr.c" - -extern char *__strcasestr_sse42 (const char *, const char *) attribute_hidden; -extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; - -#if 1 -libc_ifunc (__strcasestr, - HAS_SSE4_2 && !use_unaligned_strstr () ? __strcasestr_sse42 : - __strcasestr_sse2); -#else -libc_ifunc (__strcasestr, - 0 ? __strcasestr_sse42 : __strcasestr_sse2); -#endif diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-nonascii.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-nonascii.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strcasestr-nonascii.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strcasestr-nonascii.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,50 +0,0 @@ -/* strstr with SSE4.2 intrinsics - Copyright (C) 2010 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - - -/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C - locale. */ -static inline __m128i -__m128i_strloadu_tolower (const unsigned char *p) -{ - union - { - char b[16]; - __m128i x; - } u; - - for (int i = 0; i < 16; ++i) - if (p[i] == 0) - { - u.b[i] = 0; - break; - } - else - u.b[i] = tolower (p[i]); - - return u.x; -} - - -#define STRCASESTR_NONASCII -#define USE_AS_STRCASESTR -#define STRSTR_SSE42 __strcasestr_sse42_nonascii -#include "strstr.c" diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr.c 2014-06-30 11:26:55.025294484 -0400 @@ -1,6 +1,6 @@ -/* strstr with SSE4.2 intrinsics - Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. - Contributed by Intel Corporation. +/* Multiple versions of strstr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,368 +17,32 @@ License along with the GNU C Library; if not, see . */ -#include -#include "varshift.h" - -#ifndef STRSTR_SSE42 -# define STRSTR_SSE42 __strstr_sse42 -#endif - -#ifdef USE_AS_STRCASESTR -# include -# include - -# define LOADBYTE(C) tolower (C) -# define CMPBYTE(C1, C2) (tolower (C1) == tolower (C2)) -#else -# define LOADBYTE(C) (C) -# define CMPBYTE(C1, C2) ((C1) == (C2)) +/* Redefine strstr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +#undef strstr +#define strstr __redirect_strstr +#include +#undef strstr + +#define STRSTR __strstr_sse2 +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2); #endif -/* We use 0xe ordered-compare: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_ORDER - | _SIDD_LEAST_SIGNIFICANT - on pcmpistri to do the scanning and string comparsion requirements of - sub-string match. In the scanning phase, we process Cflag and ECX - index to locate the first fragment match; once the first fragment - match position has been identified, we do comparison of subsequent - string fragments until we can conclude false or true match; whe - n concluding a false match, we may need to repeat scanning process - from next relevant offset in the target string. - - In the scanning phase we have 4 cases: - case ECX CFlag ZFlag SFlag - 1 16 0 0 0 - 2a 16 0 0 1 - 2b 16 0 1 0 - 2c 16 0 1 1 - - 1. No ordered-comparison match, both 16B fragments are valid, so - continue to next fragment. - 2. No ordered-comparison match, there is EOS in either fragment, - 2a. Zflg = 0, Sflg = 1, we continue - 2b. Zflg = 1, Sflg = 0, we conclude no match and return. - 2c. Zflg = 1, sflg = 1, lenth determine match or no match - - In the string comparison phase, the 1st fragment match is fixed up - to produce ECX = 0. Subsequent fragment compare of nonzero index - and no match conclude a false match. - - case ECX CFlag ZFlag SFlag - 3 X 1 0 0/1 - 4a 0 1 0 0 - 4b 0 1 0 1 - 4c 0 < X 1 0 0/1 - 5 16 0 1 0 - - 3. An initial ordered-comparison fragment match, we fix up to do - subsequent string comparison - 4a. Continuation of fragment comparison of a string compare. - 4b. EOS reached in the reference string, we conclude true match and - return - 4c. String compare failed if index is nonzero, we need to go back to - scanning - 5. failed string compare, go back to scanning - */ - -/* Simple replacement of movdqu to address 4KB boundary cross issue. - If EOS occurs within less than 16B before 4KB boundary, we don't - cross to next page. */ - -static inline __m128i -__m128i_strloadu (const unsigned char * p, __m128i zero) -{ - if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0)) - { - size_t offset = ((size_t) p & (16 - 1)); - __m128i a = _mm_load_si128 ((__m128i *) (p - offset)); - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero)); - if ((bmsk >> offset) != 0) - return __m128i_shift_right (a, offset); - } - return _mm_loadu_si128 ((__m128i *) p); -} - -#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII - -/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C - locale and other which have single-byte letters only in the ASCII - range. */ -static inline __m128i -__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow, - __m128i uchigh, __m128i lcqword) -{ - __m128i frag = __m128i_strloadu (p, zero); - - /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */ - __m128i r2 = _mm_cmpgt_epi8 (uchigh, frag); - /* Compare if bytes are > 'A' - 1. */ - __m128i r1 = _mm_cmpgt_epi8 (frag, uclow); - /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */ - __m128i mask = _mm_and_si128 (r2, r1); - /* Apply lowercase bit 6 mask for above mask bytes == ff. */ - return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword)); -} +#include "string/strstr.c" -#endif +extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden; +extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; -/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP - algorithm) overlap for a fully populated 16B vector. - Input parameter: 1st 16Byte loaded from the reference string of a - strstr function. - We don't use KMP algorithm if reference string is less than 16B. */ -static int -__inline__ __attribute__ ((__always_inline__,)) -KMP16Bovrlap (__m128i s2) -{ - __m128i b = _mm_unpacklo_epi8 (s2, s2); - __m128i a = _mm_unpacklo_epi8 (b, b); - a = _mm_shuffle_epi32 (a, 0); - b = _mm_srli_si128 (s2, sizeof (char)); - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (b, a)); - - /* _BitScanForward(&k1, bmsk); */ - int k1; - __asm ("bsfl %[bmsk], %[k1]" : [k1] "=r" (k1) : [bmsk] "r" (bmsk)); - if (!bmsk) - return 16; - else if (bmsk == 0x7fff) - return 1; - else if (!k1) - { - /* There are al least two distinct chars in s2. If byte 0 and 1 are - idential and the distinct value lies farther down, we can deduce - the next byte offset to restart full compare is least no earlier - than byte 3. */ - return 3; - } - else - { - /* Byte 1 is not degenerated to byte 0. */ - return k1 + 1; - } -} - -char * -__attribute__ ((section (".text.sse4.2"))) -STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2) -{ -#define p1 s1 - const unsigned char *p2 = s2; - -#ifndef STRCASESTR_NONASCII - if (__builtin_expect (p2[0] == '\0', 0)) - return (char *) p1; - - if (__builtin_expect (p1[0] == '\0', 0)) - return NULL; - - /* Check if p1 length is 1 byte long. */ - if (__builtin_expect (p1[1] == '\0', 0)) - return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL; -#endif +#include "init-arch.h" -#ifdef USE_AS_STRCASESTR -# ifndef STRCASESTR_NONASCII - if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) - != 0, 0)) - return __strcasestr_sse42_nonascii (s1, s2); - - const __m128i uclow = _mm_set1_epi8 (0x40); - const __m128i uchigh = _mm_set1_epi8 (0x5b); - const __m128i lcqword = _mm_set1_epi8 (0x20); - const __m128i zero = _mm_setzero_si128 (); -# define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword) -# else -# define strloadu __m128i_strloadu_tolower -# define zero _mm_setzero_si128 () -# endif -#else -# define strloadu(p) __m128i_strloadu (p, zero) - const __m128i zero = _mm_setzero_si128 (); -#endif +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strstr) __libc_strstr; +libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2) - /* p1 > 1 byte long. Load up to 16 bytes of fragment. */ - __m128i frag1 = strloadu (p1); - __m128i frag2; - if (p2[1] != '\0') - /* p2 is > 1 byte long. */ - frag2 = strloadu (p2); - else - frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0); - - /* Unsigned bytes, equal order, does frag2 has null? */ - int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - int cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - int cmp = _mm_cmpistri (frag2, frag1, 0x0c); - int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - if (cmp_s & cmp_c) - { - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero)); - int len; - __asm ("bsfl %[bmsk], %[len]" - : [len] "=r" (len) : [bmsk] "r" (bmsk)); - p1 += cmp; - if ((len + cmp) <= 16) - return (char *) p1; - - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - if ((len + cmp) <= 16) - return (char *) p1 + cmp; - } - - if (cmp_s) - { - /* Adjust addr for 16B alginment in ensuing loop. */ - while (!cmp_z) - { - p1 += cmp; - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - /* Because s2 < 16 bytes and we adjusted p1 by non-zero cmp - once already, this time cmp will be zero and we can exit. */ - if ((!cmp) & cmp_c) - break; - } - - if (!cmp_c) - return NULL; - - /* Since s2 is less than 16 bytes, com_c is definitive - determination of full match. */ - return (char *) p1 + cmp; - } - - /* General case, s2 is at least 16 bytes or more. - First, the common case of false-match at first byte of p2. */ - const unsigned char *pt = NULL; - int kmp_fwd = 0; -re_trace: - while (!cmp_c) - { - /* frag1 has null. */ - if (cmp_z) - return NULL; - - /* frag 1 has no null, advance 16 bytes. */ - p1 += 16; - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - /* Unsigned bytes, equal order, is there a partial match? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - } - - /* Next, handle initial positive match as first byte of p2. We have - a partial fragment match, make full determination until we reached - end of s2. */ - if (!cmp) - { - if (cmp_z) - return (char *) p1; - - pt = p1; - p1 += 16; - p2 += 16; - /* Load up to 16 bytes of fragment. */ - frag2 = strloadu (p2); - } - else - { - /* Adjust 16B alignment. */ - p1 += cmp; - pt = p1; - } - - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - - /* Unsigned bytes, equal order, does frag2 has null? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - while (!(cmp | cmp_z | cmp_s)) - { - p1 += 16; - p2 += 16; - /* Load up to 16 bytes of fragment. */ - frag2 = strloadu (p2); - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - /* Unsigned bytes, equal order, does frag2 has null? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - } - - /* Full determination yielded a false result, retrace s1 to next - starting position. - Zflg 1 0 1 0/1 - Sflg 0 1 1 0/1 - cmp na 0 0 >0 - action done done continue continue if s2 < s1 - false match retrace s1 else false - */ - - if (cmp_s & !cmp) - return (char *) pt; - if (cmp_z) - { - if (!cmp_s) - return NULL; - - /* Handle both zero and sign flag set and s1 is shorter in - length. */ - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2)); - int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1)); - int len; - int len1; - __asm ("bsfl %[bmsk], %[len]" - : [len] "=r" (len) : [bmsk] "r" (bmsk)); - __asm ("bsfl %[bmsk1], %[len1]" - : [len1] "=r" (len1) : [bmsk1] "r" (bmsk1)); - if (len >= len1) - return NULL; - } - else if (!cmp) - return (char *) pt; - - /* Otherwise, we have to retrace and continue. Default of multiple - paths that need to retrace from next byte in s1. */ - p2 = s2; - frag2 = strloadu (p2); - - if (!kmp_fwd) - kmp_fwd = KMP16Bovrlap (frag2); - - /* KMP algorithm predicted overlap needs to be corrected for - partial fragment compare. */ - p1 = pt + (kmp_fwd > cmp ? cmp : kmp_fwd); - - /* Since s2 is at least 16 bytes long, we're certain there is no - match. */ - if (p1[0] == '\0') - return NULL; - - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - - /* Unsigned bytes, equal order, is there a partial match? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - goto re_trace; -} +#undef strstr +strong_alias (__libc_strstr, strstr) diff -N -u -r glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr-c.c --- glibc-2.17-c758a686/sysdeps/x86_64/multiarch/strstr-c.c 2012-12-24 22:02:13.000000000 -0500 +++ glibc-2.17-c758a686-hacked/sysdeps/x86_64/multiarch/strstr-c.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,50 +0,0 @@ -/* Multiple versions of strstr. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Redefine strstr so that the compiler won't complain about the type - mismatch with the IFUNC selector in strong_alias, below. */ -#undef strstr -#define strstr __redirect_strstr -#include -#undef strstr - -#define STRSTR __strstr_sse2 -#ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2); -#endif - -#include "string/strstr.c" - -extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden; -extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden; -extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; - -#include "init-arch.h" - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__redirect_strstr) __libc_strstr; -libc_ifunc (__libc_strstr, HAS_SSE4_2 ? (use_unaligned_strstr () ? - __strstr_sse2_unaligned : - __strstr_sse42) : __strstr_sse2) - -#undef strstr -strong_alias (__libc_strstr, strstr)