You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
157 lines
5.5 KiB
157 lines
5.5 KiB
commit a4b1cae068d4d6e3117dd49e7d0599e4c62ac39f |
|
Author: Noah Goldstein <goldstein.w.n@gmail.com> |
|
Date: Wed Mar 23 16:57:29 2022 -0500 |
|
|
|
x86: Remove strspn-sse2.S and use the generic implementation |
|
|
|
The generic implementation is faster. |
|
|
|
geometric_mean(N=20) of all benchmarks New / Original: .710 |
|
|
|
All string/memory tests pass. |
|
Reviewed-by: H.J. Lu <hjl.tools@gmail.com> |
|
|
|
(cherry picked from commit 9c8a6ad620b49a27120ecdd7049c26bf05900397) |
|
|
|
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.S b/sysdeps/x86_64/multiarch/strspn-sse2.c |
|
similarity index 89% |
|
rename from sysdeps/x86_64/multiarch/strspn-sse2.S |
|
rename to sysdeps/x86_64/multiarch/strspn-sse2.c |
|
index e919fe492cc15151..f5e5686db1037740 100644 |
|
--- a/sysdeps/x86_64/multiarch/strspn-sse2.S |
|
+++ b/sysdeps/x86_64/multiarch/strspn-sse2.c |
|
@@ -19,10 +19,10 @@ |
|
#if IS_IN (libc) |
|
|
|
# include <sysdep.h> |
|
-# define strspn __strspn_sse2 |
|
+# define STRSPN __strspn_sse2 |
|
|
|
# undef libc_hidden_builtin_def |
|
-# define libc_hidden_builtin_def(strspn) |
|
+# define libc_hidden_builtin_def(STRSPN) |
|
#endif |
|
|
|
-#include <sysdeps/x86_64/strspn.S> |
|
+#include <string/strspn.c> |
|
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S |
|
deleted file mode 100644 |
|
index e878f328852792db..0000000000000000 |
|
--- a/sysdeps/x86_64/strspn.S |
|
+++ /dev/null |
|
@@ -1,115 +0,0 @@ |
|
-/* strspn (str, ss) -- Return the length of the initial segment of STR |
|
- which contains only characters from SS. |
|
- For AMD x86-64. |
|
- Copyright (C) 1994-2021 Free Software Foundation, Inc. |
|
- This file is part of the GNU C Library. |
|
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>. |
|
- Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>. |
|
- Adopted for x86-64 by Andreas Jaeger <aj@suse.de>. |
|
- |
|
- The GNU C Library is free software; you can redistribute it and/or |
|
- modify it under the terms of the GNU Lesser General Public |
|
- License as published by the Free Software Foundation; either |
|
- version 2.1 of the License, or (at your option) any later version. |
|
- |
|
- The GNU C Library is distributed in the hope that it will be useful, |
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
- Lesser General Public License for more details. |
|
- |
|
- You should have received a copy of the GNU Lesser General Public |
|
- License along with the GNU C Library; if not, see |
|
- <https://www.gnu.org/licenses/>. */ |
|
- |
|
-#include <sysdep.h> |
|
- |
|
- .text |
|
-ENTRY (strspn) |
|
- |
|
- movq %rdi, %rdx /* Save SRC. */ |
|
- |
|
- /* First we create a table with flags for all possible characters. |
|
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are |
|
- supported by the C string functions we have 256 characters. |
|
- Before inserting marks for the stop characters we clear the whole |
|
- table. */ |
|
- movq %rdi, %r8 /* Save value. */ |
|
- subq $256, %rsp /* Make space for 256 bytes. */ |
|
- cfi_adjust_cfa_offset(256) |
|
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */ |
|
- movq %rsp, %rdi |
|
- xorl %eax, %eax /* We store 0s. */ |
|
- cld |
|
- rep |
|
- stosq |
|
- |
|
- movq %rsi, %rax /* Setup stopset. */ |
|
- |
|
-/* For understanding the following code remember that %rcx == 0 now. |
|
- Although all the following instruction only modify %cl we always |
|
- have a correct zero-extended 64-bit value in %rcx. */ |
|
- |
|
- .p2align 4 |
|
-L(2): movb (%rax), %cl /* get byte from stopset */ |
|
- testb %cl, %cl /* is NUL char? */ |
|
- jz L(1) /* yes => start compare loop */ |
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ |
|
- |
|
- movb 1(%rax), %cl /* get byte from stopset */ |
|
- testb $0xff, %cl /* is NUL char? */ |
|
- jz L(1) /* yes => start compare loop */ |
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ |
|
- |
|
- movb 2(%rax), %cl /* get byte from stopset */ |
|
- testb $0xff, %cl /* is NUL char? */ |
|
- jz L(1) /* yes => start compare loop */ |
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ |
|
- |
|
- movb 3(%rax), %cl /* get byte from stopset */ |
|
- addq $4, %rax /* increment stopset pointer */ |
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ |
|
- testb $0xff, %cl /* is NUL char? */ |
|
- jnz L(2) /* no => process next dword from stopset */ |
|
- |
|
-L(1): leaq -4(%rdx), %rax /* prepare loop */ |
|
- |
|
- /* We use a neat trick for the following loop. Normally we would |
|
- have to test for two termination conditions |
|
- 1. a character in the stopset was found |
|
- and |
|
- 2. the end of the string was found |
|
- But as a sign that the character is in the stopset we store its |
|
- value in the table. But the value of NUL is NUL so the loop |
|
- terminates for NUL in every case. */ |
|
- |
|
- .p2align 4 |
|
-L(3): addq $4, %rax /* adjust pointer for full loop round */ |
|
- |
|
- movb (%rax), %cl /* get byte from string */ |
|
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ |
|
- jz L(4) /* no => return */ |
|
- |
|
- movb 1(%rax), %cl /* get byte from string */ |
|
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ |
|
- jz L(5) /* no => return */ |
|
- |
|
- movb 2(%rax), %cl /* get byte from string */ |
|
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ |
|
- jz L(6) /* no => return */ |
|
- |
|
- movb 3(%rax), %cl /* get byte from string */ |
|
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ |
|
- jnz L(3) /* yes => start loop again */ |
|
- |
|
- incq %rax /* adjust pointer */ |
|
-L(6): incq %rax |
|
-L(5): incq %rax |
|
- |
|
-L(4): addq $256, %rsp /* remove stopset */ |
|
- cfi_adjust_cfa_offset(-256) |
|
- subq %rdx, %rax /* we have to return the number of valid |
|
- characters, so compute distance to first |
|
- non-valid character */ |
|
- ret |
|
-END (strspn) |
|
-libc_hidden_builtin_def (strspn)
|
|
|