You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
741 lines
19 KiB
741 lines
19 KiB
Backport of the following patch as a prerequistite for |
|
96d6fd6c4060d739abb1822e7ad633af749532b2: |
|
commit 69f13dbf06c6195de0ada8632271d58ca3cf55da |
|
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> |
|
Date: Thu Sep 26 09:29:19 2013 -0500 |
|
|
|
PowerPC: strcpy/stpcpy optimization for PPC64/POWER7 |
|
|
|
This patch intends to unify both strcpy and stpcpy implementationsi |
|
for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation |
|
is to provide both doubleword and word aligned memory access. |
|
|
|
For PPC64/POWER7 is also provide doubleword and word memory access, |
|
remove the branch hints, use the cmpb instruction for compare |
|
doubleword/words, and add an optimization for inputs of same alignment. |
|
|
|
ChangeLog: |
|
|
|
2013-10-04 Adhemerval Zanella <azanella@linux.vnet.ibm.com> |
|
|
|
* sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store |
|
to provide a boost for large inputs with word alignment. |
|
* sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite |
|
implementation based on optimized PPC64 strcpy. |
|
* sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized |
|
strcpy for PPC64/POWER7 based on both doubleword and word load/store. |
|
* sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized |
|
stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy. |
|
|
|
diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S |
|
new file mode 100644 |
|
index 0000000..727dd06 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/power7/stpcpy.S |
|
@@ -0,0 +1,24 @@ |
|
+/* Optimized stpcpy implementation for PowerPC64/POWER7. |
|
+ Copyright (C) 2013 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#define USE_AS_STPCPY |
|
+#include <sysdeps/powerpc/powerpc64/power7/strcpy.S> |
|
+ |
|
+weak_alias (__stpcpy, stpcpy) |
|
+libc_hidden_def (__stpcpy) |
|
+libc_hidden_builtin_def (stpcpy) |
|
diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S |
|
new file mode 100644 |
|
index 0000000..5c341a1 |
|
--- /dev/null |
|
+++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S |
|
@@ -0,0 +1,274 @@ |
|
+/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7. |
|
+ Copyright (C) 2013 Free Software Foundation, Inc. |
|
+ This file is part of the GNU C Library. |
|
+ |
|
+ The GNU C Library is free software; you can redistribute it and/or |
|
+ modify it under the terms of the GNU Lesser General Public |
|
+ License as published by the Free Software Foundation; either |
|
+ version 2.1 of the License, or (at your option) any later version. |
|
+ |
|
+ The GNU C Library is distributed in the hope that it will be useful, |
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
+ Lesser General Public License for more details. |
|
+ |
|
+ You should have received a copy of the GNU Lesser General Public |
|
+ License along with the GNU C Library; if not, see |
|
+ <http://www.gnu.org/licenses/>. */ |
|
+ |
|
+#include <sysdep.h> |
|
+ |
|
+/* Implements the function |
|
+ |
|
+ char * [r3] strcpy (char *dest [r3], const char *src [r4]) |
|
+ |
|
+ or |
|
+ |
|
+ char * [r3] strcpy (char *dest [r3], const char *src [r4]) |
|
+ |
|
+ if USE_AS_STPCPY is defined. It tries to use aligned memory accesses |
|
+ when possible using the following algorithm: |
|
+ |
|
+ if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0)) |
|
+ goto aligned_doubleword_copy; |
|
+ if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0)) |
|
+ goto aligned_word_copy; |
|
+ if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL)) |
|
+ goto same_alignment; |
|
+ goto unaligned; |
|
+ |
|
+ The aligned comparison are made using cmpb instructions. */ |
|
+ |
|
+#ifdef USE_AS_STPCPY |
|
+# define FUNC_NAME __stpcpy |
|
+#else |
|
+# define FUNC_NAME strcpy |
|
+#endif |
|
+ |
|
+ .machine power7 |
|
+EALIGN (FUNC_NAME, 4, 0) |
|
+ CALL_MCOUNT 2 |
|
+ |
|
+#define rTMP r0 |
|
+#ifdef USE_AS_STPCPY |
|
+#define rRTN r3 /* pointer to previous word/doubleword in dest */ |
|
+#else |
|
+#define rRTN r12 /* pointer to previous word/doubleword in dest */ |
|
+#endif |
|
+#define rSRC r4 /* pointer to previous word/doubleword in src */ |
|
+#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */ |
|
+#define rWORD r6 /* current word from src */ |
|
+#define rALT r7 /* alternate word from src */ |
|
+#define rRTNAL r8 /* alignment of return pointer */ |
|
+#define rSRCAL r9 /* alignment of source pointer */ |
|
+#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */ |
|
+#define rSUBAL r11 /* doubleword minus unaligned displacement */ |
|
+ |
|
+#ifndef USE_AS_STPCPY |
|
+/* Save the dst pointer to use as return value. */ |
|
+ mr rRTN, r3 |
|
+#endif |
|
+ or rTMP, rSRC, rRTN |
|
+ clrldi. rTMP, rTMP, 61 |
|
+ bne L(check_word_alignment) |
|
+ b L(aligned_doubleword_copy) |
|
+ |
|
+L(same_alignment): |
|
+/* Src and dst with same alignment: align both to doubleword. */ |
|
+ mr rALCNT, rRTN |
|
+ lbz rWORD, 0(rSRC) |
|
+ subfic rSUBAL, rRTNAL, 8 |
|
+ addi rRTN, rRTN, 1 |
|
+ addi rSRC, rSRC, 1 |
|
+ cmpdi cr7, rWORD, 0 |
|
+ stb rWORD, 0(rALCNT) |
|
+ beq cr7, L(s2) |
|
+ |
|
+ add rALCNT, rALCNT, rSUBAL |
|
+ subf rALCNT, rRTN, rALCNT |
|
+ addi rALCNT, rALCNT, 1 |
|
+ mtctr rALCNT |
|
+ b L(s1) |
|
+ |
|
+ .align 4 |
|
+L(s0): |
|
+ addi rSRC, rSRC, 1 |
|
+ lbz rWORD, -1(rSRC) |
|
+ cmpdi cr7, rWORD, 0 |
|
+ stb rWORD, -1(rALCNT) |
|
+ beqlr cr7 |
|
+ mr rRTN, rALCNT |
|
+L(s1): |
|
+ addi rALCNT, rRTN,1 |
|
+ bdnz L(s0) |
|
+ b L(aligned_doubleword_copy) |
|
+ .align 4 |
|
+L(s2): |
|
+ mr rRTN, rALCNT |
|
+ blr |
|
+ |
|
+/* For doubleword aligned memory, operate using doubleword load and stores. */ |
|
+ .align 4 |
|
+L(aligned_doubleword_copy): |
|
+ li rMASK, 0 |
|
+ addi rRTN, rRTN, -8 |
|
+ ld rWORD, 0(rSRC) |
|
+ b L(g2) |
|
+ |
|
+ .align 4 |
|
+L(g0): ldu rALT, 8(rSRC) |
|
+ stdu rWORD, 8(rRTN) |
|
+ cmpb rTMP, rALT, rMASK |
|
+ cmpdi rTMP, 0 |
|
+ bne L(g1) |
|
+ ldu rWORD, 8(rSRC) |
|
+ stdu rALT, 8(rRTN) |
|
+L(g2): cmpb rTMP, rWORD, rMASK |
|
+ cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */ |
|
+ beq L(g0) |
|
+ |
|
+ mr rALT, rWORD |
|
+/* We've hit the end of the string. Do the rest byte-by-byte. */ |
|
+L(g1): |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ extrdi. rTMP, rALT, 8, 56 |
|
+ stbu rALT, 8(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 48 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 40 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 32 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 24 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 16 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi. rTMP, rALT, 8, 8 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ extrdi rTMP, rALT, 8, 0 |
|
+ stbu rTMP, 1(rRTN) |
|
+#else |
|
+ extrdi. rTMP, rALT, 8, 0 |
|
+ stbu rTMP, 8(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 8 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 16 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 24 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 32 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 40 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ extrdi. rTMP, rALT, 8, 48 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ stbu rALT, 1(rRTN) |
|
+#endif |
|
+ blr |
|
+ |
|
+L(check_word_alignment): |
|
+ clrldi. rTMP, rTMP, 62 |
|
+ beq L(aligned_word_copy) |
|
+ rldicl rRTNAL, rRTN, 0, 61 |
|
+ rldicl rSRCAL, rSRC, 0, 61 |
|
+ cmpld cr7, rSRCAL, rRTNAL |
|
+ beq cr7, L(same_alignment) |
|
+ b L(unaligned) |
|
+ |
|
+/* For word aligned memory, operate using word load and stores. */ |
|
+ .align 4 |
|
+L(aligned_word_copy): |
|
+ li rMASK, 0 |
|
+ addi rRTN, rRTN, -4 |
|
+ lwz rWORD, 0(rSRC) |
|
+ b L(g5) |
|
+ |
|
+ .align 4 |
|
+L(g3): lwzu rALT, 4(rSRC) |
|
+ stwu rWORD, 4(rRTN) |
|
+ cmpb rTMP, rALT, rMASK |
|
+ cmpwi rTMP, 0 |
|
+ bne L(g4) |
|
+ lwzu rWORD, 4(rSRC) |
|
+ stwu rALT, 4(rRTN) |
|
+L(g5): cmpb rTMP, rWORD, rMASK |
|
+ cmpwi rTMP, 0 /* If rTMP is 0, no null in word. */ |
|
+ beq L(g3) |
|
+ |
|
+ mr rALT, rWORD |
|
+/* We've hit the end of the string. Do the rest byte-by-byte. */ |
|
+L(g4): |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ rlwinm. rTMP, rALT, 0, 24, 31 |
|
+ stbu rALT, 4(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 24, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 16, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ rlwinm rTMP, rALT, 8, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+#else |
|
+ rlwinm. rTMP, rALT, 8, 24, 31 |
|
+ stbu rTMP, 4(rRTN) |
|
+ beqlr |
|
+ rlwinm. rTMP, rALT, 16, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ rlwinm. rTMP, rALT, 24, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
+ stbu rALT, 1(rRTN) |
|
+#endif |
|
+ blr |
|
+ |
|
+/* Oh well. In this case, we just do a byte-by-byte copy. */ |
|
+ .align 4 |
|
+L(unaligned): |
|
+ lbz rWORD, 0(rSRC) |
|
+ addi rRTN, rRTN, -1 |
|
+ cmpdi rWORD, 0 |
|
+ beq L(u2) |
|
+ |
|
+ .align 5 |
|
+L(u0): lbzu rALT, 1(rSRC) |
|
+ stbu rWORD, 1(rRTN) |
|
+ cmpdi rALT, 0 |
|
+ beq L(u1) |
|
+ lbzu rWORD, 1(rSRC) |
|
+ stbu rALT, 1(rRTN) |
|
+ cmpdi rWORD, 0 |
|
+ beq L(u2) |
|
+ lbzu rALT, 1(rSRC) |
|
+ stbu rWORD, 1(rRTN) |
|
+ cmpdi rALT, 0 |
|
+ beq L(u1) |
|
+ lbzu rWORD, 1(rSRC) |
|
+ stbu rALT, 1(rRTN) |
|
+ cmpdi rWORD, 0 |
|
+ bne L(u0) |
|
+L(u2): stbu rWORD, 1(rRTN) |
|
+ blr |
|
+L(u1): stbu rALT, 1(rRTN) |
|
+ blr |
|
+END (FUNC_NAME) |
|
+ |
|
+#ifndef USE_AS_STPCPY |
|
+libc_hidden_builtin_def (strcpy) |
|
+#endif |
|
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S |
|
index d795b61..09aa3be 100644 |
|
--- a/sysdeps/powerpc/powerpc64/stpcpy.S |
|
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S |
|
@@ -1,5 +1,5 @@ |
|
/* Optimized stpcpy implementation for PowerPC64. |
|
- Copyright (C) 1997, 1999, 2000, 2002, 2004 Free Software Foundation, Inc. |
|
+ Copyright (C) 1997-2013 Free Software Foundation, Inc. |
|
This file is part of the GNU C Library. |
|
|
|
The GNU C Library is free software; you can redistribute it and/or |
|
@@ -16,123 +16,9 @@ |
|
License along with the GNU C Library; if not, see |
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
-#include <sysdep.h> |
|
-#include <bp-sym.h> |
|
-#include <bp-asm.h> |
|
+#define USE_AS_STPCPY |
|
+#include <sysdeps/powerpc/powerpc64/strcpy.S> |
|
|
|
-/* See strlen.s for comments on how the end-of-string testing works. */ |
|
- |
|
-/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ |
|
- |
|
-EALIGN (BP_SYM (__stpcpy), 4, 0) |
|
- CALL_MCOUNT 2 |
|
- |
|
-#define rTMP r0 |
|
-#define rRTN r3 |
|
-#if __BOUNDED_POINTERS__ |
|
-# define rDEST r4 /* pointer to previous word in dest */ |
|
-# define rSRC r5 /* pointer to previous word in src */ |
|
-# define rLOW r11 |
|
-# define rHIGH r12 |
|
-#else |
|
-# define rDEST r3 /* pointer to previous word in dest */ |
|
-# define rSRC r4 /* pointer to previous word in src */ |
|
-#endif |
|
-#define rWORD r6 /* current word from src */ |
|
-#define rFEFE r7 /* 0xfefefeff */ |
|
-#define r7F7F r8 /* 0x7f7f7f7f */ |
|
-#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ |
|
-#define rALT r10 /* alternate word from src */ |
|
- |
|
- CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) |
|
- CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) |
|
- STORE_RETURN_BOUNDS (rLOW, rHIGH) |
|
- |
|
- or rTMP, rSRC, rDEST |
|
- clrldi. rTMP, rTMP, 62 |
|
- addi rDEST, rDEST, -4 |
|
- bne L(unaligned) |
|
- |
|
- lis rFEFE, -0x101 |
|
- lis r7F7F, 0x7f7f |
|
- lwz rWORD, 0(rSRC) |
|
- addi rFEFE, rFEFE, -0x101 |
|
- addi r7F7F, r7F7F, 0x7f7f |
|
- b L(g2) |
|
- |
|
-L(g0): lwzu rALT, 4(rSRC) |
|
- stwu rWORD, 4(rDEST) |
|
- add rTMP, rFEFE, rALT |
|
- nor rNEG, r7F7F, rALT |
|
- and. rTMP, rTMP, rNEG |
|
- bne- L(g1) |
|
- lwzu rWORD, 4(rSRC) |
|
- stwu rALT, 4(rDEST) |
|
-L(g2): add rTMP, rFEFE, rWORD |
|
- nor rNEG, r7F7F, rWORD |
|
- and. rTMP, rTMP, rNEG |
|
- beq+ L(g0) |
|
- |
|
- mr rALT, rWORD |
|
-/* We've hit the end of the string. Do the rest byte-by-byte. */ |
|
-L(g1): |
|
-#ifdef __LITTLE_ENDIAN__ |
|
- rlwinm. rTMP, rALT, 0, 24, 31 |
|
- stbu rALT, 4(rDEST) |
|
- beqlr- |
|
- rlwinm. rTMP, rALT, 24, 24, 31 |
|
- stbu rTMP, 1(rDEST) |
|
- beqlr- |
|
- rlwinm. rTMP, rALT, 16, 24, 31 |
|
- stbu rTMP, 1(rDEST) |
|
- beqlr- |
|
- rlwinm rTMP, rALT, 8, 24, 31 |
|
- stbu rTMP, 1(rDEST) |
|
- blr |
|
-#else |
|
- rlwinm. rTMP, rALT, 8, 24, 31 |
|
- stbu rTMP, 4(rDEST) |
|
- beqlr- |
|
- rlwinm. rTMP, rALT, 16, 24, 31 |
|
- stbu rTMP, 1(rDEST) |
|
- beqlr- |
|
- rlwinm. rTMP, rALT, 24, 24, 31 |
|
- stbu rTMP, 1(rDEST) |
|
- beqlr- |
|
- stbu rALT, 1(rDEST) |
|
- CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) |
|
- STORE_RETURN_VALUE (rDEST) |
|
- blr |
|
-#endif |
|
- |
|
-/* Oh well. In this case, we just do a byte-by-byte copy. */ |
|
- .align 4 |
|
- nop |
|
-L(unaligned): |
|
- lbz rWORD, 0(rSRC) |
|
- addi rDEST, rDEST, 3 |
|
- cmpwi rWORD, 0 |
|
- beq- L(u2) |
|
- |
|
-L(u0): lbzu rALT, 1(rSRC) |
|
- stbu rWORD, 1(rDEST) |
|
- cmpwi rALT, 0 |
|
- beq- L(u1) |
|
- nop /* Let 601 load start of loop. */ |
|
- lbzu rWORD, 1(rSRC) |
|
- stbu rALT, 1(rDEST) |
|
- cmpwi rWORD, 0 |
|
- bne+ L(u0) |
|
-L(u2): stbu rWORD, 1(rDEST) |
|
- CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) |
|
- STORE_RETURN_VALUE (rDEST) |
|
- blr |
|
-L(u1): stbu rALT, 1(rDEST) |
|
- CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) |
|
- STORE_RETURN_VALUE (rDEST) |
|
- blr |
|
-END (BP_SYM (__stpcpy)) |
|
- |
|
-weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy)) |
|
+weak_alias (__stpcpy, stpcpy) |
|
libc_hidden_def (__stpcpy) |
|
libc_hidden_builtin_def (stpcpy) |
|
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S |
|
index 9434c27..793325d 100644 |
|
--- a/sysdeps/powerpc/powerpc64/strcpy.S |
|
+++ b/sysdeps/powerpc/powerpc64/strcpy.S |
|
@@ -1,5 +1,5 @@ |
|
/* Optimized strcpy implementation for PowerPC64. |
|
- Copyright (C) 1997, 1999, 2000, 2002, 2003, 2011 Free Software Foundation, Inc. |
|
+ Copyright (C) 1997-2013 Free Software Foundation, Inc. |
|
This file is part of the GNU C Library. |
|
|
|
The GNU C Library is free software; you can redistribute it and/or |
|
@@ -17,52 +17,43 @@ |
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
#include <sysdep.h> |
|
-#include <bp-sym.h> |
|
-#include <bp-asm.h> |
|
|
|
/* See strlen.s for comments on how the end-of-string testing works. */ |
|
|
|
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ |
|
|
|
-EALIGN (BP_SYM (strcpy), 4, 0) |
|
+#ifdef USE_AS_STPCPY |
|
+# define FUNC_NAME __stpcpy |
|
+#else |
|
+# define FUNC_NAME strcpy |
|
+#endif |
|
+ |
|
+EALIGN (FUNC_NAME, 4, 0) |
|
CALL_MCOUNT 2 |
|
|
|
#define rTMP r0 |
|
-#define rRTN r3 /* incoming DEST arg preserved as result */ |
|
-/* Note. The Bounded pointer support in this code is broken. This code |
|
- was inherited from PPC32 and that support was never completed. |
|
- Current PPC gcc does not support -fbounds-check or -fbounded-pointers. |
|
- These artifacts are left in the code as a reminder in case we need |
|
- bounded pointer support in the future. */ |
|
-#if __BOUNDED_POINTERS__ |
|
-# define rDEST r4 /* pointer to previous word in dest */ |
|
-# define rSRC r5 /* pointer to previous word in src */ |
|
-# define rLOW r11 |
|
-# define rHIGH r12 |
|
+#ifdef USE_AS_STPCPY |
|
+#define rRTN r3 /* pointer to previous word/doubleword in dest */ |
|
#else |
|
-# define rSRC r4 /* pointer to previous word in src */ |
|
-# define rDEST r5 /* pointer to previous word in dest */ |
|
+#define rRTN r12 /* pointer to previous word/doubleword in dest */ |
|
#endif |
|
+#define rSRC r4 /* pointer to previous word/doubleword in src */ |
|
#define rWORD r6 /* current word from src */ |
|
-#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ |
|
-#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ |
|
-#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ |
|
+#define rFEFE r7 /* constant 0xfefefeff | 0xfefefefefefefeff */ |
|
+#define r7F7F r8 /* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */ |
|
+#define rNEG r9 /* ~(word in s1 | r7F7F) */ |
|
#define rALT r10 /* alternate word from src */ |
|
|
|
- CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) |
|
- CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) |
|
- STORE_RETURN_BOUNDS (rLOW, rHIGH) |
|
- |
|
- dcbt 0,rSRC |
|
+#ifndef USE_AS_STPCPY |
|
+/* Save the dst pointer to use as return value. */ |
|
+ mr rRTN, r3 |
|
+#endif |
|
or rTMP, rSRC, rRTN |
|
clrldi. rTMP, rTMP, 61 |
|
-#if __BOUNDED_POINTERS__ |
|
- addi rDEST, rDEST, -8 |
|
-#else |
|
- addi rDEST, rRTN, -8 |
|
-#endif |
|
- dcbtst 0,rRTN |
|
- bne L(unaligned) |
|
+ bne L(check_word_alignment) |
|
+ |
|
+/* For doubleword aligned memory, operate using doubleword load and stores. */ |
|
+ addi rRTN, rRTN, -8 |
|
|
|
lis rFEFE, -0x101 |
|
lis r7F7F, 0x7f7f |
|
@@ -75,13 +66,13 @@ EALIGN (BP_SYM (strcpy), 4, 0) |
|
b L(g2) |
|
|
|
L(g0): ldu rALT, 8(rSRC) |
|
- stdu rWORD, 8(rDEST) |
|
+ stdu rWORD, 8(rRTN) |
|
add rTMP, rFEFE, rALT |
|
nor rNEG, r7F7F, rALT |
|
and. rTMP, rTMP, rNEG |
|
bne- L(g1) |
|
ldu rWORD, 8(rSRC) |
|
- stdu rALT, 8(rDEST) |
|
+ stdu rALT, 8(rRTN) |
|
L(g2): add rTMP, rFEFE, rWORD |
|
nor rNEG, r7F7F, rWORD |
|
and. rTMP, rTMP, rNEG |
|
@@ -92,80 +83,134 @@ L(g2): add rTMP, rFEFE, rWORD |
|
L(g1): |
|
#ifdef __LITTLE_ENDIAN__ |
|
extrdi. rTMP, rALT, 8, 56 |
|
- stb rALT, 8(rDEST) |
|
+ stbu rALT, 8(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 48 |
|
- stb rTMP, 9(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 40 |
|
- stb rTMP, 10(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 32 |
|
- stb rTMP, 11(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 24 |
|
- stb rTMP, 12(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 16 |
|
- stb rTMP, 13(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 8 |
|
- stb rTMP, 14(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi rTMP, rALT, 8, 0 |
|
- stb rTMP, 15(rDEST) |
|
- blr |
|
+ stbu rTMP, 1(rRTN) |
|
#else |
|
extrdi. rTMP, rALT, 8, 0 |
|
- stb rTMP, 8(rDEST) |
|
+ stbu rTMP, 8(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 8 |
|
- stb rTMP, 9(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 16 |
|
- stb rTMP, 10(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 24 |
|
- stb rTMP, 11(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 32 |
|
- stb rTMP, 12(rDEST) |
|
- beqlr- |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr |
|
extrdi. rTMP, rALT, 8, 40 |
|
- stb rTMP, 13(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
extrdi. rTMP, rALT, 8, 48 |
|
- stb rTMP, 14(rDEST) |
|
+ stbu rTMP, 1(rRTN) |
|
beqlr- |
|
- stb rALT, 15(rDEST) |
|
- /* GKM FIXME: check high bound. */ |
|
+ stbu rALT, 1(rRTN) |
|
+#endif |
|
blr |
|
+ |
|
+L(check_word_alignment): |
|
+ clrldi. rTMP, rTMP, 62 |
|
+ bne L(unaligned) |
|
+ |
|
+/* For word aligned memory, operate using word load and stores. */ |
|
+ addi rRTN, rRTN, -4 |
|
+ |
|
+ lis rFEFE, -0x101 |
|
+ lis r7F7F, 0x7f7f |
|
+ lwz rWORD, 0(rSRC) |
|
+ addi rFEFE, rFEFE, -0x101 |
|
+ addi r7F7F, r7F7F, 0x7f7f |
|
+ b L(g5) |
|
+ |
|
+L(g3): lwzu rALT, 4(rSRC) |
|
+ stwu rWORD, 4(rRTN) |
|
+ add rTMP, rFEFE, rALT |
|
+ nor rNEG, r7F7F, rALT |
|
+ and. rTMP, rTMP, rNEG |
|
+ bne- L(g4) |
|
+ lwzu rWORD, 4(rSRC) |
|
+ stwu rALT, 4(rRTN) |
|
+L(g5): add rTMP, rFEFE, rWORD |
|
+ nor rNEG, r7F7F, rWORD |
|
+ and. rTMP, rTMP, rNEG |
|
+ beq+ L(g3) |
|
+ |
|
+ mr rALT, rWORD |
|
+/* We've hit the end of the string. Do the rest byte-by-byte. */ |
|
+L(g4): |
|
+#ifdef __LITTLE_ENDIAN__ |
|
+ rlwinm. rTMP, rALT, 0, 24, 31 |
|
+ stbu rALT, 4(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 24, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 16, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ rlwinm rTMP, rALT, 8, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+#else |
|
+ rlwinm. rTMP, rALT, 8, 24, 31 |
|
+ stbu rTMP, 4(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 16, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ rlwinm. rTMP, rALT, 24, 24, 31 |
|
+ stbu rTMP, 1(rRTN) |
|
+ beqlr- |
|
+ stbu rALT, 1(rRTN) |
|
#endif |
|
+ blr |
|
|
|
/* Oh well. In this case, we just do a byte-by-byte copy. */ |
|
.align 4 |
|
nop |
|
L(unaligned): |
|
lbz rWORD, 0(rSRC) |
|
- addi rDEST, rRTN, -1 |
|
+ addi rRTN, rRTN, -1 |
|
cmpwi rWORD, 0 |
|
beq- L(u2) |
|
|
|
L(u0): lbzu rALT, 1(rSRC) |
|
- stbu rWORD, 1(rDEST) |
|
+ stbu rWORD, 1(rRTN) |
|
cmpwi rALT, 0 |
|
beq- L(u1) |
|
nop /* Let 601 load start of loop. */ |
|
lbzu rWORD, 1(rSRC) |
|
- stbu rALT, 1(rDEST) |
|
+ stbu rALT, 1(rRTN) |
|
cmpwi rWORD, 0 |
|
bne+ L(u0) |
|
-L(u2): stb rWORD, 1(rDEST) |
|
- /* GKM FIXME: check high bound. */ |
|
+L(u2): stbu rWORD, 1(rRTN) |
|
blr |
|
-L(u1): stb rALT, 1(rDEST) |
|
- /* GKM FIXME: check high bound. */ |
|
+L(u1): stbu rALT, 1(rRTN) |
|
blr |
|
+END (FUNC_NAME) |
|
|
|
-END (BP_SYM (strcpy)) |
|
+#ifndef USE_AS_STPCPY |
|
libc_hidden_builtin_def (strcpy) |
|
+#endif
|
|
|