You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
124 lines
2.7 KiB
124 lines
2.7 KiB
7 years ago
|
The memmove related fix is dropped in this patch because rhel-7.5
|
||
|
does not have optimized memmove for POWER7.
|
||
|
|
||
|
commit 63da5cd4a097d089033d980c42254c3356fa723f
|
||
|
Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
|
||
|
Date: Wed Oct 25 13:13:53 2017 -0200
|
||
|
|
||
|
powerpc: Replace lxvd2x/stxvd2x with lvx/stvx in P7's memcpy/memmove
|
||
|
|
||
|
POWER9 DD2.1 and earlier has an issue where some cache inhibited
|
||
|
vector load traps to the kernel, causing a performance degradation. To
|
||
|
handle this in memcpy and memmove, lvx/stvx is used for aligned
|
||
|
addresses instead of lxvd2x/stxvd2x.
|
||
|
|
||
|
Reference: https://patchwork.ozlabs.org/patch/814059/
|
||
|
|
||
|
* sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace
|
||
|
lxvd2x/stxvd2x with lvx/stvx.
|
||
|
* sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise.
|
||
|
|
||
|
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
|
||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||
|
|
||
|
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
||
|
index 1ccbc2e..a7cdf8b 100644
|
||
|
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
||
|
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
||
|
@@ -91,63 +91,63 @@ L(aligned_copy):
|
||
|
srdi 12,cnt,7
|
||
|
cmpdi 12,0
|
||
|
beq L(aligned_tail)
|
||
|
- lxvd2x 6,0,src
|
||
|
- lxvd2x 7,src,6
|
||
|
+ lvx 6,0,src
|
||
|
+ lvx 7,src,6
|
||
|
mtctr 12
|
||
|
b L(aligned_128loop)
|
||
|
|
||
|
.align 4
|
||
|
L(aligned_128head):
|
||
|
/* for the 2nd + iteration of this loop. */
|
||
|
- lxvd2x 6,0,src
|
||
|
- lxvd2x 7,src,6
|
||
|
+ lvx 6,0,src
|
||
|
+ lvx 7,src,6
|
||
|
L(aligned_128loop):
|
||
|
- lxvd2x 8,src,7
|
||
|
- lxvd2x 9,src,8
|
||
|
- stxvd2x 6,0,dst
|
||
|
+ lvx 8,src,7
|
||
|
+ lvx 9,src,8
|
||
|
+ stvx 6,0,dst
|
||
|
addi src,src,64
|
||
|
- stxvd2x 7,dst,6
|
||
|
- stxvd2x 8,dst,7
|
||
|
- stxvd2x 9,dst,8
|
||
|
- lxvd2x 6,0,src
|
||
|
- lxvd2x 7,src,6
|
||
|
+ stvx 7,dst,6
|
||
|
+ stvx 8,dst,7
|
||
|
+ stvx 9,dst,8
|
||
|
+ lvx 6,0,src
|
||
|
+ lvx 7,src,6
|
||
|
addi dst,dst,64
|
||
|
- lxvd2x 8,src,7
|
||
|
- lxvd2x 9,src,8
|
||
|
+ lvx 8,src,7
|
||
|
+ lvx 9,src,8
|
||
|
addi src,src,64
|
||
|
- stxvd2x 6,0,dst
|
||
|
- stxvd2x 7,dst,6
|
||
|
- stxvd2x 8,dst,7
|
||
|
- stxvd2x 9,dst,8
|
||
|
+ stvx 6,0,dst
|
||
|
+ stvx 7,dst,6
|
||
|
+ stvx 8,dst,7
|
||
|
+ stvx 9,dst,8
|
||
|
addi dst,dst,64
|
||
|
bdnz L(aligned_128head)
|
||
|
|
||
|
L(aligned_tail):
|
||
|
mtocrf 0x01,cnt
|
||
|
bf 25,32f
|
||
|
- lxvd2x 6,0,src
|
||
|
- lxvd2x 7,src,6
|
||
|
- lxvd2x 8,src,7
|
||
|
- lxvd2x 9,src,8
|
||
|
+ lvx 6,0,src
|
||
|
+ lvx 7,src,6
|
||
|
+ lvx 8,src,7
|
||
|
+ lvx 9,src,8
|
||
|
addi src,src,64
|
||
|
- stxvd2x 6,0,dst
|
||
|
- stxvd2x 7,dst,6
|
||
|
- stxvd2x 8,dst,7
|
||
|
- stxvd2x 9,dst,8
|
||
|
+ stvx 6,0,dst
|
||
|
+ stvx 7,dst,6
|
||
|
+ stvx 8,dst,7
|
||
|
+ stvx 9,dst,8
|
||
|
addi dst,dst,64
|
||
|
32:
|
||
|
bf 26,16f
|
||
|
- lxvd2x 6,0,src
|
||
|
- lxvd2x 7,src,6
|
||
|
+ lvx 6,0,src
|
||
|
+ lvx 7,src,6
|
||
|
addi src,src,32
|
||
|
- stxvd2x 6,0,dst
|
||
|
- stxvd2x 7,dst,6
|
||
|
+ stvx 6,0,dst
|
||
|
+ stvx 7,dst,6
|
||
|
addi dst,dst,32
|
||
|
16:
|
||
|
bf 27,8f
|
||
|
- lxvd2x 6,0,src
|
||
|
+ lvx 6,0,src
|
||
|
addi src,src,16
|
||
|
- stxvd2x 6,0,dst
|
||
|
+ stvx 6,0,dst
|
||
|
addi dst,dst,16
|
||
|
8:
|
||
|
bf 28,4f
|