The memmove related fix is dropped in this patch because rhel-7.5 does not have optimized memmove for POWER7. commit 63da5cd4a097d089033d980c42254c3356fa723f Author: Rajalakshmi Srinivasaraghavan Date: Wed Oct 25 13:13:53 2017 -0200 powerpc: Replace lxvd2x/stxvd2x with lvx/stvx in P7's memcpy/memmove POWER9 DD2.1 and earlier has an issue where some cache inhibited vector load traps to the kernel, causing a performance degradation. To handle this in memcpy and memmove, lvx/stvx is used for aligned addresses instead of lxvd2x/stxvd2x. Reference: https://patchwork.ozlabs.org/patch/814059/ * sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace lxvd2x/stxvd2x with lvx/stvx. * sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise. Reviewed-by: Tulio Magno Quites Machado Filho Reviewed-by: Adhemerval Zanella diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S index 1ccbc2e..a7cdf8b 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -91,63 +91,63 @@ L(aligned_copy): srdi 12,cnt,7 cmpdi 12,0 beq L(aligned_tail) - lxvd2x 6,0,src - lxvd2x 7,src,6 + lvx 6,0,src + lvx 7,src,6 mtctr 12 b L(aligned_128loop) .align 4 L(aligned_128head): /* for the 2nd + iteration of this loop. */ - lxvd2x 6,0,src - lxvd2x 7,src,6 + lvx 6,0,src + lvx 7,src,6 L(aligned_128loop): - lxvd2x 8,src,7 - lxvd2x 9,src,8 - stxvd2x 6,0,dst + lvx 8,src,7 + lvx 9,src,8 + stvx 6,0,dst addi src,src,64 - stxvd2x 7,dst,6 - stxvd2x 8,dst,7 - stxvd2x 9,dst,8 - lxvd2x 6,0,src - lxvd2x 7,src,6 + stvx 7,dst,6 + stvx 8,dst,7 + stvx 9,dst,8 + lvx 6,0,src + lvx 7,src,6 addi dst,dst,64 - lxvd2x 8,src,7 - lxvd2x 9,src,8 + lvx 8,src,7 + lvx 9,src,8 addi src,src,64 - stxvd2x 6,0,dst - stxvd2x 7,dst,6 - stxvd2x 8,dst,7 - stxvd2x 9,dst,8 + stvx 6,0,dst + stvx 7,dst,6 + stvx 8,dst,7 + stvx 9,dst,8 addi dst,dst,64 bdnz L(aligned_128head) L(aligned_tail): mtocrf 0x01,cnt bf 25,32f - lxvd2x 6,0,src - lxvd2x 7,src,6 - lxvd2x 8,src,7 - lxvd2x 9,src,8 + lvx 6,0,src + lvx 7,src,6 + lvx 8,src,7 + lvx 9,src,8 addi src,src,64 - stxvd2x 6,0,dst - stxvd2x 7,dst,6 - stxvd2x 8,dst,7 - stxvd2x 9,dst,8 + stvx 6,0,dst + stvx 7,dst,6 + stvx 8,dst,7 + stvx 9,dst,8 addi dst,dst,64 32: bf 26,16f - lxvd2x 6,0,src - lxvd2x 7,src,6 + lvx 6,0,src + lvx 7,src,6 addi src,src,32 - stxvd2x 6,0,dst - stxvd2x 7,dst,6 + stvx 6,0,dst + stvx 7,dst,6 addi dst,dst,32 16: bf 27,8f - lxvd2x 6,0,src + lvx 6,0,src addi src,src,16 - stxvd2x 6,0,dst + stvx 6,0,dst addi dst,dst,16 8: bf 28,4f