You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
5.7 KiB
151 lines
5.7 KiB
From 832572eac8a661d25efe0f2bcc6a861e2c29c3b8 Mon Sep 17 00:00:00 2001 |
|
From: Stefan Liebler <stli@linux.vnet.ibm.com> |
|
Date: Mon, 7 Nov 2016 15:50:46 +0100 |
|
Subject: [PATCH 05/17] S390: Optimize iso-8859-1 to ibm037 iconv-module. |
|
|
|
Upstream commit 81c6380887c6d62c56e5f0f85a241f759f58b2fd |
|
|
|
This patch reworks the s390 specific module which used the z900 |
|
translate one to one instruction. Now the g5 translate instruction is used, |
|
because it outperforms the troo instruction. |
|
|
|
ChangeLog: |
|
|
|
* sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP): |
|
Rename to TR_LOOP and usage of tr instead of troo instruction. |
|
--- |
|
sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 95 +++++++++++++++++----------- |
|
1 file changed, 57 insertions(+), 38 deletions(-) |
|
|
|
diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c |
|
index 58641f5..3b63e6a 100644 |
|
--- a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c |
|
+++ b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c |
|
@@ -1,8 +1,7 @@ |
|
/* Conversion between ISO 8859-1 and IBM037. |
|
|
|
- This module uses the Z900 variant of the Translate One To One |
|
- instruction. |
|
- Copyright (C) 1997-2009 Free Software Foundation, Inc. |
|
+ This module uses the translate instruction. |
|
+ Copyright (C) 1997-2016 Free Software Foundation, Inc. |
|
|
|
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com> |
|
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997. |
|
@@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) = |
|
#define MIN_NEEDED_FROM 1 |
|
#define MIN_NEEDED_TO 1 |
|
|
|
-/* The Z900 variant of troo forces us to always specify a test |
|
- character which ends the translation. So if we run into the |
|
- situation where the translation has been interrupted due to the |
|
- test character we translate the character by hand and jump back |
|
- into the instruction. */ |
|
- |
|
-#define TROO_LOOP(TABLE) \ |
|
+#define TR_LOOP(TABLE) \ |
|
{ \ |
|
- register const unsigned char test asm ("0") = 0; \ |
|
- register const unsigned char *pTable asm ("1") = TABLE; \ |
|
- register unsigned char *pOutput asm ("2") = outptr; \ |
|
- register uint64_t length asm ("3"); \ |
|
- const unsigned char* pInput = inptr; \ |
|
- uint64_t tmp; \ |
|
- \ |
|
- length = (inend - inptr < outend - outptr \ |
|
- ? inend - inptr : outend - outptr); \ |
|
+ size_t length = (inend - inptr < outend - outptr \ |
|
+ ? inend - inptr : outend - outptr); \ |
|
\ |
|
- asm volatile ("0: \n\t" \ |
|
- " troo %0,%1 \n\t" \ |
|
- " jz 1f \n\t" \ |
|
- " jo 0b \n\t" \ |
|
- " llgc %3,0(%1) \n\t" \ |
|
- " la %3,0(%3,%4) \n\t" \ |
|
- " mvc 0(1,%0),0(%3) \n\t" \ |
|
- " aghi %1,1 \n\t" \ |
|
- " aghi %0,1 \n\t" \ |
|
- " aghi %2,-1 \n\t" \ |
|
- " j 0b \n\t" \ |
|
- "1: \n" \ |
|
+ /* Process in 256 byte blocks. */ \ |
|
+ if (__builtin_expect (length >= 256, 0)) \ |
|
+ { \ |
|
+ size_t blocks = length / 256; \ |
|
+ __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ |
|
+ " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ |
|
+ " la %[R_IN],256(%[R_IN])\n\t" \ |
|
+ " la %[R_OUT],256(%[R_OUT])\n\t" \ |
|
+ " brctg %[R_LI],0b\n\t" \ |
|
+ : /* outputs */ [R_IN] "+a" (inptr) \ |
|
+ , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ |
|
+ : /* inputs */ [R_TBL] "a" (TABLE) \ |
|
+ : /* clobber list */ "memory" \ |
|
+ ); \ |
|
+ length = length % 256; \ |
|
+ } \ |
|
\ |
|
- : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \ |
|
- : "a" (pTable), "d" (test) \ |
|
- : "cc"); \ |
|
+ /* Process remaining 0...248 bytes in 8byte blocks. */ \ |
|
+ if (length >= 8) \ |
|
+ { \ |
|
+ size_t blocks = length / 8; \ |
|
+ for (int i = 0; i < blocks; i++) \ |
|
+ { \ |
|
+ outptr[0] = TABLE[inptr[0]]; \ |
|
+ outptr[1] = TABLE[inptr[1]]; \ |
|
+ outptr[2] = TABLE[inptr[2]]; \ |
|
+ outptr[3] = TABLE[inptr[3]]; \ |
|
+ outptr[4] = TABLE[inptr[4]]; \ |
|
+ outptr[5] = TABLE[inptr[5]]; \ |
|
+ outptr[6] = TABLE[inptr[6]]; \ |
|
+ outptr[7] = TABLE[inptr[7]]; \ |
|
+ inptr += 8; \ |
|
+ outptr += 8; \ |
|
+ } \ |
|
+ length = length % 8; \ |
|
+ } \ |
|
\ |
|
- inptr = pInput; \ |
|
- outptr = pOutput; \ |
|
+ /* Process remaining 0...7 bytes. */ \ |
|
+ switch (length) \ |
|
+ { \ |
|
+ case 7: outptr[6] = TABLE[inptr[6]]; \ |
|
+ case 6: outptr[5] = TABLE[inptr[5]]; \ |
|
+ case 5: outptr[4] = TABLE[inptr[4]]; \ |
|
+ case 4: outptr[3] = TABLE[inptr[3]]; \ |
|
+ case 3: outptr[2] = TABLE[inptr[2]]; \ |
|
+ case 2: outptr[1] = TABLE[inptr[1]]; \ |
|
+ case 1: outptr[0] = TABLE[inptr[0]]; \ |
|
+ case 0: break; \ |
|
+ } \ |
|
+ inptr += length; \ |
|
+ outptr += length; \ |
|
} |
|
|
|
+ |
|
/* First define the conversion function from ISO 8859-1 to CP037. */ |
|
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
|
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
|
#define LOOPFCT FROM_LOOP |
|
-#define BODY TROO_LOOP (table_iso8859_1_to_cp037) |
|
+#define BODY TR_LOOP (table_iso8859_1_to_cp037) |
|
|
|
#include <iconv/loop.c> |
|
|
|
@@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) = |
|
#define MIN_NEEDED_INPUT MIN_NEEDED_TO |
|
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
|
#define LOOPFCT TO_LOOP |
|
-#define BODY TROO_LOOP (table_cp037_iso8859_1); |
|
+#define BODY TR_LOOP (table_cp037_iso8859_1); |
|
|
|
#include <iconv/loop.c> |
|
|
|
-- |
|
1.8.3.1 |
|
|
|
|