You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
381 lines
13 KiB
381 lines
13 KiB
From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001 |
|
From: Petr Stodulka <pstodulk@redhat.com> |
|
Date: Tue, 24 Nov 2015 17:56:11 +0100 |
|
Subject: [PATCH] print correctly non-ascii filenames |
|
|
|
--- |
|
extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
|
unzpriv.h | 7 ++ |
|
2 files changed, 233 insertions(+), 63 deletions(-) |
|
|
|
diff --git a/extract.c b/extract.c |
|
index 0ee4e93..741b7e0 100644 |
|
--- a/extract.c |
|
+++ b/extract.c |
|
@@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry) |
|
} /* end function set_deferred_symlink() */ |
|
#endif /* SYMLINKS */ |
|
|
|
+/* |
|
+ * If Unicode is supported, assume we have what we need to do this |
|
+ * check using wide characters, avoiding MBCS issues. |
|
+ */ |
|
|
|
- |
|
+#ifndef UZ_FNFILTER_REPLACECHAR |
|
+ /* A convenient choice for the replacement of unprintable char codes is |
|
+ * the "single char wildcard", as this character is quite unlikely to |
|
+ * appear in filenames by itself. The following default definition |
|
+ * sets the replacement char to a question mark as the most common |
|
+ * "single char wildcard"; this setting should be overridden in the |
|
+ * appropiate system-specific configuration header when needed. |
|
+ */ |
|
+# define UZ_FNFILTER_REPLACECHAR '?' |
|
+#endif |
|
|
|
/*************************/ |
|
/* Function fnfilter() */ /* here instead of in list.c for SFX */ |
|
@@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ |
|
extent size; |
|
{ |
|
#ifndef NATIVE /* ASCII: filter ANSI escape codes, etc. */ |
|
- ZCONST uch *r=(ZCONST uch *)raw; |
|
+ ZCONST uch *r; // =(ZCONST uch *)raw; |
|
uch *s=space; |
|
uch *slim=NULL; |
|
uch *se=NULL; |
|
int have_overflow = FALSE; |
|
|
|
- if (size > 0) { |
|
- slim = space + size |
|
-#ifdef _MBCS |
|
- - (MB_CUR_MAX - 1) |
|
-#endif |
|
- - 4; |
|
+# if defined( UNICODE_SUPPORT) && defined( _MBCS) |
|
+/* If Unicode support is enabled, and we have multi-byte characters, |
|
+ * then do the isprint() checks by first converting to wide characters |
|
+ * and checking those. This avoids our having to parse multi-byte |
|
+ * characters for ourselves. After the wide-char replacements have been |
|
+ * made, the wide string is converted back to the local character set. |
|
+ */ |
|
+ wchar_t *wstring; /* wchar_t version of raw */ |
|
+ size_t wslen; /* length of wstring */ |
|
+ wchar_t *wostring; /* wchar_t version of output string */ |
|
+ size_t woslen; /* length of wostring */ |
|
+ char *newraw; /* new raw */ |
|
+ |
|
+ /* 2012-11-06 SMS. |
|
+ * Changed to check the value returned by mbstowcs(), and bypass the |
|
+ * Unicode processing if it fails. This seems to fix a problem |
|
+ * reported in the SourceForge forum, but it's not clear that we |
|
+ * should be doing any Unicode processing without some evidence that |
|
+ * the name actually is Unicode. (Check bit 11 in the flags before |
|
+ * coming here?) |
|
+ * http://sourceforge.net/p/infozip/bugs/40/ |
|
+ */ |
|
+ |
|
+ if (MB_CUR_MAX <= 1) |
|
+ { |
|
+ /* There's no point to converting multi-byte chars if there are |
|
+ * no multi-byte chars. |
|
+ */ |
|
+ wslen = (size_t)-1; |
|
} |
|
- while (*r) { |
|
- if (size > 0 && s >= slim && se == NULL) { |
|
- se = s; |
|
+ else |
|
+ { |
|
+ /* Get Unicode wide character count (for storage allocation). */ |
|
+ wslen = mbstowcs( NULL, raw, 0); |
|
+ } |
|
+ |
|
+ if (wslen != (size_t)-1) |
|
+ { |
|
+ /* Apparently valid Unicode. Allocate wide-char storage. */ |
|
+ wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t)); |
|
+ if (wstring == NULL) { |
|
+ strcpy( (char *)space, raw); |
|
+ return (char *)space; |
|
} |
|
-#ifdef QDOS |
|
- if (qlflag & 2) { |
|
- if (*r == '/' || *r == '.') { |
|
+ wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t)); |
|
+ if (wostring == NULL) { |
|
+ free(wstring); |
|
+ strcpy( (char *)space, raw); |
|
+ return (char *)space; |
|
+ } |
|
+ |
|
+ /* Convert the multi-byte Unicode to wide chars. */ |
|
+ wslen = mbstowcs(wstring, raw, wslen + 1); |
|
+ |
|
+ /* Filter the wide-character string. */ |
|
+ fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t))); |
|
+ |
|
+ /* Convert filtered wide chars back to multi-byte. */ |
|
+ woslen = wcstombs( NULL, wostring, 0); |
|
+ if ((newraw = malloc(woslen + 1)) == NULL) { |
|
+ free(wstring); |
|
+ free(wostring); |
|
+ strcpy( (char *)space, raw); |
|
+ return (char *)space; |
|
+ } |
|
+ woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1); |
|
+ |
|
+ if (size > 0) { |
|
+ slim = space + size - 4; |
|
+ } |
|
+ r = (ZCONST uch *)newraw; |
|
+ while (*r) { |
|
+ if (size > 0 && s >= slim && se == NULL) { |
|
+ se = s; |
|
+ } |
|
+# ifdef QDOS |
|
+ if (qlflag & 2) { |
|
+ if (*r == '/' || *r == '.') { |
|
+ if (se != NULL && (s > (space + (size-3)))) { |
|
+ have_overflow = TRUE; |
|
+ break; |
|
+ } |
|
+ ++r; |
|
+ *s++ = '_'; |
|
+ continue; |
|
+ } |
|
+ } else |
|
+# endif |
|
+ { |
|
if (se != NULL && (s > (space + (size-3)))) { |
|
have_overflow = TRUE; |
|
break; |
|
} |
|
- ++r; |
|
- *s++ = '_'; |
|
- continue; |
|
+ *s++ = *r++; |
|
} |
|
- } else |
|
+ } |
|
+ if (have_overflow) { |
|
+ strcpy((char *)se, "..."); |
|
+ } else { |
|
+ *s = '\0'; |
|
+ } |
|
+ |
|
+ free(wstring); |
|
+ free(wostring); |
|
+ free(newraw); |
|
+ } |
|
+ else |
|
+# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */ |
|
+ { |
|
+ /* No Unicode support, or apparently invalid Unicode. */ |
|
+ r = (ZCONST uch *)raw; |
|
+ |
|
+ if (size > 0) { |
|
+ slim = space + size |
|
+#ifdef _MBCS |
|
+ - (MB_CUR_MAX - 1) |
|
+#endif |
|
+ - 4; |
|
+ } |
|
+ while (*r) { |
|
+ if (size > 0 && s >= slim && se == NULL) { |
|
+ se = s; |
|
+ } |
|
+#ifdef QDOS |
|
+ if (qlflag & 2) { |
|
+ if (*r == '/' || *r == '.') { |
|
+ if (se != NULL && (s > (space + (size-3)))) { |
|
+ have_overflow = TRUE; |
|
+ break; |
|
+ } |
|
+ ++r; |
|
+ *s++ = '_'; |
|
+ continue; |
|
+ } |
|
+ } else |
|
#endif |
|
#ifdef HAVE_WORKING_ISPRINT |
|
-# ifndef UZ_FNFILTER_REPLACECHAR |
|
- /* A convenient choice for the replacement of unprintable char codes is |
|
- * the "single char wildcard", as this character is quite unlikely to |
|
- * appear in filenames by itself. The following default definition |
|
- * sets the replacement char to a question mark as the most common |
|
- * "single char wildcard"; this setting should be overridden in the |
|
- * appropiate system-specific configuration header when needed. |
|
- */ |
|
-# define UZ_FNFILTER_REPLACECHAR '?' |
|
-# endif |
|
- if (!isprint(*r)) { |
|
+ if (!isprint(*r)) { |
|
+ if (*r < 32) { |
|
+ /* ASCII control codes are escaped as "^{letter}". */ |
|
+ if (se != NULL && (s > (space + (size-4)))) { |
|
+ have_overflow = TRUE; |
|
+ break; |
|
+ } |
|
+ *s++ = '^', *s++ = (uch)(64 + *r++); |
|
+ } else { |
|
+ /* Other unprintable codes are replaced by the |
|
+ * placeholder character. */ |
|
+ if (se != NULL && (s > (space + (size-3)))) { |
|
+ have_overflow = TRUE; |
|
+ break; |
|
+ } |
|
+ *s++ = UZ_FNFILTER_REPLACECHAR; |
|
+ INCSTR(r); |
|
+ } |
|
+#else /* !HAVE_WORKING_ISPRINT */ |
|
if (*r < 32) { |
|
/* ASCII control codes are escaped as "^{letter}". */ |
|
if (se != NULL && (s > (space + (size-4)))) { |
|
@@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ |
|
break; |
|
} |
|
*s++ = '^', *s++ = (uch)(64 + *r++); |
|
+#endif /* ?HAVE_WORKING_ISPRINT */ |
|
} else { |
|
- /* Other unprintable codes are replaced by the |
|
- * placeholder character. */ |
|
+#ifdef _MBCS |
|
+ unsigned i = CLEN(r); |
|
+ if (se != NULL && (s > (space + (size-i-2)))) { |
|
+ have_overflow = TRUE; |
|
+ break; |
|
+ } |
|
+ for (; i > 0; i--) |
|
+ *s++ = *r++; |
|
+#else |
|
if (se != NULL && (s > (space + (size-3)))) { |
|
have_overflow = TRUE; |
|
break; |
|
} |
|
- *s++ = UZ_FNFILTER_REPLACECHAR; |
|
- INCSTR(r); |
|
- } |
|
-#else /* !HAVE_WORKING_ISPRINT */ |
|
- if (*r < 32) { |
|
- /* ASCII control codes are escaped as "^{letter}". */ |
|
- if (se != NULL && (s > (space + (size-4)))) { |
|
- have_overflow = TRUE; |
|
- break; |
|
- } |
|
- *s++ = '^', *s++ = (uch)(64 + *r++); |
|
-#endif /* ?HAVE_WORKING_ISPRINT */ |
|
- } else { |
|
-#ifdef _MBCS |
|
- unsigned i = CLEN(r); |
|
- if (se != NULL && (s > (space + (size-i-2)))) { |
|
- have_overflow = TRUE; |
|
- break; |
|
- } |
|
- for (; i > 0; i--) |
|
*s++ = *r++; |
|
-#else |
|
- if (se != NULL && (s > (space + (size-3)))) { |
|
- have_overflow = TRUE; |
|
- break; |
|
- } |
|
- *s++ = *r++; |
|
#endif |
|
- } |
|
- } |
|
- if (have_overflow) { |
|
- strcpy((char *)se, "..."); |
|
- } else { |
|
- *s = '\0'; |
|
+ } |
|
+ } |
|
+ if (have_overflow) { |
|
+ strcpy((char *)se, "..."); |
|
+ } else { |
|
+ *s = '\0'; |
|
+ } |
|
} |
|
|
|
#ifdef WINDLL |
|
@@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */ |
|
} /* end function fnfilter() */ |
|
|
|
|
|
+#if defined( UNICODE_SUPPORT) && defined( _MBCS) |
|
+ |
|
+/****************************/ |
|
+/* Function fnfilter[w]() */ /* (Here instead of in list.c for SFX.) */ |
|
+/****************************/ |
|
+ |
|
+/* fnfilterw() - Convert wide name to safely printable form. */ |
|
+ |
|
+/* fnfilterw() - Convert wide-character name to safely printable form. */ |
|
+ |
|
+wchar_t *fnfilterw( src, dst, siz) |
|
+ ZCONST wchar_t *src; /* Pointer to source char (string). */ |
|
+ wchar_t *dst; /* Pointer to destination char (string). */ |
|
+ extent siz; /* Not used (!). */ |
|
+{ |
|
+ wchar_t *dsx = dst; |
|
+ |
|
+ /* Filter the wide chars. */ |
|
+ while (*src) |
|
+ { |
|
+ if (iswprint( *src)) |
|
+ { |
|
+ /* Printable code. Copy it. */ |
|
+ *dst++ = *src; |
|
+ } |
|
+ else |
|
+ { |
|
+ /* Unprintable code. Substitute something printable for it. */ |
|
+ if (*src < 32) |
|
+ { |
|
+ /* Replace ASCII control code with "^{letter}". */ |
|
+ *dst++ = (wchar_t)'^'; |
|
+ *dst++ = (wchar_t)(64 + *src); |
|
+ } |
|
+ else |
|
+ { |
|
+ /* Replace other unprintable code with the placeholder. */ |
|
+ *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR; |
|
+ } |
|
+ } |
|
+ src++; |
|
+ } |
|
+ *dst = (wchar_t)0; /* NUL-terminate the destination string. */ |
|
+ return dsx; |
|
+} /* fnfilterw(). */ |
|
+ |
|
+#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */ |
|
|
|
|
|
#ifdef SET_DIR_ATTRIB |
|
diff --git a/unzpriv.h b/unzpriv.h |
|
index 22d3923..e48a652 100644 |
|
--- a/unzpriv.h |
|
+++ b/unzpriv.h |
|
@@ -1212,6 +1212,7 @@ |
|
# ifdef UNICODE_WCHAR |
|
# if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP)) |
|
# include <wchar.h> |
|
+# include <wctype.h> |
|
# endif |
|
# endif |
|
# ifndef _MBCS /* no need to include <locale.h> twice, see below */ |
|
@@ -2410,6 +2411,12 @@ int memflush OF((__GPRO__ ZCONST uch *rawbuf, ulg size)); |
|
char *fnfilter OF((ZCONST char *raw, uch *space, |
|
extent size)); |
|
|
|
+# if defined( UNICODE_SUPPORT) && defined( _MBCS) |
|
+wchar_t *fnfilterw OF((ZCONST wchar_t *src, wchar_t *dst, |
|
+ extent siz)); |
|
+#endif |
|
+ |
|
+ |
|
/*--------------------------------------------------------------------------- |
|
Decompression functions: |
|
---------------------------------------------------------------------------*/ |
|
-- |
|
2.4.3 |
|
|
|
|