You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
400 lines
14 KiB
400 lines
14 KiB
From: Giovanni Scafora <giovanni.archlinux.org> |
|
Subject: unzip files encoded with non-latin, non-unicode file names |
|
Last-Update: 2015-02-11 |
|
|
|
Updated 2015-02-11 by Marc Deslauriers <marc.deslauriers@canonical.com> |
|
to fix buffer overflow in charset_to_intern() |
|
|
|
Index: unzip-6.0/unix/unix.c |
|
=================================================================== |
|
--- unzip-6.0.orig/unix/unix.c 2015-02-11 08:46:43.675324290 -0500 |
|
+++ unzip-6.0/unix/unix.c 2015-02-11 09:18:04.902081319 -0500 |
|
@@ -30,6 +30,9 @@ |
|
#define UNZIP_INTERNAL |
|
#include "unzip.h" |
|
|
|
+#include <iconv.h> |
|
+#include <langinfo.h> |
|
+ |
|
#ifdef SCO_XENIX |
|
# define SYSNDIR |
|
#else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */ |
|
@@ -1874,3 +1877,102 @@ |
|
} |
|
} |
|
#endif /* QLZIP */ |
|
+ |
|
+ |
|
+typedef struct { |
|
+ char *local_charset; |
|
+ char *archive_charset; |
|
+} CHARSET_MAP; |
|
+ |
|
+/* A mapping of local <-> archive charsets used by default to convert filenames |
|
+ * of DOS/Windows Zip archives. Currently very basic. */ |
|
+static CHARSET_MAP dos_charset_map[] = { |
|
+ { "ANSI_X3.4-1968", "CP850" }, |
|
+ { "ISO-8859-1", "CP850" }, |
|
+ { "CP1252", "CP850" }, |
|
+ { "UTF-8", "CP866" }, |
|
+ { "KOI8-R", "CP866" }, |
|
+ { "KOI8-U", "CP866" }, |
|
+ { "ISO-8859-5", "CP866" } |
|
+}; |
|
+ |
|
+char OEM_CP[MAX_CP_NAME] = ""; |
|
+char ISO_CP[MAX_CP_NAME] = ""; |
|
+ |
|
+/* Try to guess the default value of OEM_CP based on the current locale. |
|
+ * ISO_CP is left alone for now. */ |
|
+void init_conversion_charsets() |
|
+{ |
|
+ const char *local_charset; |
|
+ int i; |
|
+ |
|
+ /* Make a guess only if OEM_CP not already set. */ |
|
+ if(*OEM_CP == '\0') { |
|
+ local_charset = nl_langinfo(CODESET); |
|
+ for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++) |
|
+ if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) { |
|
+ strncpy(OEM_CP, dos_charset_map[i].archive_charset, |
|
+ sizeof(OEM_CP)); |
|
+ break; |
|
+ } |
|
+ } |
|
+} |
|
+ |
|
+/* Convert a string from one encoding to the current locale using iconv(). |
|
+ * Be as non-intrusive as possible. If error is encountered during covertion |
|
+ * just leave the string intact. */ |
|
+static void charset_to_intern(char *string, char *from_charset) |
|
+{ |
|
+ iconv_t cd; |
|
+ char *s,*d, *buf; |
|
+ size_t slen, dlen, buflen; |
|
+ const char *local_charset; |
|
+ |
|
+ if(*from_charset == '\0') |
|
+ return; |
|
+ |
|
+ buf = NULL; |
|
+ local_charset = nl_langinfo(CODESET); |
|
+ |
|
+ if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1) |
|
+ return; |
|
+ |
|
+ slen = strlen(string); |
|
+ s = string; |
|
+ |
|
+ /* Make sure OUTBUFSIZ + 1 never ends up smaller than FILNAMSIZ |
|
+ * as this function also gets called with G.outbuf in fileio.c |
|
+ */ |
|
+ buflen = FILNAMSIZ; |
|
+ if (OUTBUFSIZ + 1 < FILNAMSIZ) |
|
+ { |
|
+ buflen = OUTBUFSIZ + 1; |
|
+ } |
|
+ |
|
+ d = buf = malloc(buflen); |
|
+ if(!d) |
|
+ goto cleanup; |
|
+ |
|
+ bzero(buf,buflen); |
|
+ dlen = buflen - 1; |
|
+ |
|
+ if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1) |
|
+ goto cleanup; |
|
+ strncpy(string, buf, buflen); |
|
+ |
|
+ cleanup: |
|
+ free(buf); |
|
+ iconv_close(cd); |
|
+} |
|
+ |
|
+/* Convert a string from OEM_CP to the current locale charset. */ |
|
+inline void oem_intern(char *string) |
|
+{ |
|
+ charset_to_intern(string, OEM_CP); |
|
+} |
|
+ |
|
+/* Convert a string from ISO_CP to the current locale charset. */ |
|
+inline void iso_intern(char *string) |
|
+{ |
|
+ charset_to_intern(string, ISO_CP); |
|
+} |
|
Index: unzip-6.0/unix/unxcfg.h |
|
=================================================================== |
|
--- unzip-6.0.orig/unix/unxcfg.h 2015-02-11 08:46:43.675324290 -0500 |
|
+++ unzip-6.0/unix/unxcfg.h 2015-02-11 08:46:43.671324260 -0500 |
|
@@ -228,4 +228,30 @@ |
|
/* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */ |
|
/* and notfirstcall are used by do_wild(). */ |
|
|
|
+ |
|
+#define MAX_CP_NAME 25 |
|
+ |
|
+#ifdef SETLOCALE |
|
+# undef SETLOCALE |
|
+#endif |
|
+#define SETLOCALE(category, locale) setlocale(category, locale) |
|
+#include <locale.h> |
|
+ |
|
+#ifdef _ISO_INTERN |
|
+# undef _ISO_INTERN |
|
+#endif |
|
+#define _ISO_INTERN(str1) iso_intern(str1) |
|
+ |
|
+#ifdef _OEM_INTERN |
|
+# undef _OEM_INTERN |
|
+#endif |
|
+#ifndef IZ_OEM2ISO_ARRAY |
|
+# define IZ_OEM2ISO_ARRAY |
|
+#endif |
|
+#define _OEM_INTERN(str1) oem_intern(str1) |
|
+ |
|
+void iso_intern(char *); |
|
+void oem_intern(char *); |
|
+void init_conversion_charsets(void); |
|
+ |
|
#endif /* !__unxcfg_h */ |
|
Index: unzip-6.0/unzip.c |
|
=================================================================== |
|
--- unzip-6.0.orig/unzip.c 2015-02-11 08:46:43.675324290 -0500 |
|
+++ unzip-6.0/unzip.c 2015-02-11 08:46:43.675324290 -0500 |
|
@@ -327,12 +327,23 @@ |
|
-2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\ |
|
-v verbose, multi-page format\n"; |
|
|
|
+#ifndef UNIX |
|
static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ |
|
-h print header line -t print totals for listed files or for all\n\ |
|
-z print zipfile comment -T print file times in sortable decimal format\ |
|
\n -C be case-insensitive %s\ |
|
-U use escapes for all non-ASCII Unicode\n\ |
|
-x exclude filenames that follow from listing\n"; |
|
+#else /* UNIX */ |
|
+static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ |
|
+ -h print header line -t print totals for listed files or for all\n\ |
|
+ -z print zipfile comment %c-T%c print file times in sortable decimal format\ |
|
+\n %c-C%c be case-insensitive %s\ |
|
+ -U use escapes for all non-ASCII Unicode\n\ |
|
+ -x exclude filenames that follow from listing\n\ |
|
+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ |
|
+ -I CHARSET specify a character encoding for UNIX and other archives\n"; |
|
+#endif /* !UNIX */ |
|
#ifdef MORE |
|
static ZCONST char Far ZipInfoUsageLine4[] = |
|
" -M page output through built-in \"more\"\n"; |
|
@@ -664,6 +674,17 @@ |
|
-U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ |
|
-C match filenames case-insensitively -L make (some) names \ |
|
lowercase\n %-42s -V retain VMS version numbers\n%s"; |
|
+#elif (defined UNIX) |
|
+static ZCONST char Far UnzipUsageLine4[] = "\ |
|
+modifiers:\n\ |
|
+ -n never overwrite existing files -q quiet mode (-qq => quieter)\n\ |
|
+ -o overwrite files WITHOUT prompting -a auto-convert any text files\n\ |
|
+ -j junk paths (do not make directories) -aa treat ALL files as text\n\ |
|
+ -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ |
|
+ -C match filenames case-insensitively -L make (some) names \ |
|
+lowercase\n %-42s -V retain VMS version numbers\n%s\ |
|
+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ |
|
+ -I CHARSET specify a character encoding for UNIX and other archives\n\n"; |
|
#else /* !VMS */ |
|
static ZCONST char Far UnzipUsageLine4[] = "\ |
|
modifiers:\n\ |
|
@@ -802,6 +823,10 @@ |
|
#endif /* UNICODE_SUPPORT */ |
|
|
|
|
|
+#ifdef UNIX |
|
+ init_conversion_charsets(); |
|
+#endif |
|
+ |
|
#if (defined(__IBMC__) && defined(__DEBUG_ALLOC__)) |
|
extern void DebugMalloc(void); |
|
|
|
@@ -1335,6 +1360,11 @@ |
|
argc = *pargc; |
|
argv = *pargv; |
|
|
|
+#ifdef UNIX |
|
+ extern char OEM_CP[MAX_CP_NAME]; |
|
+ extern char ISO_CP[MAX_CP_NAME]; |
|
+#endif |
|
+ |
|
while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) { |
|
s = *argv + 1; |
|
while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */ |
|
@@ -1516,6 +1546,35 @@ |
|
} |
|
break; |
|
#endif /* MACOS */ |
|
+#ifdef UNIX |
|
+ case ('I'): |
|
+ if (negative) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: encodings can't be negated")); |
|
+ return(PK_PARAM); |
|
+ } else { |
|
+ if(*s) { /* Handle the -Icharset case */ |
|
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ |
|
+ if(*s == '-') { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); |
|
+ } else { /* -I charset */ |
|
+ ++argv; |
|
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ s = *argv; |
|
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); |
|
+ } |
|
+ while(*(++s)); /* No params straight after charset name */ |
|
+ } |
|
+ break; |
|
+#endif /* ?UNIX */ |
|
case ('j'): /* junk pathnames/directory structure */ |
|
if (negative) |
|
uO.jflag = FALSE, negative = 0; |
|
@@ -1591,6 +1650,35 @@ |
|
} else |
|
++uO.overwrite_all; |
|
break; |
|
+#ifdef UNIX |
|
+ case ('O'): |
|
+ if (negative) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: encodings can't be negated")); |
|
+ return(PK_PARAM); |
|
+ } else { |
|
+ if(*s) { /* Handle the -Ocharset case */ |
|
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ |
|
+ if(*s == '-') { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); |
|
+ } else { /* -O charset */ |
|
+ ++argv; |
|
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -O argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ s = *argv; |
|
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); |
|
+ } |
|
+ while(*(++s)); /* No params straight after charset name */ |
|
+ } |
|
+ break; |
|
+#endif /* ?UNIX */ |
|
case ('p'): /* pipes: extract to stdout, no messages */ |
|
if (negative) { |
|
uO.cflag = FALSE; |
|
Index: unzip-6.0/unzpriv.h |
|
=================================================================== |
|
--- unzip-6.0.orig/unzpriv.h 2015-02-11 08:46:43.675324290 -0500 |
|
+++ unzip-6.0/unzpriv.h 2015-02-11 08:46:43.675324290 -0500 |
|
@@ -3008,7 +3008,7 @@ |
|
!(((islochdr) || (isuxatt)) && \ |
|
((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \ |
|
(hostnum) == FS_HPFS_ || \ |
|
- ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \ |
|
+ ((hostnum) == FS_NTFS_ /* && (hostver) == 50 */ )) { \ |
|
_OEM_INTERN((string)); \ |
|
} else { \ |
|
_ISO_INTERN((string)); \ |
|
Index: unzip-6.0/zipinfo.c |
|
=================================================================== |
|
--- unzip-6.0.orig/zipinfo.c 2015-02-11 08:46:43.675324290 -0500 |
|
+++ unzip-6.0/zipinfo.c 2015-02-11 08:46:43.675324290 -0500 |
|
@@ -457,6 +457,10 @@ |
|
int tflag_slm=TRUE, tflag_2v=FALSE; |
|
int explicit_h=FALSE, explicit_t=FALSE; |
|
|
|
+#ifdef UNIX |
|
+ extern char OEM_CP[MAX_CP_NAME]; |
|
+ extern char ISO_CP[MAX_CP_NAME]; |
|
+#endif |
|
|
|
#ifdef MACOS |
|
uO.lflag = LFLAG; /* reset default on each call */ |
|
@@ -501,6 +505,35 @@ |
|
uO.lflag = 0; |
|
} |
|
break; |
|
+#ifdef UNIX |
|
+ case ('I'): |
|
+ if (negative) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: encodings can't be negated")); |
|
+ return(PK_PARAM); |
|
+ } else { |
|
+ if(*s) { /* Handle the -Icharset case */ |
|
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ |
|
+ if(*s == '-') { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); |
|
+ } else { /* -I charset */ |
|
+ ++argv; |
|
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ s = *argv; |
|
+ strncpy(ISO_CP, s, sizeof(ISO_CP)); |
|
+ } |
|
+ while(*(++s)); /* No params straight after charset name */ |
|
+ } |
|
+ break; |
|
+#endif /* ?UNIX */ |
|
case 'l': /* longer form of "ls -l" type listing */ |
|
if (negative) |
|
uO.lflag = -2, negative = 0; |
|
@@ -521,6 +554,35 @@ |
|
G.M_flag = TRUE; |
|
break; |
|
#endif |
|
+#ifdef UNIX |
|
+ case ('O'): |
|
+ if (negative) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: encodings can't be negated")); |
|
+ return(PK_PARAM); |
|
+ } else { |
|
+ if(*s) { /* Handle the -Ocharset case */ |
|
+ /* Assume that charsets can't start with a dash to spot arguments misuse */ |
|
+ if(*s == '-') { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -I argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); |
|
+ } else { /* -O charset */ |
|
+ ++argv; |
|
+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { |
|
+ Info(slide, 0x401, ((char *)slide, |
|
+ "error: a valid character encoding should follow the -O argument")); |
|
+ return(PK_PARAM); |
|
+ } |
|
+ s = *argv; |
|
+ strncpy(OEM_CP, s, sizeof(OEM_CP)); |
|
+ } |
|
+ while(*(++s)); /* No params straight after charset name */ |
|
+ } |
|
+ break; |
|
+#endif /* ?UNIX */ |
|
case 's': /* default: shorter "ls -l" type listing */ |
|
if (negative) |
|
uO.lflag = -2, negative = 0;
|
|
|