archive-zip: mark text files in archives
Set the text flag for ZIP archive entries that look like text files so that unzip -a can be used to perform end-of-line conversions. Info-ZIP zip does the same. Detect binary files the same way as git diff and git grep do, namely by checking for the attribute "diff" and its negation "-diff", and if none is found by falling back to checking for the presence of NUL bytes in the first few bytes of the file contents. 7-Zip, Windows' built-in ZIP functionality and Info-ZIP unzip without the switch -a are not affected by the change and still extract text files without doing any end-of-line conversions. NB: The actual end-of-line style used in the archive entries doesn't matter to unzip -a, as it converts any CR, CRLF and LF to the line end characters appropriate for the platform it is running on. Suggested-by: Ulrike Fischer <luatex@nililand.de> Signed-off-by: Rene Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									282616c72d
								
							
						
					
					
						commit
						4aff646d17
					
				|  | @ -5,6 +5,8 @@ | ||||||
| #include "archive.h" | #include "archive.h" | ||||||
| #include "streaming.h" | #include "streaming.h" | ||||||
| #include "utf8.h" | #include "utf8.h" | ||||||
|  | #include "userdiff.h" | ||||||
|  | #include "xdiff-interface.h" | ||||||
|  |  | ||||||
| static int zip_date; | static int zip_date; | ||||||
| static int zip_time; | static int zip_time; | ||||||
|  | @ -189,6 +191,16 @@ static int has_only_ascii(const char *s) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static int entry_is_binary(const char *path, const void *buffer, size_t size) | ||||||
|  | { | ||||||
|  | 	struct userdiff_driver *driver = userdiff_find_by_path(path); | ||||||
|  | 	if (!driver) | ||||||
|  | 		driver = userdiff_find_by_name("default"); | ||||||
|  | 	if (driver->binary != -1) | ||||||
|  | 		return driver->binary; | ||||||
|  | 	return buffer_is_binary(buffer, size); | ||||||
|  | } | ||||||
|  |  | ||||||
| #define STREAM_BUFFER_SIZE (1024 * 16) | #define STREAM_BUFFER_SIZE (1024 * 16) | ||||||
|  |  | ||||||
| static int write_zip_entry(struct archiver_args *args, | static int write_zip_entry(struct archiver_args *args, | ||||||
|  | @ -210,6 +222,8 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 	struct git_istream *stream = NULL; | 	struct git_istream *stream = NULL; | ||||||
| 	unsigned long flags = 0; | 	unsigned long flags = 0; | ||||||
| 	unsigned long size; | 	unsigned long size; | ||||||
|  | 	int is_binary = -1; | ||||||
|  | 	const char *path_without_prefix = path + args->baselen; | ||||||
|  |  | ||||||
| 	crc = crc32(0, NULL, 0); | 	crc = crc32(0, NULL, 0); | ||||||
|  |  | ||||||
|  | @ -256,6 +270,8 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 				return error("cannot read %s", | 				return error("cannot read %s", | ||||||
| 					     sha1_to_hex(sha1)); | 					     sha1_to_hex(sha1)); | ||||||
| 			crc = crc32(crc, buffer, size); | 			crc = crc32(crc, buffer, size); | ||||||
|  | 			is_binary = entry_is_binary(path_without_prefix, | ||||||
|  | 						    buffer, size); | ||||||
| 			out = buffer; | 			out = buffer; | ||||||
| 		} | 		} | ||||||
| 		compressed_size = (method == 0) ? size : 0; | 		compressed_size = (method == 0) ? size : 0; | ||||||
|  | @ -300,7 +316,6 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 	copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); | 	copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); | ||||||
| 	copy_le16(dirent.comment_length, 0); | 	copy_le16(dirent.comment_length, 0); | ||||||
| 	copy_le16(dirent.disk, 0); | 	copy_le16(dirent.disk, 0); | ||||||
| 	copy_le16(dirent.attr1, 0); |  | ||||||
| 	copy_le32(dirent.attr2, attr2); | 	copy_le32(dirent.attr2, attr2); | ||||||
| 	copy_le32(dirent.offset, zip_offset); | 	copy_le32(dirent.offset, zip_offset); | ||||||
|  |  | ||||||
|  | @ -328,6 +343,9 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 			if (readlen <= 0) | 			if (readlen <= 0) | ||||||
| 				break; | 				break; | ||||||
| 			crc = crc32(crc, buf, readlen); | 			crc = crc32(crc, buf, readlen); | ||||||
|  | 			if (is_binary == -1) | ||||||
|  | 				is_binary = entry_is_binary(path_without_prefix, | ||||||
|  | 							    buf, readlen); | ||||||
| 			write_or_die(1, buf, readlen); | 			write_or_die(1, buf, readlen); | ||||||
| 		} | 		} | ||||||
| 		close_istream(stream); | 		close_istream(stream); | ||||||
|  | @ -361,6 +379,9 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 			if (readlen <= 0) | 			if (readlen <= 0) | ||||||
| 				break; | 				break; | ||||||
| 			crc = crc32(crc, buf, readlen); | 			crc = crc32(crc, buf, readlen); | ||||||
|  | 			if (is_binary == -1) | ||||||
|  | 				is_binary = entry_is_binary(path_without_prefix, | ||||||
|  | 							    buf, readlen); | ||||||
|  |  | ||||||
| 			zstream.next_in = buf; | 			zstream.next_in = buf; | ||||||
| 			zstream.avail_in = readlen; | 			zstream.avail_in = readlen; | ||||||
|  | @ -405,6 +426,8 @@ static int write_zip_entry(struct archiver_args *args, | ||||||
| 	free(deflated); | 	free(deflated); | ||||||
| 	free(buffer); | 	free(buffer); | ||||||
|  |  | ||||||
|  | 	copy_le16(dirent.attr1, !is_binary); | ||||||
|  |  | ||||||
| 	memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); | 	memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); | ||||||
| 	zip_dir_offset += ZIP_DIR_HEADER_SIZE; | 	zip_dir_offset += ZIP_DIR_HEADER_SIZE; | ||||||
| 	memcpy(zip_dir + zip_dir_offset, path, pathlen); | 	memcpy(zip_dir + zip_dir_offset, path, pathlen); | ||||||
|  |  | ||||||
|  | @ -33,6 +33,37 @@ check_zip() { | ||||||
| 	test_expect_success UNZIP " validate file contents" " | 	test_expect_success UNZIP " validate file contents" " | ||||||
| 		diff -r a ${dir_with_prefix}a | 		diff -r a ${dir_with_prefix}a | ||||||
| 	" | 	" | ||||||
|  |  | ||||||
|  | 	dir=eol_$1 | ||||||
|  | 	dir_with_prefix=$dir/$2 | ||||||
|  | 	extracted=${dir_with_prefix}a | ||||||
|  | 	original=a | ||||||
|  |  | ||||||
|  | 	test_expect_success UNZIP " extract ZIP archive with EOL conversion" ' | ||||||
|  | 		(mkdir $dir && cd $dir && "$GIT_UNZIP" -a ../$zipfile) | ||||||
|  | 	' | ||||||
|  |  | ||||||
|  | 	test_expect_success UNZIP " validate that text files are converted" " | ||||||
|  | 		test_cmp_bin $extracted/text.cr $extracted/text.crlf && | ||||||
|  | 		test_cmp_bin $extracted/text.cr $extracted/text.lf | ||||||
|  | 	" | ||||||
|  |  | ||||||
|  | 	test_expect_success UNZIP " validate that binary files are unchanged" " | ||||||
|  | 		test_cmp_bin $original/binary.cr   $extracted/binary.cr && | ||||||
|  | 		test_cmp_bin $original/binary.crlf $extracted/binary.crlf && | ||||||
|  | 		test_cmp_bin $original/binary.lf   $extracted/binary.lf | ||||||
|  | 	" | ||||||
|  |  | ||||||
|  | 	test_expect_success UNZIP " validate that diff files are converted" " | ||||||
|  | 		test_cmp_bin $extracted/diff.cr $extracted/diff.crlf && | ||||||
|  | 		test_cmp_bin $extracted/diff.cr $extracted/diff.lf | ||||||
|  | 	" | ||||||
|  |  | ||||||
|  | 	test_expect_success UNZIP " validate that -diff files are unchanged" " | ||||||
|  | 		test_cmp_bin $original/nodiff.cr   $extracted/nodiff.cr && | ||||||
|  | 		test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf && | ||||||
|  | 		test_cmp_bin $original/nodiff.lf   $extracted/nodiff.lf | ||||||
|  | 	" | ||||||
| } | } | ||||||
|  |  | ||||||
| test_expect_success \ | test_expect_success \ | ||||||
|  | @ -41,6 +72,18 @@ test_expect_success \ | ||||||
|      echo simple textfile >a/a && |      echo simple textfile >a/a && | ||||||
|      mkdir a/bin && |      mkdir a/bin && | ||||||
|      cp /bin/sh a/bin && |      cp /bin/sh a/bin && | ||||||
|  |      printf "text\r"	>a/text.cr && | ||||||
|  |      printf "text\r\n"	>a/text.crlf && | ||||||
|  |      printf "text\n"	>a/text.lf && | ||||||
|  |      printf "text\r"	>a/nodiff.cr && | ||||||
|  |      printf "text\r\n"	>a/nodiff.crlf && | ||||||
|  |      printf "text\n"	>a/nodiff.lf && | ||||||
|  |      printf "\0\r"	>a/binary.cr && | ||||||
|  |      printf "\0\r\n"	>a/binary.crlf && | ||||||
|  |      printf "\0\n"	>a/binary.lf && | ||||||
|  |      printf "\0\r"	>a/diff.cr && | ||||||
|  |      printf "\0\r\n"	>a/diff.crlf && | ||||||
|  |      printf "\0\n"	>a/diff.lf && | ||||||
|      printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 && |      printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 && | ||||||
|      printf "A not substituted O" >a/substfile2 && |      printf "A not substituted O" >a/substfile2 && | ||||||
|      (p=long_path_to_a_file && cd a && |      (p=long_path_to_a_file && cd a && | ||||||
|  | @ -70,7 +113,9 @@ test_expect_success \ | ||||||
|      git update-ref HEAD $(TZ=GMT GIT_COMMITTER_DATE="2005-05-27 22:00:00" \ |      git update-ref HEAD $(TZ=GMT GIT_COMMITTER_DATE="2005-05-27 22:00:00" \ | ||||||
|      git commit-tree $treeid </dev/null)' |      git commit-tree $treeid </dev/null)' | ||||||
|  |  | ||||||
| test_expect_success 'setup export-subst' ' | test_expect_success 'setup export-subst and diff attributes' ' | ||||||
|  | 	echo "a/nodiff.* -diff" >>.git/info/attributes && | ||||||
|  | 	echo "a/diff.* diff" >>.git/info/attributes && | ||||||
| 	echo "substfile?" export-subst >>.git/info/attributes && | 	echo "substfile?" export-subst >>.git/info/attributes && | ||||||
| 	git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \ | 	git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \ | ||||||
| 		>a/substfile1 | 		>a/substfile1 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 René Scharfe
						René Scharfe