Merge branch 'jk/zlib-inflate-fixes'

Fix our use of zlib corner cases. * jk/zlib-inflate-fixes: unpack_loose_rest(): rewrite return handling for clarity unpack_loose_rest(): simplify error handling unpack_loose_rest(): never clean up zstream unpack_loose_rest(): avoid numeric comparison of zlib status unpack_loose_header(): avoid numeric comparison of zlib status git_inflate(): skip zlib_post_call() sanity check on Z_NEED_DICT unpack_loose_header(): fix infinite loop on broken zlib input unpack_loose_header(): report headers without NUL as "bad" unpack_loose_header(): simplify next_out assignment loose_object_info(): BUG() on inflating content with unknown type
2025-04-15 13:50:13 -07:00 · 2025-04-15 13:50:13 -07:00 · c39e5cbaa5
parent 139d703511 1cb2f293f5
commit c39e5cbaa5
3 changed files with 92 additions and 36 deletions
--- a/git-zlib.c
+++ b/git-zlib.c
@ -45,7 +45,7 @@ static void zlib_pre_call(git_zstream *s)
 	s->z.avail_out = zlib_buf_cap(s->avail_out);
 }

-static void zlib_post_call(git_zstream *s)
+static void zlib_post_call(git_zstream *s, int status)
 {
 	unsigned long bytes_consumed;
 	unsigned long bytes_produced;
@ -54,7 +54,12 @@ static void zlib_post_call(git_zstream *s)
 	bytes_produced = s->z.next_out - s->next_out;
 	if (s->z.total_out != s->total_out + bytes_produced)
 		BUG("total_out mismatch");
-	if (s->z.total_in != s->total_in + bytes_consumed)
+	/*
+	 * zlib does not update total_in when it returns Z_NEED_DICT,
+	 * causing a mismatch here. Skip the sanity check in that case.
+	 */
+	if (status != Z_NEED_DICT &&
+	    s->z.total_in != s->total_in + bytes_consumed)
 		BUG("total_in mismatch");

 	s->total_out = s->z.total_out;
@ -72,7 +77,7 @@ void git_inflate_init(git_zstream *strm)

 	zlib_pre_call(strm);
 	status = inflateInit(&strm->z);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	if (status == Z_OK)
 		return;
 	die("inflateInit: %s (%s)", zerr_to_string(status),
@ -90,7 +95,7 @@ void git_inflate_init_gzip_only(git_zstream *strm)

 	zlib_pre_call(strm);
 	status = inflateInit2(&strm->z, windowBits);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	if (status == Z_OK)
 		return;
 	die("inflateInit2: %s (%s)", zerr_to_string(status),
@ -103,7 +108,7 @@ void git_inflate_end(git_zstream *strm)

 	zlib_pre_call(strm);
 	status = inflateEnd(&strm->z);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	if (status == Z_OK)
 		return;
 	error("inflateEnd: %s (%s)", zerr_to_string(status),
@ -122,7 +127,7 @@ int git_inflate(git_zstream *strm, int flush)
 				 ? 0 : flush);
 		if (status == Z_MEM_ERROR)
 			die("inflate: out of memory");
-		zlib_post_call(strm);
+		zlib_post_call(strm, status);

 		/*
 		 * Let zlib work another round, while we can still
@ -160,7 +165,7 @@ void git_deflate_init(git_zstream *strm, int level)
 	memset(strm, 0, sizeof(*strm));
 	zlib_pre_call(strm);
 	status = deflateInit(&strm->z, level);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	if (status == Z_OK)
 		return;
 	die("deflateInit: %s (%s)", zerr_to_string(status),
@ -176,7 +181,7 @@ static void do_git_deflate_init(git_zstream *strm, int level, int windowBits)
 	status = deflateInit2(&strm->z, level,
 				  Z_DEFLATED, windowBits,
 				  8, Z_DEFAULT_STRATEGY);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	if (status == Z_OK)
 		return;
 	die("deflateInit2: %s (%s)", zerr_to_string(status),
@ -207,7 +212,7 @@ int git_deflate_abort(git_zstream *strm)

 	zlib_pre_call(strm);
 	status = deflateEnd(&strm->z);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	return status;
 }

@ -227,7 +232,7 @@ int git_deflate_end_gently(git_zstream *strm)

 	zlib_pre_call(strm);
 	status = deflateEnd(&strm->z);
-	zlib_post_call(strm);
+	zlib_post_call(strm, status);
 	return status;
 }

@ -244,7 +249,7 @@ int git_deflate(git_zstream *strm, int flush)
 				 ? 0 : flush);
 		if (status == Z_MEM_ERROR)
 			die("deflate: out of memory");
-		zlib_post_call(strm);
+		zlib_post_call(strm, status);

 		/*
 		 * Let zlib work another round, while we can still
--- a/object-file.c
+++ b/object-file.c
@ -1362,7 +1362,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 	obj_read_unlock();
 	status = git_inflate(stream, 0);
 	obj_read_lock();
-	if (status < Z_OK)
+	if (status != Z_OK && status != Z_STREAM_END)
 		return ULHR_BAD;

 	/*
@ -1385,20 +1385,19 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 	 * reading the stream.
 	 */
 	strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
-	stream->next_out = buffer;
-	stream->avail_out = bufsiz;

 	do {
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+
 		obj_read_unlock();
 		status = git_inflate(stream, 0);
 		obj_read_lock();
 		strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
 		if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 			return 0;
-		stream->next_out = buffer;
-		stream->avail_out = bufsiz;
-	} while (status != Z_STREAM_END);
-	return ULHR_TOO_LONG;
+	} while (status == Z_OK);
+	return ULHR_BAD;
 }

 static void *unpack_loose_rest(git_zstream *stream,
@ -1437,18 +1436,17 @@ static void *unpack_loose_rest(git_zstream *stream,
 			obj_read_lock();
 		}
 	}
-	if (status == Z_STREAM_END && !stream->avail_in) {
-		git_inflate_end(stream);
-		return buf;
-	}

-	if (status < 0)
+	if (status != Z_STREAM_END) {
 		error(_("corrupt loose object '%s'"), oid_to_hex(oid));
-	else if (stream->avail_in)
+		FREE_AND_NULL(buf);
+	} else if (stream->avail_in) {
 		error(_("garbage at end of loose object '%s'"),
 		      oid_to_hex(oid));
-	free(buf);
-	return NULL;
+		FREE_AND_NULL(buf);
+	}
+
+	return buf;
 }

 /*
@ -1580,6 +1578,8 @@ static int loose_object_info(struct repository *r,

 		if (!oi->contentp)
 			break;
+		if (hdrbuf.len)
+			BUG("unpacking content with unknown types not yet supported");
 		*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
 		if (*oi->contentp)
 			goto cleanup;
@ -1600,8 +1600,8 @@ static int loose_object_info(struct repository *r,
 		die(_("loose object %s (stored in %s) is corrupt"),
 		    oid_to_hex(oid), path);

-	git_inflate_end(&stream);
 cleanup:
+	git_inflate_end(&stream);
 	munmap(map, mapsize);
 	if (oi->sizep == &size_scratch)
 		oi->sizep = NULL;
@ -3080,7 +3080,6 @@ static int check_stream_oid(git_zstream *stream,
 		git_hash_update(&c, buf, stream->next_out - buf);
 		total_read += stream->next_out - buf;
 	}
-	git_inflate_end(stream);

 	if (status != Z_STREAM_END) {
 		error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid));
@ -3127,35 +3126,34 @@ int read_loose_object(const char *path,
 	if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
 				NULL) != ULHR_OK) {
 		error(_("unable to unpack header of %s"), path);
-		git_inflate_end(&stream);
-		goto out;
+		goto out_inflate;
 	}

 	if (parse_loose_header(hdr, oi) < 0) {
 		error(_("unable to parse header of %s"), path);
-		git_inflate_end(&stream);
-		goto out;
+		goto out_inflate;
 	}

 	if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
-			goto out;
+			goto out_inflate;
 	} else {
 		*contents = unpack_loose_rest(&stream, hdr, *size, expected_oid);
 		if (!*contents) {
 			error(_("unable to unpack contents of %s"), path);
-			git_inflate_end(&stream);
-			goto out;
+			goto out_inflate;
 		}
 		hash_object_file_literally(the_repository->hash_algo,
 					   *contents, *size,
 					   oi->type_name->buf, real_oid);
 		if (!oideq(expected_oid, real_oid))
-			goto out;
+			goto out_inflate;
 	}

 	ret = 0; /* everything checks out */

+out_inflate:
+	git_inflate_end(&stream);
 out:
 	if (map)
 		munmap(map, mapsize);
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@ -903,6 +903,59 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' '
 	)
 '

+test_expect_success 'truncated object with --allow-unknown-type' - <<\EOT
+	objtype='a really long type name that exceeds the 32-byte limit' &&
+	blob=$(git hash-object -w --literally -t "$objtype" /dev/null) &&
+	objpath=.git/objects/$(test_oid_to_path "$blob") &&
+
+	# We want to truncate the object far enough in that we don't hit the
+	# end while inflating the first 32 bytes (since we want to have to dig
+	# for the trailing NUL of the header). But we don't want to go too far,
+	# since our header isn't very big. And of course we are counting
+	# deflated zlib bytes in the on-disk file, so it's a bit of a guess.
+	# Empirically 50 seems to work.
+	mv "$objpath" obj.bak &&
+	test_when_finished 'mv obj.bak "$objpath"' &&
+	test_copy_bytes 50 <obj.bak >"$objpath" &&
+
+	test_must_fail git cat-file --allow-unknown-type -t $blob 2>err &&
+	test_grep "unable to unpack $blob header" err
+EOT
+
+test_expect_success 'object reading handles zlib dictionary' - <<\EOT
+	echo 'content that will be recompressed' >file &&
+	blob=$(git hash-object -w file) &&
+	objpath=.git/objects/$(test_oid_to_path "$blob") &&
+
+	# Recompress a loose object using a precomputed zlib dictionary.
+	# This was originally done with:
+	#
+	#  perl -MCompress::Raw::Zlib -e '
+	#    binmode STDIN;
+	#    binmode STDOUT;
+	#    my $data = do { local $/; <STDIN> };
+	#    my $in = new Compress::Raw::Zlib::Inflate;
+	#    my $de = new Compress::Raw::Zlib::Deflate(
+	#      -Dictionary => "anything"
+	#    );
+	#    $in->inflate($data, $raw);
+	#    $de->deflate($raw, $out);
+	#    print $out;
+	#  ' <obj.bak >$objpath
+	#
+	# but we do not want to require the perl module for all test runs (nor
+	# carry a custom t/helper program that uses zlib features we don't
+	# otherwise care about).
+	mv "$objpath" obj.bak &&
+	test_when_finished 'mv obj.bak "$objpath"' &&
+	printf '\170\273\017\112\003\143' >$objpath &&
+
+	test_must_fail git cat-file blob $blob 2>err &&
+	test_grep ! 'too long' err &&
+	test_grep 'error: unable to unpack' err &&
+	test_grep 'error: inflate: needs dictionary' err
+EOT
+
 # Tests for git cat-file --follow-symlinks
 test_expect_success 'prep for symlink tests' '
 	echo_without_newline "$hello_content" >morx &&