Merge branch 'jk/zlib-inflate-fixes'

Fix our use of zlib corner cases.

* jk/zlib-inflate-fixes:
  unpack_loose_rest(): rewrite return handling for clarity
  unpack_loose_rest(): simplify error handling
  unpack_loose_rest(): never clean up zstream
  unpack_loose_rest(): avoid numeric comparison of zlib status
  unpack_loose_header(): avoid numeric comparison of zlib status
  git_inflate(): skip zlib_post_call() sanity check on Z_NEED_DICT
  unpack_loose_header(): fix infinite loop on broken zlib input
  unpack_loose_header(): report headers without NUL as "bad"
  unpack_loose_header(): simplify next_out assignment
  loose_object_info(): BUG() on inflating content with unknown type
maint
Junio C Hamano 2025-04-15 13:50:13 -07:00
commit c39e5cbaa5
3 changed files with 92 additions and 36 deletions

View File

@ -45,7 +45,7 @@ static void zlib_pre_call(git_zstream *s)
s->z.avail_out = zlib_buf_cap(s->avail_out); s->z.avail_out = zlib_buf_cap(s->avail_out);
} }


static void zlib_post_call(git_zstream *s) static void zlib_post_call(git_zstream *s, int status)
{ {
unsigned long bytes_consumed; unsigned long bytes_consumed;
unsigned long bytes_produced; unsigned long bytes_produced;
@ -54,7 +54,12 @@ static void zlib_post_call(git_zstream *s)
bytes_produced = s->z.next_out - s->next_out; bytes_produced = s->z.next_out - s->next_out;
if (s->z.total_out != s->total_out + bytes_produced) if (s->z.total_out != s->total_out + bytes_produced)
BUG("total_out mismatch"); BUG("total_out mismatch");
if (s->z.total_in != s->total_in + bytes_consumed) /*
* zlib does not update total_in when it returns Z_NEED_DICT,
* causing a mismatch here. Skip the sanity check in that case.
*/
if (status != Z_NEED_DICT &&
s->z.total_in != s->total_in + bytes_consumed)
BUG("total_in mismatch"); BUG("total_in mismatch");


s->total_out = s->z.total_out; s->total_out = s->z.total_out;
@ -72,7 +77,7 @@ void git_inflate_init(git_zstream *strm)


zlib_pre_call(strm); zlib_pre_call(strm);
status = inflateInit(&strm->z); status = inflateInit(&strm->z);
zlib_post_call(strm); zlib_post_call(strm, status);
if (status == Z_OK) if (status == Z_OK)
return; return;
die("inflateInit: %s (%s)", zerr_to_string(status), die("inflateInit: %s (%s)", zerr_to_string(status),
@ -90,7 +95,7 @@ void git_inflate_init_gzip_only(git_zstream *strm)


zlib_pre_call(strm); zlib_pre_call(strm);
status = inflateInit2(&strm->z, windowBits); status = inflateInit2(&strm->z, windowBits);
zlib_post_call(strm); zlib_post_call(strm, status);
if (status == Z_OK) if (status == Z_OK)
return; return;
die("inflateInit2: %s (%s)", zerr_to_string(status), die("inflateInit2: %s (%s)", zerr_to_string(status),
@ -103,7 +108,7 @@ void git_inflate_end(git_zstream *strm)


zlib_pre_call(strm); zlib_pre_call(strm);
status = inflateEnd(&strm->z); status = inflateEnd(&strm->z);
zlib_post_call(strm); zlib_post_call(strm, status);
if (status == Z_OK) if (status == Z_OK)
return; return;
error("inflateEnd: %s (%s)", zerr_to_string(status), error("inflateEnd: %s (%s)", zerr_to_string(status),
@ -122,7 +127,7 @@ int git_inflate(git_zstream *strm, int flush)
? 0 : flush); ? 0 : flush);
if (status == Z_MEM_ERROR) if (status == Z_MEM_ERROR)
die("inflate: out of memory"); die("inflate: out of memory");
zlib_post_call(strm); zlib_post_call(strm, status);


/* /*
* Let zlib work another round, while we can still * Let zlib work another round, while we can still
@ -160,7 +165,7 @@ void git_deflate_init(git_zstream *strm, int level)
memset(strm, 0, sizeof(*strm)); memset(strm, 0, sizeof(*strm));
zlib_pre_call(strm); zlib_pre_call(strm);
status = deflateInit(&strm->z, level); status = deflateInit(&strm->z, level);
zlib_post_call(strm); zlib_post_call(strm, status);
if (status == Z_OK) if (status == Z_OK)
return; return;
die("deflateInit: %s (%s)", zerr_to_string(status), die("deflateInit: %s (%s)", zerr_to_string(status),
@ -176,7 +181,7 @@ static void do_git_deflate_init(git_zstream *strm, int level, int windowBits)
status = deflateInit2(&strm->z, level, status = deflateInit2(&strm->z, level,
Z_DEFLATED, windowBits, Z_DEFLATED, windowBits,
8, Z_DEFAULT_STRATEGY); 8, Z_DEFAULT_STRATEGY);
zlib_post_call(strm); zlib_post_call(strm, status);
if (status == Z_OK) if (status == Z_OK)
return; return;
die("deflateInit2: %s (%s)", zerr_to_string(status), die("deflateInit2: %s (%s)", zerr_to_string(status),
@ -207,7 +212,7 @@ int git_deflate_abort(git_zstream *strm)


zlib_pre_call(strm); zlib_pre_call(strm);
status = deflateEnd(&strm->z); status = deflateEnd(&strm->z);
zlib_post_call(strm); zlib_post_call(strm, status);
return status; return status;
} }


@ -227,7 +232,7 @@ int git_deflate_end_gently(git_zstream *strm)


zlib_pre_call(strm); zlib_pre_call(strm);
status = deflateEnd(&strm->z); status = deflateEnd(&strm->z);
zlib_post_call(strm); zlib_post_call(strm, status);
return status; return status;
} }


@ -244,7 +249,7 @@ int git_deflate(git_zstream *strm, int flush)
? 0 : flush); ? 0 : flush);
if (status == Z_MEM_ERROR) if (status == Z_MEM_ERROR)
die("deflate: out of memory"); die("deflate: out of memory");
zlib_post_call(strm); zlib_post_call(strm, status);


/* /*
* Let zlib work another round, while we can still * Let zlib work another round, while we can still

View File

@ -1362,7 +1362,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
obj_read_unlock(); obj_read_unlock();
status = git_inflate(stream, 0); status = git_inflate(stream, 0);
obj_read_lock(); obj_read_lock();
if (status < Z_OK) if (status != Z_OK && status != Z_STREAM_END)
return ULHR_BAD; return ULHR_BAD;


/* /*
@ -1385,20 +1385,19 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
* reading the stream. * reading the stream.
*/ */
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
stream->next_out = buffer;
stream->avail_out = bufsiz;


do { do {
stream->next_out = buffer;
stream->avail_out = bufsiz;

obj_read_unlock(); obj_read_unlock();
status = git_inflate(stream, 0); status = git_inflate(stream, 0);
obj_read_lock(); obj_read_lock();
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0; return 0;
stream->next_out = buffer; } while (status == Z_OK);
stream->avail_out = bufsiz; return ULHR_BAD;
} while (status != Z_STREAM_END);
return ULHR_TOO_LONG;
} }


static void *unpack_loose_rest(git_zstream *stream, static void *unpack_loose_rest(git_zstream *stream,
@ -1437,18 +1436,17 @@ static void *unpack_loose_rest(git_zstream *stream,
obj_read_lock(); obj_read_lock();
} }
} }
if (status == Z_STREAM_END && !stream->avail_in) {
git_inflate_end(stream);
return buf;
}


if (status < 0) if (status != Z_STREAM_END) {
error(_("corrupt loose object '%s'"), oid_to_hex(oid)); error(_("corrupt loose object '%s'"), oid_to_hex(oid));
else if (stream->avail_in) FREE_AND_NULL(buf);
} else if (stream->avail_in) {
error(_("garbage at end of loose object '%s'"), error(_("garbage at end of loose object '%s'"),
oid_to_hex(oid)); oid_to_hex(oid));
free(buf); FREE_AND_NULL(buf);
return NULL; }

return buf;
} }


/* /*
@ -1580,6 +1578,8 @@ static int loose_object_info(struct repository *r,


if (!oi->contentp) if (!oi->contentp)
break; break;
if (hdrbuf.len)
BUG("unpacking content with unknown types not yet supported");
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (*oi->contentp) if (*oi->contentp)
goto cleanup; goto cleanup;
@ -1600,8 +1600,8 @@ static int loose_object_info(struct repository *r,
die(_("loose object %s (stored in %s) is corrupt"), die(_("loose object %s (stored in %s) is corrupt"),
oid_to_hex(oid), path); oid_to_hex(oid), path);


git_inflate_end(&stream);
cleanup: cleanup:
git_inflate_end(&stream);
munmap(map, mapsize); munmap(map, mapsize);
if (oi->sizep == &size_scratch) if (oi->sizep == &size_scratch)
oi->sizep = NULL; oi->sizep = NULL;
@ -3080,7 +3080,6 @@ static int check_stream_oid(git_zstream *stream,
git_hash_update(&c, buf, stream->next_out - buf); git_hash_update(&c, buf, stream->next_out - buf);
total_read += stream->next_out - buf; total_read += stream->next_out - buf;
} }
git_inflate_end(stream);


if (status != Z_STREAM_END) { if (status != Z_STREAM_END) {
error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid)); error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid));
@ -3127,35 +3126,34 @@ int read_loose_object(const char *path,
if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr), if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
NULL) != ULHR_OK) { NULL) != ULHR_OK) {
error(_("unable to unpack header of %s"), path); error(_("unable to unpack header of %s"), path);
git_inflate_end(&stream); goto out_inflate;
goto out;
} }


if (parse_loose_header(hdr, oi) < 0) { if (parse_loose_header(hdr, oi) < 0) {
error(_("unable to parse header of %s"), path); error(_("unable to parse header of %s"), path);
git_inflate_end(&stream); goto out_inflate;
goto out;
} }


if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) { if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) {
if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
goto out; goto out_inflate;
} else { } else {
*contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid);
if (!*contents) { if (!*contents) {
error(_("unable to unpack contents of %s"), path); error(_("unable to unpack contents of %s"), path);
git_inflate_end(&stream); goto out_inflate;
goto out;
} }
hash_object_file_literally(the_repository->hash_algo, hash_object_file_literally(the_repository->hash_algo,
*contents, *size, *contents, *size,
oi->type_name->buf, real_oid); oi->type_name->buf, real_oid);
if (!oideq(expected_oid, real_oid)) if (!oideq(expected_oid, real_oid))
goto out; goto out_inflate;
} }


ret = 0; /* everything checks out */ ret = 0; /* everything checks out */


out_inflate:
git_inflate_end(&stream);
out: out:
if (map) if (map)
munmap(map, mapsize); munmap(map, mapsize);

View File

@ -903,6 +903,59 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' '
) )
' '


test_expect_success 'truncated object with --allow-unknown-type' - <<\EOT
objtype='a really long type name that exceeds the 32-byte limit' &&
blob=$(git hash-object -w --literally -t "$objtype" /dev/null) &&
objpath=.git/objects/$(test_oid_to_path "$blob") &&

# We want to truncate the object far enough in that we don't hit the
# end while inflating the first 32 bytes (since we want to have to dig
# for the trailing NUL of the header). But we don't want to go too far,
# since our header isn't very big. And of course we are counting
# deflated zlib bytes in the on-disk file, so it's a bit of a guess.
# Empirically 50 seems to work.
mv "$objpath" obj.bak &&
test_when_finished 'mv obj.bak "$objpath"' &&
test_copy_bytes 50 <obj.bak >"$objpath" &&

test_must_fail git cat-file --allow-unknown-type -t $blob 2>err &&
test_grep "unable to unpack $blob header" err
EOT

test_expect_success 'object reading handles zlib dictionary' - <<\EOT
echo 'content that will be recompressed' >file &&
blob=$(git hash-object -w file) &&
objpath=.git/objects/$(test_oid_to_path "$blob") &&

# Recompress a loose object using a precomputed zlib dictionary.
# This was originally done with:
#
# perl -MCompress::Raw::Zlib -e '
# binmode STDIN;
# binmode STDOUT;
# my $data = do { local $/; <STDIN> };
# my $in = new Compress::Raw::Zlib::Inflate;
# my $de = new Compress::Raw::Zlib::Deflate(
# -Dictionary => "anything"
# );
# $in->inflate($data, $raw);
# $de->deflate($raw, $out);
# print $out;
# ' <obj.bak >$objpath
#
# but we do not want to require the perl module for all test runs (nor
# carry a custom t/helper program that uses zlib features we don't
# otherwise care about).
mv "$objpath" obj.bak &&
test_when_finished 'mv obj.bak "$objpath"' &&
printf '\170\273\017\112\003\143' >$objpath &&

test_must_fail git cat-file blob $blob 2>err &&
test_grep ! 'too long' err &&
test_grep 'error: unable to unpack' err &&
test_grep 'error: inflate: needs dictionary' err
EOT

# Tests for git cat-file --follow-symlinks # Tests for git cat-file --follow-symlinks
test_expect_success 'prep for symlink tests' ' test_expect_success 'prep for symlink tests' '
echo_without_newline "$hello_content" >morx && echo_without_newline "$hello_content" >morx &&