Merge branch 'ps/fsck-stream-from-the-right-object-instance'

"fsck" iterates over packfiles and its access to pack data caused
the list to be permuted, which caused it to loop forever; the code
to access pack data by "fsck" has been updated to avoid this.

* ps/fsck-stream-from-the-right-object-instance:
  pack-check: fix verification of large objects
  packfile: expose function to read object stream for an offset
  object-file: adapt `stream_object_signature()` to take a stream
  t/helper: improve "genrandom" test helper
maint
Junio C Hamano 2026-03-05 10:04:49 -08:00
commit d93be9cbca
14 changed files with 114 additions and 43 deletions

View File

@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
return !oideq(oid, &real_oid) ? -1 : 0; return !oideq(oid, &real_oid) ? -1 : 0;
} }


int stream_object_signature(struct repository *r, const struct object_id *oid) int stream_object_signature(struct repository *r,
struct odb_read_stream *st,
const struct object_id *oid)
{ {
struct object_id real_oid; struct object_id real_oid;
struct odb_read_stream *st;
struct git_hash_ctx c; struct git_hash_ctx c;
char hdr[MAX_HEADER_LEN]; char hdr[MAX_HEADER_LEN];
int hdrlen; int hdrlen;


st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return -1;

/* Generate the header */ /* Generate the header */
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);


@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
git_hash_update(&c, buf, readlen); git_hash_update(&c, buf, readlen);
} }
git_hash_final_oid(&real_oid, &c); git_hash_final_oid(&real_oid, &c);
odb_read_stream_close(st);
return !oideq(oid, &real_oid) ? -1 : 0; return !oideq(oid, &real_oid) ? -1 : 0;
} }



View File

@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
* Try reading the object named with "oid" using * Try reading the object named with "oid" using
* the streaming interface and rehash it to do the same. * the streaming interface and rehash it to do the same.
*/ */
int stream_object_signature(struct repository *r, const struct object_id *oid); int stream_object_signature(struct repository *r,
struct odb_read_stream *stream,
const struct object_id *oid);


enum finalize_object_file_flags { enum finalize_object_file_flags {
FOF_SKIP_COLLISION_CHECK = 1, FOF_SKIP_COLLISION_CHECK = 1,

View File

@ -6,6 +6,7 @@
#include "object.h" #include "object.h"
#include "replace-object.h" #include "replace-object.h"
#include "object-file.h" #include "object-file.h"
#include "odb/streaming.h"
#include "blob.h" #include "blob.h"
#include "statinfo.h" #include "statinfo.h"
#include "tree.h" #include "tree.h"
@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,


if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) && if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) { odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
if (!skip_hash && stream_object_signature(r, repl) < 0) { if (!skip_hash) {
error(_("hash mismatch %s"), oid_to_hex(oid)); struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
return NULL;
if (!stream) {
error(_("unable to open object stream for %s"), oid_to_hex(oid));
return NULL;
}

if (stream_object_signature(r, stream, repl) < 0) {
error(_("hash mismatch %s"), oid_to_hex(oid));
odb_read_stream_close(stream);
return NULL;
}

odb_read_stream_close(stream);
} }
parse_blob_buffer(lookup_blob(r, oid)); parse_blob_buffer(lookup_blob(r, oid));
return lookup_object(r, oid); return lookup_object(r, oid);

View File

@ -9,6 +9,7 @@
#include "packfile.h" #include "packfile.h"
#include "object-file.h" #include "object-file.h"
#include "odb.h" #include "odb.h"
#include "odb/streaming.h"


struct idx_entry { struct idx_entry {
off_t offset; off_t offset;
@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
QSORT(entries, nr_objects, compare_entries); QSORT(entries, nr_objects, compare_entries);


for (i = 0; i < nr_objects; i++) { for (i = 0; i < nr_objects; i++) {
struct odb_read_stream *stream = NULL;
void *data; void *data;
struct object_id oid; struct object_id oid;
enum object_type type; enum object_type type;
@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
type) < 0) type) < 0)
err = error("packed %s from %s is corrupt", err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name); oid_to_hex(&oid), p->pack_name);
else if (!data && stream_object_signature(r, &oid) < 0) else if (!data &&
(packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
stream_object_signature(r, stream, &oid) < 0))
err = error("packed %s from %s is corrupt", err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name); oid_to_hex(&oid), p->pack_name);
else if (fn) { else if (fn) {
@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
} }
if (((base_count + i) & 1023) == 0) if (((base_count + i) & 1023) == 0)
display_progress(progress, base_count + i); display_progress(progress, base_count + i);
free(data);


if (stream)
odb_read_stream_close(stream);
free(data);
} }

display_progress(progress, base_count + i); display_progress(progress, base_count + i);
free(entries); free(entries);

return err; return err;
} }



View File

@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
return 0; return 0;
} }


int packfile_store_read_object_stream(struct odb_read_stream **out, int packfile_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store, const struct object_id *oid,
const struct object_id *oid) struct packed_git *pack,
off_t offset)
{ {
struct odb_packed_read_stream *stream; struct odb_packed_read_stream *stream;
struct pack_window *window = NULL; struct pack_window *window = NULL;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type in_pack_type; enum object_type in_pack_type;
unsigned long size; unsigned long size;


oi.sizep = &size; in_pack_type = unpack_object_header(pack, &window, &offset, &size);
unuse_pack(&window);


if (packfile_store_read_object_info(store, oid, &oi, 0) || if (repo_settings_get_big_file_threshold(pack->repo) >= size)
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
return -1; return -1;


in_pack_type = unpack_object_header(oi.u.packed.pack,
&window,
&oi.u.packed.offset,
&size);
unuse_pack(&window);
switch (in_pack_type) { switch (in_pack_type) {
default: default:
return -1; /* we do not do deltas for now */ return -1; /* we do not do deltas for now */
case OBJ_BAD:
mark_bad_packed_object(pack, oid);
return -1;
case OBJ_COMMIT: case OBJ_COMMIT:
case OBJ_TREE: case OBJ_TREE:
case OBJ_BLOB: case OBJ_BLOB:
@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
stream->base.type = in_pack_type; stream->base.type = in_pack_type;
stream->base.size = size; stream->base.size = size;
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
stream->pack = oi.u.packed.pack; stream->pack = pack;
stream->pos = oi.u.packed.offset; stream->pos = offset;


*out = &stream->base; *out = &stream->base;


return 0; return 0;
} }

int packfile_store_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store,
const struct object_id *oid)
{
struct pack_entry e;

if (!find_pack_entry(store, oid, &e))
return -1;

return packfile_read_object_stream(out, oid, e.p, e.offset);
}

View File

@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
off_t *curpos, enum object_type type, off_t *curpos, enum object_type type,
off_t delta_obj_offset); off_t delta_obj_offset);


int packfile_read_object_stream(struct odb_read_stream **out,
const struct object_id *oid,
struct packed_git *pack,
off_t offset);

void release_pack_memory(size_t); void release_pack_memory(size_t);


/* global flag to enable extra checks when accessing packed objects */ /* global flag to enable extra checks when accessing packed objects */

View File

@ -6,6 +6,7 @@


#include "test-tool.h" #include "test-tool.h"
#include "git-compat-util.h" #include "git-compat-util.h"
#include "parse.h"


int cmd__genrandom(int argc, const char **argv) int cmd__genrandom(int argc, const char **argv)
{ {
@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
next = next * 11 + *c; next = next * 11 + *c;
} while (*c++); } while (*c++);


count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX; count = ULONG_MAX;
if (argc == 3 && !git_parse_ulong(argv[2], &count))
return error_errno("cannot parse argument '%s'", argv[2]);


while (count--) { while (count--) {
next = next * 1103515245 + 12345; next = next * 1103515245 + 12345;

View File

@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
' '


test_expect_success 'setup blobs which are likely to delta' ' test_expect_success 'setup blobs which are likely to delta' '
test-tool genrandom foo 10240 >foo && test-tool genrandom foo 10k >foo &&
{ cat foo && echo plus; } >foo-plus && { cat foo && echo plus; } >foo-plus &&
git add foo foo-plus && git add foo foo-plus &&
git commit -m foo && git commit -m foo &&

View File

@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
# mid1 and mid2 will fit within 256k limit but # mid1 and mid2 will fit within 256k limit but
# appending mid3 will bust the limit and will # appending mid3 will bust the limit and will
# result in a separate packfile. # result in a separate packfile.
test-tool genrandom "a" $(( 66 * 1024 )) >mid1 && test-tool genrandom "a" 66k >mid1 &&
test-tool genrandom "b" $(( 80 * 1024 )) >mid2 && test-tool genrandom "b" 80k >mid2 &&
test-tool genrandom "c" $(( 128 * 1024 )) >mid3 && test-tool genrandom "c" 128k >mid3 &&
git add mid1 mid2 mid3 && git add mid1 mid2 mid3 &&


count=0 && count=0 &&

View File

@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
! grep corrupt out ! grep corrupt out
' '


test_expect_success 'fsck handles multiple packfiles with big blobs' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&

# We construct two packfiles with two objects in common and one
# object not in common. The objects in common can then be
# corrupted in one of the packfiles, respectively. The other
# objects that are unique to the packs are merely used to not
# have both packs contain the same data.
blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
chmod a+w .git/objects/pack/pack-*.pack &&

# Corrupt blob two in the first pack.
git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&

# Corrupt blob three in the second pack.
git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&

# We now expect to see two failures for the corrupted objects,
# even though they exist in a non-corrupted form in the
# respective other pack.
test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
)
'

test_expect_success 'fsck fails on corrupt packfile' ' test_expect_success 'fsck fails on corrupt packfile' '
hsh=$(git commit-tree -m mycommit HEAD^{tree}) && hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) && pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
test_expect_success 'fsck detects truncated loose object' ' test_expect_success 'fsck detects truncated loose object' '
# make it big enough that we know we will truncate in the data # make it big enough that we know we will truncate in the data
# portion, not the header # portion, not the header
test-tool genrandom truncate 4096 >file && test-tool genrandom truncate 4k >file &&
blob=$(git hash-object -w file) && blob=$(git hash-object -w file) &&
file=$(sha1_file $blob) && file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" && test_when_finished "remove_object $blob" &&

View File

@ -12,7 +12,7 @@ test_expect_success 'setup' '
for i in a b c for i in a b c
do do
echo $i >$i && echo $i >$i &&
test-tool genrandom "$i" 32768 >>$i && test-tool genrandom "$i" 32k >>$i &&
git update-index --add $i || return 1 git update-index --add $i || return 1
done && done &&
echo d >d && cat c >>d && git update-index --add d && echo d >d && cat c >>d && git update-index --add d &&

View File

@ -242,7 +242,7 @@ test_bitmap_cases () {
' '


test_expect_success 'splitting packs does not generate bogus bitmaps' ' test_expect_success 'splitting packs does not generate bogus bitmaps' '
test-tool genrandom foo $((1024 * 1024)) >rand && test-tool genrandom foo 1m >rand &&
git add rand && git add rand &&
git commit -m "commit with big file" && git commit -m "commit with big file" &&
git -c pack.packSizeLimit=500k repack -adb && git -c pack.packSizeLimit=500k repack -adb &&

View File

@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' '
test_commit -C template 1 && test_commit -C template 1 &&
test_commit -C template 2 && test_commit -C template 2 &&
test_commit -C template 3 && test_commit -C template 3 &&
test-tool genrandom foo 10240 >template/foo && test-tool genrandom foo 10k >template/foo &&
git -C template add foo && git -C template add foo &&
git -C template commit -m foo git -C template commit -m foo
' '
@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet


test_expect_success "setup for subsequent fetches" ' test_expect_success "setup for subsequent fetches" '
# Generate new commit with large blob # Generate new commit with large blob
test-tool genrandom bar 10240 >template/bar && test-tool genrandom bar 10k >template/bar &&
git -C template add bar && git -C template add bar &&
git -C template commit -m bar && git -C template commit -m bar &&



View File

@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' '


test_expect_success 'auto-bitmaps do not complain if unavailable' ' test_expect_success 'auto-bitmaps do not complain if unavailable' '
test_config -C bare.git pack.packSizeLimit 1M && test_config -C bare.git pack.packSizeLimit 1M &&
blob=$(test-tool genrandom big $((1024*1024)) | blob=$(test-tool genrandom big 1m |
git -C bare.git hash-object -w --stdin) && git -C bare.git hash-object -w --stdin) &&
git -C bare.git update-ref refs/tags/big $blob && git -C bare.git update-ref refs/tags/big $blob &&


@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' '
cd max-pack-size && cd max-pack-size &&
test_commit base && test_commit base &&
# two blobs which exceed the maximum pack size # two blobs which exceed the maximum pack size
test-tool genrandom foo 1048576 >foo && test-tool genrandom foo 1m >foo &&
git hash-object -w foo && git hash-object -w foo &&
test-tool genrandom bar 1048576 >bar && test-tool genrandom bar 1m >bar &&
git hash-object -w bar && git hash-object -w bar &&
git add foo bar && git add foo bar &&
git commit -m "adding foo and bar" git commit -m "adding foo and bar"