Merge branch 'ps/fsck-stream-from-the-right-object-instance'

"fsck" iterates over packfiles and its access to pack data caused the list to be permuted, which caused it to loop forever; the code to access pack data by "fsck" has been updated to avoid this. * ps/fsck-stream-from-the-right-object-instance: pack-check: fix verification of large objects packfile: expose function to read object stream for an offset object-file: adapt `stream_object_signature()` to take a stream t/helper: improve "genrandom" test helper
2026-03-05 10:04:49 -08:00 · 2026-03-05 10:04:49 -08:00 · d93be9cbca
parent db227bce22 13eb65d366
commit d93be9cbca
14 changed files with 114 additions and 43 deletions
--- a/object-file.c
+++ b/object-file.c
@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
-int stream_object_signature(struct repository *r, const struct object_id *oid)
+int stream_object_signature(struct repository *r,
 			    struct odb_read_stream *st,
 			    const struct object_id *oid)
 {
 	struct object_id real_oid;
 	struct odb_read_stream *st;
 	struct git_hash_ctx c;
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
 	st = odb_read_stream_open(r->objects, oid, NULL);
 	if (!st)
 		return -1;
 	/* Generate the header */
 	hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 		git_hash_update(&c, buf, readlen);
 	}
 	git_hash_final_oid(&real_oid, &c);
 	odb_read_stream_close(st);
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
--- a/object-file.h
+++ b/object-file.h
@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 * Try reading the object named with "oid" using
 * the streaming interface and rehash it to do the same.
 */
-int stream_object_signature(struct repository *r, const struct object_id *oid);
+int stream_object_signature(struct repository *r,
 			    struct odb_read_stream *stream,
 			    const struct object_id *oid);
 enum finalize_object_file_flags {
 	FOF_SKIP_COLLISION_CHECK = 1,
--- a/object.c
+++ b/object.c
@ -6,6 +6,7 @@
 #include "object.h"
 #include "replace-object.h"
 #include "object-file.h"
 #include "odb/streaming.h"
 #include "blob.h"
 #include "statinfo.h"
 #include "tree.h"
@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,
 	if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
 	    odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
-		if (!skip_hash && stream_object_signature(r, repl) < 0) {
+		if (!skip_hash) {
-			error(_("hash mismatch %s"), oid_to_hex(oid));
+			struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
-			return NULL;
+
 			if (!stream) {
 				error(_("unable to open object stream for %s"), oid_to_hex(oid));
 				return NULL;
 			}
 			if (stream_object_signature(r, stream, repl) < 0) {
 				error(_("hash mismatch %s"), oid_to_hex(oid));
 				odb_read_stream_close(stream);
 				return NULL;
 			}
 			odb_read_stream_close(stream);
 		}
 		parse_blob_buffer(lookup_blob(r, oid));
 		return lookup_object(r, oid);
--- a/pack-check.c
+++ b/pack-check.c
@ -9,6 +9,7 @@
 #include "packfile.h"
 #include "object-file.h"
 #include "odb.h"
 #include "odb/streaming.h"
 struct idx_entry {
 	off_t                offset;
@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
 	QSORT(entries, nr_objects, compare_entries);
 	for (i = 0; i < nr_objects; i++) {
 		struct odb_read_stream *stream = NULL;
 		void *data;
 		struct object_id oid;
 		enum object_type type;
@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
 							type) < 0)
 			err = error("packed %s from %s is corrupt",
 				    oid_to_hex(&oid), p->pack_name);
-		else if (!data && stream_object_signature(r, &oid) < 0)
+		else if (!data &&
 			 (packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
 			  stream_object_signature(r, stream, &oid) < 0))
 			err = error("packed %s from %s is corrupt",
 				    oid_to_hex(&oid), p->pack_name);
 		else if (fn) {
@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
 		}
 		if (((base_count + i) & 1023) == 0)
 			display_progress(progress, base_count + i);
 		free(data);
 		if (stream)
 			odb_read_stream_close(stream);
 		free(data);
 	}
 	display_progress(progress, base_count + i);
 	free(entries);
 	return err;
 }
--- a/packfile.c
+++ b/packfile.c
@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
 	return 0;
 }
-int packfile_store_read_object_stream(struct odb_read_stream **out,
+int packfile_read_object_stream(struct odb_read_stream **out,
-				      struct packfile_store *store,
+				const struct object_id *oid,
-				      const struct object_id *oid)
+				struct packed_git *pack,
 				off_t offset)
 {
 	struct odb_packed_read_stream *stream;
 	struct pack_window *window = NULL;
 	struct object_info oi = OBJECT_INFO_INIT;
 	enum object_type in_pack_type;
 	unsigned long size;
-	oi.sizep = &size;
+	in_pack_type = unpack_object_header(pack, &window, &offset, &size);
 	unuse_pack(&window);
-	if (packfile_store_read_object_info(store, oid, &oi, 0) ||
+	if (repo_settings_get_big_file_threshold(pack->repo) >= size)
 	    oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
 	    oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
 	    repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
 		return -1;
 	in_pack_type = unpack_object_header(oi.u.packed.pack,
 					    &window,
 					    &oi.u.packed.offset,
 					    &size);
 	unuse_pack(&window);
 	switch (in_pack_type) {
 	default:
 		return -1; /* we do not do deltas for now */
 	case OBJ_BAD:
 		mark_bad_packed_object(pack, oid);
 		return -1;
 	case OBJ_COMMIT:
 	case OBJ_TREE:
 	case OBJ_BLOB:
@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
 	stream->base.type = in_pack_type;
 	stream->base.size = size;
 	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
-	stream->pack = oi.u.packed.pack;
+	stream->pack = pack;
-	stream->pos = oi.u.packed.offset;
+	stream->pos = offset;
 	*out = &stream->base;
 	return 0;
 }
 int packfile_store_read_object_stream(struct odb_read_stream **out,
 				      struct packfile_store *store,
 				      const struct object_id *oid)
 {
 	struct pack_entry e;
 	if (!find_pack_entry(store, oid, &e))
 		return -1;
 	return packfile_read_object_stream(out, oid, e.p, e.offset);
 }
--- a/packfile.h
+++ b/packfile.h
@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
 		     off_t *curpos, enum object_type type,
 		     off_t delta_obj_offset);
 int packfile_read_object_stream(struct odb_read_stream **out,
 				const struct object_id *oid,
 				struct packed_git *pack,
 				off_t offset);
 void release_pack_memory(size_t);
 /* global flag to enable extra checks when accessing packed objects */
--- a/t/helper/test-genrandom.c
+++ b/t/helper/test-genrandom.c
@ -6,6 +6,7 @@
 #include "test-tool.h"
 #include "git-compat-util.h"
 #include "parse.h"
 int cmd__genrandom(int argc, const char **argv)
 {
@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
 		next = next * 11 + *c;
 	} while (*c++);
-	count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX;
+	count = ULONG_MAX;
 	if (argc == 3 && !git_parse_ulong(argv[2], &count))
 		return error_errno("cannot parse argument '%s'", argv[2]);
 	while (count--) {
 		next = next * 1103515245 + 12345;
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
 '
 test_expect_success 'setup blobs which are likely to delta' '
-	test-tool genrandom foo 10240 >foo &&
+	test-tool genrandom foo 10k >foo &&
 	{ cat foo && echo plus; } >foo-plus &&
 	git add foo foo-plus &&
 	git commit -m foo &&
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
 		# mid1 and mid2 will fit within 256k limit but
 		# appending mid3 will bust the limit and will
 		# result in a separate packfile.
-		test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
+		test-tool genrandom "a" 66k >mid1 &&
-		test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
+		test-tool genrandom "b" 80k >mid2 &&
-		test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
+		test-tool genrandom "c" 128k >mid3 &&
 		git add mid1 mid2 mid3 &&
 		count=0 &&
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
 	! grep corrupt out
 '
 test_expect_success 'fsck handles multiple packfiles with big blobs' '
 	test_when_finished "rm -rf repo" &&
 	git init repo &&
 	(
 		cd repo &&
 		# We construct two packfiles with two objects in common and one
 		# object not in common. The objects in common can then be
 		# corrupted in one of the packfiles, respectively. The other
 		# objects that are unique to the packs are merely used to not
 		# have both packs contain the same data.
 		blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
 		blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
 		blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
 		blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
 		pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
 		pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
 		chmod a+w .git/objects/pack/pack-*.pack &&
 		# Corrupt blob two in the first pack.
 		git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
 		offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
 		printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&
 		# Corrupt blob three in the second pack.
 		git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
 		offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
 		printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&
 		# We now expect to see two failures for the corrupted objects,
 		# even though they exist in a non-corrupted form in the
 		# respective other pack.
 		test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
 		test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
 		test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
 	)
 '
 test_expect_success 'fsck fails on corrupt packfile' '
 	hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
 	pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
 test_expect_success 'fsck detects truncated loose object' '
 	# make it big enough that we know we will truncate in the data
 	# portion, not the header
-	test-tool genrandom truncate 4096 >file &&
+	test-tool genrandom truncate 4k >file &&
 	blob=$(git hash-object -w file) &&
 	file=$(sha1_file $blob) &&
 	test_when_finished "remove_object $blob" &&
--- a/t/t5301-sliding-window.sh
+++ b/t/t5301-sliding-window.sh
@ -12,7 +12,7 @@ test_expect_success 'setup' '
 	for i in a b c
 	do
 	echo $i >$i &&
-	test-tool genrandom "$i" 32768 >>$i &&
+	test-tool genrandom "$i" 32k >>$i &&
 	git update-index --add $i || return 1
 	done &&
 	echo d >d && cat c >>d && git update-index --add d &&
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@ -242,7 +242,7 @@ test_bitmap_cases () {
 	'
 	test_expect_success 'splitting packs does not generate bogus bitmaps' '
-		test-tool genrandom foo $((1024 * 1024)) >rand &&
+		test-tool genrandom foo 1m >rand &&
 		git add rand &&
 		git commit -m "commit with big file" &&
 		git -c pack.packSizeLimit=500k repack -adb &&
--- a/t/t5710-promisor-remote-capability.sh
+++ b/t/t5710-promisor-remote-capability.sh
@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' '
 	test_commit -C template 1 &&
 	test_commit -C template 2 &&
 	test_commit -C template 3 &&
-	test-tool genrandom foo 10240 >template/foo &&
+	test-tool genrandom foo 10k >template/foo &&
 	git -C template add foo &&
 	git -C template commit -m foo
 '
@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet
 test_expect_success "setup for subsequent fetches" '
 	# Generate new commit with large blob
-	test-tool genrandom bar 10240 >template/bar &&
+	test-tool genrandom bar 10k >template/bar &&
 	git -C template add bar &&
 	git -C template commit -m bar &&
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' '
 test_expect_success 'auto-bitmaps do not complain if unavailable' '
 	test_config -C bare.git pack.packSizeLimit 1M &&
-	blob=$(test-tool genrandom big $((1024*1024)) |
+	blob=$(test-tool genrandom big 1m |
 	       git -C bare.git hash-object -w --stdin) &&
 	git -C bare.git update-ref refs/tags/big $blob &&
@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' '
 		cd max-pack-size &&
 		test_commit base &&
 		# two blobs which exceed the maximum pack size
-		test-tool genrandom foo 1048576 >foo &&
+		test-tool genrandom foo 1m >foo &&
 		git hash-object -w foo &&
-		test-tool genrandom bar 1048576 >bar &&
+		test-tool genrandom bar 1m >bar &&
 		git hash-object -w bar &&
 		git add foo bar &&
 		git commit -m "adding foo and bar"