From 325006f2dbeb482f422b2c0e62b485a41cb1c64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:36:40 +0200 Subject: [PATCH 1/5] oidset: make oidset_size() an inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit oidset_size() just reads a single word from memory and returns it. Avoid the function call overhead for this trivial operation by turning it into an inline function. While we're at it, declare its parameter const to allow it to be used on read-only oidsets. Suggested-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- oidset.c | 5 ----- oidset.h | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/oidset.c b/oidset.c index 5aac633c1f..b36a2bae86 100644 --- a/oidset.c +++ b/oidset.c @@ -36,11 +36,6 @@ void oidset_clear(struct oidset *set) oidset_init(set, 0); } -int oidset_size(struct oidset *set) -{ - return kh_size(&set->set); -} - void oidset_parse_file(struct oidset *set, const char *path) { oidset_parse_file_carefully(set, path, NULL, NULL); diff --git a/oidset.h b/oidset.h index 01f6560283..ba4a5a2cd3 100644 --- a/oidset.h +++ b/oidset.h @@ -57,7 +57,10 @@ int oidset_remove(struct oidset *set, const struct object_id *oid); /** * Returns the number of oids in the set. */ -int oidset_size(struct oidset *set); +static inline int oidset_size(const struct oidset *set) +{ + return kh_size(&set->set); +} /** * Remove all entries from the oidset, freeing any resources associated with From 893b5635059a7e47b042073bc69ca0c5d9d15dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:39:31 +0200 Subject: [PATCH 2/5] midx: inline nth_midxed_pack_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fill_midx_entry() finds the position of an object ID and passes it to nth_midxed_pack_entry(), which uses the position to look up the object ID for its own purposes. Inline the latter into the former to avoid that lookup. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- midx.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/midx.c b/midx.c index 321c6fdd2f..8cb063023c 100644 --- a/midx.c +++ b/midx.c @@ -276,14 +276,18 @@ uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); } -static int nth_midxed_pack_entry(struct repository *r, - struct multi_pack_index *m, - struct pack_entry *e, - uint32_t pos) +int fill_midx_entry(struct repository * r, + const struct object_id *oid, + struct pack_entry *e, + struct multi_pack_index *m) { + uint32_t pos; uint32_t pack_int_id; struct packed_git *p; + if (!bsearch_midx(oid, m, &pos)) + return 0; + if (pos >= m->num_objects) return 0; @@ -305,10 +309,8 @@ static int nth_midxed_pack_entry(struct repository *r, if (p->num_bad_objects) { uint32_t i; - struct object_id oid; - nth_midxed_object_oid(&oid, m, pos); for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid.hash, + if (hasheq(oid->hash, p->bad_object_sha1 + the_hash_algo->rawsz * i)) return 0; } @@ -319,19 +321,6 @@ static int nth_midxed_pack_entry(struct repository *r, return 1; } -int fill_midx_entry(struct repository * r, - const struct object_id *oid, - struct pack_entry *e, - struct multi_pack_index *m) -{ - uint32_t pos; - - if (!bsearch_midx(oid, m, &pos)) - return 0; - - return nth_midxed_pack_entry(r, m, e, pos); -} - /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ static int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name) From 751530de5db77196a08897e3c47526c0f0147ef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:40:33 +0200 Subject: [PATCH 3/5] packfile: convert mark_bad_packed_object() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers have full object IDs, so pass them on instead of just their hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- object-file.c | 2 +- packfile.c | 12 ++++++------ packfile.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/object-file.c b/object-file.c index a8be899481..fb5a385a06 100644 --- a/object-file.c +++ b/object-file.c @@ -1616,7 +1616,7 @@ static int do_oid_object_info_extended(struct repository *r, return 0; rtype = packed_object_info(r, e.p, e.offset, oi); if (rtype < 0) { - mark_bad_packed_object(e.p, real->hash); + mark_bad_packed_object(e.p, real); return do_oid_object_info_extended(r, real, oi, 0); } else if (oi->whence == OI_PACKED) { oi->u.packed.offset = e.offset; diff --git a/packfile.c b/packfile.c index 4d0d625238..fb15fc5b49 100644 --- a/packfile.c +++ b/packfile.c @@ -1161,17 +1161,17 @@ int unpack_object_header(struct packed_git *p, return type; } -void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) +void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { unsigned i; const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) + if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); p->num_bad_objects++; } @@ -1272,7 +1272,7 @@ static int retry_bad_packed_offset(struct repository *r, if (offset_to_pack_pos(p, obj_offset, &pos) < 0) return OBJ_BAD; nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) return OBJ_BAD; @@ -1722,7 +1722,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); data = NULL; goto out; } @@ -1811,7 +1811,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, p->pack_name); - mark_bad_packed_object(p, base_oid.hash); + mark_bad_packed_object(p, &base_oid); base = read_object(r, &base_oid, &type, &base_size); external_base = base; } diff --git a/packfile.h b/packfile.h index 3ae117a8ae..a982ed9994 100644 --- a/packfile.h +++ b/packfile.h @@ -159,7 +159,7 @@ int packed_object_info(struct repository *r, struct packed_git *pack, off_t offset, struct object_info *); -void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); +void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); #define ON_DISK_KEEP_PACKS 1 From 7407d733a4a99cf946cab0c82e03c41dbd305b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:42:20 +0200 Subject: [PATCH 4/5] packfile: convert has_packed_and_bad() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single caller has a full object ID, so pass it on instead of just its hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- object-file.c | 2 +- packfile.c | 4 ++-- packfile.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/object-file.c b/object-file.c index fb5a385a06..01e7058b4e 100644 --- a/object-file.c +++ b/object-file.c @@ -1725,7 +1725,7 @@ void *read_object_file_extended(struct repository *r, die(_("loose object %s (stored in %s) is corrupt"), oid_to_hex(repl), path); - if ((p = has_packed_and_bad(r, repl->hash)) != NULL) + if ((p = has_packed_and_bad(r, repl)) != NULL) die(_("packed object %s (stored in %s) is corrupt"), oid_to_hex(repl), p->pack_name); obj_read_unlock(); diff --git a/packfile.c b/packfile.c index fb15fc5b49..04080a558b 100644 --- a/packfile.c +++ b/packfile.c @@ -1176,14 +1176,14 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) } const struct packed_git *has_packed_and_bad(struct repository *r, - const unsigned char *sha1) + const struct object_id *oid) { struct packed_git *p; unsigned i; for (p = r->objects->packed_git; p; p = p->next) for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, + if (hasheq(oid->hash, p->bad_object_sha1 + the_hash_algo->rawsz * i)) return p; return NULL; diff --git a/packfile.h b/packfile.h index a982ed9994..186146779d 100644 --- a/packfile.h +++ b/packfile.h @@ -160,7 +160,7 @@ int packed_object_info(struct repository *r, off_t offset, struct object_info *); void mark_bad_packed_object(struct packed_git *, const struct object_id *); -const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); +const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); #define ON_DISK_KEEP_PACKS 1 #define IN_CORE_KEEP_PACKS 2 From 09ef66179b943d03cbe0bea0603e5f40574695a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:43:26 +0200 Subject: [PATCH 5/5] packfile: use oidset for bad objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the object ID of broken pack entries in an oidset instead of keeping only their hashes in an unsorted array. The resulting code is shorter and easier to read. It also handles the (hopefully) very rare case of having a high number of bad objects better. Helped-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- midx.c | 10 +++------- object-store.h | 4 ++-- packfile.c | 28 ++++++---------------------- 3 files changed, 11 insertions(+), 31 deletions(-) diff --git a/midx.c b/midx.c index 8cb063023c..76322b713c 100644 --- a/midx.c +++ b/midx.c @@ -307,13 +307,9 @@ int fill_midx_entry(struct repository * r, if (!is_pack_valid(p)) return 0; - if (p->num_bad_objects) { - uint32_t i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; e->offset = nth_midxed_offset(m, pos); e->p = p; diff --git a/object-store.h b/object-store.h index b4dc6668aa..c7bead66f6 100644 --- a/object-store.h +++ b/object-store.h @@ -10,6 +10,7 @@ #include "khash.h" #include "dir.h" #include "oidtree.h" +#include "oidset.h" struct object_directory { struct object_directory *next; @@ -75,9 +76,8 @@ struct packed_git { const void *index_data; size_t index_size; uint32_t num_objects; - uint32_t num_bad_objects; uint32_t crc_offset; - unsigned char *bad_object_sha1; + struct oidset bad_objects; int index_version; time_t mtime; int pack_fd; diff --git a/packfile.c b/packfile.c index 04080a558b..caba29c624 100644 --- a/packfile.c +++ b/packfile.c @@ -1163,29 +1163,17 @@ int unpack_object_header(struct packed_git *p, void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { - unsigned i; - const unsigned hashsz = the_hash_algo->rawsz; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) - return; - p->bad_object_sha1 = xrealloc(p->bad_object_sha1, - st_mult(GIT_MAX_RAWSZ, - st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); - p->num_bad_objects++; + oidset_insert(&p->bad_objects, oid); } const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { struct packed_git *p; - unsigned i; for (p = r->objects->packed_git; p; p = p->next) - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return p; + if (oidset_contains(&p->bad_objects, oid)) + return p; return NULL; } @@ -2016,13 +2004,9 @@ static int fill_pack_entry(const struct object_id *oid, { off_t offset; - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; offset = find_pack_entry_one(oid->hash, p); if (!offset)