From e78ab370545d81a950fa3b2701dd7c72015ee802 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:38 +0100 Subject: [PATCH 1/8] packfile: use a `strmap` to store packs by name To allow fast lookups of a packfile by name we use a hashmap that has the packfile name as key and the pack itself as value. But while this is the perfect use case for a `strmap`, we instead use `struct hashmap` and store the hashmap entry in the packfile itself. Simplify the code by using a `strmap` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 24 ++++-------------------- packfile.h | 4 ++-- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/packfile.c b/packfile.c index 1ae2b2fe1e..04649e5292 100644 --- a/packfile.c +++ b/packfile.c @@ -788,8 +788,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack->next = store->packs; store->packs = pack; - hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&store->map, &pack->packmap_ent); + strmap_put(&store->packs_by_path, pack->pack_name, pack); } struct packed_git *packfile_store_load_pack(struct packfile_store *store, @@ -806,8 +805,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, - struct packed_git, packmap_ent); + p = strmap_get(&store->packs_by_path, key.buf); if (!p) { p = add_packed_git(store->odb->repo, idx_path, strlen(idx_path), local); @@ -2311,27 +2309,13 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -static int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; INIT_LIST_HEAD(&store->mru); - hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); + strmap_init(&store->packs_by_path); return store; } @@ -2341,7 +2325,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } - hashmap_clear(&store->map); + strmap_clear(&store->packs_by_path, 0); free(store); } diff --git a/packfile.h b/packfile.h index c9d0b93446..9da7f14317 100644 --- a/packfile.h +++ b/packfile.h @@ -5,12 +5,12 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "strmap.h" /* in odb.h */ struct object_info; struct packed_git { - struct hashmap_entry packmap_ent; struct packed_git *next; struct list_head mru; struct pack_window *windows; @@ -85,7 +85,7 @@ struct packfile_store { * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. */ - struct hashmap map; + struct strmap packs_by_path; /* * Whether packfiles have already been populated with this store's From f905a855b1d1e172ba1e51e03fc5ec531445575e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:39 +0100 Subject: [PATCH 2/8] packfile: move the MRU list into the packfile store Packfiles have two lists associated to them: - A list that keeps track of packfiles in the order that they were added to a packfile store. - A list that keeps track of packfiles in most-recently-used order so that packfiles that are more likely to contain a specific object are ordered towards the front. Both of these lists are hosted by `struct packed_git` itself, So to identify all packfiles in a repository you simply need to grab the first packfile and then iterate the `->next` pointers or the MRU list. This pattern has the problem that all packfiles are part of the same list, regardless of whether or not they belong to the same object source. With the upcoming pluggable object database effort this needs to change: packfiles should be contained by a single object source, and reading an object from any such packfile should use that source to look up the object. Consequently, we need to break up the global lists of packfiles into per-object-source lists. A first step towards this goal is to move those lists out of `struct packed_git` and into the packfile store. While the packfile store is currently sitting on the `struct object_database` level, the intent is to push it down one level into the `struct odb_source` in a subsequent patch series. Introduce a new `struct packfile_list` that is used to manage lists of packfiles and use it to store the list of most-recently-used packfiles in `struct packfile_store`. For now, the new list type is only used in a single spot, but we'll expand its usage in subsequent patches. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 ++--- midx.c | 2 +- packfile.c | 92 +++++++++++++++++++++++++++++++++++++----- packfile.h | 19 +++++++-- 4 files changed, 104 insertions(+), 18 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df1..5348aebbe9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1706,8 +1706,8 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; + struct packfile_list_entry *e; struct odb_source *source; - struct list_head *pos; if (!exclude && local) { /* @@ -1748,12 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - packfile_store_get_packs_mru(the_repository->objects->packfiles)); + packfile_list_prepend(&the_repository->objects->packfiles->mru, p); if (want != -1) return want; } diff --git a/midx.c b/midx.c index 1d6269f957..8022be9a45 100644 --- a/midx.c +++ b/midx.c @@ -463,7 +463,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); if (p) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index 04649e5292..4d2d3b674f 100644 --- a/packfile.c +++ b/packfile.c @@ -47,6 +47,80 @@ static size_t pack_mapped; #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } +void packfile_list_clear(struct packfile_list *list) +{ + struct packfile_list_entry *e, *next; + + for (e = list->head; e; e = next) { + next = e->next; + free(e); + } + + list->head = list->tail = NULL; +} + +static struct packfile_list_entry *packfile_list_remove_internal(struct packfile_list *list, + struct packed_git *pack) +{ + struct packfile_list_entry *e, *prev; + + for (e = list->head, prev = NULL; e; prev = e, e = e->next) { + if (e->pack != pack) + continue; + + if (prev) + prev->next = e->next; + if (list->head == e) + list->head = e->next; + if (list->tail == e) + list->tail = prev; + + return e; + } + + return NULL; +} + +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack) +{ + free(packfile_list_remove_internal(list, pack)); +} + +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = list->head; + + list->head = entry; + if (!list->tail) + list->tail = entry; +} + +void packfile_list_append(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = NULL; + + if (list->tail) { + list->tail->next = entry; + list->tail = entry; + } else { + list->head = list->tail = entry; + } +} + void pack_report(struct repository *repo) { fprintf(stderr, @@ -995,10 +1069,10 @@ static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&store->mru); + packfile_list_clear(&store->mru); for (p = store->packs; p; p = p->next) - list_add_tail(&p->mru, &store->mru); + packfile_list_append(&store->mru, p); } void packfile_store_prepare(struct packfile_store *store) @@ -1040,10 +1114,10 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) { packfile_store_prepare(store); - return &store->mru; + return store->mru.head; } /* @@ -2048,7 +2122,7 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct list_head *pos; + struct packfile_list_entry *l; packfile_store_prepare(r->objects->packfiles); @@ -2059,10 +2133,11 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packfiles->mru) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (l = r->objects->packfiles->mru.head; l; l = l->next) { + struct packed_git *p = l->pack; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packfiles->mru); + packfile_list_prepend(&r->objects->packfiles->mru, p); return 1; } } @@ -2314,7 +2389,6 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; - INIT_LIST_HEAD(&store->mru); strmap_init(&store->packs_by_path); return store; } diff --git a/packfile.h b/packfile.h index 9da7f14317..39ed1073e4 100644 --- a/packfile.h +++ b/packfile.h @@ -12,7 +12,6 @@ struct object_info; struct packed_git { struct packed_git *next; - struct list_head mru; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -52,6 +51,20 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct packfile_list { + struct packfile_list_entry *head, *tail; +}; + +struct packfile_list_entry { + struct packfile_list_entry *next; + struct packed_git *pack; +}; + +void packfile_list_clear(struct packfile_list *list); +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); +void packfile_list_append(struct packfile_list *list, struct packed_git *pack); + /* * A store that manages packfiles for a given object database. */ @@ -79,7 +92,7 @@ struct packfile_store { } kept_cache; /* A most-recently-used ordered version of the packs list. */ - struct list_head mru; + struct packfile_list mru; /* * A map of packfile names to packed_git structs for tracking which @@ -153,7 +166,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. */ -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); /* * Open the packfile and add it to the store if it isn't yet known. Returns From 89219bc0cd09ada8a204e0ace0bd15decaea7d31 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:40 +0100 Subject: [PATCH 3/8] http: refactor subsystem to use `packfile_list`s The dumb HTTP protocol directly fetches packfiles from the remote server and temporarily stores them in a list of packfiles. Those packfiles are not yet added to the repository's packfile store until we finalize the whole fetch. Refactor the code to instead use a `struct packfile_list` to store those packs. This prepares us for a subsequent change where the `->next` pointer of `struct packed_git` will go away. Note that this refactoring creates some temporary duplication of code, as we now have both `packfile_list_find_oid()` and `find_oid_pack()`. The latter function will be removed in a subsequent commit though. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-push.c | 6 +++--- http-walker.c | 26 +++++++++----------------- http.c | 21 ++++++++------------- http.h | 5 +++-- packfile.c | 9 +++++++++ packfile.h | 8 ++++++++ 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b..d86ce77119 100644 --- a/http-push.c +++ b/http-push.c @@ -104,7 +104,7 @@ struct repo { int has_info_refs; int can_update_info_refs; int has_info_packs; - struct packed_git *packs; + struct packfile_list packs; struct remote_lock *locks; }; @@ -311,7 +311,7 @@ static void start_fetch_packed(struct transfer_request *request) struct transfer_request *check_request = request_queue_head; struct http_pack_request *preq; - target = find_oid_pack(&request->obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &request->obj->oid); if (!target) { fprintf(stderr, "Unable to fetch %s, will not be able to update server info refs\n", oid_to_hex(&request->obj->oid)); repo->can_update_info_refs = 0; @@ -683,7 +683,7 @@ static int add_send_request(struct object *obj, struct remote_lock *lock) get_remote_object_list(obj->oid.hash[0]); if (obj->flags & (REMOTE | PUSHING)) return 0; - target = find_oid_pack(&obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &obj->oid); if (target) { obj->flags |= REMOTE; return 0; diff --git a/http-walker.c b/http-walker.c index 0f7ae46d7f..e886e64866 100644 --- a/http-walker.c +++ b/http-walker.c @@ -15,7 +15,7 @@ struct alt_base { char *base; int got_indices; - struct packed_git *packs; + struct packfile_list packs; struct alt_base *next; }; @@ -324,11 +324,8 @@ static void process_alternates_response(void *callback_data) } else if (is_alternate_allowed(target.buf)) { warning("adding alternate object store: %s", target.buf); - newalt = xmalloc(sizeof(*newalt)); - newalt->next = NULL; + CALLOC_ARRAY(newalt, 1); newalt->base = strbuf_detach(&target, NULL); - newalt->got_indices = 0; - newalt->packs = NULL; while (tail->next != NULL) tail = tail->next; @@ -435,7 +432,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, if (fetch_indices(walker, repo)) return -1; - target = find_oid_pack(oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, oid); if (!target) return -1; close_pack_index(target); @@ -584,17 +581,15 @@ static void cleanup(struct walker *walker) if (data) { alt = data->alt; while (alt) { - struct packed_git *pack; + struct packfile_list_entry *e; alt_next = alt->next; - pack = alt->packs; - while (pack) { - struct packed_git *pack_next = pack->next; - close_pack(pack); - free(pack); - pack = pack_next; + for (e = alt->packs.head; e; e = e->next) { + close_pack(e->pack); + free(e->pack); } + packfile_list_clear(&alt->packs); free(alt->base); free(alt); @@ -612,14 +607,11 @@ struct walker *get_http_walker(const char *url) struct walker_data *data = xmalloc(sizeof(struct walker_data)); struct walker *walker = xmalloc(sizeof(struct walker)); - data->alt = xmalloc(sizeof(*data->alt)); + CALLOC_ARRAY(data->alt, 1); data->alt->base = xstrdup(url); for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) *s = 0; - data->alt->got_indices = 0; - data->alt->packs = NULL; - data->alt->next = NULL; data->got_alternates = -1; walker->corrupt_object_found = 0; diff --git a/http.c b/http.c index 17130823f0..41f850db16 100644 --- a/http.c +++ b/http.c @@ -2413,8 +2413,9 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) return tmp; } -static int fetch_and_setup_pack_index(struct packed_git **packs_head, - unsigned char *sha1, const char *base_url) +static int fetch_and_setup_pack_index(struct packfile_list *packs, + unsigned char *sha1, + const char *base_url) { struct packed_git *new_pack, *p; char *tmp_idx = NULL; @@ -2448,12 +2449,11 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, if (ret) return -1; - new_pack->next = *packs_head; - *packs_head = new_pack; + packfile_list_prepend(packs, new_pack); return 0; } -int http_get_info_packs(const char *base_url, struct packed_git **packs_head) +int http_get_info_packs(const char *base_url, struct packfile_list *packs) { struct http_get_options options = {0}; int ret = 0; @@ -2477,7 +2477,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head) !parse_oid_hex(data, &oid, &data) && skip_prefix(data, ".pack", &data) && (*data == '\n' || *data == '\0')) { - fetch_and_setup_pack_index(packs_head, oid.hash, base_url); + fetch_and_setup_pack_index(packs, oid.hash, base_url); } else { data = strchrnul(data, '\n'); } @@ -2541,14 +2541,9 @@ cleanup: } void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from) + struct packfile_list *list_to_remove_from) { - struct packed_git **lst = list_to_remove_from; - - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - + packfile_list_remove(list_to_remove_from, p); packfile_store_add_pack(the_repository->objects->packfiles, p); } diff --git a/http.h b/http.h index 553e16205c..f9d4593404 100644 --- a/http.h +++ b/http.h @@ -2,6 +2,7 @@ #define HTTP_H struct packed_git; +struct packfile_list; #include "git-zlib.h" @@ -190,7 +191,7 @@ struct curl_slist *http_append_auth_header(const struct credential *c, /* Helpers for fetching packs */ int http_get_info_packs(const char *base_url, - struct packed_git **packs_head); + struct packfile_list *packs); /* Helper for getting Accept-Language header */ const char *http_get_accept_language_header(void); @@ -226,7 +227,7 @@ void release_http_pack_request(struct http_pack_request *preq); * from http_get_info_packs() and have chosen a specific pack to fetch. */ void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from); + struct packfile_list *list_to_remove_from); /* Helpers for fetching object */ struct http_object_request { diff --git a/packfile.c b/packfile.c index 4d2d3b674f..6aa2ca8ac9 100644 --- a/packfile.c +++ b/packfile.c @@ -121,6 +121,15 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack) } } +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid) +{ + for (; packs; packs = packs->next) + if (find_pack_entry_one(oid, packs->pack)) + return packs->pack; + return NULL; +} + void pack_report(struct repository *repo) { fprintf(stderr, diff --git a/packfile.h b/packfile.h index 39ed1073e4..a53336d722 100644 --- a/packfile.h +++ b/packfile.h @@ -65,6 +65,14 @@ void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); void packfile_list_append(struct packfile_list *list, struct packed_git *pack); +/* + * Find the pack within the "packs" list whose index contains the object + * "oid". For general object lookups, you probably don't want this; use + * find_pack_entry() instead. + */ +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid); + /* * A store that manages packfiles for a given object database. */ From 02a7f6ffab9ec7641f88032f30998976bca07820 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:41 +0100 Subject: [PATCH 4/8] packfile: fix approximation of object counts When approximating the number of objects in a repository we only take into account two data sources, the multi-pack index and the packfile indices, as both of these data structures allow us to easily figure out how many objects they contain. But the way we currently approximate the number of objects is broken in presence of a multi-pack index. This is due to two separate reasons: - We have recently introduced initial infrastructure for incremental multi-pack indices. Starting with that series, `num_objects` only counts the number of objects of a specific layer of the MIDX chain, so we do not take into account objects from parent layers. This issue is fixed by adding `num_objects_in_base`, which contains the sum of all objects in previous layers. - When using the multi-pack index we may count objects contained in packfiles twice: once via the multi-pack index, but then we again count them via the packfile itself. This issue is fixed by skipping any packfiles that have an MIDX. Overall, given that we _always_ count the packs, we can only end up overestimating the number of objects, and the overestimation is limited to a factor of two at most. The consequences of those issues are very limited though, as we only approximate object counts in a small number of cases: - When writing a commit-graph we use the approximate object count to display the upper limit of a progress display. - In `repo_find_unique_abbrev_r()` we use it to specify a lower limit of how many hex digits we want to abbreviate to. Given that we use power-of-two here to derive the lower limit we may end up with an abbreviated hash that is one digit longer than required. - In `estimate_repack_memory()` we may end up overestimating how much memory a repack needs to pack objects. Conseuqently, we may end up dropping some packfiles from a repack. None of these are really game-changing. But it's nice to fix those issues regardless. While at it, convert the code to use `repo_for_each_pack()`. Furthermore, use `odb_prepare_alternates()` instead of explicitly preparing the packfile store. We really only want to prepare the object database sources, and `get_multi_pack_index()` already knows to prepare the packfile store for us. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index 6aa2ca8ac9..b07509b69b 100644 --- a/packfile.c +++ b/packfile.c @@ -1143,16 +1143,16 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - packfile_store_prepare(r->objects->packfiles); + odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); if (m) - count += m->num_objects; + count += m->num_objects + m->num_objects_in_base; } - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) + repo_for_each_pack(r, p) { + if (p->multi_pack_index || open_pack_index(p)) continue; count += p->num_objects; } From 0d0e4b5954e97fcdfd0a4120e17e2c570497981a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:42 +0100 Subject: [PATCH 5/8] builtin/pack-objects: simplify logic to find kept or nonlocal objects The function `has_sha1_pack_kept_or_nonlocal()` takes an object ID and then searches through packed objects to figure out whether the object exists in a kept or non-local pack. As a performance optimization we remember the packfile that contains a given object ID so that the next call to the function first checks that same packfile again. The way this is written is rather hard to follow though, as the caching mechanism is intertwined with the loop that iterates through the packs. Consequently, we need to do some gymnastics to re-start the iteration if the cached pack does not contain the objects. Refactor this so that we check the cached packfile at the beginning. We don't have to re-verify whether the packfile meets the properties as we have already verified those when storing the pack in `last_found` in the first place. So all we need to do is to use `find_pack_entry_one()` to check whether the pack contains the object ID, and to skip the cached pack in the loop so that we don't search it twice. Furthermore, stop using the `(void *)1` sentinel value and instead use a simple `NULL` pointer to indicate that we don't have a last-found pack yet. This refactoring significantly simplifies the logic and makes it much easier to follow. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5348aebbe9..b83eb8ead1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4388,27 +4388,27 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - struct packfile_store *packs = the_repository->objects->packfiles; - static struct packed_git *last_found = (void *)1; + static struct packed_git *last_found = NULL; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - packfile_store_get_packs(packs); + if (last_found && find_pack_entry_one(oid, last_found)) + return 1; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + repo_for_each_pack(the_repository, p) { + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found) + continue; + + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = packfile_store_get_packs(packs); - else - p = p->next; - if (p == last_found) - p = p->next; } + return 0; } From 589127caa73090040200989ff4d24c3d54f473f2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:43 +0100 Subject: [PATCH 6/8] packfile: move list of packs into the packfile store Move the list of packs into the packfile store. This follows the same logic as in a previous commit, where we moved the most-recently-used list of packs, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 +-- packfile.c | 83 +++++++++++++++++++------------------------ packfile.h | 16 +++------ 3 files changed, 43 insertions(+), 60 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 215295c156..6fe6e9bc61 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -978,7 +978,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1179,7 +1179,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/packfile.c b/packfile.c index b07509b69b..71e95ae11c 100644 --- a/packfile.c +++ b/packfile.c @@ -356,13 +356,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct packed_git *current) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packfiles->packs; p; p = p->next) - scan_windows(p, &lru_p, &lru_w, &lru_l); + for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -542,14 +543,15 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (p->pack_fd == -1) + for (e = r->objects->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) continue; - find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); } if (lru_p) @@ -868,8 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - pack->next = store->packs; - store->packs = pack; + packfile_list_prepend(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1046,9 +1047,10 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); +DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); -static int sort_pack(const struct packed_git *a, const struct packed_git *b) +static int sort_pack(const struct packfile_list_entry *a, + const struct packfile_list_entry *b) { int st; @@ -1058,7 +1060,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * remote ones could be on a network mounted filesystem. * Favor local ones for these reasons. */ - st = a->pack_local - b->pack_local; + st = a->pack->pack_local - b->pack->pack_local; if (st) return -st; @@ -1067,21 +1069,19 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * and more recent objects tend to get accessed more * often. */ - if (a->mtime < b->mtime) + if (a->pack->mtime < b->pack->mtime) return 1; - else if (a->mtime == b->mtime) + else if (a->pack->mtime == b->pack->mtime) return 0; return -1; } static void packfile_store_prepare_mru(struct packfile_store *store) { - struct packed_git *p; - packfile_list_clear(&store->mru); - for (p = store->packs; p; p = p->next) - packfile_list_append(&store->mru, p); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + packfile_list_append(&store->mru, e->pack); } void packfile_store_prepare(struct packfile_store *store) @@ -1096,7 +1096,11 @@ void packfile_store_prepare(struct packfile_store *store) prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - sort_packs(&store->packs, sort_pack); + + sort_packs(&store->packs.head, sort_pack); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + if (!e->next) + store->packs.tail = e; packfile_store_prepare_mru(store); store->initialized = true; @@ -1108,7 +1112,7 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_packs(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store) { packfile_store_prepare(store); @@ -1120,7 +1124,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) prepare_midx_pack(m, i); } - return store->packs; + return store->packs.head; } struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) @@ -1276,11 +1280,11 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packed_git *p; + struct packfile_list_entry *e; - for (p = r->objects->packfiles->packs; p; p = p->next) - if (oidset_contains(&p->bad_objects, oid)) - return p; + for (e = r->objects->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; return NULL; } @@ -2088,19 +2092,6 @@ int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs) -{ - struct packed_git *p; - - for (p = packs; p; p = p->next) { - if (find_pack_entry_one(oid, p)) - return p; - } - return NULL; - -} - static int fill_pack_entry(const struct object_id *oid, struct pack_entry *e, struct packed_git *p) @@ -2139,7 +2130,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs) + if (!r->objects->packfiles->packs.head) return 0; for (l = r->objects->packfiles->mru.head; l; l = l->next) { @@ -2404,19 +2395,19 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { - for (struct packed_git *p = store->packs, *next; p; p = next) { - next = p->next; - free(p); - } + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + free(e->pack); + packfile_list_clear(&store->packs); + strmap_clear(&store->packs_by_path, 0); free(store); } void packfile_store_close(struct packfile_store *store) { - for (struct packed_git *p = store->packs; p; p = p->next) { - if (p->do_not_close) + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) { + if (e->pack->do_not_close) BUG("want to close pack marked 'do-not-close'"); - close_pack(p); + close_pack(e->pack); } } diff --git a/packfile.h b/packfile.h index a53336d722..d95275e666 100644 --- a/packfile.h +++ b/packfile.h @@ -11,7 +11,6 @@ struct object_info; struct packed_git { - struct packed_git *next; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -83,7 +82,7 @@ struct packfile_store { * The list of packfiles in the order in which they are being added to * the store. */ - struct packed_git *packs; + struct packfile_list packs; /* * Cache of packfiles which are marked as "kept", either because there @@ -163,13 +162,14 @@ void packfile_store_add_pack(struct packfile_store *store, * repository. */ #define repo_for_each_pack(repo, p) \ - for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) + for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ + ((p) = (e ? e->pack : NULL)); e = e->next) /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. */ -struct packed_git *packfile_store_get_packs(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. @@ -266,14 +266,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); */ unsigned long repo_approximate_object_count(struct repository *r); -/* - * Find the pack within the "packs" list whose index contains the object "oid". - * For general object lookups, you probably don't want this; use - * find_pack_entry() instead. - */ -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs); - void pack_report(struct repository *repo); /* From 6aff1f25a046f3dcd8a78b0c61414fa2d1c9a93c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:44 +0100 Subject: [PATCH 7/8] packfile: always add packfiles to MRU when adding a pack When preparing the packfile store we know to also prepare the MRU list of packfiles with all packs that are currently loaded in the store via `packfile_store_prepare_mru()`. So we know that the list of packs in the MRU list should match the list of packs in the non-MRU list. But there are some direct or indirect callsites that add a packfile to the store via `packfile_store_add_pack()` without adding the pack to the MRU. And while functions that access the MRU (e.g. `find_pack_entry()`) know to call `packfile_store_prepare()`, which knows to prepare the MRU via `packfile_store_prepare_mru()`, that operation will be turned into a no-op because the packfile store is already prepared. So this will not cause us to add the packfile to the MRU, and consequently we won't be able to find the packfile in our MRU list. There are only a handful of callers outside of "packfile.c" that add a packfile to the store: - "builtin/fast-import.c" adds multiple packs of imported objects, but it knows to look up objects via `packfile_store_get_packs()`. This function does not use the MRU, so we're good. - "builtin/index-pack.c" adds the indexed pack to the store in case it needs to perform consistency checks on its objects. - "http.c" adds the fetched pack to the store so that we can access its objects. In all of these cases we actually want to access the contained objects. And luckily, reading these objects works as expected: 1. We eventually end up in `do_oid_object_info_extended()`. 2. Calling `find_pack_entry()` fails because the MRU list doesn't contain the newly added packfile. 3. The callers don't pass `OBJECT_INFO_QUICK`, so we end up repreparing the object database. This will also cause us to reprepare the MRU list. 4. We now retry reading the object via `find_pack_entry()`, and now we succeed because the MRU list got populated. This logic feels quite fragile: we intentionally add the packfile to the store, but we then ultimately rely on repreparing the entire store only to make the packfile accessible. While we do the correct thing in `do_oid_object_info_extended()`, other sites that access the MRU may not know to reprepare. But besides being fragile it's also a waste of resources: repreparing the object database requires us to re-read the alternates file and discard any caches. Refactor the code so that we unconditionally add packfiles to the MRU when adding them to a packfile store. This makes the logic less fragile and ensures that we don't have to reprepare the store to make the pack accessible. Note that this does not allow us to drop `packfile_store_prepare_mru()` just yet: while the MRU list is already populated with all packs now, the order in which we add these packs is indeterministic for most of the part. So by first calling `sort_pack()` on the other packfile list and then re-preparing the MRU list we inherit its sorting. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 -- packfile.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 8022be9a45..24e1e72175 100644 --- a/midx.c +++ b/midx.c @@ -462,8 +462,6 @@ int prepare_midx_pack(struct multi_pack_index *m, m->pack_names[pack_int_id]); p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); - if (p) - packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index 71e95ae11c..60f2e42876 100644 --- a/packfile.c +++ b/packfile.c @@ -871,6 +871,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack_open_fds++; packfile_list_prepend(&store->packs, pack); + packfile_list_append(&store->mru, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } From c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:45 +0100 Subject: [PATCH 8/8] packfile: track packs via the MRU list exclusively We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- packfile.c | 27 +++++++-------------------- packfile.h | 27 +++++++++++++++++---------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b83eb8ead1..0e4e9f8068 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->mru, p); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index 60f2e42876..378b0b1920 100644 --- a/packfile.c +++ b/packfile.c @@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - packfile_list_prepend(&store->packs, pack); - packfile_list_append(&store->mru, pack); - + packfile_list_append(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a, return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - packfile_list_clear(&store->mru); - - for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) - packfile_list_append(&store->mru, e->pack); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store) if (!e->next) store->packs.tail = e; - packfile_store_prepare_mru(store); store->initialized = true; } @@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->mru.head; -} - /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs.head) return 0; - for (l = r->objects->packfiles->mru.head; l; l = l->next) { + for (l = r->objects->packfiles->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - packfile_list_prepend(&r->objects->packfiles->mru, p); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; + repo->objects->packfiles->skip_mru_updates = true; repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; @@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index d95275e666..27ba607e7c 100644 --- a/packfile.h +++ b/packfile.h @@ -79,8 +79,8 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ struct packfile_list packs; @@ -98,9 +98,6 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct packfile_list mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -112,6 +109,21 @@ struct packfile_store { * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -171,11 +183,6 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); -/* - * Get all packs in most-recently-used order. - */ -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a