From 7b40d3963835699e138d21f6040a60c07e797853 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:43 +0200 Subject: [PATCH 01/11] repack_without_ref(): split list curation and entry writing The repack_without_ref() function first removes the deleted ref from the internal packed-refs list, then writes the packed-refs list to disk, omitting any broken or stale entries. This patch splits that second step into multiple passes: * collect the list of refnames that should be deleted from packed_refs * delete those refnames from the cache * write the remainder to the packed-refs file The purpose of this change is to make the "write the remainder" part reusable. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/refs.c b/refs.c index 42a7e17f6b..1184b9995b 100644 --- a/refs.c +++ b/refs.c @@ -1998,6 +1998,23 @@ static void write_packed_entry(int fd, char *refname, unsigned char *sha1, } } +/* + * An each_ref_entry_fn that writes the entry to a packed-refs file. + */ +static int write_packed_entry_fn(struct ref_entry *entry, void *cb_data) +{ + int *fd = cb_data; + enum peel_status peel_status = peel_entry(entry, 0); + + if (peel_status != PEEL_PEELED && peel_status != PEEL_NON_TAG) + error("internal error: %s is not a valid packed reference!", + entry->name); + write_packed_entry(*fd, entry->name, entry->u.value.sha1, + peel_status == PEEL_PEELED ? + entry->u.value.peeled : NULL); + return 0; +} + struct ref_to_prune { struct ref_to_prune *next; unsigned char sha1[20]; @@ -2117,14 +2134,25 @@ int pack_refs(unsigned int flags) return 0; } -static int repack_ref_fn(struct ref_entry *entry, void *cb_data) +/* + * If entry is no longer needed in packed-refs, add it to the string + * list pointed to by cb_data. Reasons for deleting entries: + * + * - Entry is broken. + * - Entry is overridden by a loose ref. + * - Entry does not point at a valid object. + * + * In the first and third cases, also emit an error message because these + * are indications of repository corruption. + */ +static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data) { - int *fd = cb_data; - enum peel_status peel_status; + struct string_list *refs_to_delete = cb_data; if (entry->flag & REF_ISBROKEN) { /* This shouldn't happen to packed refs. */ error("%s is broken!", entry->name); + string_list_append(refs_to_delete, entry->name); return 0; } if (!has_sha1_file(entry->u.value.sha1)) { @@ -2134,7 +2162,7 @@ static int repack_ref_fn(struct ref_entry *entry, void *cb_data) if (read_ref_full(entry->name, sha1, 0, &flags)) /* We should at least have found the packed ref. */ die("Internal error"); - if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) + if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) { /* * This packed reference is overridden by a * loose reference, so it is OK that its value @@ -2143,9 +2171,11 @@ static int repack_ref_fn(struct ref_entry *entry, void *cb_data) * collected. For this purpose we don't even * care whether the loose reference itself is * invalid, broken, symbolic, etc. Silently - * omit the packed reference from the output. + * remove the packed reference. */ + string_list_append(refs_to_delete, entry->name); return 0; + } /* * There is no overriding loose reference, so the fact * that this reference doesn't refer to a valid object @@ -2154,14 +2184,10 @@ static int repack_ref_fn(struct ref_entry *entry, void *cb_data) * the output. */ error("%s does not point to a valid object!", entry->name); + string_list_append(refs_to_delete, entry->name); return 0; } - peel_status = peel_entry(entry, 0); - write_packed_entry(*fd, entry->name, entry->u.value.sha1, - peel_status == PEEL_PEELED ? - entry->u.value.peeled : NULL); - return 0; } @@ -2169,6 +2195,8 @@ static int repack_without_ref(const char *refname) { int fd; struct ref_dir *packed; + struct string_list refs_to_delete = STRING_LIST_INIT_DUP; + struct string_list_item *ref_to_delete; if (!get_packed_ref(refname)) return 0; /* refname does not exist in packed refs */ @@ -2180,7 +2208,8 @@ static int repack_without_ref(const char *refname) } clear_packed_ref_cache(&ref_cache); packed = get_packed_refs(&ref_cache); - /* Remove refname from the cache. */ + + /* Remove refname from the cache: */ if (remove_entry(packed, refname) == -1) { /* * The packed entry disappeared while we were @@ -2189,8 +2218,17 @@ static int repack_without_ref(const char *refname) rollback_lock_file(&packlock); return 0; } + + /* Remove any other accumulated cruft: */ + do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete); + for_each_string_list_item(ref_to_delete, &refs_to_delete) { + if (remove_entry(packed, ref_to_delete->string) == -1) + die("internal error"); + } + + /* Write what remains: */ write_or_die(fd, PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); - do_for_each_entry_in_dir(packed, 0, repack_ref_fn, &fd); + do_for_each_entry_in_dir(packed, 0, write_packed_entry_fn, &fd); return commit_lock_file(&packlock); } From 267f9a8cc8192e120a6476fc55590e288e08b459 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:44 +0200 Subject: [PATCH 02/11] pack_refs(): split creation of packed refs and entry writing Split pack_refs() into multiple passes: * Iterate over loose refs. For each one that can be turned into a packed ref, create a corresponding entry in the packed refs cache. * Write the packed refs to the packed-refs file. This change isolates the mutation of the packed-refs file to a single place. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/refs.c b/refs.c index 1184b9995b..9f1a007e3b 100644 --- a/refs.c +++ b/refs.c @@ -2023,35 +2023,50 @@ struct ref_to_prune { struct pack_refs_cb_data { unsigned int flags; + struct ref_dir *packed_refs; struct ref_to_prune *ref_to_prune; - int fd; }; -static int pack_one_ref(struct ref_entry *entry, void *cb_data) +/* + * An each_ref_entry_fn that is run over loose references only. If + * the loose reference can be packed, add an entry in the packed ref + * cache. If the reference should be pruned, also add it to + * ref_to_prune in the pack_refs_cb_data. + */ +static int pack_if_possible_fn(struct ref_entry *entry, void *cb_data) { struct pack_refs_cb_data *cb = cb_data; enum peel_status peel_status; + struct ref_entry *packed_entry; int is_tag_ref = !prefixcmp(entry->name, "refs/tags/"); - /* ALWAYS pack refs that were already packed or are tags */ - if (!(cb->flags & PACK_REFS_ALL) && !is_tag_ref && - !(entry->flag & REF_ISPACKED)) + /* ALWAYS pack tags */ + if (!(cb->flags & PACK_REFS_ALL) && !is_tag_ref) return 0; /* Do not pack symbolic or broken refs: */ if ((entry->flag & REF_ISSYMREF) || !ref_resolves_to_object(entry)) return 0; + /* Add a packed ref cache entry equivalent to the loose entry. */ peel_status = peel_entry(entry, 1); if (peel_status != PEEL_PEELED && peel_status != PEEL_NON_TAG) die("internal error peeling reference %s (%s)", entry->name, sha1_to_hex(entry->u.value.sha1)); - write_packed_entry(cb->fd, entry->name, entry->u.value.sha1, - peel_status == PEEL_PEELED ? - entry->u.value.peeled : NULL); + packed_entry = find_ref(cb->packed_refs, entry->name); + if (packed_entry) { + /* Overwrite existing packed entry with info from loose entry */ + packed_entry->flag = REF_ISPACKED | REF_KNOWS_PEELED; + hashcpy(packed_entry->u.value.sha1, entry->u.value.sha1); + } else { + packed_entry = create_ref_entry(entry->name, entry->u.value.sha1, + REF_ISPACKED | REF_KNOWS_PEELED, 0); + add_ref(cb->packed_refs, packed_entry); + } + hashcpy(packed_entry->u.value.peeled, entry->u.value.peeled); - /* If the ref was already packed, there is no need to prune it. */ - if ((cb->flags & PACK_REFS_PRUNE) && !(entry->flag & REF_ISPACKED)) { + /* Schedule the loose reference for pruning if requested. */ + if ((cb->flags & PACK_REFS_PRUNE)) { int namelen = strlen(entry->name) + 1; struct ref_to_prune *n = xcalloc(1, sizeof(*n) + namelen); hashcpy(n->sha1, entry->u.value.sha1); @@ -2118,16 +2133,21 @@ static struct lock_file packlock; int pack_refs(unsigned int flags) { struct pack_refs_cb_data cbdata; + int fd; memset(&cbdata, 0, sizeof(cbdata)); cbdata.flags = flags; - cbdata.fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), - LOCK_DIE_ON_ERROR); + fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), + LOCK_DIE_ON_ERROR); + cbdata.packed_refs = get_packed_refs(&ref_cache); - write_or_die(cbdata.fd, PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); + do_for_each_entry_in_dir(get_loose_refs(&ref_cache), 0, + pack_if_possible_fn, &cbdata); + + write_or_die(fd, PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); + do_for_each_entry_in_dir(cbdata.packed_refs, 0, write_packed_entry_fn, &fd); - do_for_each_entry(&ref_cache, "", pack_one_ref, &cbdata); if (commit_lock_file(&packlock) < 0) die_errno("unable to overwrite old ref-pack file"); prune_refs(cbdata.ref_to_prune); From 2fff7812902abd0afe05ae1e9ef334fcd26f0389 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:45 +0200 Subject: [PATCH 03/11] refs: wrap the packed refs cache in a level of indirection As we know, we can solve any problem in this manner. In this case, the problem is to avoid freeing a packed refs cache while somebody is using it. So add a level of indirection as a prelude to reference-counting the packed refs cache. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/refs.c b/refs.c index 9f1a007e3b..373d95bb79 100644 --- a/refs.c +++ b/refs.c @@ -806,6 +806,10 @@ static int is_refname_available(const char *refname, const char *oldrefname, return 1; } +struct packed_ref_cache { + struct ref_entry *root; +}; + /* * Future: need to be in "struct repository" * when doing a full libification. @@ -813,7 +817,7 @@ static int is_refname_available(const char *refname, const char *oldrefname, static struct ref_cache { struct ref_cache *next; struct ref_entry *loose; - struct ref_entry *packed; + struct packed_ref_cache *packed; /* * The submodule name, or "" for the main repo. We allocate * length 1 rather than FLEX_ARRAY so that the main ref_cache @@ -825,7 +829,8 @@ static struct ref_cache { static void clear_packed_ref_cache(struct ref_cache *refs) { if (refs->packed) { - free_ref_entry(refs->packed); + free_ref_entry(refs->packed->root); + free(refs->packed); refs->packed = NULL; } } @@ -996,24 +1001,39 @@ static void read_packed_refs(FILE *f, struct ref_dir *dir) } } -static struct ref_dir *get_packed_refs(struct ref_cache *refs) +/* + * Get the packed_ref_cache for the specified ref_cache, creating it + * if necessary. + */ +static struct packed_ref_cache *get_packed_ref_cache(struct ref_cache *refs) { if (!refs->packed) { const char *packed_refs_file; FILE *f; - refs->packed = create_dir_entry(refs, "", 0, 0); + refs->packed = xcalloc(1, sizeof(*refs->packed)); + refs->packed->root = create_dir_entry(refs, "", 0, 0); if (*refs->name) packed_refs_file = git_path_submodule(refs->name, "packed-refs"); else packed_refs_file = git_path("packed-refs"); f = fopen(packed_refs_file, "r"); if (f) { - read_packed_refs(f, get_ref_dir(refs->packed)); + read_packed_refs(f, get_ref_dir(refs->packed->root)); fclose(f); } } - return get_ref_dir(refs->packed); + return refs->packed; +} + +static struct ref_dir *get_packed_ref_dir(struct packed_ref_cache *packed_ref_cache) +{ + return get_ref_dir(packed_ref_cache->root); +} + +static struct ref_dir *get_packed_refs(struct ref_cache *refs) +{ + return get_packed_ref_dir(get_packed_ref_cache(refs)); } void add_packed_ref(const char *refname, const unsigned char *sha1) From 9f69d297703bff37c5506276c2565c721347e03f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:46 +0200 Subject: [PATCH 04/11] refs: implement simple transactions for the packed-refs file Handle simple transactions for the packed-refs file at the packed_ref_cache level via new functions lock_packed_refs(), commit_packed_refs(), and rollback_packed_refs(). Only allow the packed ref cache to be modified (via add_packed_ref()) while the packed refs file is locked. Change clone to add the new references within a transaction. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- builtin/clone.c | 5 ++- refs.c | 88 +++++++++++++++++++++++++++++++++++++++---------- refs.h | 26 +++++++++++++-- 3 files changed, 98 insertions(+), 21 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 66bff5700f..14b1323568 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -493,13 +493,16 @@ static void write_remote_refs(const struct ref *local_refs) { const struct ref *r; + lock_packed_refs(LOCK_DIE_ON_ERROR); + for (r = local_refs; r; r = r->next) { if (!r->peer_ref) continue; add_packed_ref(r->peer_ref->name, r->old_sha1); } - pack_refs(PACK_REFS_ALL); + if (commit_packed_refs()) + die_errno("unable to overwrite old ref-pack file"); } static void write_followtags(const struct ref *refs, const char *msg) diff --git a/refs.c b/refs.c index 373d95bb79..b3457996f5 100644 --- a/refs.c +++ b/refs.c @@ -808,6 +808,13 @@ static int is_refname_available(const char *refname, const char *oldrefname, struct packed_ref_cache { struct ref_entry *root; + + /* + * Iff the packed-refs file associated with this instance is + * currently locked for writing, this points at the associated + * lock (which is owned by somebody else). + */ + struct lock_file *lock; }; /* @@ -826,9 +833,14 @@ static struct ref_cache { char name[1]; } ref_cache, *submodule_ref_caches; +/* Lock used for the main packed-refs file: */ +static struct lock_file packlock; + static void clear_packed_ref_cache(struct ref_cache *refs) { if (refs->packed) { + if (refs->packed->lock) + die("internal error: packed-ref cache cleared while locked"); free_ref_entry(refs->packed->root); free(refs->packed); refs->packed = NULL; @@ -1038,7 +1050,12 @@ static struct ref_dir *get_packed_refs(struct ref_cache *refs) void add_packed_ref(const char *refname, const unsigned char *sha1) { - add_ref(get_packed_refs(&ref_cache), + struct packed_ref_cache *packed_ref_cache = + get_packed_ref_cache(&ref_cache); + + if (!packed_ref_cache->lock) + die("internal error: packed refs not locked"); + add_ref(get_packed_ref_dir(packed_ref_cache), create_ref_entry(refname, sha1, REF_ISPACKED, 1)); } @@ -2035,6 +2052,52 @@ static int write_packed_entry_fn(struct ref_entry *entry, void *cb_data) return 0; } +int lock_packed_refs(int flags) +{ + struct packed_ref_cache *packed_ref_cache; + + /* Discard the old cache because it might be invalid: */ + clear_packed_ref_cache(&ref_cache); + if (hold_lock_file_for_update(&packlock, git_path("packed-refs"), flags) < 0) + return -1; + /* Read the current packed-refs while holding the lock: */ + packed_ref_cache = get_packed_ref_cache(&ref_cache); + packed_ref_cache->lock = &packlock; + return 0; +} + +int commit_packed_refs(void) +{ + struct packed_ref_cache *packed_ref_cache = + get_packed_ref_cache(&ref_cache); + int error = 0; + + if (!packed_ref_cache->lock) + die("internal error: packed-refs not locked"); + write_or_die(packed_ref_cache->lock->fd, + PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); + + do_for_each_entry_in_dir(get_packed_ref_dir(packed_ref_cache), + 0, write_packed_entry_fn, + &packed_ref_cache->lock->fd); + if (commit_lock_file(packed_ref_cache->lock)) + error = -1; + packed_ref_cache->lock = NULL; + return error; +} + +void rollback_packed_refs(void) +{ + struct packed_ref_cache *packed_ref_cache = + get_packed_ref_cache(&ref_cache); + + if (!packed_ref_cache->lock) + die("internal error: packed-refs not locked"); + rollback_lock_file(packed_ref_cache->lock); + packed_ref_cache->lock = NULL; + clear_packed_ref_cache(&ref_cache); +} + struct ref_to_prune { struct ref_to_prune *next; unsigned char sha1[20]; @@ -2148,28 +2211,22 @@ static void prune_refs(struct ref_to_prune *r) } } -static struct lock_file packlock; - int pack_refs(unsigned int flags) { struct pack_refs_cb_data cbdata; - int fd; memset(&cbdata, 0, sizeof(cbdata)); cbdata.flags = flags; - fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), - LOCK_DIE_ON_ERROR); + lock_packed_refs(LOCK_DIE_ON_ERROR); cbdata.packed_refs = get_packed_refs(&ref_cache); do_for_each_entry_in_dir(get_loose_refs(&ref_cache), 0, pack_if_possible_fn, &cbdata); - write_or_die(fd, PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); - do_for_each_entry_in_dir(cbdata.packed_refs, 0, write_packed_entry_fn, &fd); - - if (commit_lock_file(&packlock) < 0) + if (commit_packed_refs()) die_errno("unable to overwrite old ref-pack file"); + prune_refs(cbdata.ref_to_prune); return 0; } @@ -2233,7 +2290,6 @@ static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data) static int repack_without_ref(const char *refname) { - int fd; struct ref_dir *packed; struct string_list refs_to_delete = STRING_LIST_INIT_DUP; struct string_list_item *ref_to_delete; @@ -2241,12 +2297,10 @@ static int repack_without_ref(const char *refname) if (!get_packed_ref(refname)) return 0; /* refname does not exist in packed refs */ - fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0); - if (fd < 0) { + if (lock_packed_refs(0)) { unable_to_lock_error(git_path("packed-refs"), errno); return error("cannot delete '%s' from packed refs", refname); } - clear_packed_ref_cache(&ref_cache); packed = get_packed_refs(&ref_cache); /* Remove refname from the cache: */ @@ -2255,7 +2309,7 @@ static int repack_without_ref(const char *refname) * The packed entry disappeared while we were * acquiring the lock. */ - rollback_lock_file(&packlock); + rollback_packed_refs(); return 0; } @@ -2267,9 +2321,7 @@ static int repack_without_ref(const char *refname) } /* Write what remains: */ - write_or_die(fd, PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER)); - do_for_each_entry_in_dir(packed, 0, write_packed_entry_fn, &fd); - return commit_lock_file(&packlock); + return commit_packed_refs(); } int delete_ref(const char *refname, const unsigned char *sha1, int delopt) diff --git a/refs.h b/refs.h index 246bf6096d..9e5db3ae26 100644 --- a/refs.h +++ b/refs.h @@ -77,11 +77,33 @@ extern int for_each_rawref(each_ref_fn, void *); extern void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname); /* - * Add a reference to the in-memory packed reference cache. To actually - * write the reference to the packed-refs file, call pack_refs(). + * Lock the packed-refs file for writing. Flags is passed to + * hold_lock_file_for_update(). Return 0 on success. + */ +extern int lock_packed_refs(int flags); + +/* + * Add a reference to the in-memory packed reference cache. This may + * only be called while the packed-refs file is locked (see + * lock_packed_refs()). To actually write the packed-refs file, call + * commit_packed_refs(). */ extern void add_packed_ref(const char *refname, const unsigned char *sha1); +/* + * Write the current version of the packed refs cache from memory to + * disk. The packed-refs file must already be locked for writing (see + * lock_packed_refs()). Return zero on success. + */ +extern int commit_packed_refs(void); + +/* + * Rollback the lockfile for the packed-refs file, and discard the + * in-memory packed reference cache. (The packed-refs file will be + * read anew if it is needed again after this function is called.) + */ +extern void rollback_packed_refs(void); + /* * Flags for controlling behaviour of pack_refs() * PACK_REFS_PRUNE: Prune loose refs after packing From 5f5e2a8868e2b5792bf467799339e3288282e91e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:47 +0200 Subject: [PATCH 05/11] refs: manage lifetime of packed refs cache via reference counting In struct packed_ref_cache, keep a count of the number of users of the data structure. Only free the packed ref cache when the reference count goes to zero rather than when the packed ref cache is cleared. This mechanism will be used to prevent the cache data structure from being freed while it is being iterated over. So far, only the reference in struct ref_cache::packed is counted; other users will be adjusted in separate commits. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/refs.c b/refs.c index b3457996f5..80c172f469 100644 --- a/refs.c +++ b/refs.c @@ -809,6 +809,14 @@ static int is_refname_available(const char *refname, const char *oldrefname, struct packed_ref_cache { struct ref_entry *root; + /* + * Count of references to the data structure in this instance, + * including the pointer from ref_cache::packed if any. The + * data will not be freed as long as the reference count is + * nonzero. + */ + unsigned int referrers; + /* * Iff the packed-refs file associated with this instance is * currently locked for writing, this points at the associated @@ -836,14 +844,38 @@ static struct ref_cache { /* Lock used for the main packed-refs file: */ static struct lock_file packlock; +/* + * Increment the reference count of *packed_refs. + */ +static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs) +{ + packed_refs->referrers++; +} + +/* + * Decrease the reference count of *packed_refs. If it goes to zero, + * free *packed_refs and return true; otherwise return false. + */ +static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) +{ + if (!--packed_refs->referrers) { + free_ref_entry(packed_refs->root); + free(packed_refs); + return 1; + } else { + return 0; + } +} + static void clear_packed_ref_cache(struct ref_cache *refs) { if (refs->packed) { - if (refs->packed->lock) + struct packed_ref_cache *packed_refs = refs->packed; + + if (packed_refs->lock) die("internal error: packed-ref cache cleared while locked"); - free_ref_entry(refs->packed->root); - free(refs->packed); refs->packed = NULL; + release_packed_ref_cache(packed_refs); } } @@ -1024,6 +1056,7 @@ static struct packed_ref_cache *get_packed_ref_cache(struct ref_cache *refs) FILE *f; refs->packed = xcalloc(1, sizeof(*refs->packed)); + acquire_packed_ref_cache(refs->packed); refs->packed->root = create_dir_entry(refs, "", 0, 0); if (*refs->name) packed_refs_file = git_path_submodule(refs->name, "packed-refs"); From 8baf2bb99a22f8265a26d97f706a27e39911f69e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:48 +0200 Subject: [PATCH 06/11] do_for_each_entry(): increment the packed refs cache refcount This function calls a user-supplied callback function which could do something that causes the packed refs cache to be invalidated. So acquire a reference count on the data structure to prevent our copy from being freed while we are iterating over it. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/refs.c b/refs.c index 80c172f469..f33d22403e 100644 --- a/refs.c +++ b/refs.c @@ -1590,10 +1590,12 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname) static int do_for_each_entry(struct ref_cache *refs, const char *base, each_ref_entry_fn fn, void *cb_data) { - struct ref_dir *packed_dir = get_packed_refs(refs); + struct packed_ref_cache *packed_ref_cache = get_packed_ref_cache(refs); + struct ref_dir *packed_dir = get_packed_ref_dir(packed_ref_cache); struct ref_dir *loose_dir = get_loose_refs(refs); int retval = 0; + acquire_packed_ref_cache(packed_ref_cache); if (base && *base) { packed_dir = find_containing_dir(packed_dir, base, 0); loose_dir = find_containing_dir(loose_dir, base, 0); @@ -1614,6 +1616,7 @@ static int do_for_each_entry(struct ref_cache *refs, const char *base, loose_dir, 0, fn, cb_data); } + release_packed_ref_cache(packed_ref_cache); return retval; } From 4f6b83e3708943c3b3f4ac911da7d0c30019c20a Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:49 +0200 Subject: [PATCH 07/11] packed_ref_cache: increment refcount when locked Increment the packed_ref_cache reference count while it is locked to prevent its being freed. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/refs.c b/refs.c index f33d22403e..a5a5b5d3bd 100644 --- a/refs.c +++ b/refs.c @@ -820,7 +820,9 @@ struct packed_ref_cache { /* * Iff the packed-refs file associated with this instance is * currently locked for writing, this points at the associated - * lock (which is owned by somebody else). + * lock (which is owned by somebody else). The referrer count + * is also incremented when the file is locked and decremented + * when it is unlocked. */ struct lock_file *lock; }; @@ -2099,6 +2101,8 @@ int lock_packed_refs(int flags) /* Read the current packed-refs while holding the lock: */ packed_ref_cache = get_packed_ref_cache(&ref_cache); packed_ref_cache->lock = &packlock; + /* Increment the reference count to prevent it from being freed: */ + acquire_packed_ref_cache(packed_ref_cache); return 0; } @@ -2119,6 +2123,7 @@ int commit_packed_refs(void) if (commit_lock_file(packed_ref_cache->lock)) error = -1; packed_ref_cache->lock = NULL; + release_packed_ref_cache(packed_ref_cache); return error; } @@ -2131,6 +2136,7 @@ void rollback_packed_refs(void) die("internal error: packed-refs not locked"); rollback_lock_file(packed_ref_cache->lock); packed_ref_cache->lock = NULL; + release_packed_ref_cache(packed_ref_cache); clear_packed_ref_cache(&ref_cache); } From c21d39d7c7eb07eed0be301a72856bff65201256 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:50 +0200 Subject: [PATCH 08/11] Extract a struct stat_data from cache_entry Add public functions fill_stat_data() and match_stat_data() to work with it. This infrastructure will later be used to check the validity of other types of file. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 12 ++-- cache.h | 33 +++++++--- read-cache.c | 151 +++++++++++++++++++++++++-------------------- 3 files changed, 116 insertions(+), 80 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 22020729cb..6a0730f720 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -165,11 +165,13 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) } write_name(ce->name, ce_namelen(ce)); if (debug_mode) { - printf(" ctime: %d:%d\n", ce->ce_ctime.sec, ce->ce_ctime.nsec); - printf(" mtime: %d:%d\n", ce->ce_mtime.sec, ce->ce_mtime.nsec); - printf(" dev: %d\tino: %d\n", ce->ce_dev, ce->ce_ino); - printf(" uid: %d\tgid: %d\n", ce->ce_uid, ce->ce_gid); - printf(" size: %d\tflags: %x\n", ce->ce_size, ce->ce_flags); + struct stat_data *sd = &ce->ce_stat_data; + + printf(" ctime: %d:%d\n", sd->sd_ctime.sec, sd->sd_ctime.nsec); + printf(" mtime: %d:%d\n", sd->sd_mtime.sec, sd->sd_mtime.nsec); + printf(" dev: %d\tino: %d\n", sd->sd_dev, sd->sd_ino); + printf(" uid: %d\tgid: %d\n", sd->sd_uid, sd->sd_gid); + printf(" size: %d\tflags: %x\n", sd->sd_size, ce->ce_flags); } } diff --git a/cache.h b/cache.h index 820aa05c7a..f158fed1d8 100644 --- a/cache.h +++ b/cache.h @@ -119,15 +119,19 @@ struct cache_time { unsigned int nsec; }; +struct stat_data { + struct cache_time sd_ctime; + struct cache_time sd_mtime; + unsigned int sd_dev; + unsigned int sd_ino; + unsigned int sd_uid; + unsigned int sd_gid; + unsigned int sd_size; +}; + struct cache_entry { - struct cache_time ce_ctime; - struct cache_time ce_mtime; - unsigned int ce_dev; - unsigned int ce_ino; + struct stat_data ce_stat_data; unsigned int ce_mode; - unsigned int ce_uid; - unsigned int ce_gid; - unsigned int ce_size; unsigned int ce_flags; unsigned int ce_namelen; unsigned char sha1[20]; @@ -511,6 +515,21 @@ extern int limit_pathspec_to_literal(void); #define HASH_FORMAT_CHECK 2 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); + +/* + * Record to sd the data from st that we use to check whether a file + * might have changed. + */ +extern void fill_stat_data(struct stat_data *sd, struct stat *st); + +/* + * Return 0 if st is consistent with a file not having been changed + * since sd was filled. If there are differences, return a + * combination of MTIME_CHANGED, CTIME_CHANGED, OWNER_CHANGED, + * INODE_CHANGED, and DATA_CHANGED. + */ +extern int match_stat_data(const struct stat_data *sd, struct stat *st); + extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ diff --git a/read-cache.c b/read-cache.c index 5e30746886..5660b37bb2 100644 --- a/read-cache.c +++ b/read-cache.c @@ -67,6 +67,61 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); } +void fill_stat_data(struct stat_data *sd, struct stat *st) +{ + sd->sd_ctime.sec = (unsigned int)st->st_ctime; + sd->sd_mtime.sec = (unsigned int)st->st_mtime; + sd->sd_ctime.nsec = ST_CTIME_NSEC(*st); + sd->sd_mtime.nsec = ST_MTIME_NSEC(*st); + sd->sd_dev = st->st_dev; + sd->sd_ino = st->st_ino; + sd->sd_uid = st->st_uid; + sd->sd_gid = st->st_gid; + sd->sd_size = st->st_size; +} + +int match_stat_data(const struct stat_data *sd, struct stat *st) +{ + int changed = 0; + + if (sd->sd_mtime.sec != (unsigned int)st->st_mtime) + changed |= MTIME_CHANGED; + if (trust_ctime && check_stat && + sd->sd_ctime.sec != (unsigned int)st->st_ctime) + changed |= CTIME_CHANGED; + +#ifdef USE_NSEC + if (check_stat && sd->sd_mtime.nsec != ST_MTIME_NSEC(*st)) + changed |= MTIME_CHANGED; + if (trust_ctime && check_stat && + sd->sd_ctime.nsec != ST_CTIME_NSEC(*st)) + changed |= CTIME_CHANGED; +#endif + + if (check_stat) { + if (sd->sd_uid != (unsigned int) st->st_uid || + sd->sd_gid != (unsigned int) st->st_gid) + changed |= OWNER_CHANGED; + if (sd->sd_ino != (unsigned int) st->st_ino) + changed |= INODE_CHANGED; + } + +#ifdef USE_STDEV + /* + * st_dev breaks on network filesystems where different + * clients will have different views of what "device" + * the filesystem is on + */ + if (check_stat && sd->sd_dev != (unsigned int) st->st_dev) + changed |= INODE_CHANGED; +#endif + + if (sd->sd_size != (unsigned int) st->st_size) + changed |= DATA_CHANGED; + + return changed; +} + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -74,15 +129,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n */ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st) { - ce->ce_ctime.sec = (unsigned int)st->st_ctime; - ce->ce_mtime.sec = (unsigned int)st->st_mtime; - ce->ce_ctime.nsec = ST_CTIME_NSEC(*st); - ce->ce_mtime.nsec = ST_MTIME_NSEC(*st); - ce->ce_dev = st->st_dev; - ce->ce_ino = st->st_ino; - ce->ce_uid = st->st_uid; - ce->ce_gid = st->st_gid; - ce->ce_size = st->st_size; + fill_stat_data(&ce->ce_stat_data, st); if (assume_unchanged) ce->ce_flags |= CE_VALID; @@ -195,43 +242,11 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st) default: die("internal error: ce_mode is %o", ce->ce_mode); } - if (ce->ce_mtime.sec != (unsigned int)st->st_mtime) - changed |= MTIME_CHANGED; - if (trust_ctime && check_stat && - ce->ce_ctime.sec != (unsigned int)st->st_ctime) - changed |= CTIME_CHANGED; -#ifdef USE_NSEC - if (check_stat && ce->ce_mtime.nsec != ST_MTIME_NSEC(*st)) - changed |= MTIME_CHANGED; - if (trust_ctime && check_stat && - ce->ce_ctime.nsec != ST_CTIME_NSEC(*st)) - changed |= CTIME_CHANGED; -#endif - - if (check_stat) { - if (ce->ce_uid != (unsigned int) st->st_uid || - ce->ce_gid != (unsigned int) st->st_gid) - changed |= OWNER_CHANGED; - if (ce->ce_ino != (unsigned int) st->st_ino) - changed |= INODE_CHANGED; - } - -#ifdef USE_STDEV - /* - * st_dev breaks on network filesystems where different - * clients will have different views of what "device" - * the filesystem is on - */ - if (check_stat && ce->ce_dev != (unsigned int) st->st_dev) - changed |= INODE_CHANGED; -#endif - - if (ce->ce_size != (unsigned int) st->st_size) - changed |= DATA_CHANGED; + changed |= match_stat_data(&ce->ce_stat_data, st); /* Racily smudged entry? */ - if (!ce->ce_size) { + if (!ce->ce_stat_data.sd_size) { if (!is_empty_blob_sha1(ce->sha1)) changed |= DATA_CHANGED; } @@ -246,11 +261,11 @@ static int is_racy_timestamp(const struct index_state *istate, istate->timestamp.sec && #ifdef USE_NSEC /* nanosecond timestamped files can also be racy! */ - (istate->timestamp.sec < ce->ce_mtime.sec || - (istate->timestamp.sec == ce->ce_mtime.sec && - istate->timestamp.nsec <= ce->ce_mtime.nsec)) + (istate->timestamp.sec < ce->ce_stat_data.sd_mtime.sec || + (istate->timestamp.sec == ce->ce_stat_data.sd_mtime.sec && + istate->timestamp.nsec <= ce->ce_stat_data.sd_mtime.nsec)) #else - istate->timestamp.sec <= ce->ce_mtime.sec + istate->timestamp.sec <= ce->ce_stat_data.sd_mtime.sec #endif ); } @@ -342,7 +357,7 @@ int ie_modified(const struct index_state *istate, * then we know it is. */ if ((changed & DATA_CHANGED) && - (S_ISGITLINK(ce->ce_mode) || ce->ce_size != 0)) + (S_ISGITLINK(ce->ce_mode) || ce->ce_stat_data.sd_size != 0)) return changed; changed_fs = ce_modified_check_fs(ce, st); @@ -1324,16 +1339,16 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on { struct cache_entry *ce = xmalloc(cache_entry_size(len)); - ce->ce_ctime.sec = ntoh_l(ondisk->ctime.sec); - ce->ce_mtime.sec = ntoh_l(ondisk->mtime.sec); - ce->ce_ctime.nsec = ntoh_l(ondisk->ctime.nsec); - ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec); - ce->ce_dev = ntoh_l(ondisk->dev); - ce->ce_ino = ntoh_l(ondisk->ino); + ce->ce_stat_data.sd_ctime.sec = ntoh_l(ondisk->ctime.sec); + ce->ce_stat_data.sd_mtime.sec = ntoh_l(ondisk->mtime.sec); + ce->ce_stat_data.sd_ctime.nsec = ntoh_l(ondisk->ctime.nsec); + ce->ce_stat_data.sd_mtime.nsec = ntoh_l(ondisk->mtime.nsec); + ce->ce_stat_data.sd_dev = ntoh_l(ondisk->dev); + ce->ce_stat_data.sd_ino = ntoh_l(ondisk->ino); ce->ce_mode = ntoh_l(ondisk->mode); - ce->ce_uid = ntoh_l(ondisk->uid); - ce->ce_gid = ntoh_l(ondisk->gid); - ce->ce_size = ntoh_l(ondisk->size); + ce->ce_stat_data.sd_uid = ntoh_l(ondisk->uid); + ce->ce_stat_data.sd_gid = ntoh_l(ondisk->gid); + ce->ce_stat_data.sd_size = ntoh_l(ondisk->size); ce->ce_flags = flags & ~CE_NAMEMASK; ce->ce_namelen = len; hashcpy(ce->sha1, ondisk->sha1); @@ -1610,7 +1625,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) * The only thing we care about in this function is to smudge the * falsely clean entry due to touch-update-touch race, so we leave * everything else as they are. We are called for entries whose - * ce_mtime match the index file mtime. + * ce_stat_data.sd_mtime match the index file mtime. * * Note that this actually does not do much for gitlinks, for * which ce_match_stat_basic() always goes to the actual @@ -1649,7 +1664,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) * file, and never calls us, so the cached size information * for "frotz" stays 6 which does not match the filesystem. */ - ce->ce_size = 0; + ce->ce_stat_data.sd_size = 0; } } @@ -1659,16 +1674,16 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, { short flags; - ondisk->ctime.sec = htonl(ce->ce_ctime.sec); - ondisk->mtime.sec = htonl(ce->ce_mtime.sec); - ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec); - ondisk->mtime.nsec = htonl(ce->ce_mtime.nsec); - ondisk->dev = htonl(ce->ce_dev); - ondisk->ino = htonl(ce->ce_ino); + ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec); + ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec); + ondisk->ctime.nsec = htonl(ce->ce_stat_data.sd_ctime.nsec); + ondisk->mtime.nsec = htonl(ce->ce_stat_data.sd_mtime.nsec); + ondisk->dev = htonl(ce->ce_stat_data.sd_dev); + ondisk->ino = htonl(ce->ce_stat_data.sd_ino); ondisk->mode = htonl(ce->ce_mode); - ondisk->uid = htonl(ce->ce_uid); - ondisk->gid = htonl(ce->ce_gid); - ondisk->size = htonl(ce->ce_size); + ondisk->uid = htonl(ce->ce_stat_data.sd_uid); + ondisk->gid = htonl(ce->ce_stat_data.sd_gid); + ondisk->size = htonl(ce->ce_stat_data.sd_size); hashcpy(ondisk->sha1, ce->sha1); flags = ce->ce_flags; From 38612532240ecbe1b12e54ca859fed8410ae6de1 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 20 Jun 2013 10:37:51 +0200 Subject: [PATCH 09/11] add a stat_validity struct It can sometimes be useful to know whether a path in the filesystem has been updated without going to the work of opening and re-reading its content. We trust the stat() information on disk already to handle index updates, and we can use the same trick here. This patch introduces a "stat_validity" struct which encapsulates the concept of checking the stat-freshness of a file. It is implemented on top of "struct stat_data" to reuse the logic about which stat entries to trust for a particular platform, but hides the complexity behind two simple functions: check and update. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- cache.h | 27 +++++++++++++++++++++++++++ read-cache.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/cache.h b/cache.h index f158fed1d8..50f33db2cc 100644 --- a/cache.h +++ b/cache.h @@ -1360,4 +1360,31 @@ int checkout_fast_forward(const unsigned char *from, int sane_execvp(const char *file, char *const argv[]); +/* + * A struct to encapsulate the concept of whether a file has changed + * since we last checked it. This uses criteria similar to those used + * for the index. + */ +struct stat_validity { + struct stat_data *sd; +}; + +void stat_validity_clear(struct stat_validity *sv); + +/* + * Returns 1 if the path is a regular file (or a symlink to a regular + * file) and matches the saved stat_validity, 0 otherwise. A missing + * or inaccessible file is considered a match if the struct was just + * initialized, or if the previous update found an inaccessible file. + */ +int stat_validity_check(struct stat_validity *sv, const char *path); + +/* + * Update the stat_validity from a file opened at descriptor fd. If + * the file is missing, inaccessible, or not a regular file, then + * future calls to stat_validity_check will match iff one of those + * conditions continues to be true. + */ +void stat_validity_update(struct stat_validity *sv, int fd); + #endif /* CACHE_H */ diff --git a/read-cache.c b/read-cache.c index 5660b37bb2..b15bc096ea 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1950,3 +1950,33 @@ void *read_blob_data_from_index(struct index_state *istate, const char *path, un *size = sz; return data; } + +void stat_validity_clear(struct stat_validity *sv) +{ + free(sv->sd); + sv->sd = NULL; +} + +int stat_validity_check(struct stat_validity *sv, const char *path) +{ + struct stat st; + + if (stat(path, &st) < 0) + return sv->sd == NULL; + if (!sv->sd) + return 0; + return S_ISREG(st.st_mode) && !match_stat_data(sv->sd, &st); +} + +void stat_validity_update(struct stat_validity *sv, int fd) +{ + struct stat st; + + if (fstat(fd, &st) < 0 || !S_ISREG(st.st_mode)) + stat_validity_clear(sv); + else { + if (!sv->sd) + sv->sd = xcalloc(1, sizeof(struct stat_data)); + fill_stat_data(sv->sd, &st); + } +} From ca9199300eb0d485f5e03961bbbd9aff3484a1fa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2013 10:37:52 +0200 Subject: [PATCH 10/11] get_packed_ref_cache: reload packed-refs file when it changes Once we read the packed-refs file into memory, we cache it to save work on future ref lookups. However, our cache may be out of date with respect to what is on disk if another process is simultaneously packing the refs. Normally it is acceptable for us to be a little out of date, since there is no guarantee whether we read the file before or after the simultaneous update. However, there is an important special case: our packed-refs file must be up to date with respect to any loose refs we read. Otherwise, we risk the following race condition: 0. There exists a loose ref refs/heads/master. 1. Process A starts and looks up the ref "master". It first checks $GIT_DIR/master, which does not exist. It then loads (and caches) the packed-refs file to see if "master" exists in it, which it does not. 2. Meanwhile, process B runs "pack-refs --all --prune". It creates a new packed-refs file which contains refs/heads/master, and removes the loose copy at $GIT_DIR/refs/heads/master. 3. Process A continues its lookup, and eventually tries $GIT_DIR/refs/heads/master. It sees that the loose ref is missing, and falls back to the packed-refs file. But it examines its cached version, which does not have refs/heads/master. After trying a few other prefixes, it reports master as a non-existent ref. There are many variants (e.g., step 1 may involve process A looking up another ref entirely, so even a fully qualified refname can fail). One of the most interesting ones is if "refs/heads/master" is already packed. In that case process A will not see it as missing, but rather will report whatever value happened to be in the packed-refs file before process B repacked (which might be an arbitrarily old value). We can fix this by making sure we reload the packed-refs file from disk after looking at any loose refs. That's unacceptably slow, so we can check its stat()-validity as a proxy, and read it only when it appears to have changed. Reading the packed-refs file after performing any loose-ref system calls is sufficient because we know the ordering of the pack-refs process: it always makes sure the newly written packed-refs file is installed into place before pruning any loose refs. As long as those operations by B appear in their executed order to process A, by the time A sees the missing loose ref, the new packed-refs file must be in place. Signed-off-by: Michael Haggerty Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- refs.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/refs.c b/refs.c index a5a5b5d3bd..60f250769e 100644 --- a/refs.c +++ b/refs.c @@ -825,6 +825,9 @@ struct packed_ref_cache { * when it is unlocked. */ struct lock_file *lock; + + /* The metadata from when this packed-refs cache was read */ + struct stat_validity validity; }; /* @@ -862,6 +865,7 @@ static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) { if (!--packed_refs->referrers) { free_ref_entry(packed_refs->root); + stat_validity_clear(&packed_refs->validity); free(packed_refs); return 1; } else { @@ -1053,19 +1057,26 @@ static void read_packed_refs(FILE *f, struct ref_dir *dir) */ static struct packed_ref_cache *get_packed_ref_cache(struct ref_cache *refs) { + const char *packed_refs_file; + + if (*refs->name) + packed_refs_file = git_path_submodule(refs->name, "packed-refs"); + else + packed_refs_file = git_path("packed-refs"); + + if (refs->packed && + !stat_validity_check(&refs->packed->validity, packed_refs_file)) + clear_packed_ref_cache(refs); + if (!refs->packed) { - const char *packed_refs_file; FILE *f; refs->packed = xcalloc(1, sizeof(*refs->packed)); acquire_packed_ref_cache(refs->packed); refs->packed->root = create_dir_entry(refs, "", 0, 0); - if (*refs->name) - packed_refs_file = git_path_submodule(refs->name, "packed-refs"); - else - packed_refs_file = git_path("packed-refs"); f = fopen(packed_refs_file, "r"); if (f) { + stat_validity_update(&refs->packed->validity, fileno(f)); read_packed_refs(f, get_ref_dir(refs->packed->root)); fclose(f); } From 98eeb09e8acb6cbe0b0da3b1772b6676fe6d167f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2013 10:37:53 +0200 Subject: [PATCH 11/11] for_each_ref: load all loose refs before packed refs If we are iterating through the refs using for_each_ref (or any of its sister functions), we can get into a race condition with a simultaneous "pack-refs --prune" that looks like this: 0. We have a large number of loose refs, and a few packed refs. refs/heads/z/foo is loose, with no matching entry in the packed-refs file. 1. Process A starts iterating through the refs. It loads the packed-refs file from disk, then starts lazily traversing through the loose ref directories. 2. Process B, running "pack-refs --prune", writes out the new packed-refs file. It then deletes the newly packed refs, including refs/heads/z/foo. 3. Meanwhile, process A has finally gotten to refs/heads/z (it traverses alphabetically). It descends, but finds nothing there. It checks its cached view of the packed-refs file, but it does not mention anything in "refs/heads/z/" at all (it predates the new file written by B in step 2). The traversal completes successfully without mentioning refs/heads/z/foo at all (the name, of course, isn't important; but the more refs you have and the farther down the alphabetical list a ref is, the more likely it is to hit the race). If refs/heads/z/foo did exist in the packed refs file at state 0, we would see an entry for it, but it would show whatever sha1 the ref had the last time it was packed (which could be an arbitrarily long time ago). This can be especially dangerous when process A is "git prune", as it means our set of reachable tips will be incomplete, and we may erroneously prune objects reachable from that tip (the same thing can happen if "repack -ad" is used, as it simply drops unreachable objects that are packed). This patch solves it by loading all of the loose refs for our traversal into our in-memory cache, and then refreshing the packed-refs cache. Because a pack-refs writer will always put the new packed-refs file into place before starting the prune, we know that any loose refs we fail to see will either truly be missing, or will have already been put in the packed-refs file by the time we refresh. Signed-off-by: Michael Haggerty Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- refs.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 60f250769e..787cc1c9bc 100644 --- a/refs.c +++ b/refs.c @@ -749,6 +749,21 @@ static int do_for_each_entry_in_dirs(struct ref_dir *dir1, } } +/* + * Load all of the refs from the dir into our in-memory cache. The hard work + * of loading loose refs is done by get_ref_dir(), so we just need to recurse + * through all of the sub-directories. We do not even need to care about + * sorting, as traversal order does not matter to us. + */ +static void prime_ref_dir(struct ref_dir *dir) +{ + int i; + for (i = 0; i < dir->nr; i++) { + struct ref_entry *entry = dir->entries[i]; + if (entry->flag & REF_DIR) + prime_ref_dir(get_ref_dir(entry)); + } +} /* * Return true iff refname1 and refname2 conflict with each other. * Two reference names conflict if one of them exactly matches the @@ -1603,15 +1618,31 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname) static int do_for_each_entry(struct ref_cache *refs, const char *base, each_ref_entry_fn fn, void *cb_data) { - struct packed_ref_cache *packed_ref_cache = get_packed_ref_cache(refs); - struct ref_dir *packed_dir = get_packed_ref_dir(packed_ref_cache); - struct ref_dir *loose_dir = get_loose_refs(refs); + struct packed_ref_cache *packed_ref_cache; + struct ref_dir *loose_dir; + struct ref_dir *packed_dir; int retval = 0; + /* + * We must make sure that all loose refs are read before accessing the + * packed-refs file; this avoids a race condition in which loose refs + * are migrated to the packed-refs file by a simultaneous process, but + * our in-memory view is from before the migration. get_packed_ref_cache() + * takes care of making sure our view is up to date with what is on + * disk. + */ + loose_dir = get_loose_refs(refs); + if (base && *base) { + loose_dir = find_containing_dir(loose_dir, base, 0); + } + if (loose_dir) + prime_ref_dir(loose_dir); + + packed_ref_cache = get_packed_ref_cache(refs); acquire_packed_ref_cache(packed_ref_cache); + packed_dir = get_packed_ref_dir(packed_ref_cache); if (base && *base) { packed_dir = find_containing_dir(packed_dir, base, 0); - loose_dir = find_containing_dir(loose_dir, base, 0); } if (packed_dir && loose_dir) {