From 8738a8a4df9ee50112b5f5a757c58988166974d3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:15:55 +0200 Subject: [PATCH 01/21] ref_iterator: keep track of whether the iterator output is ordered References are iterated over in order by refname, but reflogs are not. Some consumers of reference iteration care about the difference. Teach each `ref_iterator` to keep track of whether its output is ordered. `overlay_ref_iterator` is one of the picky consumers. Add a sanity check in `overlay_ref_iterator_begin()` to verify that its inputs are ordered. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 4 ++++ refs/files-backend.c | 16 +++++++++------- refs/iterator.c | 15 ++++++++++----- refs/packed-backend.c | 2 +- refs/ref-cache.c | 2 +- refs/ref-cache.h | 3 ++- refs/refs-internal.h | 23 +++++++++++++++++++---- 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/refs.c b/refs.c index b0106b8162..101c107ee8 100644 --- a/refs.c +++ b/refs.c @@ -1309,6 +1309,10 @@ struct ref_iterator *refs_ref_iterator_begin( if (trim) iter = prefix_ref_iterator_begin(iter, "", trim); + /* Sanity check for subclasses: */ + if (!iter->ordered) + BUG("reference iterator is not ordered"); + return iter; } diff --git a/refs/files-backend.c b/refs/files-backend.c index 961424a4ea..35648c89fc 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -762,7 +762,7 @@ static struct ref_iterator *files_ref_iterator_begin( const char *prefix, unsigned int flags) { struct files_ref_store *refs; - struct ref_iterator *loose_iter, *packed_iter; + struct ref_iterator *loose_iter, *packed_iter, *overlay_iter; struct files_ref_iterator *iter; struct ref_iterator *ref_iterator; unsigned int required_flags = REF_STORE_READ; @@ -772,10 +772,6 @@ static struct ref_iterator *files_ref_iterator_begin( refs = files_downcast(ref_store, required_flags, "ref_iterator_begin"); - iter = xcalloc(1, sizeof(*iter)); - ref_iterator = &iter->base; - base_ref_iterator_init(ref_iterator, &files_ref_iterator_vtable); - /* * We must make sure that all loose refs are read before * accessing the packed-refs file; this avoids a race @@ -811,7 +807,13 @@ static struct ref_iterator *files_ref_iterator_begin( refs->packed_ref_store, prefix, 0, DO_FOR_EACH_INCLUDE_BROKEN); - iter->iter0 = overlay_ref_iterator_begin(loose_iter, packed_iter); + overlay_iter = overlay_ref_iterator_begin(loose_iter, packed_iter); + + iter = xcalloc(1, sizeof(*iter)); + ref_iterator = &iter->base; + base_ref_iterator_init(ref_iterator, &files_ref_iterator_vtable, + overlay_iter->ordered); + iter->iter0 = overlay_iter; iter->flags = flags; return ref_iterator; @@ -2084,7 +2086,7 @@ static struct ref_iterator *files_reflog_iterator_begin(struct ref_store *ref_st struct ref_iterator *ref_iterator = &iter->base; struct strbuf sb = STRBUF_INIT; - base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable); + base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0); files_reflog_path(refs, &sb, NULL); iter->dir_iterator = dir_iterator_begin(sb.buf); iter->ref_store = ref_store; diff --git a/refs/iterator.c b/refs/iterator.c index 4cf449ef66..c475360f0a 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -25,9 +25,11 @@ int ref_iterator_abort(struct ref_iterator *ref_iterator) } void base_ref_iterator_init(struct ref_iterator *iter, - struct ref_iterator_vtable *vtable) + struct ref_iterator_vtable *vtable, + int ordered) { iter->vtable = vtable; + iter->ordered = !!ordered; iter->refname = NULL; iter->oid = NULL; iter->flags = 0; @@ -72,7 +74,7 @@ struct ref_iterator *empty_ref_iterator_begin(void) struct empty_ref_iterator *iter = xcalloc(1, sizeof(*iter)); struct ref_iterator *ref_iterator = &iter->base; - base_ref_iterator_init(ref_iterator, &empty_ref_iterator_vtable); + base_ref_iterator_init(ref_iterator, &empty_ref_iterator_vtable, 1); return ref_iterator; } @@ -205,6 +207,7 @@ static struct ref_iterator_vtable merge_ref_iterator_vtable = { }; struct ref_iterator *merge_ref_iterator_begin( + int ordered, struct ref_iterator *iter0, struct ref_iterator *iter1, ref_iterator_select_fn *select, void *cb_data) { @@ -219,7 +222,7 @@ struct ref_iterator *merge_ref_iterator_begin( * references through only if they exist in both iterators. */ - base_ref_iterator_init(ref_iterator, &merge_ref_iterator_vtable); + base_ref_iterator_init(ref_iterator, &merge_ref_iterator_vtable, ordered); iter->iter0 = iter0; iter->iter1 = iter1; iter->select = select; @@ -268,9 +271,11 @@ struct ref_iterator *overlay_ref_iterator_begin( } else if (is_empty_ref_iterator(back)) { ref_iterator_abort(back); return front; + } else if (!front->ordered || !back->ordered) { + BUG("overlay_ref_iterator requires ordered inputs"); } - return merge_ref_iterator_begin(front, back, + return merge_ref_iterator_begin(1, front, back, overlay_iterator_select, NULL); } @@ -361,7 +366,7 @@ struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0, iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; - base_ref_iterator_init(ref_iterator, &prefix_ref_iterator_vtable); + base_ref_iterator_init(ref_iterator, &prefix_ref_iterator_vtable, iter0->ordered); iter->iter0 = iter0; iter->prefix = xstrdup(prefix); diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 0279aeceea..e411501871 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -437,7 +437,7 @@ static struct ref_iterator *packed_ref_iterator_begin( iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; - base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable); + base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable, 1); /* * Note that get_packed_ref_cache() internally checks whether diff --git a/refs/ref-cache.c b/refs/ref-cache.c index 76bb723c86..54dfb5218c 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -574,7 +574,7 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; - base_ref_iterator_init(ref_iterator, &cache_ref_iterator_vtable); + base_ref_iterator_init(ref_iterator, &cache_ref_iterator_vtable, 1); ALLOC_GROW(iter->levels, 10, iter->levels_alloc); iter->levels_nr = 1; diff --git a/refs/ref-cache.h b/refs/ref-cache.h index 794f000fd3..a082bfb06c 100644 --- a/refs/ref-cache.h +++ b/refs/ref-cache.h @@ -245,7 +245,8 @@ struct ref_entry *find_ref_entry(struct ref_dir *dir, const char *refname); * Start iterating over references in `cache`. If `prefix` is * specified, only include references whose names start with that * prefix. If `prime_dir` is true, then fill any incomplete - * directories before beginning the iteration. + * directories before beginning the iteration. The output is ordered + * by refname. */ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, const char *prefix, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index d7d344de73..d7f233beba 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -329,6 +329,13 @@ int refs_rename_ref_available(struct ref_store *refs, */ struct ref_iterator { struct ref_iterator_vtable *vtable; + + /* + * Does this `ref_iterator` iterate over references in order + * by refname? + */ + unsigned int ordered : 1; + const char *refname; const struct object_id *oid; unsigned int flags; @@ -374,7 +381,7 @@ int is_empty_ref_iterator(struct ref_iterator *ref_iterator); * which the refname begins with prefix. If trim is non-zero, then * trim that many characters off the beginning of each refname. flags * can be DO_FOR_EACH_INCLUDE_BROKEN to include broken references in - * the iteration. + * the iteration. The output is ordered by refname. */ struct ref_iterator *refs_ref_iterator_begin( struct ref_store *refs, @@ -400,9 +407,11 @@ typedef enum iterator_selection ref_iterator_select_fn( * Iterate over the entries from iter0 and iter1, with the values * interleaved as directed by the select function. The iterator takes * ownership of iter0 and iter1 and frees them when the iteration is - * over. + * over. A derived class should set `ordered` to 1 or 0 based on + * whether it generates its output in order by reference name. */ struct ref_iterator *merge_ref_iterator_begin( + int ordered, struct ref_iterator *iter0, struct ref_iterator *iter1, ref_iterator_select_fn *select, void *cb_data); @@ -431,6 +440,8 @@ struct ref_iterator *overlay_ref_iterator_begin( * As an convenience to callers, if prefix is the empty string and * trim is zero, this function returns iter0 directly, without * wrapping it. + * + * The resulting ref_iterator is ordered if iter0 is. */ struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0, const char *prefix, @@ -441,11 +452,14 @@ struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0, /* * Base class constructor for ref_iterators. Initialize the * ref_iterator part of iter, setting its vtable pointer as specified. + * `ordered` should be set to 1 if the iterator will iterate over + * references in order by refname; otherwise it should be set to 0. * This is meant to be called only by the initializers of derived * classes. */ void base_ref_iterator_init(struct ref_iterator *iter, - struct ref_iterator_vtable *vtable); + struct ref_iterator_vtable *vtable, + int ordered); /* * Base class destructor for ref_iterators. Destroy the ref_iterator @@ -564,7 +578,8 @@ typedef int rename_ref_fn(struct ref_store *ref_store, * Iterate over the references in `ref_store` whose names start with * `prefix`. `prefix` is matched as a literal string, without regard * for path separators. If prefix is NULL or the empty string, iterate - * over all references in `ref_store`. + * over all references in `ref_store`. The output is ordered by + * refname. */ typedef struct ref_iterator *ref_iterator_begin_fn( struct ref_store *ref_store, From 157113c614034dc247e1aa6fb65448125f71f7ca Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 13 Sep 2017 19:15:56 +0200 Subject: [PATCH 02/21] prefix_ref_iterator: break when we leave the prefix If the underlying iterator is ordered, then `prefix_ref_iterator` can stop as soon as it sees a refname that comes after the prefix. This will rarely make a big difference now, because `ref_cache_iterator` only iterates over the directory containing the prefix (and usually the prefix will span a whole directory anyway). But if *hint, hint* a future reference backend doesn't itself know where to stop the iteration, then this optimization will be a big win. Note that there is no guarantee that the underlying iterator doesn't include output preceding the prefix, so we have to skip over any unwanted references before we get to the ones that we want. Signed-off-by: Jeff King Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/iterator.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/refs/iterator.c b/refs/iterator.c index c475360f0a..bd35da4e62 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -287,6 +287,20 @@ struct prefix_ref_iterator { int trim; }; +/* Return -1, 0, 1 if refname is before, inside, or after the prefix. */ +static int compare_prefix(const char *refname, const char *prefix) +{ + while (*prefix) { + if (*refname != *prefix) + return ((unsigned char)*refname < (unsigned char)*prefix) ? -1 : +1; + + refname++; + prefix++; + } + + return 0; +} + static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) { struct prefix_ref_iterator *iter = @@ -294,9 +308,25 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) int ok; while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { - if (!starts_with(iter->iter0->refname, iter->prefix)) + int cmp = compare_prefix(iter->iter0->refname, iter->prefix); + + if (cmp < 0) continue; + if (cmp > 0) { + /* + * If the source iterator is ordered, then we + * can stop the iteration as soon as we see a + * refname that comes after the prefix: + */ + if (iter->iter0->ordered) { + ok = ref_iterator_abort(iter->iter0); + break; + } else { + continue; + } + } + if (iter->trim) { /* * It is nonsense to trim off characters that From f0a7dc86d2919a2b91e46503f1df173e58977ef2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:15:57 +0200 Subject: [PATCH 03/21] packed_ref_cache: add a backlink to the associated `packed_ref_store` It will prove convenient in upcoming patches. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index e411501871..a3d9210cb0 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -7,7 +7,15 @@ #include "../iterator.h" #include "../lockfile.h" +struct packed_ref_store; + struct packed_ref_cache { + /* + * A back-pointer to the packed_ref_store with which this + * cache is associated: + */ + struct packed_ref_store *refs; + struct ref_cache *cache; /* @@ -154,7 +162,7 @@ static const char *parse_ref_line(struct strbuf *line, struct object_id *oid) } /* - * Read from `packed_refs_file` into a newly-allocated + * Read from the `packed-refs` file into a newly-allocated * `packed_ref_cache` and return it. The return value will already * have its reference count incremented. * @@ -182,7 +190,7 @@ static const char *parse_ref_line(struct strbuf *line, struct object_id *oid) * compatibility with older clients, but we do not require it * (i.e., "peeled" is a no-op if "fully-peeled" is set). */ -static struct packed_ref_cache *read_packed_refs(const char *packed_refs_file) +static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) { FILE *f; struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); @@ -191,11 +199,12 @@ static struct packed_ref_cache *read_packed_refs(const char *packed_refs_file) enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; struct ref_dir *dir; + packed_refs->refs = refs; acquire_packed_ref_cache(packed_refs); packed_refs->cache = create_ref_cache(NULL, NULL); packed_refs->cache->root->flag &= ~REF_INCOMPLETE; - f = fopen(packed_refs_file, "r"); + f = fopen(refs->path, "r"); if (!f) { if (errno == ENOENT) { /* @@ -205,7 +214,7 @@ static struct packed_ref_cache *read_packed_refs(const char *packed_refs_file) */ return packed_refs; } else { - die_errno("couldn't read %s", packed_refs_file); + die_errno("couldn't read %s", refs->path); } } @@ -218,7 +227,7 @@ static struct packed_ref_cache *read_packed_refs(const char *packed_refs_file) const char *traits; if (!line.len || line.buf[line.len - 1] != '\n') - die("unterminated line in %s: %s", packed_refs_file, line.buf); + die("unterminated line in %s: %s", refs->path, line.buf); if (skip_prefix(line.buf, "# pack-refs with:", &traits)) { if (strstr(traits, " fully-peeled ")) @@ -258,7 +267,7 @@ static struct packed_ref_cache *read_packed_refs(const char *packed_refs_file) last->flag |= REF_KNOWS_PEELED; } else { strbuf_setlen(&line, line.len - 1); - die("unexpected line in %s: %s", packed_refs_file, line.buf); + die("unexpected line in %s: %s", refs->path, line.buf); } } @@ -293,7 +302,7 @@ static struct packed_ref_cache *get_packed_ref_cache(struct packed_ref_store *re validate_packed_ref_cache(refs); if (!refs->cache) - refs->cache = read_packed_refs(refs->path); + refs->cache = read_packed_refs(refs); return refs->cache; } From 735267aa100e5e75e5b8c74d47f412ad50851ec9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:15:58 +0200 Subject: [PATCH 04/21] die_unterminated_line(), die_invalid_line(): new functions Extract some helper functions for reporting errors. While we're at it, prevent them from spewing unlimited output to the terminal. These functions will soon have more callers. These functions accept the problematic line as a `(ptr, len)` pair rather than a NUL-terminated string, and `die_invalid_line()` checks for an EOL itself, because these calling conventions will be convenient for future callers. (Efficiency is not a concern here because these functions are only ever called if the `packed-refs` file is corrupt.) Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a3d9210cb0..5c50c223ef 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -161,6 +161,29 @@ static const char *parse_ref_line(struct strbuf *line, struct object_id *oid) return ref; } +static NORETURN void die_unterminated_line(const char *path, + const char *p, size_t len) +{ + if (len < 80) + die("unterminated line in %s: %.*s", path, (int)len, p); + else + die("unterminated line in %s: %.75s...", path, p); +} + +static NORETURN void die_invalid_line(const char *path, + const char *p, size_t len) +{ + const char *eol = memchr(p, '\n', len); + + if (!eol) + die_unterminated_line(path, p, len); + else if (eol - p < 80) + die("unexpected line in %s: %.*s", path, (int)(eol - p), p); + else + die("unexpected line in %s: %.75s...", path, p); + +} + /* * Read from the `packed-refs` file into a newly-allocated * `packed_ref_cache` and return it. The return value will already @@ -227,7 +250,7 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) const char *traits; if (!line.len || line.buf[line.len - 1] != '\n') - die("unterminated line in %s: %s", refs->path, line.buf); + die_unterminated_line(refs->path, line.buf, line.len); if (skip_prefix(line.buf, "# pack-refs with:", &traits)) { if (strstr(traits, " fully-peeled ")) @@ -266,8 +289,7 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) */ last->flag |= REF_KNOWS_PEELED; } else { - strbuf_setlen(&line, line.len - 1); - die("unexpected line in %s: %s", refs->path, line.buf); + die_invalid_line(refs->path, line.buf, line.len); } } From 49a03ef46667ad5074f1e602e392b7763c686205 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:15:59 +0200 Subject: [PATCH 05/21] read_packed_refs(): use mmap to read the `packed-refs` file It's still done in a pretty stupid way, involving more data copying than necessary. That will improve in future commits. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 5c50c223ef..154abbd83a 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -215,8 +215,12 @@ static NORETURN void die_invalid_line(const char *path, */ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) { - FILE *f; struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); + int fd; + struct stat st; + size_t size; + char *buf; + const char *pos, *eol, *eof; struct ref_entry *last = NULL; struct strbuf line = STRBUF_INIT; enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; @@ -227,8 +231,8 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) packed_refs->cache = create_ref_cache(NULL, NULL); packed_refs->cache->root->flag &= ~REF_INCOMPLETE; - f = fopen(refs->path, "r"); - if (!f) { + fd = open(refs->path, O_RDONLY); + if (fd < 0) { if (errno == ENOENT) { /* * This is OK; it just means that no @@ -241,16 +245,27 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) } } - stat_validity_update(&packed_refs->validity, fileno(f)); + stat_validity_update(&packed_refs->validity, fd); + + if (fstat(fd, &st) < 0) + die_errno("couldn't stat %s", refs->path); + + size = xsize_t(st.st_size); + buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + pos = buf; + eof = buf + size; dir = get_ref_dir(packed_refs->cache->root); - while (strbuf_getwholeline(&line, f, '\n') != EOF) { + while (pos < eof) { struct object_id oid; const char *refname; const char *traits; - if (!line.len || line.buf[line.len - 1] != '\n') - die_unterminated_line(refs->path, line.buf, line.len); + eol = memchr(pos, '\n', eof - pos); + if (!eol) + die_unterminated_line(refs->path, pos, eof - pos); + + strbuf_add(&line, pos, eol + 1 - pos); if (skip_prefix(line.buf, "# pack-refs with:", &traits)) { if (strstr(traits, " fully-peeled ")) @@ -258,7 +273,7 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) else if (strstr(traits, " peeled ")) peeled = PEELED_TAGS; /* perhaps other traits later as well */ - continue; + goto next_line; } refname = parse_ref_line(&line, &oid); @@ -291,11 +306,18 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) } else { die_invalid_line(refs->path, line.buf, line.len); } + + next_line: + /* The "+ 1" is for the LF character. */ + pos = eol + 1; + strbuf_reset(&line); } - fclose(f); - strbuf_release(&line); + if (munmap(buf, size)) + die_errno("error ummapping packed-refs file"); + close(fd); + strbuf_release(&line); return packed_refs; } From 36f23534aef58f747048ffea5addb0367406896d Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:16:00 +0200 Subject: [PATCH 06/21] read_packed_refs(): only check for a header at the top of the file This tightens up the parsing a bit; previously, stray header-looking lines would have been processed. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 154abbd83a..141f02b9c8 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -255,10 +255,8 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) pos = buf; eof = buf + size; - dir = get_ref_dir(packed_refs->cache->root); - while (pos < eof) { - struct object_id oid; - const char *refname; + /* If the file has a header line, process it: */ + if (pos < eof && *pos == '#') { const char *traits; eol = memchr(pos, '\n', eof - pos); @@ -267,14 +265,30 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) strbuf_add(&line, pos, eol + 1 - pos); - if (skip_prefix(line.buf, "# pack-refs with:", &traits)) { - if (strstr(traits, " fully-peeled ")) - peeled = PEELED_FULLY; - else if (strstr(traits, " peeled ")) - peeled = PEELED_TAGS; - /* perhaps other traits later as well */ - goto next_line; - } + if (!skip_prefix(line.buf, "# pack-refs with:", &traits)) + die_invalid_line(refs->path, pos, eof - pos); + + if (strstr(traits, " fully-peeled ")) + peeled = PEELED_FULLY; + else if (strstr(traits, " peeled ")) + peeled = PEELED_TAGS; + /* perhaps other traits later as well */ + + /* The "+ 1" is for the LF character. */ + pos = eol + 1; + strbuf_reset(&line); + } + + dir = get_ref_dir(packed_refs->cache->root); + while (pos < eof) { + struct object_id oid; + const char *refname; + + eol = memchr(pos, '\n', eof - pos); + if (!eol) + die_unterminated_line(refs->path, pos, eof - pos); + + strbuf_add(&line, pos, eol + 1 - pos); refname = parse_ref_line(&line, &oid); if (refname) { @@ -307,7 +321,6 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) die_invalid_line(refs->path, line.buf, line.len); } - next_line: /* The "+ 1" is for the LF character. */ pos = eol + 1; strbuf_reset(&line); From a8811695e30e48bec0aeefe6a01ed291fd68640b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Sep 2017 19:16:01 +0200 Subject: [PATCH 07/21] read_packed_refs(): make parsing of the header line more robust The old code parsed the traits in the `packed-refs` header by looking for the string " trait " (i.e., the name of the trait with a space on either side) in the header line. This is fragile, because if any other implementation of Git forgets to write the trailing space, the last trait would silently be ignored (and the error might never be noticed). So instead, use `string_list_split_in_place()` to split the traits into tokens then use `unsorted_string_list_has_string()` to look for the tokens we are interested in. This means that we can read the traits correctly even if the header line is missing a trailing space (or indeed, if it is missing the space after the colon, or if it has multiple spaces somewhere). However, older Git clients (and perhaps other Git implementations) still require the surrounding spaces, so we still have to output the header with a trailing space. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 141f02b9c8..a45e3ff92f 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -257,25 +257,30 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) /* If the file has a header line, process it: */ if (pos < eof && *pos == '#') { - const char *traits; + char *p; + struct string_list traits = STRING_LIST_INIT_NODUP; eol = memchr(pos, '\n', eof - pos); if (!eol) die_unterminated_line(refs->path, pos, eof - pos); - strbuf_add(&line, pos, eol + 1 - pos); + strbuf_add(&line, pos, eol - pos); - if (!skip_prefix(line.buf, "# pack-refs with:", &traits)) + if (!skip_prefix(line.buf, "# pack-refs with:", (const char **)&p)) die_invalid_line(refs->path, pos, eof - pos); - if (strstr(traits, " fully-peeled ")) + string_list_split_in_place(&traits, p, ' ', -1); + + if (unsorted_string_list_has_string(&traits, "fully-peeled")) peeled = PEELED_FULLY; - else if (strstr(traits, " peeled ")) + else if (unsorted_string_list_has_string(&traits, "peeled")) peeled = PEELED_TAGS; /* perhaps other traits later as well */ /* The "+ 1" is for the LF character. */ pos = eol + 1; + + string_list_clear(&traits, 0); strbuf_reset(&line); } @@ -610,7 +615,11 @@ int packed_refs_is_locked(struct ref_store *ref_store) /* * The packed-refs header line that we write out. Perhaps other - * traits will be added later. The trailing space is required. + * traits will be added later. + * + * Note that earlier versions of Git used to parse these traits by + * looking for " trait " in the line. For this reason, the space after + * the colon and the trailing space are required. */ static const char PACKED_REFS_HEADER[] = "# pack-refs with: peeled fully-peeled \n"; From 6a9bc4034ab59a2d241151f9025c07e62178ad95 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:05 +0200 Subject: [PATCH 08/21] read_packed_refs(): read references with minimal copying Instead of copying data from the `packed-refs` file one line at time and then processing it, process the data in place as much as possible. Also, instead of processing one line per iteration of the main loop, process a reference line plus its corresponding peeled line (if present) together. Note that this change slightly tightens up the parsing of the `packed-refs` file. Previously, the parser would have accepted multiple "peeled" lines for a single reference (ignoring all but the last one). Now it would reject that. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 101 +++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 61 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a45e3ff92f..2b80f244c8 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -134,33 +134,6 @@ static void clear_packed_ref_cache(struct packed_ref_store *refs) } } -/* The length of a peeled reference line in packed-refs, including EOL: */ -#define PEELED_LINE_LENGTH 42 - -/* - * Parse one line from a packed-refs file. Write the SHA1 to sha1. - * Return a pointer to the refname within the line (null-terminated), - * or NULL if there was a problem. - */ -static const char *parse_ref_line(struct strbuf *line, struct object_id *oid) -{ - const char *ref; - - if (parse_oid_hex(line->buf, oid, &ref) < 0) - return NULL; - if (!isspace(*ref++)) - return NULL; - - if (isspace(*ref)) - return NULL; - - if (line->buf[line->len - 1] != '\n') - return NULL; - line->buf[--line->len] = 0; - - return ref; -} - static NORETURN void die_unterminated_line(const char *path, const char *p, size_t len) { @@ -221,8 +194,7 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) size_t size; char *buf; const char *pos, *eol, *eof; - struct ref_entry *last = NULL; - struct strbuf line = STRBUF_INIT; + struct strbuf tmp = STRBUF_INIT; enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; struct ref_dir *dir; @@ -264,9 +236,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) if (!eol) die_unterminated_line(refs->path, pos, eof - pos); - strbuf_add(&line, pos, eol - pos); + strbuf_add(&tmp, pos, eol - pos); - if (!skip_prefix(line.buf, "# pack-refs with:", (const char **)&p)) + if (!skip_prefix(tmp.buf, "# pack-refs with:", (const char **)&p)) die_invalid_line(refs->path, pos, eof - pos); string_list_split_in_place(&traits, p, ' ', -1); @@ -281,61 +253,68 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) pos = eol + 1; string_list_clear(&traits, 0); - strbuf_reset(&line); + strbuf_reset(&tmp); } dir = get_ref_dir(packed_refs->cache->root); while (pos < eof) { + const char *p = pos; struct object_id oid; const char *refname; + int flag = REF_ISPACKED; + struct ref_entry *entry = NULL; - eol = memchr(pos, '\n', eof - pos); + if (eof - pos < GIT_SHA1_HEXSZ + 2 || + parse_oid_hex(p, &oid, &p) || + !isspace(*p++)) + die_invalid_line(refs->path, pos, eof - pos); + + eol = memchr(p, '\n', eof - p); if (!eol) die_unterminated_line(refs->path, pos, eof - pos); - strbuf_add(&line, pos, eol + 1 - pos); + strbuf_add(&tmp, p, eol - p); + refname = tmp.buf; - refname = parse_ref_line(&line, &oid); - if (refname) { - int flag = REF_ISPACKED; + if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { + if (!refname_is_safe(refname)) + die("packed refname is dangerous: %s", refname); + oidclr(&oid); + flag |= REF_BAD_NAME | REF_ISBROKEN; + } + if (peeled == PEELED_FULLY || + (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/"))) + flag |= REF_KNOWS_PEELED; + entry = create_ref_entry(refname, &oid, flag); + add_ref_entry(dir, entry); + + pos = eol + 1; + + if (pos < eof && *pos == '^') { + p = pos + 1; + if (eof - p < GIT_SHA1_HEXSZ + 1 || + parse_oid_hex(p, &entry->u.value.peeled, &p) || + *p++ != '\n') + die_invalid_line(refs->path, pos, eof - pos); - if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { - if (!refname_is_safe(refname)) - die("packed refname is dangerous: %s", refname); - oidclr(&oid); - flag |= REF_BAD_NAME | REF_ISBROKEN; - } - last = create_ref_entry(refname, &oid, flag); - if (peeled == PEELED_FULLY || - (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/"))) - last->flag |= REF_KNOWS_PEELED; - add_ref_entry(dir, last); - } else if (last && - line.buf[0] == '^' && - line.len == PEELED_LINE_LENGTH && - line.buf[PEELED_LINE_LENGTH - 1] == '\n' && - !get_oid_hex(line.buf + 1, &oid)) { - oidcpy(&last->u.value.peeled, &oid); /* * Regardless of what the file header said, * we definitely know the value of *this* * reference: */ - last->flag |= REF_KNOWS_PEELED; - } else { - die_invalid_line(refs->path, line.buf, line.len); + entry->flag |= REF_KNOWS_PEELED; + + pos = p; } - /* The "+ 1" is for the LF character. */ - pos = eol + 1; - strbuf_reset(&line); + strbuf_reset(&tmp); } if (munmap(buf, size)) die_errno("error ummapping packed-refs file"); close(fd); - strbuf_release(&line); + strbuf_release(&tmp); return packed_refs; } From daa45408c12053d608d86f0c1daa23b79815624f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:06 +0200 Subject: [PATCH 09/21] packed_ref_cache: remember the file-wide peeling state Rather than store the peeling state (i.e., the one defined by traits in the `packed-refs` file header line) in a local variable in `read_packed_refs()`, store it permanently in `packed_ref_cache`. This will be needed when we stop reading all packed refs at once. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 2b80f244c8..ae276f3445 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -18,6 +18,12 @@ struct packed_ref_cache { struct ref_cache *cache; + /* + * What is the peeled state of this cache? (This is usually + * determined from the header of the "packed-refs" file.) + */ + enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled; + /* * Count of references to the data structure in this instance, * including the pointer from files_ref_store::packed if any. @@ -195,13 +201,13 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) char *buf; const char *pos, *eol, *eof; struct strbuf tmp = STRBUF_INIT; - enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; struct ref_dir *dir; packed_refs->refs = refs; acquire_packed_ref_cache(packed_refs); packed_refs->cache = create_ref_cache(NULL, NULL); packed_refs->cache->root->flag &= ~REF_INCOMPLETE; + packed_refs->peeled = PEELED_NONE; fd = open(refs->path, O_RDONLY); if (fd < 0) { @@ -244,9 +250,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) string_list_split_in_place(&traits, p, ' ', -1); if (unsorted_string_list_has_string(&traits, "fully-peeled")) - peeled = PEELED_FULLY; + packed_refs->peeled = PEELED_FULLY; else if (unsorted_string_list_has_string(&traits, "peeled")) - peeled = PEELED_TAGS; + packed_refs->peeled = PEELED_TAGS; /* perhaps other traits later as well */ /* The "+ 1" is for the LF character. */ @@ -282,8 +288,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) oidclr(&oid); flag |= REF_BAD_NAME | REF_ISBROKEN; } - if (peeled == PEELED_FULLY || - (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/"))) + if (packed_refs->peeled == PEELED_FULLY || + (packed_refs->peeled == PEELED_TAGS && + starts_with(refname, "refs/tags/"))) flag |= REF_KNOWS_PEELED; entry = create_ref_entry(refname, &oid, flag); add_ref_entry(dir, entry); From 9cfb3dc0d12a1db12c43edaa5e2545a6818ecc95 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:07 +0200 Subject: [PATCH 10/21] mmapped_ref_iterator: add iterator over a packed-refs file Add a new `mmapped_ref_iterator`, which can iterate over the references in an mmapped `packed-refs` file directly. Use this iterator from `read_packed_refs()` to fill the packed refs cache. Note that we are not yet willing to promise that the new iterator generates its output in order. That doesn't matter for now, because the packed refs cache doesn't care what order it is filled. This change adds a lot of boilerplate without providing any obvious benefits. The benefits will come soon, when we get rid of the `ref_cache` for packed references altogether. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 207 +++++++++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 55 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index ae276f3445..312116a99d 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -163,6 +163,141 @@ static NORETURN void die_invalid_line(const char *path, } +/* + * An iterator over a packed-refs file that is currently mmapped. + */ +struct mmapped_ref_iterator { + struct ref_iterator base; + + struct packed_ref_cache *packed_refs; + + /* The current position in the mmapped file: */ + const char *pos; + + /* The end of the mmapped file: */ + const char *eof; + + struct object_id oid, peeled; + + struct strbuf refname_buf; +}; + +static int mmapped_ref_iterator_advance(struct ref_iterator *ref_iterator) +{ + struct mmapped_ref_iterator *iter = + (struct mmapped_ref_iterator *)ref_iterator; + const char *p = iter->pos, *eol; + + strbuf_reset(&iter->refname_buf); + + if (iter->pos == iter->eof) + return ref_iterator_abort(ref_iterator); + + iter->base.flags = REF_ISPACKED; + + if (iter->eof - p < GIT_SHA1_HEXSZ + 2 || + parse_oid_hex(p, &iter->oid, &p) || + !isspace(*p++)) + die_invalid_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + + eol = memchr(p, '\n', iter->eof - p); + if (!eol) + die_unterminated_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + + strbuf_add(&iter->refname_buf, p, eol - p); + iter->base.refname = iter->refname_buf.buf; + + if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { + if (!refname_is_safe(iter->base.refname)) + die("packed refname is dangerous: %s", + iter->base.refname); + oidclr(&iter->oid); + iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; + } + if (iter->packed_refs->peeled == PEELED_FULLY || + (iter->packed_refs->peeled == PEELED_TAGS && + starts_with(iter->base.refname, "refs/tags/"))) + iter->base.flags |= REF_KNOWS_PEELED; + + iter->pos = eol + 1; + + if (iter->pos < iter->eof && *iter->pos == '^') { + p = iter->pos + 1; + if (iter->eof - p < GIT_SHA1_HEXSZ + 1 || + parse_oid_hex(p, &iter->peeled, &p) || + *p++ != '\n') + die_invalid_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + iter->pos = p; + + /* + * Regardless of what the file header said, we + * definitely know the value of *this* reference: + */ + iter->base.flags |= REF_KNOWS_PEELED; + } else { + oidclr(&iter->peeled); + } + + return ITER_OK; +} + +static int mmapped_ref_iterator_peel(struct ref_iterator *ref_iterator, + struct object_id *peeled) +{ + struct mmapped_ref_iterator *iter = + (struct mmapped_ref_iterator *)ref_iterator; + + if ((iter->base.flags & REF_KNOWS_PEELED)) { + oidcpy(peeled, &iter->peeled); + return is_null_oid(&iter->peeled) ? -1 : 0; + } else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) { + return -1; + } else { + return !!peel_object(iter->oid.hash, peeled->hash); + } +} + +static int mmapped_ref_iterator_abort(struct ref_iterator *ref_iterator) +{ + struct mmapped_ref_iterator *iter = + (struct mmapped_ref_iterator *)ref_iterator; + + release_packed_ref_cache(iter->packed_refs); + strbuf_release(&iter->refname_buf); + base_ref_iterator_free(ref_iterator); + return ITER_DONE; +} + +static struct ref_iterator_vtable mmapped_ref_iterator_vtable = { + mmapped_ref_iterator_advance, + mmapped_ref_iterator_peel, + mmapped_ref_iterator_abort +}; + +struct ref_iterator *mmapped_ref_iterator_begin( + const char *packed_refs_file, + struct packed_ref_cache *packed_refs, + const char *pos, const char *eof) +{ + struct mmapped_ref_iterator *iter = xcalloc(1, sizeof(*iter)); + struct ref_iterator *ref_iterator = &iter->base; + + base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 0); + + iter->packed_refs = packed_refs; + acquire_packed_ref_cache(iter->packed_refs); + iter->pos = pos; + iter->eof = eof; + strbuf_init(&iter->refname_buf, 0); + + iter->base.oid = &iter->oid; + + return ref_iterator; +} + /* * Read from the `packed-refs` file into a newly-allocated * `packed_ref_cache` and return it. The return value will already @@ -199,9 +334,10 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) struct stat st; size_t size; char *buf; - const char *pos, *eol, *eof; - struct strbuf tmp = STRBUF_INIT; + const char *pos, *eof; struct ref_dir *dir; + struct ref_iterator *iter; + int ok; packed_refs->refs = refs; acquire_packed_ref_cache(packed_refs); @@ -235,7 +371,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) /* If the file has a header line, process it: */ if (pos < eof && *pos == '#') { + struct strbuf tmp = STRBUF_INIT; char *p; + const char *eol; struct string_list traits = STRING_LIST_INIT_NODUP; eol = memchr(pos, '\n', eof - pos); @@ -259,69 +397,28 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) pos = eol + 1; string_list_clear(&traits, 0); - strbuf_reset(&tmp); + strbuf_release(&tmp); } dir = get_ref_dir(packed_refs->cache->root); - while (pos < eof) { - const char *p = pos; - struct object_id oid; - const char *refname; - int flag = REF_ISPACKED; - struct ref_entry *entry = NULL; + iter = mmapped_ref_iterator_begin(refs->path, packed_refs, pos, eof); + while ((ok = ref_iterator_advance(iter)) == ITER_OK) { + struct ref_entry *entry = + create_ref_entry(iter->refname, iter->oid, iter->flags); - if (eof - pos < GIT_SHA1_HEXSZ + 2 || - parse_oid_hex(p, &oid, &p) || - !isspace(*p++)) - die_invalid_line(refs->path, pos, eof - pos); - - eol = memchr(p, '\n', eof - p); - if (!eol) - die_unterminated_line(refs->path, pos, eof - pos); - - strbuf_add(&tmp, p, eol - p); - refname = tmp.buf; - - if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { - if (!refname_is_safe(refname)) - die("packed refname is dangerous: %s", refname); - oidclr(&oid); - flag |= REF_BAD_NAME | REF_ISBROKEN; - } - if (packed_refs->peeled == PEELED_FULLY || - (packed_refs->peeled == PEELED_TAGS && - starts_with(refname, "refs/tags/"))) - flag |= REF_KNOWS_PEELED; - entry = create_ref_entry(refname, &oid, flag); + if ((iter->flags & REF_KNOWS_PEELED)) + ref_iterator_peel(iter, &entry->u.value.peeled); add_ref_entry(dir, entry); - - pos = eol + 1; - - if (pos < eof && *pos == '^') { - p = pos + 1; - if (eof - p < GIT_SHA1_HEXSZ + 1 || - parse_oid_hex(p, &entry->u.value.peeled, &p) || - *p++ != '\n') - die_invalid_line(refs->path, pos, eof - pos); - - /* - * Regardless of what the file header said, - * we definitely know the value of *this* - * reference: - */ - entry->flag |= REF_KNOWS_PEELED; - - pos = p; - } - - strbuf_reset(&tmp); } + if (ok != ITER_DONE) + die("error reading packed-refs file %s", refs->path); + if (munmap(buf, size)) - die_errno("error ummapping packed-refs file"); + die_errno("error ummapping packed-refs file %s", refs->path); + close(fd); - strbuf_release(&tmp); return packed_refs; } From 81b9b5aea7094cdef110c4ae91d9cc601510fe8b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:08 +0200 Subject: [PATCH 11/21] mmapped_ref_iterator_advance(): no peeled value for broken refs If a reference is broken, suppress its peeled value. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 312116a99d..724c88631d 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -234,9 +234,15 @@ static int mmapped_ref_iterator_advance(struct ref_iterator *ref_iterator) /* * Regardless of what the file header said, we - * definitely know the value of *this* reference: + * definitely know the value of *this* reference. But + * we suppress it if the reference is broken: */ - iter->base.flags |= REF_KNOWS_PEELED; + if ((iter->base.flags & REF_ISBROKEN)) { + oidclr(&iter->peeled); + iter->base.flags &= ~REF_KNOWS_PEELED; + } else { + iter->base.flags |= REF_KNOWS_PEELED; + } } else { oidclr(&iter->peeled); } From 14b3c344ead04deb7ff9680cd8cf927f9f9ac12a Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:09 +0200 Subject: [PATCH 12/21] packed-backend.c: reorder some definitions No code has been changed. This will make subsequent patches more self-contained. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 724c88631d..0fe41a7203 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -36,30 +36,6 @@ struct packed_ref_cache { struct stat_validity validity; }; -/* - * Increment the reference count of *packed_refs. - */ -static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs) -{ - packed_refs->referrers++; -} - -/* - * Decrease the reference count of *packed_refs. If it goes to zero, - * free *packed_refs and return true; otherwise return false. - */ -static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) -{ - if (!--packed_refs->referrers) { - free_ref_cache(packed_refs->cache); - stat_validity_clear(&packed_refs->validity); - free(packed_refs); - return 1; - } else { - return 0; - } -} - /* * A container for `packed-refs`-related data. It is not (yet) a * `ref_store`. @@ -92,6 +68,30 @@ struct packed_ref_store { struct tempfile tempfile; }; +/* + * Increment the reference count of *packed_refs. + */ +static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs) +{ + packed_refs->referrers++; +} + +/* + * Decrease the reference count of *packed_refs. If it goes to zero, + * free *packed_refs and return true; otherwise return false. + */ +static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) +{ + if (!--packed_refs->referrers) { + free_ref_cache(packed_refs->cache); + stat_validity_clear(&packed_refs->validity); + free(packed_refs); + return 1; + } else { + return 0; + } +} + struct ref_store *packed_ref_store_create(const char *path, unsigned int store_flags) { From 5b633610ec7a6c10f55e4758b047892652da8e05 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:10 +0200 Subject: [PATCH 13/21] packed_ref_cache: keep the `packed-refs` file mmapped if possible Keep a copy of the `packed-refs` file contents in memory for as long as a `packed_ref_cache` object is in use: * If the system allows it, keep the `packed-refs` file mmapped. * If not (either because the system doesn't support `mmap()` at all, or because a file that is currently mmapped cannot be replaced via `rename()`), then make a copy of the file's contents in heap-allocated space, and keep that around instead. We base the choice of behavior on a new build-time switch, `MMAP_PREVENTS_DELETE`. By default, this switch is set for Windows variants. After this commit, `MMAP_NONE` and `MMAP_TEMPORARY` are still handled identically. But the next commit will introduce a difference. This whole change is still pointless, because we only read the `packed-refs` file contents immediately after instantiating the `packed_ref_cache`. But that will soon change. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- Makefile | 6 ++ config.mak.uname | 3 + refs/packed-backend.c | 185 ++++++++++++++++++++++++++++++++---------- 3 files changed, 152 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index f2bb7f2f63..d34c3dfe69 100644 --- a/Makefile +++ b/Makefile @@ -200,6 +200,9 @@ all:: # # Define NO_MMAP if you want to avoid mmap. # +# Define MMAP_PREVENTS_DELETE if a file that is currently mmapped cannot be +# deleted or cannot be replaced using rename(). +# # Define NO_SYS_POLL_H if you don't have sys/poll.h. # # Define NO_POLL if you do not have or don't want to use poll(). @@ -1383,6 +1386,9 @@ else COMPAT_OBJS += compat/win32mmap.o endif endif +ifdef MMAP_PREVENTS_DELETE + BASIC_CFLAGS += -DMMAP_PREVENTS_DELETE +endif ifdef OBJECT_CREATION_USES_RENAMES COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1 endif diff --git a/config.mak.uname b/config.mak.uname index 6604b130f8..685a80d138 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -184,6 +184,7 @@ ifeq ($(uname_O),Cygwin) UNRELIABLE_FSTAT = UnfortunatelyYes SPARSE_FLAGS = -isystem /usr/include/w32api -Wno-one-bit-signed-bitfield OBJECT_CREATION_USES_RENAMES = UnfortunatelyNeedsTo + MMAP_PREVENTS_DELETE = UnfortunatelyYes COMPAT_OBJS += compat/cygwin.o FREAD_READS_DIRECTORIES = UnfortunatelyYes endif @@ -353,6 +354,7 @@ ifeq ($(uname_S),Windows) NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease NO_NSEC = YesPlease USE_WIN32_MMAP = YesPlease + MMAP_PREVENTS_DELETE = UnfortunatelyYes # USE_NED_ALLOCATOR = YesPlease UNRELIABLE_FSTAT = UnfortunatelyYes OBJECT_CREATION_USES_RENAMES = UnfortunatelyNeedsTo @@ -501,6 +503,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease NO_NSEC = YesPlease USE_WIN32_MMAP = YesPlease + MMAP_PREVENTS_DELETE = UnfortunatelyYes USE_NED_ALLOCATOR = YesPlease UNRELIABLE_FSTAT = UnfortunatelyYes OBJECT_CREATION_USES_RENAMES = UnfortunatelyNeedsTo diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 0fe41a7203..75d44cf061 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -7,6 +7,35 @@ #include "../iterator.h" #include "../lockfile.h" +enum mmap_strategy { + /* + * Don't use mmap() at all for reading `packed-refs`. + */ + MMAP_NONE, + + /* + * Can use mmap() for reading `packed-refs`, but the file must + * not remain mmapped. This is the usual option on Windows, + * where you cannot rename a new version of a file onto a file + * that is currently mmapped. + */ + MMAP_TEMPORARY, + + /* + * It is OK to leave the `packed-refs` file mmapped while + * arbitrary other code is running. + */ + MMAP_OK +}; + +#if defined(NO_MMAP) +static enum mmap_strategy mmap_strategy = MMAP_NONE; +#elif defined(MMAP_PREVENTS_DELETE) +static enum mmap_strategy mmap_strategy = MMAP_TEMPORARY; +#else +static enum mmap_strategy mmap_strategy = MMAP_OK; +#endif + struct packed_ref_store; struct packed_ref_cache { @@ -18,6 +47,21 @@ struct packed_ref_cache { struct ref_cache *cache; + /* Is the `packed-refs` file currently mmapped? */ + int mmapped; + + /* + * The contents of the `packed-refs` file. If the file is + * mmapped, this points at the mmapped contents of the file. + * If not, this points at heap-allocated memory containing the + * contents. If there were no contents (e.g., because the file + * didn't exist), `buf` and `eof` are both NULL. + */ + char *buf, *eof; + + /* The size of the header line, if any; otherwise, 0: */ + size_t header_len; + /* * What is the peeled state of this cache? (This is usually * determined from the header of the "packed-refs" file.) @@ -76,6 +120,26 @@ static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs) packed_refs->referrers++; } +/* + * If the buffer in `packed_refs` is active, then either munmap the + * memory and close the file, or free the memory. Then set the buffer + * pointers to NULL. + */ +static void release_packed_ref_buffer(struct packed_ref_cache *packed_refs) +{ + if (packed_refs->mmapped) { + if (munmap(packed_refs->buf, + packed_refs->eof - packed_refs->buf)) + die_errno("error ummapping packed-refs file %s", + packed_refs->refs->path); + packed_refs->mmapped = 0; + } else { + free(packed_refs->buf); + } + packed_refs->buf = packed_refs->eof = NULL; + packed_refs->header_len = 0; +} + /* * Decrease the reference count of *packed_refs. If it goes to zero, * free *packed_refs and return true; otherwise return false. @@ -85,6 +149,7 @@ static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) if (!--packed_refs->referrers) { free_ref_cache(packed_refs->cache); stat_validity_clear(&packed_refs->validity); + release_packed_ref_buffer(packed_refs); free(packed_refs); return 1; } else { @@ -284,13 +349,15 @@ static struct ref_iterator_vtable mmapped_ref_iterator_vtable = { }; struct ref_iterator *mmapped_ref_iterator_begin( - const char *packed_refs_file, struct packed_ref_cache *packed_refs, const char *pos, const char *eof) { struct mmapped_ref_iterator *iter = xcalloc(1, sizeof(*iter)); struct ref_iterator *ref_iterator = &iter->base; + if (!packed_refs->buf) + return empty_ref_iterator_begin(); + base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 0); iter->packed_refs = packed_refs; @@ -304,6 +371,62 @@ struct ref_iterator *mmapped_ref_iterator_begin( return ref_iterator; } +/* + * Depending on `mmap_strategy`, either mmap or read the contents of + * the `packed-refs` file into the `packed_refs` instance. Return 1 if + * the file existed and was read, or 0 if the file was absent. Die on + * errors. + */ +static int load_contents(struct packed_ref_cache *packed_refs) +{ + int fd; + struct stat st; + size_t size; + ssize_t bytes_read; + + fd = open(packed_refs->refs->path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) { + /* + * This is OK; it just means that no + * "packed-refs" file has been written yet, + * which is equivalent to it being empty, + * which is its state when initialized with + * zeros. + */ + return 0; + } else { + die_errno("couldn't read %s", packed_refs->refs->path); + } + } + + stat_validity_update(&packed_refs->validity, fd); + + if (fstat(fd, &st) < 0) + die_errno("couldn't stat %s", packed_refs->refs->path); + size = xsize_t(st.st_size); + + switch (mmap_strategy) { + case MMAP_NONE: + case MMAP_TEMPORARY: + packed_refs->buf = xmalloc(size); + bytes_read = read_in_full(fd, packed_refs->buf, size); + if (bytes_read < 0 || bytes_read != size) + die_errno("couldn't read %s", packed_refs->refs->path); + packed_refs->eof = packed_refs->buf + size; + packed_refs->mmapped = 0; + break; + case MMAP_OK: + packed_refs->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + packed_refs->eof = packed_refs->buf + size; + packed_refs->mmapped = 1; + break; + } + close(fd); + + return 1; +} + /* * Read from the `packed-refs` file into a newly-allocated * `packed_ref_cache` and return it. The return value will already @@ -336,11 +459,6 @@ struct ref_iterator *mmapped_ref_iterator_begin( static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) { struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); - int fd; - struct stat st; - size_t size; - char *buf; - const char *pos, *eof; struct ref_dir *dir; struct ref_iterator *iter; int ok; @@ -351,45 +469,29 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) packed_refs->cache->root->flag &= ~REF_INCOMPLETE; packed_refs->peeled = PEELED_NONE; - fd = open(refs->path, O_RDONLY); - if (fd < 0) { - if (errno == ENOENT) { - /* - * This is OK; it just means that no - * "packed-refs" file has been written yet, - * which is equivalent to it being empty. - */ - return packed_refs; - } else { - die_errno("couldn't read %s", refs->path); - } - } - - stat_validity_update(&packed_refs->validity, fd); - - if (fstat(fd, &st) < 0) - die_errno("couldn't stat %s", refs->path); - - size = xsize_t(st.st_size); - buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - pos = buf; - eof = buf + size; + if (!load_contents(packed_refs)) + return packed_refs; /* If the file has a header line, process it: */ - if (pos < eof && *pos == '#') { + if (packed_refs->buf < packed_refs->eof && *packed_refs->buf == '#') { struct strbuf tmp = STRBUF_INIT; char *p; const char *eol; struct string_list traits = STRING_LIST_INIT_NODUP; - eol = memchr(pos, '\n', eof - pos); + eol = memchr(packed_refs->buf, '\n', + packed_refs->eof - packed_refs->buf); if (!eol) - die_unterminated_line(refs->path, pos, eof - pos); + die_unterminated_line(refs->path, + packed_refs->buf, + packed_refs->eof - packed_refs->buf); - strbuf_add(&tmp, pos, eol - pos); + strbuf_add(&tmp, packed_refs->buf, eol - packed_refs->buf); if (!skip_prefix(tmp.buf, "# pack-refs with:", (const char **)&p)) - die_invalid_line(refs->path, pos, eof - pos); + die_invalid_line(refs->path, + packed_refs->buf, + packed_refs->eof - packed_refs->buf); string_list_split_in_place(&traits, p, ' ', -1); @@ -400,14 +502,17 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) /* perhaps other traits later as well */ /* The "+ 1" is for the LF character. */ - pos = eol + 1; + packed_refs->header_len = eol + 1 - packed_refs->buf; string_list_clear(&traits, 0); strbuf_release(&tmp); } dir = get_ref_dir(packed_refs->cache->root); - iter = mmapped_ref_iterator_begin(refs->path, packed_refs, pos, eof); + iter = mmapped_ref_iterator_begin( + packed_refs, + packed_refs->buf + packed_refs->header_len, + packed_refs->eof); while ((ok = ref_iterator_advance(iter)) == ITER_OK) { struct ref_entry *entry = create_ref_entry(iter->refname, iter->oid, iter->flags); @@ -420,11 +525,6 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) if (ok != ITER_DONE) die("error reading packed-refs file %s", refs->path); - if (munmap(buf, size)) - die_errno("error ummapping packed-refs file %s", refs->path); - - close(fd); - return packed_refs; } @@ -1031,6 +1131,8 @@ static int packed_transaction_finish(struct ref_store *ref_store, int ret = TRANSACTION_GENERIC_ERROR; char *packed_refs_path; + clear_packed_ref_cache(refs); + packed_refs_path = get_locked_file_path(&refs->lock); if (rename_tempfile(&refs->tempfile, packed_refs_path)) { strbuf_addf(err, "error replacing %s: %s", @@ -1038,7 +1140,6 @@ static int packed_transaction_finish(struct ref_store *ref_store, goto cleanup; } - clear_packed_ref_cache(refs); ret = 0; cleanup: From 02b920f3f79422582793c9b4801baf91f2f64344 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:11 +0200 Subject: [PATCH 14/21] read_packed_refs(): ensure that references are ordered when read It doesn't actually matter now, because the references are only iterated over to fill the associated `ref_cache`, which itself puts them in the correct order. But we want to get rid of the `ref_cache`, so we want to be able to iterate directly over the `packed-refs` buffer, and then the iteration will need to be ordered correctly. In fact, we already write the `packed-refs` file sorted, but it is possible that other Git clients don't get it right. So let's not assume that a `packed-refs` file is sorted unless it is explicitly declared to be so via a `sorted` trait in its header line. If it is *not* declared to be sorted, then scan quickly through the file to check. If it is found to be out of order, then sort the records into a new memory-only copy. This checking and sorting is done quickly, without parsing the full file contents. However, it needs a little bit of care to avoid reading past the end of the buffer even if the `packed-refs` file is corrupt. Since *we* always write the file correctly sorted, include that trait when we write or rewrite a `packed-refs` file. This means that the scan described in the previous paragraph should only have to be done for `packed-refs` files that were written by older versions of the Git command-line client, or by other clients that haven't yet learned to write the `sorted` trait. If `packed-refs` was already sorted, then (if the system allows it) we can use the mmapped file contents directly. But if the system doesn't allow a file that is currently mmapped to be replaced using `rename()`, then it would be bad for us to keep the file mmapped for any longer than necessary. So, on such systems, always make a copy of the file contents, either as part of the sorting process, or afterwards. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 223 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 212 insertions(+), 11 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 75d44cf061..a7fc613c06 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -51,11 +51,12 @@ struct packed_ref_cache { int mmapped; /* - * The contents of the `packed-refs` file. If the file is - * mmapped, this points at the mmapped contents of the file. - * If not, this points at heap-allocated memory containing the - * contents. If there were no contents (e.g., because the file - * didn't exist), `buf` and `eof` are both NULL. + * The contents of the `packed-refs` file. If the file was + * already sorted, this points at the mmapped contents of the + * file. If not, this points at heap-allocated memory + * containing the contents, sorted. If there were no contents + * (e.g., because the file didn't exist), `buf` and `eof` are + * both NULL. */ char *buf, *eof; @@ -358,7 +359,7 @@ struct ref_iterator *mmapped_ref_iterator_begin( if (!packed_refs->buf) return empty_ref_iterator_begin(); - base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 0); + base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 1); iter->packed_refs = packed_refs; acquire_packed_ref_cache(iter->packed_refs); @@ -371,6 +372,170 @@ struct ref_iterator *mmapped_ref_iterator_begin( return ref_iterator; } +struct packed_ref_entry { + const char *start; + size_t len; +}; + +static int cmp_packed_ref_entries(const void *v1, const void *v2) +{ + const struct packed_ref_entry *e1 = v1, *e2 = v2; + const char *r1 = e1->start + GIT_SHA1_HEXSZ + 1; + const char *r2 = e2->start + GIT_SHA1_HEXSZ + 1; + + while (1) { + if (*r1 == '\n') + return *r2 == '\n' ? 0 : -1; + if (*r1 != *r2) { + if (*r2 == '\n') + return 1; + else + return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1; + } + r1++; + r2++; + } +} + +/* + * `packed_refs->buf` is not known to be sorted. Check whether it is, + * and if not, sort it into new memory and munmap/free the old + * storage. + */ +static void sort_packed_refs(struct packed_ref_cache *packed_refs) +{ + struct packed_ref_entry *entries = NULL; + size_t alloc = 0, nr = 0; + int sorted = 1; + const char *pos, *eof, *eol; + size_t len, i; + char *new_buffer, *dst; + + pos = packed_refs->buf + packed_refs->header_len; + eof = packed_refs->eof; + len = eof - pos; + + if (!len) + return; + + /* + * Initialize entries based on a crude estimate of the number + * of references in the file (we'll grow it below if needed): + */ + ALLOC_GROW(entries, len / 80 + 20, alloc); + + while (pos < eof) { + eol = memchr(pos, '\n', eof - pos); + if (!eol) + /* The safety check should prevent this. */ + BUG("unterminated line found in packed-refs"); + if (eol - pos < GIT_SHA1_HEXSZ + 2) + die_invalid_line(packed_refs->refs->path, + pos, eof - pos); + eol++; + if (eol < eof && *eol == '^') { + /* + * Keep any peeled line together with its + * reference: + */ + const char *peeled_start = eol; + + eol = memchr(peeled_start, '\n', eof - peeled_start); + if (!eol) + /* The safety check should prevent this. */ + BUG("unterminated peeled line found in packed-refs"); + eol++; + } + + ALLOC_GROW(entries, nr + 1, alloc); + entries[nr].start = pos; + entries[nr].len = eol - pos; + nr++; + + if (sorted && + nr > 1 && + cmp_packed_ref_entries(&entries[nr - 2], + &entries[nr - 1]) >= 0) + sorted = 0; + + pos = eol; + } + + if (sorted) + goto cleanup; + + /* We need to sort the memory. First we sort the entries array: */ + QSORT(entries, nr, cmp_packed_ref_entries); + + /* + * Allocate a new chunk of memory, and copy the old memory to + * the new in the order indicated by `entries` (not bothering + * with the header line): + */ + new_buffer = xmalloc(len); + for (dst = new_buffer, i = 0; i < nr; i++) { + memcpy(dst, entries[i].start, entries[i].len); + dst += entries[i].len; + } + + /* + * Now munmap the old buffer and use the sorted buffer in its + * place: + */ + release_packed_ref_buffer(packed_refs); + packed_refs->buf = new_buffer; + packed_refs->eof = new_buffer + len; + packed_refs->header_len = 0; + +cleanup: + free(entries); +} + +/* + * Return a pointer to the start of the record that contains the + * character `*p` (which must be within the buffer). If no other + * record start is found, return `buf`. + */ +static const char *find_start_of_record(const char *buf, const char *p) +{ + while (p > buf && (p[-1] != '\n' || p[0] == '^')) + p--; + return p; +} + +/* + * We want to be able to compare mmapped reference records quickly, + * without totally parsing them. We can do so because the records are + * LF-terminated, and the refname should start exactly (GIT_SHA1_HEXSZ + * + 1) bytes past the beginning of the record. + * + * But what if the `packed-refs` file contains garbage? We're willing + * to tolerate not detecting the problem, as long as we don't produce + * totally garbled output (we can't afford to check the integrity of + * the whole file during every Git invocation). But we do want to be + * sure that we never read past the end of the buffer in memory and + * perform an illegal memory access. + * + * Guarantee that minimum level of safety by verifying that the last + * record in the file is LF-terminated, and that it has at least + * (GIT_SHA1_HEXSZ + 1) characters before the LF. Die if either of + * these checks fails. + */ +static void verify_buffer_safe(struct packed_ref_cache *packed_refs) +{ + const char *buf = packed_refs->buf + packed_refs->header_len; + const char *eof = packed_refs->eof; + const char *last_line; + + if (buf == eof) + return; + + last_line = find_start_of_record(buf, eof - 1); + if (*(eof - 1) != '\n' || eof - last_line < GIT_SHA1_HEXSZ + 2) + die_invalid_line(packed_refs->refs->path, + last_line, eof - last_line); +} + /* * Depending on `mmap_strategy`, either mmap or read the contents of * the `packed-refs` file into the `packed_refs` instance. Return 1 if @@ -408,7 +573,6 @@ static int load_contents(struct packed_ref_cache *packed_refs) switch (mmap_strategy) { case MMAP_NONE: - case MMAP_TEMPORARY: packed_refs->buf = xmalloc(size); bytes_read = read_in_full(fd, packed_refs->buf, size); if (bytes_read < 0 || bytes_read != size) @@ -416,6 +580,7 @@ static int load_contents(struct packed_ref_cache *packed_refs) packed_refs->eof = packed_refs->buf + size; packed_refs->mmapped = 0; break; + case MMAP_TEMPORARY: case MMAP_OK: packed_refs->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); packed_refs->eof = packed_refs->buf + size; @@ -435,19 +600,19 @@ static int load_contents(struct packed_ref_cache *packed_refs) * A comment line of the form "# pack-refs with: " may contain zero or * more traits. We interpret the traits as follows: * - * No traits: + * Neither `peeled` nor `fully-peeled`: * * Probably no references are peeled. But if the file contains a * peeled value for a reference, we will use it. * - * peeled: + * `peeled`: * * References under "refs/tags/", if they *can* be peeled, *are* * peeled in this file. References outside of "refs/tags/" are * probably not peeled even if they could have been, but if we find * a peeled value for such a reference we will use it. * - * fully-peeled: + * `fully-peeled`: * * All references in the file that can be peeled are peeled. * Inversely (and this is more important), any references in the @@ -455,12 +620,17 @@ static int load_contents(struct packed_ref_cache *packed_refs) * trait should typically be written alongside "peeled" for * compatibility with older clients, but we do not require it * (i.e., "peeled" is a no-op if "fully-peeled" is set). + * + * `sorted`: + * + * The references in this file are known to be sorted by refname. */ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) { struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); struct ref_dir *dir; struct ref_iterator *iter; + int sorted = 0; int ok; packed_refs->refs = refs; @@ -499,6 +669,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) packed_refs->peeled = PEELED_FULLY; else if (unsorted_string_list_has_string(&traits, "peeled")) packed_refs->peeled = PEELED_TAGS; + + sorted = unsorted_string_list_has_string(&traits, "sorted"); + /* perhaps other traits later as well */ /* The "+ 1" is for the LF character. */ @@ -508,6 +681,34 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) strbuf_release(&tmp); } + verify_buffer_safe(packed_refs); + + if (!sorted) { + sort_packed_refs(packed_refs); + + /* + * Reordering the records might have moved a short one + * to the end of the buffer, so verify the buffer's + * safety again: + */ + verify_buffer_safe(packed_refs); + } + + if (mmap_strategy != MMAP_OK && packed_refs->mmapped) { + /* + * We don't want to leave the file mmapped, so we are + * forced to make a copy now: + */ + size_t size = packed_refs->eof - + (packed_refs->buf + packed_refs->header_len); + char *buf_copy = xmalloc(size); + + memcpy(buf_copy, packed_refs->buf + packed_refs->header_len, size); + release_packed_ref_buffer(packed_refs); + packed_refs->buf = buf_copy; + packed_refs->eof = buf_copy + size; + } + dir = get_ref_dir(packed_refs->cache->root); iter = mmapped_ref_iterator_begin( packed_refs, @@ -811,7 +1012,7 @@ int packed_refs_is_locked(struct ref_store *ref_store) * the colon and the trailing space are required. */ static const char PACKED_REFS_HEADER[] = - "# pack-refs with: peeled fully-peeled \n"; + "# pack-refs with: peeled fully-peeled sorted \n"; static int packed_init_db(struct ref_store *ref_store, struct strbuf *err) { From d1cf15516feb024f34ae5fbbdad8f538b4e62fce Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:12 +0200 Subject: [PATCH 15/21] packed_ref_iterator_begin(): iterate using `mmapped_ref_iterator` Now that we have an efficient way to iterate, in order, over the mmapped contents of the `packed-refs` file, we can use that directly to implement reference iteration for the `packed_ref_store`, rather than iterating over the `ref_cache`. This is the next step towards getting rid of the `ref_cache` entirely. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 109 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a7fc613c06..abf14a1405 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -397,6 +397,27 @@ static int cmp_packed_ref_entries(const void *v1, const void *v2) } } +/* + * Compare a packed-refs record pointed to by `rec` to the specified + * NUL-terminated refname. + */ +static int cmp_entry_to_refname(const char *rec, const char *refname) +{ + const char *r1 = rec + GIT_SHA1_HEXSZ + 1; + const char *r2 = refname; + + while (1) { + if (*r1 == '\n') + return *r2 ? -1 : 0; + if (!*r2) + return 1; + if (*r1 != *r2) + return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1; + r1++; + r2++; + } +} + /* * `packed_refs->buf` is not known to be sorted. Check whether it is, * and if not, sort it into new memory and munmap/free the old @@ -503,6 +524,17 @@ static const char *find_start_of_record(const char *buf, const char *p) return p; } +/* + * Return a pointer to the start of the record following the record + * that contains `*p`. If none is found before `end`, return `end`. + */ +static const char *find_end_of_record(const char *p, const char *end) +{ + while (++p < end && (p[-1] != '\n' || p[0] == '^')) + ; + return p; +} + /* * We want to be able to compare mmapped reference records quickly, * without totally parsing them. We can do so because the records are @@ -592,6 +624,65 @@ static int load_contents(struct packed_ref_cache *packed_refs) return 1; } +/* + * Find the place in `cache->buf` where the start of the record for + * `refname` starts. If `mustexist` is true and the reference doesn't + * exist, then return NULL. If `mustexist` is false and the reference + * doesn't exist, then return the point where that reference would be + * inserted. In the latter mode, `refname` doesn't have to be a proper + * reference name; for example, one could search for "refs/replace/" + * to find the start of any replace references. + * + * The record is sought using a binary search, so `cache->buf` must be + * sorted. + */ +static const char *find_reference_location(struct packed_ref_cache *cache, + const char *refname, int mustexist) +{ + /* + * This is not *quite* a garden-variety binary search, because + * the data we're searching is made up of records, and we + * always need to find the beginning of a record to do a + * comparison. A "record" here is one line for the reference + * itself and zero or one peel lines that start with '^'. Our + * loop invariant is described in the next two comments. + */ + + /* + * A pointer to the character at the start of a record whose + * preceding records all have reference names that come + * *before* `refname`. + */ + const char *lo = cache->buf + cache->header_len; + + /* + * A pointer to a the first character of a record whose + * reference name comes *after* `refname`. + */ + const char *hi = cache->eof; + + while (lo < hi) { + const char *mid, *rec; + int cmp; + + mid = lo + (hi - lo) / 2; + rec = find_start_of_record(lo, mid); + cmp = cmp_entry_to_refname(rec, refname); + if (cmp < 0) { + lo = find_end_of_record(mid, hi); + } else if (cmp > 0) { + hi = rec; + } else { + return rec; + } + } + + if (mustexist) + return NULL; + else + return lo; +} + /* * Read from the `packed-refs` file into a newly-allocated * `packed_ref_cache` and return it. The return value will already @@ -888,6 +979,8 @@ static struct ref_iterator *packed_ref_iterator_begin( const char *prefix, unsigned int flags) { struct packed_ref_store *refs; + struct packed_ref_cache *packed_refs; + const char *start; struct packed_ref_iterator *iter; struct ref_iterator *ref_iterator; unsigned int required_flags = REF_STORE_READ; @@ -905,13 +998,23 @@ static struct ref_iterator *packed_ref_iterator_begin( * the packed-ref cache is up to date with what is on disk, * and re-reads it if not. */ + iter->cache = packed_refs = get_packed_ref_cache(refs); + acquire_packed_ref_cache(packed_refs); - iter->cache = get_packed_ref_cache(refs); - acquire_packed_ref_cache(iter->cache); - iter->iter0 = cache_ref_iterator_begin(iter->cache->cache, prefix, 0); + if (prefix && *prefix) + start = find_reference_location(packed_refs, prefix, 0); + else + start = packed_refs->buf + packed_refs->header_len; + + iter->iter0 = mmapped_ref_iterator_begin( + packed_refs, start, packed_refs->eof); iter->flags = flags; + if (prefix && *prefix) + /* Stop iteration after we've gone *past* prefix: */ + ref_iterator = prefix_ref_iterator_begin(ref_iterator, prefix, 0); + return ref_iterator; } From f3987ab36d74382852c0db03ac25e58f83178225 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:13 +0200 Subject: [PATCH 16/21] packed_read_raw_ref(): read the reference from the mmapped buffer Instead of reading the reference from the `ref_cache`, read it directly from the mmapped buffer. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index abf14a1405..be614e79f5 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -876,18 +876,22 @@ static int packed_read_raw_ref(struct ref_store *ref_store, { struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); - - struct ref_entry *entry; + struct packed_ref_cache *packed_refs = get_packed_ref_cache(refs); + const char *rec; *type = 0; - entry = get_packed_ref(refs, refname); - if (!entry) { + rec = find_reference_location(packed_refs, refname, 1); + + if (!rec) { + /* refname is not a packed reference. */ errno = ENOENT; return -1; } - hashcpy(sha1, entry->u.value.oid.hash); + if (get_sha1_hex(rec, sha1)) + die_invalid_line(refs->path, rec, packed_refs->eof - rec); + *type = REF_ISPACKED; return 0; } From ba1c052fa616eb93a654375e8b9d59daa47c28a8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:14 +0200 Subject: [PATCH 17/21] ref_store: implement `refs_peel_ref()` generically We're about to stop storing packed refs in a `ref_cache`. That means that the only way we have left to optimize `peel_ref()` is by checking whether the reference being peeled is the one currently being iterated over (in `current_ref_iter`), and if so, using `ref_iterator_peel()`. But this can be done generically; it doesn't have to be implemented per-backend. So implement `refs_peel_ref()` in `refs.c` and remove the `peel_ref()` method from the refs API. This removes the last callers of a couple of functions, so delete them. More cleanup to come... Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 18 +++++++++++++++++- refs/files-backend.c | 38 -------------------------------------- refs/packed-backend.c | 36 ------------------------------------ refs/refs-internal.h | 3 --- 4 files changed, 17 insertions(+), 78 deletions(-) diff --git a/refs.c b/refs.c index 101c107ee8..c5e6f79c77 100644 --- a/refs.c +++ b/refs.c @@ -1735,7 +1735,23 @@ int refs_pack_refs(struct ref_store *refs, unsigned int flags) int refs_peel_ref(struct ref_store *refs, const char *refname, unsigned char *sha1) { - return refs->be->peel_ref(refs, refname, sha1); + int flag; + unsigned char base[20]; + + if (current_ref_iter && current_ref_iter->refname == refname) { + struct object_id peeled; + + if (ref_iterator_peel(current_ref_iter, &peeled)) + return -1; + hashcpy(sha1, peeled.hash); + return 0; + } + + if (refs_read_ref_full(refs, refname, + RESOLVE_REF_READING, base, &flag)) + return -1; + + return peel_object(base, sha1); } int peel_ref(const char *refname, unsigned char *sha1) diff --git a/refs/files-backend.c b/refs/files-backend.c index 35648c89fc..7d12de88d0 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -655,43 +655,6 @@ out: return ret; } -static int files_peel_ref(struct ref_store *ref_store, - const char *refname, unsigned char *sha1) -{ - struct files_ref_store *refs = - files_downcast(ref_store, REF_STORE_READ | REF_STORE_ODB, - "peel_ref"); - int flag; - unsigned char base[20]; - - if (current_ref_iter && current_ref_iter->refname == refname) { - struct object_id peeled; - - if (ref_iterator_peel(current_ref_iter, &peeled)) - return -1; - hashcpy(sha1, peeled.hash); - return 0; - } - - if (refs_read_ref_full(ref_store, refname, - RESOLVE_REF_READING, base, &flag)) - return -1; - - /* - * If the reference is packed, read its ref_entry from the - * cache in the hope that we already know its peeled value. - * We only try this optimization on packed references because - * (a) forcing the filling of the loose reference cache could - * be expensive and (b) loose references anyway usually do not - * have REF_KNOWS_PEELED. - */ - if (flag & REF_ISPACKED && - !refs_peel_ref(refs->packed_ref_store, refname, sha1)) - return 0; - - return peel_object(base, sha1); -} - struct files_ref_iterator { struct ref_iterator base; @@ -3012,7 +2975,6 @@ struct ref_storage_be refs_be_files = { files_initial_transaction_commit, files_pack_refs, - files_peel_ref, files_create_symref, files_delete_refs, files_rename_ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index be614e79f5..dbbba45502 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -850,26 +850,6 @@ static struct packed_ref_cache *get_packed_ref_cache(struct packed_ref_store *re return refs->cache; } -static struct ref_dir *get_packed_ref_dir(struct packed_ref_cache *packed_ref_cache) -{ - return get_ref_dir(packed_ref_cache->cache->root); -} - -static struct ref_dir *get_packed_refs(struct packed_ref_store *refs) -{ - return get_packed_ref_dir(get_packed_ref_cache(refs)); -} - -/* - * Return the ref_entry for the given refname from the packed - * references. If it does not exist, return NULL. - */ -static struct ref_entry *get_packed_ref(struct packed_ref_store *refs, - const char *refname) -{ - return find_ref_entry(get_packed_refs(refs), refname); -} - static int packed_read_raw_ref(struct ref_store *ref_store, const char *refname, unsigned char *sha1, struct strbuf *referent, unsigned int *type) @@ -896,21 +876,6 @@ static int packed_read_raw_ref(struct ref_store *ref_store, return 0; } -static int packed_peel_ref(struct ref_store *ref_store, - const char *refname, unsigned char *sha1) -{ - struct packed_ref_store *refs = - packed_downcast(ref_store, REF_STORE_READ | REF_STORE_ODB, - "peel_ref"); - struct ref_entry *r = get_packed_ref(refs, refname); - - if (!r || peel_entry(r, 0)) - return -1; - - hashcpy(sha1, r->u.value.peeled.hash); - return 0; -} - struct packed_ref_iterator { struct ref_iterator base; @@ -1597,7 +1562,6 @@ struct ref_storage_be refs_be_packed = { packed_initial_transaction_commit, packed_pack_refs, - packed_peel_ref, packed_create_symref, packed_delete_refs, packed_rename_ref, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index d7f233beba..cc6c373f59 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -562,8 +562,6 @@ typedef int ref_transaction_commit_fn(struct ref_store *refs, struct strbuf *err); typedef int pack_refs_fn(struct ref_store *ref_store, unsigned int flags); -typedef int peel_ref_fn(struct ref_store *ref_store, - const char *refname, unsigned char *sha1); typedef int create_symref_fn(struct ref_store *ref_store, const char *ref_target, const char *refs_heads_master, @@ -668,7 +666,6 @@ struct ref_storage_be { ref_transaction_commit_fn *initial_transaction_commit; pack_refs_fn *pack_refs; - peel_ref_fn *peel_ref; create_symref_fn *create_symref; delete_refs_fn *delete_refs; rename_ref_fn *rename_ref; From 9dd389f3d8d220d764beaca7237ac33de5de81c7 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:15 +0200 Subject: [PATCH 18/21] packed_ref_store: get rid of the `ref_cache` entirely Now that everything has been changed to read what it needs directly out of the `packed-refs` file, `packed_ref_store` doesn't need to maintain a `ref_cache` at all. So get rid of it. First of all, this will save a lot of memory and lots of little allocations. Instead of needing to store complicated parsed data structures in memory, we just mmap the file (potentially sharing memory with other processes) and parse only what we need. Moreover, since the mmapped access to the file reads only the parts of the file that it needs, this might save reading all of the data from disk at all (at least if the file starts out sorted). Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index dbbba45502..3829e9c294 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -45,8 +45,6 @@ struct packed_ref_cache { */ struct packed_ref_store *refs; - struct ref_cache *cache; - /* Is the `packed-refs` file currently mmapped? */ int mmapped; @@ -148,7 +146,6 @@ static void release_packed_ref_buffer(struct packed_ref_cache *packed_refs) static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) { if (!--packed_refs->referrers) { - free_ref_cache(packed_refs->cache); stat_validity_clear(&packed_refs->validity); release_packed_ref_buffer(packed_refs); free(packed_refs); @@ -719,15 +716,10 @@ static const char *find_reference_location(struct packed_ref_cache *cache, static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) { struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); - struct ref_dir *dir; - struct ref_iterator *iter; int sorted = 0; - int ok; packed_refs->refs = refs; acquire_packed_ref_cache(packed_refs); - packed_refs->cache = create_ref_cache(NULL, NULL); - packed_refs->cache->root->flag &= ~REF_INCOMPLETE; packed_refs->peeled = PEELED_NONE; if (!load_contents(packed_refs)) @@ -800,23 +792,6 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) packed_refs->eof = buf_copy + size; } - dir = get_ref_dir(packed_refs->cache->root); - iter = mmapped_ref_iterator_begin( - packed_refs, - packed_refs->buf + packed_refs->header_len, - packed_refs->eof); - while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - struct ref_entry *entry = - create_ref_entry(iter->refname, iter->oid, iter->flags); - - if ((iter->flags & REF_KNOWS_PEELED)) - ref_iterator_peel(iter, &entry->u.value.peeled); - add_ref_entry(dir, entry); - } - - if (ok != ITER_DONE) - die("error reading packed-refs file %s", refs->path); - return packed_refs; } @@ -975,8 +950,8 @@ static struct ref_iterator *packed_ref_iterator_begin( else start = packed_refs->buf + packed_refs->header_len; - iter->iter0 = mmapped_ref_iterator_begin( - packed_refs, start, packed_refs->eof); + iter->iter0 = mmapped_ref_iterator_begin(packed_refs, + start, packed_refs->eof); iter->flags = flags; From a6e19bcdadf782079cf88609b928519c2dee3378 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:16 +0200 Subject: [PATCH 19/21] ref_cache: remove support for storing peeled values Now that the `packed-refs` backend doesn't use `ref_cache`, there is nobody left who might want to store peeled values of references in `ref_cache`. So remove that feature. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 9 ++++++++- refs/ref-cache.c | 42 +----------------------------------------- refs/ref-cache.h | 32 ++------------------------------ 3 files changed, 11 insertions(+), 72 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 3829e9c294..66e5525174 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -2,7 +2,6 @@ #include "../config.h" #include "../refs.h" #include "refs-internal.h" -#include "ref-cache.h" #include "packed-backend.h" #include "../iterator.h" #include "../lockfile.h" @@ -226,6 +225,14 @@ static NORETURN void die_invalid_line(const char *path, } +/* + * This value is set in `base.flags` if the peeled value of the + * current reference is known. In that case, `peeled` contains the + * correct peeled value for the reference, which might be `null_sha1` + * if the reference is not a tag or if it is broken. + */ +#define REF_KNOWS_PEELED 0x40 + /* * An iterator over a packed-refs file that is currently mmapped. */ diff --git a/refs/ref-cache.c b/refs/ref-cache.c index 54dfb5218c..4f850e1b5c 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -38,7 +38,6 @@ struct ref_entry *create_ref_entry(const char *refname, FLEX_ALLOC_STR(ref, name, refname); oidcpy(&ref->u.value.oid, oid); - oidclr(&ref->u.value.peeled); ref->flag = flag; return ref; } @@ -491,49 +490,10 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) } } -enum peel_status peel_entry(struct ref_entry *entry, int repeel) -{ - enum peel_status status; - - if (entry->flag & REF_KNOWS_PEELED) { - if (repeel) { - entry->flag &= ~REF_KNOWS_PEELED; - oidclr(&entry->u.value.peeled); - } else { - return is_null_oid(&entry->u.value.peeled) ? - PEEL_NON_TAG : PEEL_PEELED; - } - } - if (entry->flag & REF_ISBROKEN) - return PEEL_BROKEN; - if (entry->flag & REF_ISSYMREF) - return PEEL_IS_SYMREF; - - status = peel_object(entry->u.value.oid.hash, entry->u.value.peeled.hash); - if (status == PEEL_PEELED || status == PEEL_NON_TAG) - entry->flag |= REF_KNOWS_PEELED; - return status; -} - static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, struct object_id *peeled) { - struct cache_ref_iterator *iter = - (struct cache_ref_iterator *)ref_iterator; - struct cache_ref_iterator_level *level; - struct ref_entry *entry; - - level = &iter->levels[iter->levels_nr - 1]; - - if (level->index == -1) - die("BUG: peel called before advance for cache iterator"); - - entry = level->dir->entries[level->index]; - - if (peel_entry(entry, 0)) - return -1; - oidcpy(peeled, &entry->u.value.peeled); - return 0; + return peel_object(ref_iterator->oid->hash, peeled->hash); } static int cache_ref_iterator_abort(struct ref_iterator *ref_iterator) diff --git a/refs/ref-cache.h b/refs/ref-cache.h index a082bfb06c..eda65e73ed 100644 --- a/refs/ref-cache.h +++ b/refs/ref-cache.h @@ -38,14 +38,6 @@ struct ref_value { * referred to by the last reference in the symlink chain. */ struct object_id oid; - - /* - * If REF_KNOWS_PEELED, then this field holds the peeled value - * of this reference, or null if the reference is known not to - * be peelable. See the documentation for peel_ref() for an - * exact definition of "peelable". - */ - struct object_id peeled; }; /* @@ -97,21 +89,14 @@ struct ref_dir { * public values; see refs.h. */ -/* - * The field ref_entry->u.value.peeled of this value entry contains - * the correct peeled value for the reference, which might be - * null_sha1 if the reference is not a tag or if it is broken. - */ -#define REF_KNOWS_PEELED 0x10 - /* ref_entry represents a directory of references */ -#define REF_DIR 0x20 +#define REF_DIR 0x10 /* * Entry has not yet been read from disk (used only for REF_DIR * entries representing loose references) */ -#define REF_INCOMPLETE 0x40 +#define REF_INCOMPLETE 0x20 /* * A ref_entry represents either a reference or a "subdirectory" of @@ -252,17 +237,4 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, const char *prefix, int prime_dir); -/* - * Peel the entry (if possible) and return its new peel_status. If - * repeel is true, re-peel the entry even if there is an old peeled - * value that is already stored in it. - * - * It is OK to call this function with a packed reference entry that - * might be stale and might even refer to an object that has since - * been garbage-collected. In such a case, if the entry has - * REF_KNOWS_PEELED then leave the status unchanged and return - * PEEL_PEELED or PEEL_NON_TAG; otherwise, return PEEL_INVALID. - */ -enum peel_status peel_entry(struct ref_entry *entry, int repeel); - #endif /* REFS_REF_CACHE_H */ From 523ee2d7858a6b1c09ba550fd1710ebfeff27335 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:17 +0200 Subject: [PATCH 20/21] mmapped_ref_iterator: inline into `packed_ref_iterator` Since `packed_ref_iterator` is now delegating to `mmapped_ref_iterator` rather than `cache_ref_iterator` to do the heavy lifting, there is no need to keep the two iterators separate. So "inline" `mmapped_ref_iterator` into `packed_ref_iterator`. This removes a bunch of boilerplate. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 284 +++++++++++++++++------------------------- 1 file changed, 114 insertions(+), 170 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 66e5525174..1ed52d7eca 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -225,157 +225,6 @@ static NORETURN void die_invalid_line(const char *path, } -/* - * This value is set in `base.flags` if the peeled value of the - * current reference is known. In that case, `peeled` contains the - * correct peeled value for the reference, which might be `null_sha1` - * if the reference is not a tag or if it is broken. - */ -#define REF_KNOWS_PEELED 0x40 - -/* - * An iterator over a packed-refs file that is currently mmapped. - */ -struct mmapped_ref_iterator { - struct ref_iterator base; - - struct packed_ref_cache *packed_refs; - - /* The current position in the mmapped file: */ - const char *pos; - - /* The end of the mmapped file: */ - const char *eof; - - struct object_id oid, peeled; - - struct strbuf refname_buf; -}; - -static int mmapped_ref_iterator_advance(struct ref_iterator *ref_iterator) -{ - struct mmapped_ref_iterator *iter = - (struct mmapped_ref_iterator *)ref_iterator; - const char *p = iter->pos, *eol; - - strbuf_reset(&iter->refname_buf); - - if (iter->pos == iter->eof) - return ref_iterator_abort(ref_iterator); - - iter->base.flags = REF_ISPACKED; - - if (iter->eof - p < GIT_SHA1_HEXSZ + 2 || - parse_oid_hex(p, &iter->oid, &p) || - !isspace(*p++)) - die_invalid_line(iter->packed_refs->refs->path, - iter->pos, iter->eof - iter->pos); - - eol = memchr(p, '\n', iter->eof - p); - if (!eol) - die_unterminated_line(iter->packed_refs->refs->path, - iter->pos, iter->eof - iter->pos); - - strbuf_add(&iter->refname_buf, p, eol - p); - iter->base.refname = iter->refname_buf.buf; - - if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { - if (!refname_is_safe(iter->base.refname)) - die("packed refname is dangerous: %s", - iter->base.refname); - oidclr(&iter->oid); - iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; - } - if (iter->packed_refs->peeled == PEELED_FULLY || - (iter->packed_refs->peeled == PEELED_TAGS && - starts_with(iter->base.refname, "refs/tags/"))) - iter->base.flags |= REF_KNOWS_PEELED; - - iter->pos = eol + 1; - - if (iter->pos < iter->eof && *iter->pos == '^') { - p = iter->pos + 1; - if (iter->eof - p < GIT_SHA1_HEXSZ + 1 || - parse_oid_hex(p, &iter->peeled, &p) || - *p++ != '\n') - die_invalid_line(iter->packed_refs->refs->path, - iter->pos, iter->eof - iter->pos); - iter->pos = p; - - /* - * Regardless of what the file header said, we - * definitely know the value of *this* reference. But - * we suppress it if the reference is broken: - */ - if ((iter->base.flags & REF_ISBROKEN)) { - oidclr(&iter->peeled); - iter->base.flags &= ~REF_KNOWS_PEELED; - } else { - iter->base.flags |= REF_KNOWS_PEELED; - } - } else { - oidclr(&iter->peeled); - } - - return ITER_OK; -} - -static int mmapped_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct mmapped_ref_iterator *iter = - (struct mmapped_ref_iterator *)ref_iterator; - - if ((iter->base.flags & REF_KNOWS_PEELED)) { - oidcpy(peeled, &iter->peeled); - return is_null_oid(&iter->peeled) ? -1 : 0; - } else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) { - return -1; - } else { - return !!peel_object(iter->oid.hash, peeled->hash); - } -} - -static int mmapped_ref_iterator_abort(struct ref_iterator *ref_iterator) -{ - struct mmapped_ref_iterator *iter = - (struct mmapped_ref_iterator *)ref_iterator; - - release_packed_ref_cache(iter->packed_refs); - strbuf_release(&iter->refname_buf); - base_ref_iterator_free(ref_iterator); - return ITER_DONE; -} - -static struct ref_iterator_vtable mmapped_ref_iterator_vtable = { - mmapped_ref_iterator_advance, - mmapped_ref_iterator_peel, - mmapped_ref_iterator_abort -}; - -struct ref_iterator *mmapped_ref_iterator_begin( - struct packed_ref_cache *packed_refs, - const char *pos, const char *eof) -{ - struct mmapped_ref_iterator *iter = xcalloc(1, sizeof(*iter)); - struct ref_iterator *ref_iterator = &iter->base; - - if (!packed_refs->buf) - return empty_ref_iterator_begin(); - - base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 1); - - iter->packed_refs = packed_refs; - acquire_packed_ref_cache(iter->packed_refs); - iter->pos = pos; - iter->eof = eof; - strbuf_init(&iter->refname_buf, 0); - - iter->base.oid = &iter->oid; - - return ref_iterator; -} - struct packed_ref_entry { const char *start; size_t len; @@ -858,38 +707,120 @@ static int packed_read_raw_ref(struct ref_store *ref_store, return 0; } +/* + * This value is set in `base.flags` if the peeled value of the + * current reference is known. In that case, `peeled` contains the + * correct peeled value for the reference, which might be `null_sha1` + * if the reference is not a tag or if it is broken. + */ +#define REF_KNOWS_PEELED 0x40 + +/* + * An iterator over a packed-refs file that is currently mmapped. + */ struct packed_ref_iterator { struct ref_iterator base; - struct packed_ref_cache *cache; - struct ref_iterator *iter0; + struct packed_ref_cache *packed_refs; + + /* The current position in the mmapped file: */ + const char *pos; + + /* The end of the mmapped file: */ + const char *eof; + + struct object_id oid, peeled; + + struct strbuf refname_buf; + unsigned int flags; }; +static int next_record(struct packed_ref_iterator *iter) +{ + const char *p = iter->pos, *eol; + + strbuf_reset(&iter->refname_buf); + + if (iter->pos == iter->eof) + return ITER_DONE; + + iter->base.flags = REF_ISPACKED; + + if (iter->eof - p < GIT_SHA1_HEXSZ + 2 || + parse_oid_hex(p, &iter->oid, &p) || + !isspace(*p++)) + die_invalid_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + + eol = memchr(p, '\n', iter->eof - p); + if (!eol) + die_unterminated_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + + strbuf_add(&iter->refname_buf, p, eol - p); + iter->base.refname = iter->refname_buf.buf; + + if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { + if (!refname_is_safe(iter->base.refname)) + die("packed refname is dangerous: %s", + iter->base.refname); + oidclr(&iter->oid); + iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; + } + if (iter->packed_refs->peeled == PEELED_FULLY || + (iter->packed_refs->peeled == PEELED_TAGS && + starts_with(iter->base.refname, "refs/tags/"))) + iter->base.flags |= REF_KNOWS_PEELED; + + iter->pos = eol + 1; + + if (iter->pos < iter->eof && *iter->pos == '^') { + p = iter->pos + 1; + if (iter->eof - p < GIT_SHA1_HEXSZ + 1 || + parse_oid_hex(p, &iter->peeled, &p) || + *p++ != '\n') + die_invalid_line(iter->packed_refs->refs->path, + iter->pos, iter->eof - iter->pos); + iter->pos = p; + + /* + * Regardless of what the file header said, we + * definitely know the value of *this* reference. But + * we suppress it if the reference is broken: + */ + if ((iter->base.flags & REF_ISBROKEN)) { + oidclr(&iter->peeled); + iter->base.flags &= ~REF_KNOWS_PEELED; + } else { + iter->base.flags |= REF_KNOWS_PEELED; + } + } else { + oidclr(&iter->peeled); + } + + return ITER_OK; +} + static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator) { struct packed_ref_iterator *iter = (struct packed_ref_iterator *)ref_iterator; int ok; - while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { + while ((ok = next_record(iter)) == ITER_OK) { if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && - ref_type(iter->iter0->refname) != REF_TYPE_PER_WORKTREE) + ref_type(iter->base.refname) != REF_TYPE_PER_WORKTREE) continue; if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) && - !ref_resolves_to_object(iter->iter0->refname, - iter->iter0->oid, - iter->iter0->flags)) + !ref_resolves_to_object(iter->base.refname, &iter->oid, + iter->flags)) continue; - iter->base.refname = iter->iter0->refname; - iter->base.oid = iter->iter0->oid; - iter->base.flags = iter->iter0->flags; return ITER_OK; } - iter->iter0 = NULL; if (ref_iterator_abort(ref_iterator) != ITER_DONE) ok = ITER_ERROR; @@ -902,7 +833,14 @@ static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator, struct packed_ref_iterator *iter = (struct packed_ref_iterator *)ref_iterator; - return ref_iterator_peel(iter->iter0, peeled); + if ((iter->base.flags & REF_KNOWS_PEELED)) { + oidcpy(peeled, &iter->peeled); + return is_null_oid(&iter->peeled) ? -1 : 0; + } else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) { + return -1; + } else { + return !!peel_object(iter->oid.hash, peeled->hash); + } } static int packed_ref_iterator_abort(struct ref_iterator *ref_iterator) @@ -911,10 +849,8 @@ static int packed_ref_iterator_abort(struct ref_iterator *ref_iterator) (struct packed_ref_iterator *)ref_iterator; int ok = ITER_DONE; - if (iter->iter0) - ok = ref_iterator_abort(iter->iter0); - - release_packed_ref_cache(iter->cache); + strbuf_release(&iter->refname_buf); + release_packed_ref_cache(iter->packed_refs); base_ref_iterator_free(ref_iterator); return ok; } @@ -940,6 +876,11 @@ static struct ref_iterator *packed_ref_iterator_begin( required_flags |= REF_STORE_ODB; refs = packed_downcast(ref_store, required_flags, "ref_iterator_begin"); + packed_refs = get_packed_ref_cache(refs); + + if (!packed_refs->buf) + return empty_ref_iterator_begin(); + iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable, 1); @@ -949,7 +890,7 @@ static struct ref_iterator *packed_ref_iterator_begin( * the packed-ref cache is up to date with what is on disk, * and re-reads it if not. */ - iter->cache = packed_refs = get_packed_ref_cache(refs); + iter->packed_refs = packed_refs; acquire_packed_ref_cache(packed_refs); if (prefix && *prefix) @@ -957,8 +898,11 @@ static struct ref_iterator *packed_ref_iterator_begin( else start = packed_refs->buf + packed_refs->header_len; - iter->iter0 = mmapped_ref_iterator_begin(packed_refs, - start, packed_refs->eof); + iter->pos = start; + iter->eof = packed_refs->eof; + strbuf_init(&iter->refname_buf, 0); + + iter->base.oid = &iter->oid; iter->flags = flags; From cff28ca94cca0fd4712aa37f6ca93430bb3351d8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 25 Sep 2017 10:00:18 +0200 Subject: [PATCH 21/21] packed-backend.c: rename a bunch of things and update comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've made huge changes to this file, and some of the old names and comments are no longer very fitting. So rename a bunch of things: * `struct packed_ref_cache` → `struct snapshot` * `acquire_packed_ref_cache()` → `acquire_snapshot()` * `release_packed_ref_buffer()` → `clear_snapshot_buffer()` * `release_packed_ref_cache()` → `release_snapshot()` * `clear_packed_ref_cache()` → `clear_snapshot()` * `struct packed_ref_entry` → `struct snapshot_record` * `cmp_packed_ref_entries()` → `cmp_packed_ref_records()` * `cmp_entry_to_refname()` → `cmp_record_to_refname()` * `sort_packed_refs()` → `sort_snapshot()` * `read_packed_refs()` → `create_snapshot()` * `validate_packed_ref_cache()` → `validate_snapshot()` * `get_packed_ref_cache()` → `get_snapshot()` * Renamed local variables and struct members accordingly. Also update a bunch of comments to reflect the renaming and the accumulated changes that the code has undergone. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 422 +++++++++++++++++++++++------------------- 1 file changed, 232 insertions(+), 190 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1ed52d7eca..d500ebfaa5 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -37,10 +37,30 @@ static enum mmap_strategy mmap_strategy = MMAP_OK; struct packed_ref_store; -struct packed_ref_cache { +/* + * A `snapshot` represents one snapshot of a `packed-refs` file. + * + * Normally, this will be a mmapped view of the contents of the + * `packed-refs` file at the time the snapshot was created. However, + * if the `packed-refs` file was not sorted, this might point at heap + * memory holding the contents of the `packed-refs` file with its + * records sorted by refname. + * + * `snapshot` instances are reference counted (via + * `acquire_snapshot()` and `release_snapshot()`). This is to prevent + * an instance from disappearing while an iterator is still iterating + * over it. Instances are garbage collected when their `referrers` + * count goes to zero. + * + * The most recent `snapshot`, if available, is referenced by the + * `packed_ref_store`. Its freshness is checked whenever + * `get_snapshot()` is called; if the existing snapshot is obsolete, a + * new snapshot is taken. + */ +struct snapshot { /* * A back-pointer to the packed_ref_store with which this - * cache is associated: + * snapshot is associated: */ struct packed_ref_store *refs; @@ -61,26 +81,42 @@ struct packed_ref_cache { size_t header_len; /* - * What is the peeled state of this cache? (This is usually - * determined from the header of the "packed-refs" file.) + * What is the peeled state of the `packed-refs` file that + * this snapshot represents? (This is usually determined from + * the file's header.) */ enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled; /* - * Count of references to the data structure in this instance, - * including the pointer from files_ref_store::packed if any. - * The data will not be freed as long as the reference count - * is nonzero. + * Count of references to this instance, including the pointer + * from `packed_ref_store::snapshot`, if any. The instance + * will not be freed as long as the reference count is + * nonzero. */ unsigned int referrers; - /* The metadata from when this packed-refs cache was read */ + /* + * The metadata of the `packed-refs` file from which this + * snapshot was created, used to tell if the file has been + * replaced since we read it. + */ struct stat_validity validity; }; /* - * A container for `packed-refs`-related data. It is not (yet) a - * `ref_store`. + * A `ref_store` representing references stored in a `packed-refs` + * file. It implements the `ref_store` interface, though it has some + * limitations: + * + * - It cannot store symbolic references. + * + * - It cannot store reflogs. + * + * - It does not support reference renaming (though it could). + * + * On the other hand, it can be locked outside of a reference + * transaction. In that case, it remains locked even after the + * transaction is done and the new `packed-refs` file is activated. */ struct packed_ref_store { struct ref_store base; @@ -91,10 +127,10 @@ struct packed_ref_store { char *path; /* - * A cache of the values read from the `packed-refs` file, if - * it might still be current; otherwise, NULL. + * A snapshot of the values read from the `packed-refs` file, + * if it might still be current; otherwise, NULL. */ - struct packed_ref_cache *cache; + struct snapshot *snapshot; /* * Lock used for the "packed-refs" file. Note that this (and @@ -111,43 +147,42 @@ struct packed_ref_store { }; /* - * Increment the reference count of *packed_refs. + * Increment the reference count of `*snapshot`. */ -static void acquire_packed_ref_cache(struct packed_ref_cache *packed_refs) +static void acquire_snapshot(struct snapshot *snapshot) { - packed_refs->referrers++; + snapshot->referrers++; } /* - * If the buffer in `packed_refs` is active, then either munmap the + * If the buffer in `snapshot` is active, then either munmap the * memory and close the file, or free the memory. Then set the buffer * pointers to NULL. */ -static void release_packed_ref_buffer(struct packed_ref_cache *packed_refs) +static void clear_snapshot_buffer(struct snapshot *snapshot) { - if (packed_refs->mmapped) { - if (munmap(packed_refs->buf, - packed_refs->eof - packed_refs->buf)) + if (snapshot->mmapped) { + if (munmap(snapshot->buf, snapshot->eof - snapshot->buf)) die_errno("error ummapping packed-refs file %s", - packed_refs->refs->path); - packed_refs->mmapped = 0; + snapshot->refs->path); + snapshot->mmapped = 0; } else { - free(packed_refs->buf); + free(snapshot->buf); } - packed_refs->buf = packed_refs->eof = NULL; - packed_refs->header_len = 0; + snapshot->buf = snapshot->eof = NULL; + snapshot->header_len = 0; } /* - * Decrease the reference count of *packed_refs. If it goes to zero, - * free *packed_refs and return true; otherwise return false. + * Decrease the reference count of `*snapshot`. If it goes to zero, + * free `*snapshot` and return true; otherwise return false. */ -static int release_packed_ref_cache(struct packed_ref_cache *packed_refs) +static int release_snapshot(struct snapshot *snapshot) { - if (!--packed_refs->referrers) { - stat_validity_clear(&packed_refs->validity); - release_packed_ref_buffer(packed_refs); - free(packed_refs); + if (!--snapshot->referrers) { + stat_validity_clear(&snapshot->validity); + clear_snapshot_buffer(snapshot); + free(snapshot); return 1; } else { return 0; @@ -192,13 +227,13 @@ static struct packed_ref_store *packed_downcast(struct ref_store *ref_store, return refs; } -static void clear_packed_ref_cache(struct packed_ref_store *refs) +static void clear_snapshot(struct packed_ref_store *refs) { - if (refs->cache) { - struct packed_ref_cache *cache = refs->cache; + if (refs->snapshot) { + struct snapshot *snapshot = refs->snapshot; - refs->cache = NULL; - release_packed_ref_cache(cache); + refs->snapshot = NULL; + release_snapshot(snapshot); } } @@ -225,14 +260,14 @@ static NORETURN void die_invalid_line(const char *path, } -struct packed_ref_entry { +struct snapshot_record { const char *start; size_t len; }; -static int cmp_packed_ref_entries(const void *v1, const void *v2) +static int cmp_packed_ref_records(const void *v1, const void *v2) { - const struct packed_ref_entry *e1 = v1, *e2 = v2; + const struct snapshot_record *e1 = v1, *e2 = v2; const char *r1 = e1->start + GIT_SHA1_HEXSZ + 1; const char *r2 = e2->start + GIT_SHA1_HEXSZ + 1; @@ -251,10 +286,10 @@ static int cmp_packed_ref_entries(const void *v1, const void *v2) } /* - * Compare a packed-refs record pointed to by `rec` to the specified - * NUL-terminated refname. + * Compare a snapshot record at `rec` to the specified NUL-terminated + * refname. */ -static int cmp_entry_to_refname(const char *rec, const char *refname) +static int cmp_record_to_refname(const char *rec, const char *refname) { const char *r1 = rec + GIT_SHA1_HEXSZ + 1; const char *r2 = refname; @@ -272,31 +307,30 @@ static int cmp_entry_to_refname(const char *rec, const char *refname) } /* - * `packed_refs->buf` is not known to be sorted. Check whether it is, - * and if not, sort it into new memory and munmap/free the old - * storage. + * `snapshot->buf` is not known to be sorted. Check whether it is, and + * if not, sort it into new memory and munmap/free the old storage. */ -static void sort_packed_refs(struct packed_ref_cache *packed_refs) +static void sort_snapshot(struct snapshot *snapshot) { - struct packed_ref_entry *entries = NULL; + struct snapshot_record *records = NULL; size_t alloc = 0, nr = 0; int sorted = 1; const char *pos, *eof, *eol; size_t len, i; char *new_buffer, *dst; - pos = packed_refs->buf + packed_refs->header_len; - eof = packed_refs->eof; + pos = snapshot->buf + snapshot->header_len; + eof = snapshot->eof; len = eof - pos; if (!len) return; /* - * Initialize entries based on a crude estimate of the number + * Initialize records based on a crude estimate of the number * of references in the file (we'll grow it below if needed): */ - ALLOC_GROW(entries, len / 80 + 20, alloc); + ALLOC_GROW(records, len / 80 + 20, alloc); while (pos < eof) { eol = memchr(pos, '\n', eof - pos); @@ -304,7 +338,7 @@ static void sort_packed_refs(struct packed_ref_cache *packed_refs) /* The safety check should prevent this. */ BUG("unterminated line found in packed-refs"); if (eol - pos < GIT_SHA1_HEXSZ + 2) - die_invalid_line(packed_refs->refs->path, + die_invalid_line(snapshot->refs->path, pos, eof - pos); eol++; if (eol < eof && *eol == '^') { @@ -321,15 +355,15 @@ static void sort_packed_refs(struct packed_ref_cache *packed_refs) eol++; } - ALLOC_GROW(entries, nr + 1, alloc); - entries[nr].start = pos; - entries[nr].len = eol - pos; + ALLOC_GROW(records, nr + 1, alloc); + records[nr].start = pos; + records[nr].len = eol - pos; nr++; if (sorted && nr > 1 && - cmp_packed_ref_entries(&entries[nr - 2], - &entries[nr - 1]) >= 0) + cmp_packed_ref_records(&records[nr - 2], + &records[nr - 1]) >= 0) sorted = 0; pos = eol; @@ -338,31 +372,31 @@ static void sort_packed_refs(struct packed_ref_cache *packed_refs) if (sorted) goto cleanup; - /* We need to sort the memory. First we sort the entries array: */ - QSORT(entries, nr, cmp_packed_ref_entries); + /* We need to sort the memory. First we sort the records array: */ + QSORT(records, nr, cmp_packed_ref_records); /* * Allocate a new chunk of memory, and copy the old memory to - * the new in the order indicated by `entries` (not bothering + * the new in the order indicated by `records` (not bothering * with the header line): */ new_buffer = xmalloc(len); for (dst = new_buffer, i = 0; i < nr; i++) { - memcpy(dst, entries[i].start, entries[i].len); - dst += entries[i].len; + memcpy(dst, records[i].start, records[i].len); + dst += records[i].len; } /* * Now munmap the old buffer and use the sorted buffer in its * place: */ - release_packed_ref_buffer(packed_refs); - packed_refs->buf = new_buffer; - packed_refs->eof = new_buffer + len; - packed_refs->header_len = 0; + clear_snapshot_buffer(snapshot); + snapshot->buf = new_buffer; + snapshot->eof = new_buffer + len; + snapshot->header_len = 0; cleanup: - free(entries); + free(records); } /* @@ -406,10 +440,10 @@ static const char *find_end_of_record(const char *p, const char *end) * (GIT_SHA1_HEXSZ + 1) characters before the LF. Die if either of * these checks fails. */ -static void verify_buffer_safe(struct packed_ref_cache *packed_refs) +static void verify_buffer_safe(struct snapshot *snapshot) { - const char *buf = packed_refs->buf + packed_refs->header_len; - const char *eof = packed_refs->eof; + const char *buf = snapshot->buf + snapshot->header_len; + const char *eof = snapshot->eof; const char *last_line; if (buf == eof) @@ -417,24 +451,23 @@ static void verify_buffer_safe(struct packed_ref_cache *packed_refs) last_line = find_start_of_record(buf, eof - 1); if (*(eof - 1) != '\n' || eof - last_line < GIT_SHA1_HEXSZ + 2) - die_invalid_line(packed_refs->refs->path, + die_invalid_line(snapshot->refs->path, last_line, eof - last_line); } /* * Depending on `mmap_strategy`, either mmap or read the contents of - * the `packed-refs` file into the `packed_refs` instance. Return 1 if - * the file existed and was read, or 0 if the file was absent. Die on - * errors. + * the `packed-refs` file into the snapshot. Return 1 if the file + * existed and was read, or 0 if the file was absent. Die on errors. */ -static int load_contents(struct packed_ref_cache *packed_refs) +static int load_contents(struct snapshot *snapshot) { int fd; struct stat st; size_t size; ssize_t bytes_read; - fd = open(packed_refs->refs->path, O_RDONLY); + fd = open(snapshot->refs->path, O_RDONLY); if (fd < 0) { if (errno == ENOENT) { /* @@ -446,30 +479,30 @@ static int load_contents(struct packed_ref_cache *packed_refs) */ return 0; } else { - die_errno("couldn't read %s", packed_refs->refs->path); + die_errno("couldn't read %s", snapshot->refs->path); } } - stat_validity_update(&packed_refs->validity, fd); + stat_validity_update(&snapshot->validity, fd); if (fstat(fd, &st) < 0) - die_errno("couldn't stat %s", packed_refs->refs->path); + die_errno("couldn't stat %s", snapshot->refs->path); size = xsize_t(st.st_size); switch (mmap_strategy) { case MMAP_NONE: - packed_refs->buf = xmalloc(size); - bytes_read = read_in_full(fd, packed_refs->buf, size); + snapshot->buf = xmalloc(size); + bytes_read = read_in_full(fd, snapshot->buf, size); if (bytes_read < 0 || bytes_read != size) - die_errno("couldn't read %s", packed_refs->refs->path); - packed_refs->eof = packed_refs->buf + size; - packed_refs->mmapped = 0; + die_errno("couldn't read %s", snapshot->refs->path); + snapshot->eof = snapshot->buf + size; + snapshot->mmapped = 0; break; case MMAP_TEMPORARY: case MMAP_OK: - packed_refs->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - packed_refs->eof = packed_refs->buf + size; - packed_refs->mmapped = 1; + snapshot->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + snapshot->eof = snapshot->buf + size; + snapshot->mmapped = 1; break; } close(fd); @@ -478,7 +511,7 @@ static int load_contents(struct packed_ref_cache *packed_refs) } /* - * Find the place in `cache->buf` where the start of the record for + * Find the place in `snapshot->buf` where the start of the record for * `refname` starts. If `mustexist` is true and the reference doesn't * exist, then return NULL. If `mustexist` is false and the reference * doesn't exist, then return the point where that reference would be @@ -486,10 +519,10 @@ static int load_contents(struct packed_ref_cache *packed_refs) * reference name; for example, one could search for "refs/replace/" * to find the start of any replace references. * - * The record is sought using a binary search, so `cache->buf` must be - * sorted. + * The record is sought using a binary search, so `snapshot->buf` must + * be sorted. */ -static const char *find_reference_location(struct packed_ref_cache *cache, +static const char *find_reference_location(struct snapshot *snapshot, const char *refname, int mustexist) { /* @@ -506,13 +539,13 @@ static const char *find_reference_location(struct packed_ref_cache *cache, * preceding records all have reference names that come * *before* `refname`. */ - const char *lo = cache->buf + cache->header_len; + const char *lo = snapshot->buf + snapshot->header_len; /* * A pointer to a the first character of a record whose * reference name comes *after* `refname`. */ - const char *hi = cache->eof; + const char *hi = snapshot->eof; while (lo < hi) { const char *mid, *rec; @@ -520,7 +553,7 @@ static const char *find_reference_location(struct packed_ref_cache *cache, mid = lo + (hi - lo) / 2; rec = find_start_of_record(lo, mid); - cmp = cmp_entry_to_refname(rec, refname); + cmp = cmp_record_to_refname(rec, refname); if (cmp < 0) { lo = find_end_of_record(mid, hi); } else if (cmp > 0) { @@ -537,9 +570,9 @@ static const char *find_reference_location(struct packed_ref_cache *cache, } /* - * Read from the `packed-refs` file into a newly-allocated - * `packed_ref_cache` and return it. The return value will already - * have its reference count incremented. + * Create a newly-allocated `snapshot` of the `packed-refs` file in + * its current state and return it. The return value will already have + * its reference count incremented. * * A comment line of the form "# pack-refs with: " may contain zero or * more traits. We interpret the traits as follows: @@ -569,116 +602,117 @@ static const char *find_reference_location(struct packed_ref_cache *cache, * * The references in this file are known to be sorted by refname. */ -static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs) +static struct snapshot *create_snapshot(struct packed_ref_store *refs) { - struct packed_ref_cache *packed_refs = xcalloc(1, sizeof(*packed_refs)); + struct snapshot *snapshot = xcalloc(1, sizeof(*snapshot)); int sorted = 0; - packed_refs->refs = refs; - acquire_packed_ref_cache(packed_refs); - packed_refs->peeled = PEELED_NONE; + snapshot->refs = refs; + acquire_snapshot(snapshot); + snapshot->peeled = PEELED_NONE; - if (!load_contents(packed_refs)) - return packed_refs; + if (!load_contents(snapshot)) + return snapshot; /* If the file has a header line, process it: */ - if (packed_refs->buf < packed_refs->eof && *packed_refs->buf == '#') { + if (snapshot->buf < snapshot->eof && *snapshot->buf == '#') { struct strbuf tmp = STRBUF_INIT; char *p; const char *eol; struct string_list traits = STRING_LIST_INIT_NODUP; - eol = memchr(packed_refs->buf, '\n', - packed_refs->eof - packed_refs->buf); + eol = memchr(snapshot->buf, '\n', + snapshot->eof - snapshot->buf); if (!eol) die_unterminated_line(refs->path, - packed_refs->buf, - packed_refs->eof - packed_refs->buf); + snapshot->buf, + snapshot->eof - snapshot->buf); - strbuf_add(&tmp, packed_refs->buf, eol - packed_refs->buf); + strbuf_add(&tmp, snapshot->buf, eol - snapshot->buf); if (!skip_prefix(tmp.buf, "# pack-refs with:", (const char **)&p)) die_invalid_line(refs->path, - packed_refs->buf, - packed_refs->eof - packed_refs->buf); + snapshot->buf, + snapshot->eof - snapshot->buf); string_list_split_in_place(&traits, p, ' ', -1); if (unsorted_string_list_has_string(&traits, "fully-peeled")) - packed_refs->peeled = PEELED_FULLY; + snapshot->peeled = PEELED_FULLY; else if (unsorted_string_list_has_string(&traits, "peeled")) - packed_refs->peeled = PEELED_TAGS; + snapshot->peeled = PEELED_TAGS; sorted = unsorted_string_list_has_string(&traits, "sorted"); /* perhaps other traits later as well */ /* The "+ 1" is for the LF character. */ - packed_refs->header_len = eol + 1 - packed_refs->buf; + snapshot->header_len = eol + 1 - snapshot->buf; string_list_clear(&traits, 0); strbuf_release(&tmp); } - verify_buffer_safe(packed_refs); + verify_buffer_safe(snapshot); if (!sorted) { - sort_packed_refs(packed_refs); + sort_snapshot(snapshot); /* * Reordering the records might have moved a short one * to the end of the buffer, so verify the buffer's * safety again: */ - verify_buffer_safe(packed_refs); + verify_buffer_safe(snapshot); } - if (mmap_strategy != MMAP_OK && packed_refs->mmapped) { + if (mmap_strategy != MMAP_OK && snapshot->mmapped) { /* * We don't want to leave the file mmapped, so we are * forced to make a copy now: */ - size_t size = packed_refs->eof - - (packed_refs->buf + packed_refs->header_len); + size_t size = snapshot->eof - + (snapshot->buf + snapshot->header_len); char *buf_copy = xmalloc(size); - memcpy(buf_copy, packed_refs->buf + packed_refs->header_len, size); - release_packed_ref_buffer(packed_refs); - packed_refs->buf = buf_copy; - packed_refs->eof = buf_copy + size; + memcpy(buf_copy, snapshot->buf + snapshot->header_len, size); + clear_snapshot_buffer(snapshot); + snapshot->buf = buf_copy; + snapshot->eof = buf_copy + size; } - return packed_refs; + return snapshot; } /* - * Check that the packed refs cache (if any) still reflects the - * contents of the file. If not, clear the cache. + * Check that `refs->snapshot` (if present) still reflects the + * contents of the `packed-refs` file. If not, clear the snapshot. */ -static void validate_packed_ref_cache(struct packed_ref_store *refs) +static void validate_snapshot(struct packed_ref_store *refs) { - if (refs->cache && - !stat_validity_check(&refs->cache->validity, refs->path)) - clear_packed_ref_cache(refs); + if (refs->snapshot && + !stat_validity_check(&refs->snapshot->validity, refs->path)) + clear_snapshot(refs); } /* - * Get the packed_ref_cache for the specified packed_ref_store, - * creating and populating it if it hasn't been read before or if the - * file has been changed (according to its `validity` field) since it - * was last read. On the other hand, if we hold the lock, then assume - * that the file hasn't been changed out from under us, so skip the - * extra `stat()` call in `stat_validity_check()`. + * Get the `snapshot` for the specified packed_ref_store, creating and + * populating it if it hasn't been read before or if the file has been + * changed (according to its `validity` field) since it was last read. + * On the other hand, if we hold the lock, then assume that the file + * hasn't been changed out from under us, so skip the extra `stat()` + * call in `stat_validity_check()`. This function does *not* increase + * the snapshot's reference count on behalf of the caller. */ -static struct packed_ref_cache *get_packed_ref_cache(struct packed_ref_store *refs) +static struct snapshot *get_snapshot(struct packed_ref_store *refs) { if (!is_lock_file_locked(&refs->lock)) - validate_packed_ref_cache(refs); + validate_snapshot(refs); - if (!refs->cache) - refs->cache = read_packed_refs(refs); + if (!refs->snapshot) + refs->snapshot = create_snapshot(refs); - return refs->cache; + return refs->snapshot; } static int packed_read_raw_ref(struct ref_store *ref_store, @@ -687,12 +721,12 @@ static int packed_read_raw_ref(struct ref_store *ref_store, { struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); - struct packed_ref_cache *packed_refs = get_packed_ref_cache(refs); + struct snapshot *snapshot = get_snapshot(refs); const char *rec; *type = 0; - rec = find_reference_location(packed_refs, refname, 1); + rec = find_reference_location(snapshot, refname, 1); if (!rec) { /* refname is not a packed reference. */ @@ -701,7 +735,7 @@ static int packed_read_raw_ref(struct ref_store *ref_store, } if (get_sha1_hex(rec, sha1)) - die_invalid_line(refs->path, rec, packed_refs->eof - rec); + die_invalid_line(refs->path, rec, snapshot->eof - rec); *type = REF_ISPACKED; return 0; @@ -716,26 +750,33 @@ static int packed_read_raw_ref(struct ref_store *ref_store, #define REF_KNOWS_PEELED 0x40 /* - * An iterator over a packed-refs file that is currently mmapped. + * An iterator over a snapshot of a `packed-refs` file. */ struct packed_ref_iterator { struct ref_iterator base; - struct packed_ref_cache *packed_refs; + struct snapshot *snapshot; - /* The current position in the mmapped file: */ + /* The current position in the snapshot's buffer: */ const char *pos; - /* The end of the mmapped file: */ + /* The end of the part of the buffer that will be iterated over: */ const char *eof; + /* Scratch space for current values: */ struct object_id oid, peeled; - struct strbuf refname_buf; unsigned int flags; }; +/* + * Move the iterator to the next record in the snapshot, without + * respect for whether the record is actually required by the current + * iteration. Adjust the fields in `iter` and return `ITER_OK` or + * `ITER_DONE`. This function does not free the iterator in the case + * of `ITER_DONE`. + */ static int next_record(struct packed_ref_iterator *iter) { const char *p = iter->pos, *eol; @@ -750,12 +791,12 @@ static int next_record(struct packed_ref_iterator *iter) if (iter->eof - p < GIT_SHA1_HEXSZ + 2 || parse_oid_hex(p, &iter->oid, &p) || !isspace(*p++)) - die_invalid_line(iter->packed_refs->refs->path, + die_invalid_line(iter->snapshot->refs->path, iter->pos, iter->eof - iter->pos); eol = memchr(p, '\n', iter->eof - p); if (!eol) - die_unterminated_line(iter->packed_refs->refs->path, + die_unterminated_line(iter->snapshot->refs->path, iter->pos, iter->eof - iter->pos); strbuf_add(&iter->refname_buf, p, eol - p); @@ -768,8 +809,8 @@ static int next_record(struct packed_ref_iterator *iter) oidclr(&iter->oid); iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; } - if (iter->packed_refs->peeled == PEELED_FULLY || - (iter->packed_refs->peeled == PEELED_TAGS && + if (iter->snapshot->peeled == PEELED_FULLY || + (iter->snapshot->peeled == PEELED_TAGS && starts_with(iter->base.refname, "refs/tags/"))) iter->base.flags |= REF_KNOWS_PEELED; @@ -780,7 +821,7 @@ static int next_record(struct packed_ref_iterator *iter) if (iter->eof - p < GIT_SHA1_HEXSZ + 1 || parse_oid_hex(p, &iter->peeled, &p) || *p++ != '\n') - die_invalid_line(iter->packed_refs->refs->path, + die_invalid_line(iter->snapshot->refs->path, iter->pos, iter->eof - iter->pos); iter->pos = p; @@ -850,7 +891,7 @@ static int packed_ref_iterator_abort(struct ref_iterator *ref_iterator) int ok = ITER_DONE; strbuf_release(&iter->refname_buf); - release_packed_ref_cache(iter->packed_refs); + release_snapshot(iter->snapshot); base_ref_iterator_free(ref_iterator); return ok; } @@ -866,7 +907,7 @@ static struct ref_iterator *packed_ref_iterator_begin( const char *prefix, unsigned int flags) { struct packed_ref_store *refs; - struct packed_ref_cache *packed_refs; + struct snapshot *snapshot; const char *start; struct packed_ref_iterator *iter; struct ref_iterator *ref_iterator; @@ -876,30 +917,30 @@ static struct ref_iterator *packed_ref_iterator_begin( required_flags |= REF_STORE_ODB; refs = packed_downcast(ref_store, required_flags, "ref_iterator_begin"); - packed_refs = get_packed_ref_cache(refs); + /* + * Note that `get_snapshot()` internally checks whether the + * snapshot is up to date with what is on disk, and re-reads + * it if not. + */ + snapshot = get_snapshot(refs); - if (!packed_refs->buf) + if (!snapshot->buf) return empty_ref_iterator_begin(); iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable, 1); - /* - * Note that get_packed_ref_cache() internally checks whether - * the packed-ref cache is up to date with what is on disk, - * and re-reads it if not. - */ - iter->packed_refs = packed_refs; - acquire_packed_ref_cache(packed_refs); + iter->snapshot = snapshot; + acquire_snapshot(snapshot); if (prefix && *prefix) - start = find_reference_location(packed_refs, prefix, 0); + start = find_reference_location(snapshot, prefix, 0); else - start = packed_refs->buf + packed_refs->header_len; + start = snapshot->buf + snapshot->header_len; iter->pos = start; - iter->eof = packed_refs->eof; + iter->eof = snapshot->eof; strbuf_init(&iter->refname_buf, 0); iter->base.oid = &iter->oid; @@ -963,19 +1004,19 @@ int packed_refs_lock(struct ref_store *ref_store, int flags, struct strbuf *err) /* * Now that we hold the `packed-refs` lock, make sure that our - * cache matches the current version of the file. Normally - * `get_packed_ref_cache()` does that for us, but that - * function assumes that when the file is locked, any existing - * cache is still valid. We've just locked the file, but it - * might have changed the moment *before* we locked it. + * snapshot matches the current version of the file. Normally + * `get_snapshot()` does that for us, but that function + * assumes that when the file is locked, any existing snapshot + * is still valid. We've just locked the file, but it might + * have changed the moment *before* we locked it. */ - validate_packed_ref_cache(refs); + validate_snapshot(refs); /* * Now make sure that the packed-refs file as it exists in the - * locked state is loaded into the cache: + * locked state is loaded into the snapshot: */ - get_packed_ref_cache(refs); + get_snapshot(refs); return 0; } @@ -1002,8 +1043,8 @@ int packed_refs_is_locked(struct ref_store *ref_store) } /* - * The packed-refs header line that we write out. Perhaps other - * traits will be added later. + * The packed-refs header line that we write out. Perhaps other traits + * will be added later. * * Note that earlier versions of Git used to parse these traits by * looking for " trait " in the line. For this reason, the space after @@ -1019,9 +1060,9 @@ static int packed_init_db(struct ref_store *ref_store, struct strbuf *err) } /* - * Write the packed-refs from the cache to the packed-refs tempfile, - * incorporating any changes from `updates`. `updates` must be a - * sorted string list whose keys are the refnames and whose util + * Write the packed refs from the current snapshot to the packed-refs + * tempfile, incorporating any changes from `updates`. `updates` must + * be a sorted string list whose keys are the refnames and whose util * values are `struct ref_update *`. On error, rollback the tempfile, * write an error message to `err`, and return a nonzero value. * @@ -1262,9 +1303,10 @@ static int packed_transaction_prepare(struct ref_store *ref_store, /* * Note that we *don't* skip transactions with zero updates, * because such a transaction might be executed for the side - * effect of ensuring that all of the references are peeled. - * If the caller wants to optimize away empty transactions, it - * should do so itself. + * effect of ensuring that all of the references are peeled or + * ensuring that the `packed-refs` file is sorted. If the + * caller wants to optimize away empty transactions, it should + * do so itself. */ data = xcalloc(1, sizeof(*data)); @@ -1330,7 +1372,7 @@ static int packed_transaction_finish(struct ref_store *ref_store, int ret = TRANSACTION_GENERIC_ERROR; char *packed_refs_path; - clear_packed_ref_cache(refs); + clear_snapshot(refs); packed_refs_path = get_locked_file_path(&refs->lock); if (rename_tempfile(&refs->tempfile, packed_refs_path)) {