diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 0550c6d0dc..e1a883eb46 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -77,6 +77,29 @@ in the commit graph. We can treat these commits as having "infinite" generation number and walk until reaching commits with known generation number. +We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not +in the commit-graph file. If a commit-graph file was written by a version +of Git that did not compute generation numbers, then those commits will +have generation number represented by the macro GENERATION_NUMBER_ZERO = 0. + +Since the commit-graph file is closed under reachability, we can guarantee +the following weaker condition on all commits: + + If A and B are commits with generation numbers N amd M, respectively, + and N < M, then A cannot reach B. + +Note how the strict inequality differs from the inequality when we have +fully-computed generation numbers. Using strict inequality may result in +walking a few extra commits, but the simplicity in dealing with commits +with generation number *_INFINITY or *_ZERO is valuable. + +We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose +generation numbers are computed to be at least this value. We limit at +this value since it is the largest value that can be stored in the +commit-graph file using the 30 bits available to generation numbers. This +presents another case where a commit can have generation number equal to +that of a parent. + Design Details -------------- @@ -98,18 +121,14 @@ Future Work - The 'commit-graph' subcommand does not have a "verify" mode that is necessary for integration with fsck. -- The file format includes room for precomputed generation numbers. These - are not currently computed, so all generation numbers will be marked as - 0 (or "uncomputed"). A later patch will include this calculation. - - After computing and storing generation numbers, we must make graph walks aware of generation numbers to gain the performance benefits they enable. This will mostly be accomplished by swapping a commit-date-ordered priority queue with one ordered by generation number. The following operations are important candidates: - - paint_down_to_common() - 'log --topo-order' + - 'tag --merged' - Currently, parse_commit_gently() requires filling in the root tree object for a commit. This passes through lookup_tree() and consequently diff --git a/alloc.c b/alloc.c index cf4f8b61e1..e8ab14f4a1 100644 --- a/alloc.c +++ b/alloc.c @@ -94,6 +94,7 @@ void *alloc_commit_node(void) c->object.type = OBJ_COMMIT; c->index = alloc_commit_index(); c->graph_pos = COMMIT_NOT_FROM_GRAPH; + c->generation = GENERATION_NUMBER_INFINITY; return c; } diff --git a/builtin/merge.c b/builtin/merge.c index fb4faca250..4a4c09496c 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1186,14 +1186,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix) branch = branch_to_free = resolve_refdup("HEAD", 0, &head_oid, NULL); if (branch) skip_prefix(branch, "refs/heads/", &branch); + + init_diff_ui_defaults(); + git_config(git_merge_config, NULL); + if (!branch || is_null_oid(&head_oid)) head_commit = NULL; else head_commit = lookup_commit_or_die(&head_oid, "HEAD"); - init_diff_ui_defaults(); - git_config(git_merge_config, NULL); - if (branch_mergeoptions) parse_branch_merge_options(branch_mergeoptions); argc = parse_options(argc, argv, prefix, builtin_merge_options, diff --git a/commit-graph.c b/commit-graph.c index 4c6127088f..b63a1fc85e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1,5 +1,6 @@ #include "cache.h" #include "config.h" +#include "dir.h" #include "git-compat-util.h" #include "lockfile.h" #include "pack.h" @@ -248,6 +249,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, return &commit_list_insert(c, pptr)->next; } +static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) +{ + const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + item->graph_pos = pos; + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; +} + static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) { uint32_t edge_value; @@ -265,6 +273,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin date_low = get_be32(commit_data + g->hash_len + 12); item->date = (timestamp_t)((date_high << 32) | date_low); + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + pptr = &item->parents; edge_value = get_be32(commit_data + g->hash_len); @@ -293,31 +303,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin return 1; } +static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) +{ + if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { + *pos = item->graph_pos; + return 1; + } else { + return bsearch_graph(g, &(item->object.oid), pos); + } +} + int parse_commit_in_graph(struct commit *item) { + uint32_t pos; + if (!core_commit_graph) return 0; if (item->object.parsed) return 1; - prepare_commit_graph(); - if (commit_graph) { - uint32_t pos; - int found; - if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { - pos = item->graph_pos; - found = 1; - } else { - found = bsearch_graph(commit_graph, &(item->object.oid), &pos); - } - - if (found) - return fill_commit_in_graph(item, commit_graph, pos); - } - + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + return fill_commit_in_graph(item, commit_graph, pos); return 0; } +void load_commit_graph_info(struct commit *item) +{ + uint32_t pos; + if (!core_commit_graph) + return; + prepare_commit_graph(); + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + fill_commit_graph_info(item, commit_graph, pos); +} + static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) { struct object_id oid; @@ -440,6 +459,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, else packedDate[0] = 0; + packedDate[0] |= htonl((*list)->generation << 2); + packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -572,6 +593,45 @@ static void close_reachable(struct packed_oid_list *oids) } } +static void compute_generation_numbers(struct packed_commit_list* commits) +{ + int i; + struct commit_list *list = NULL; + + for (i = 0; i < commits->nr; i++) { + if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY && + commits->list[i]->generation != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(commits->list[i], &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_generation = 0; + + for (parent = current->parents; parent; parent = parent->next) { + if (parent->item->generation == GENERATION_NUMBER_INFINITY || + parent->item->generation == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } else if (parent->item->generation > max_generation) { + max_generation = parent->item->generation; + } + } + + if (all_parents_computed) { + current->generation = max_generation + 1; + pop_commit(&list); + + if (current->generation > GENERATION_NUMBER_MAX) + current->generation = GENERATION_NUMBER_MAX; + } + } + } +} + void write_commit_graph(const char *obj_dir, const char **pack_indexes, int nr_packs, @@ -584,7 +644,6 @@ void write_commit_graph(const char *obj_dir, struct hashfile *f; uint32_t i, count_distinct = 0; char *graph_name; - int fd; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -695,24 +754,14 @@ void write_commit_graph(const char *obj_dir, if (commits.nr >= GRAPH_PARENT_MISSING) die(_("too many commits to write graph")); + compute_generation_numbers(&commits); + graph_name = get_commit_graph_filename(obj_dir); - fd = hold_lock_file_for_update(&lk, graph_name, 0); - - if (fd < 0) { - struct strbuf folder = STRBUF_INIT; - strbuf_addstr(&folder, graph_name); - strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf); - - if (mkdir(folder.buf, 0777) < 0) - die_errno(_("cannot mkdir %s"), folder.buf); - strbuf_release(&folder); - - fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); - - if (fd < 0) - die_errno("unable to create '%s'", graph_name); - } + if (safe_create_leading_directories(graph_name)) + die_errno(_("unable to create leading directories of %s"), + graph_name); + hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); hashwrite_be32(f, GRAPH_SIGNATURE); diff --git a/commit-graph.h b/commit-graph.h index 260a468e73..96cccb10f3 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +/* + * It is possible that we loaded commit contents from the commit buffer, + * but we also want to ensure the commit-graph content is correctly + * checked and filled. Fill the graph_pos and generation members of + * the given commit. + */ +void load_commit_graph_info(struct commit *item); + struct tree *get_commit_tree_in_graph(const struct commit *c); struct commit_graph { diff --git a/commit.c b/commit.c index 2d4431a21f..298ad747c6 100644 --- a/commit.c +++ b/commit.c @@ -344,7 +344,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) return ret; } -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph) { const char *tail = buffer; const char *bufptr = buffer; @@ -399,6 +399,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s } item->date = parse_commit_date(bufptr, tail); + if (check_graph) + load_commit_graph_info(item); + return 0; } @@ -425,7 +428,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return error("Object %s not a commit", oid_to_hex(&item->object.oid)); } - ret = parse_commit_buffer(item, buffer, size); + ret = parse_commit_buffer(item, buffer, size, 0); if (save_commit_buffer && !ret) { set_commit_buffer(item, buffer, size); return 0; @@ -653,6 +656,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_, return 0; } +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused) +{ + const struct commit *a = a_, *b = b_; + + /* newer commits first */ + if (a->generation < b->generation) + return 1; + else if (a->generation > b->generation) + return -1; + + /* use date as a heuristic when generations are equal */ + if (a->date < b->date) + return 1; + else if (a->date > b->date) + return -1; + return 0; +} + int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) { const struct commit *a = a_, *b = b_; @@ -800,11 +821,14 @@ static int queue_has_nonstale(struct prio_queue *queue) } /* all input commits in one and twos[] must have been parsed! */ -static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) +static struct commit_list *paint_down_to_common(struct commit *one, int n, + struct commit **twos, + int min_generation) { - struct prio_queue queue = { compare_commits_by_commit_date }; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; + uint32_t last_gen = GENERATION_NUMBER_INFINITY; one->object.flags |= PARENT1; if (!n) { @@ -823,6 +847,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc struct commit_list *parents; int flags; + if (commit->generation > last_gen) + BUG("bad generation skip %8x > %8x at %s", + commit->generation, last_gen, + oid_to_hex(&commit->object.oid)); + last_gen = commit->generation; + + if (commit->generation < min_generation) + break; + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -871,7 +904,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co return NULL; } - list = paint_down_to_common(one, n, twos); + list = paint_down_to_common(one, n, twos, 0); while (list) { struct commit *commit = pop_commit(&list); @@ -929,6 +962,7 @@ static int remove_redundant(struct commit **array, int cnt) parse_commit(array[i]); for (i = 0; i < cnt; i++) { struct commit_list *common; + uint32_t min_generation = array[i]->generation; if (redundant[i]) continue; @@ -937,8 +971,12 @@ static int remove_redundant(struct commit **array, int cnt) continue; filled_index[filled] = j; work[filled++] = array[j]; + + if (array[j]->generation < min_generation) + min_generation = array[j]->generation; } - common = paint_down_to_common(array[i], filled, work); + common = paint_down_to_common(array[i], filled, work, + min_generation); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) @@ -1048,14 +1086,21 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit * { struct commit_list *bases; int ret = 0, i; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; if (parse_commit(commit)) return ret; - for (i = 0; i < nr_reference; i++) + for (i = 0; i < nr_reference; i++) { if (parse_commit(reference[i])) return ret; + if (reference[i]->generation < min_generation) + min_generation = reference[i]->generation; + } - bases = paint_down_to_common(commit, nr_reference, reference); + if (commit->generation > min_generation) + return ret; + + bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); if (commit->object.flags & PARENT2) ret = 1; clear_commit_marks(commit, all_flags); diff --git a/commit.h b/commit.h index e48f05dffc..cb943013d0 100644 --- a/commit.h +++ b/commit.h @@ -10,6 +10,9 @@ #include "pretty.h" #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF +#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF +#define GENERATION_NUMBER_MAX 0x3FFFFFFF +#define GENERATION_NUMBER_ZERO 0 struct commit_list { struct commit *item; @@ -33,6 +36,7 @@ struct commit { */ struct tree *maybe_tree; uint32_t graph_pos; + uint32_t generation; unsigned int index; }; @@ -72,7 +76,7 @@ struct commit *lookup_commit_reference_by_name(const char *name); */ struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name); -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size); +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph); int parse_commit_gently(struct commit *item, int quiet_on_missing); static inline int parse_commit(struct commit *item) { @@ -341,6 +345,7 @@ extern int remove_signature(struct strbuf *buf); extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc); int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused); +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused); LAST_ARG_MUST_BE_NULL extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...); diff --git a/object.c b/object.c index f7f4de3aaf..d33e2a5cc8 100644 --- a/object.c +++ b/object.c @@ -210,7 +210,7 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type } else if (type == OBJ_COMMIT) { struct commit *commit = lookup_commit(oid); if (commit) { - if (parse_commit_buffer(commit, buffer, size)) + if (parse_commit_buffer(commit, buffer, size, 1)) return NULL; if (!get_cached_commit_buffer(commit, NULL)) { set_commit_buffer(commit, buffer, size); diff --git a/ref-filter.c b/ref-filter.c index 01c1a82075..fa3685d91f 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -16,6 +16,7 @@ #include "trailer.h" #include "wt-status.h" #include "commit-slab.h" +#include "commit-graph.h" static struct ref_msg { const char *gone; @@ -1662,12 +1663,13 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) } /* - * Test whether the candidate or one of its parents is contained in the list. + * Test whether the candidate is contained in the list. * Do not recurse to find out, though, but return -1 if inconclusive. */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, - struct contains_cache *cache) + struct contains_cache *cache, + uint32_t cutoff) { enum contains_result *cached = contains_cache_at(cache, candidate); @@ -1683,6 +1685,10 @@ static enum contains_result contains_test(struct commit *candidate, /* Otherwise, we don't know; prepare to recurse */ parse_commit_or_die(candidate); + + if (candidate->generation < cutoff) + return CONTAINS_NO; + return CONTAINS_UNKNOWN; } @@ -1698,8 +1704,18 @@ static enum contains_result contains_tag_algo(struct commit *candidate, struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result = contains_test(candidate, want, cache); + enum contains_result result; + uint32_t cutoff = GENERATION_NUMBER_INFINITY; + const struct commit_list *p; + for (p = want; p; p = p->next) { + struct commit *c = p->item; + load_commit_graph_info(c); + if (c->generation < cutoff) + cutoff = c->generation; + } + + result = contains_test(candidate, want, cache, cutoff); if (result != CONTAINS_UNKNOWN) return result; @@ -1717,7 +1733,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, * If we just popped the stack, parents->item has been marked, * therefore contains_test will return a meaningful yes/no. */ - else switch (contains_test(parents->item, want, cache)) { + else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: *contains_cache_at(cache, commit) = CONTAINS_YES; contains_stack.nr--; @@ -1731,7 +1747,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want, cache); + return contains_test(candidate, want, cache, cutoff); } static int commit_contains(struct ref_filter *filter, struct commit *commit, diff --git a/sha1-file.c b/sha1-file.c index 695e5c6276..de4839e634 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1801,7 +1801,7 @@ static void check_commit(const void *buf, size_t size) { struct commit c; memset(&c, 0, sizeof(c)); - if (parse_commit_buffer(&c, buf, size)) + if (parse_commit_buffer(&c, buf, size, 0)) die("corrupt commit"); } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index a380419b65..77d85aefe7 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -221,4 +221,13 @@ test_expect_success 'write graph in bare repo' ' graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1 graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2 +test_expect_success 'perform fast-forward merge in full repo' ' + cd "$TRASH_DIRECTORY/full" && + git checkout -b merge-5-to-8 commits/5 && + git merge commits/8 && + git show-ref -s merge-5-to-8 >output && + git show-ref -s commits/8 >expect && + test_cmp expect output +' + test_done