From 2deda00707f8278382d64c696d75f33cc16e1233 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 2 Nov 2017 12:41:42 -0700 Subject: [PATCH 1/7] t6120: fix typo in test name Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- t/t6120-describe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index 1c0e8659d9..c8b7ed82d9 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -304,7 +304,7 @@ test_expect_success 'describe chokes on severely broken submodules' ' mv .git/modules/sub1/ .git/modules/sub_moved && test_must_fail git describe --dirty ' -test_expect_success 'describe ignoring a borken submodule' ' +test_expect_success 'describe ignoring a broken submodule' ' git describe --broken >out && test_when_finished "mv .git/modules/sub_moved .git/modules/sub1" && grep broken out From 91904f5645196ceef92c6fca21cc9454928613f0 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 2 Nov 2017 12:41:43 -0700 Subject: [PATCH 2/7] list-objects.c: factor out traverse_trees_and_blobs With traverse_trees_and_blobs factored out of the main traverse function, the next patch can introduce an in-order revision walking with ease. In the next patch we'll call `traverse_trees_and_blobs` from within the loop walking the commits, such that we'll have one invocation of that function per commit. That is why we do not want to have memory allocations in that function, such as we'd have if we were to use a strbuf locally. Pass a strbuf from traverse_commit_list into the blob and tree traversing function as a scratch pad that only needs to be allocated once. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- list-objects.c | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/list-objects.c b/list-objects.c index b3931fa434..7c2ce9c4bd 100644 --- a/list-objects.c +++ b/list-objects.c @@ -183,25 +183,15 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -void traverse_commit_list(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *data) +static void traverse_trees_and_blobs(struct rev_info *revs, + struct strbuf *base, + show_object_fn show_object, + void *data) { int i; - struct commit *commit; - struct strbuf base; - strbuf_init(&base, PATH_MAX); - while ((commit = get_revision(revs)) != NULL) { - /* - * an uninteresting boundary commit may not have its tree - * parsed yet, but we are not going to show them anyway - */ - if (commit->tree) - add_pending_tree(revs, commit->tree); - show_commit(commit, data); - } + assert(base->len == 0); + for (i = 0; i < revs->pending.nr; i++) { struct object_array_entry *pending = revs->pending.objects + i; struct object *obj = pending->item; @@ -218,17 +208,39 @@ void traverse_commit_list(struct rev_info *revs, path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - &base, path, data); + base, path, data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - &base, path, data); + base, path, data); continue; } die("unknown pending object %s (%s)", oid_to_hex(&obj->oid), name); } object_array_clear(&revs->pending); - strbuf_release(&base); +} + +void traverse_commit_list(struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *data) +{ + struct commit *commit; + struct strbuf csp; /* callee's scratch pad */ + strbuf_init(&csp, PATH_MAX); + + while ((commit = get_revision(revs)) != NULL) { + /* + * an uninteresting boundary commit may not have its tree + * parsed yet, but we are not going to show them anyway + */ + if (commit->tree) + add_pending_tree(revs, commit->tree); + show_commit(commit, data); + } + traverse_trees_and_blobs(revs, &csp, show_object, data); + + strbuf_release(&csp); } From ce5b6f9be84690ba38eba10c42b3f7c7e2511abb Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 15 Nov 2017 18:00:35 -0800 Subject: [PATCH 3/7] revision.h: introduce blob/tree walking in order of the commits The functionality to list tree objects in the order they were seen while traversing the commits will be used in one of the next commits, where we teach `git describe` to describe not only commits, but blobs, too. The change in list-objects.c is rather minimal as we'll be re-using the infrastructure put in place of the revision walking machinery. For example one could expect that add_pending_tree is not called, but rather commit->tree is directly passed to the tree traversal function. This however requires a lot more code than just emptying the queue containing trees after each commit. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 5 ++ list-objects.c | 8 ++++ revision.c | 2 + revision.h | 3 +- t/t6100-rev-list-in-order.sh | 77 ++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) create mode 100755 t/t6100-rev-list-in-order.sh diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 13501e1556..9066e0c777 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -686,6 +686,11 @@ ifdef::git-rev-list[] all object IDs which I need to download if I have the commit object _bar_ but not _foo_''. +--in-commit-order:: + Print tree and blob ids in order of the commits. The tree + and blob ids are printed after they are first referenced + by a commit. + --objects-edge:: Similar to `--objects`, but also print the IDs of excluded commits prefixed with a ``-'' character. This is used by diff --git a/list-objects.c b/list-objects.c index 7c2ce9c4bd..4caa6fcb77 100644 --- a/list-objects.c +++ b/list-objects.c @@ -239,6 +239,14 @@ void traverse_commit_list(struct rev_info *revs, if (commit->tree) add_pending_tree(revs, commit->tree); show_commit(commit, data); + + if (revs->tree_blobs_in_commit_order) + /* + * NEEDSWORK: Adding the tree and then flushing it here + * needs a reallocation for each commit. Can we pass the + * tree directory without allocation churn? + */ + traverse_trees_and_blobs(revs, &csp, show_object, data); } traverse_trees_and_blobs(revs, &csp, show_object, data); diff --git a/revision.c b/revision.c index d167223e69..9329d4ebbf 100644 --- a/revision.c +++ b/revision.c @@ -1845,6 +1845,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->dense = 0; } else if (!strcmp(arg, "--show-all")) { revs->show_all = 1; + } else if (!strcmp(arg, "--in-commit-order")) { + revs->tree_blobs_in_commit_order = 1; } else if (!strcmp(arg, "--remove-empty")) { revs->remove_empty_trees = 1; } else if (!strcmp(arg, "--merges")) { diff --git a/revision.h b/revision.h index 54761200ad..86985d68aa 100644 --- a/revision.h +++ b/revision.h @@ -121,7 +121,8 @@ struct rev_info { bisect:1, ancestry_path:1, first_parent_only:1, - line_level_traverse:1; + line_level_traverse:1, + tree_blobs_in_commit_order:1; /* Diff flags */ unsigned int diff:1, diff --git a/t/t6100-rev-list-in-order.sh b/t/t6100-rev-list-in-order.sh new file mode 100755 index 0000000000..b2bb0a7f61 --- /dev/null +++ b/t/t6100-rev-list-in-order.sh @@ -0,0 +1,77 @@ +#!/bin/sh + +test_description='rev-list testing in-commit-order' + +. ./test-lib.sh + +test_expect_success 'setup a commit history with trees, blobs' ' + for x in one two three four + do + echo $x >$x && + git add $x && + git commit -m "add file $x" || + return 1 + done && + for x in four three + do + git rm $x && + git commit -m "remove $x" || + return 1 + done +' + +test_expect_success 'rev-list --in-commit-order' ' + git rev-list --in-commit-order --objects HEAD >actual.raw && + cut -c 1-40 >actual expect.raw <<-\EOF && + HEAD^{commit} + HEAD^{tree} + HEAD^{tree}:one + HEAD^{tree}:two + HEAD~1^{commit} + HEAD~1^{tree} + HEAD~1^{tree}:three + HEAD~2^{commit} + HEAD~2^{tree} + HEAD~2^{tree}:four + HEAD~3^{commit} + # HEAD~3^{tree} skipped, same as HEAD~1^{tree} + HEAD~4^{commit} + # HEAD~4^{tree} skipped, same as HEAD^{tree} + HEAD~5^{commit} + HEAD~5^{tree} + EOF + grep -v "#" >expect actual.raw && + cut -c 1-40 >actual expect.raw <<-\EOF && + HEAD^{commit} + HEAD~1^{commit} + HEAD~2^{commit} + HEAD~3^{commit} + HEAD~4^{commit} + HEAD~5^{commit} + HEAD^{tree} + HEAD^{tree}:one + HEAD^{tree}:two + HEAD~1^{tree} + HEAD~1^{tree}:three + HEAD~2^{tree} + HEAD~2^{tree}:four + # HEAD~3^{tree} skipped, same as HEAD~1^{tree} + # HEAD~4^{tree} skipped, same as HEAD^{tree} + HEAD~5^{tree} + EOF + grep -v "#" >expect Date: Wed, 15 Nov 2017 18:00:36 -0800 Subject: [PATCH 4/7] builtin/describe.c: rename `oid` to avoid variable shadowing The function `describe` has already a variable named `oid` declared at the beginning of the function for an object id. Do not shadow that variable with a pointer to an object id. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/describe.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 29075dbd0f..fd61f463cf 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -381,9 +381,9 @@ static void describe(const char *arg, int last_one) } if (!match_cnt) { - struct object_id *oid = &cmit->object.oid; + struct object_id *cmit_oid = &cmit->object.oid; if (always) { - printf("%s", find_unique_abbrev(oid->hash, abbrev)); + printf("%s", find_unique_abbrev(cmit_oid->hash, abbrev)); if (suffix) printf("%s", suffix); printf("\n"); @@ -392,11 +392,11 @@ static void describe(const char *arg, int last_one) if (unannotated_cnt) die(_("No annotated tags can describe '%s'.\n" "However, there were unannotated tags: try --tags."), - oid_to_hex(oid)); + oid_to_hex(cmit_oid)); else die(_("No tags can describe '%s'.\n" "Try --always, or create some tags."), - oid_to_hex(oid)); + oid_to_hex(cmit_oid)); } QSORT(all_matches, match_cnt, compare_pt); From cdaed0cf023a47cae327671fae11c10d88100ee7 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 15 Nov 2017 18:00:37 -0800 Subject: [PATCH 5/7] builtin/describe.c: print debug statements earlier When debugging, print the received argument at the start of the function instead of in the middle. This ensures that the received argument is printed in all code paths, and also allows a subsequent refactoring to not need to move the "arg" parameter. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/describe.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builtin/describe.c b/builtin/describe.c index fd61f463cf..3136efde31 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -293,6 +293,9 @@ static void describe(const char *arg, int last_one) unsigned long seen_commits = 0; unsigned int unannotated_cnt = 0; + if (debug) + fprintf(stderr, _("describe %s\n"), arg); + if (get_oid(arg, &oid)) die(_("Not a valid object name %s"), arg); cmit = lookup_commit_reference(&oid); @@ -316,7 +319,7 @@ static void describe(const char *arg, int last_one) if (!max_candidates) die(_("no tag exactly matches '%s'"), oid_to_hex(&cmit->object.oid)); if (debug) - fprintf(stderr, _("searching to describe %s\n"), arg); + fprintf(stderr, _("No exact match on refs or tags, searching to describe\n")); if (!have_util) { struct hashmap_iter iter; From 4dbc59a4cce418ff8428a9d2ecd67c34ca50db56 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 15 Nov 2017 18:00:38 -0800 Subject: [PATCH 6/7] builtin/describe.c: factor out describe_commit Factor out describing commits into its own function `describe_commit`, which will put any output to stdout into a strbuf, to be printed afterwards. As the next patch will teach Git to describe blobs using a commit and path, this refactor will make it easy to reuse the code describing commits. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/describe.c | 63 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 3136efde31..9e9a5ed5d4 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -256,7 +256,7 @@ static unsigned long finish_depth_computation( return seen_commits; } -static void display_name(struct commit_name *n) +static void append_name(struct commit_name *n, struct strbuf *dst) { if (n->prio == 2 && !n->tag) { n->tag = lookup_tag(&n->oid); @@ -272,19 +272,18 @@ static void display_name(struct commit_name *n) } if (n->tag) - printf("%s", n->tag->tag); + strbuf_addstr(dst, n->tag->tag); else - printf("%s", n->path); + strbuf_addstr(dst, n->path); } -static void show_suffix(int depth, const struct object_id *oid) +static void append_suffix(int depth, const struct object_id *oid, struct strbuf *dst) { - printf("-%d-g%s", depth, find_unique_abbrev(oid->hash, abbrev)); + strbuf_addf(dst, "-%d-g%s", depth, find_unique_abbrev(oid->hash, abbrev)); } -static void describe(const char *arg, int last_one) +static void describe_commit(struct object_id *oid, struct strbuf *dst) { - struct object_id oid; struct commit *cmit, *gave_up_on = NULL; struct commit_list *list; struct commit_name *n; @@ -293,26 +292,18 @@ static void describe(const char *arg, int last_one) unsigned long seen_commits = 0; unsigned int unannotated_cnt = 0; - if (debug) - fprintf(stderr, _("describe %s\n"), arg); - - if (get_oid(arg, &oid)) - die(_("Not a valid object name %s"), arg); - cmit = lookup_commit_reference(&oid); - if (!cmit) - die(_("%s is not a valid '%s' object"), arg, commit_type); + cmit = lookup_commit_reference(oid); n = find_commit_name(&cmit->object.oid); if (n && (tags || all || n->prio == 2)) { /* * Exact match to an existing ref. */ - display_name(n); + append_name(n, dst); if (longformat) - show_suffix(0, n->tag ? &n->tag->tagged->oid : &oid); + append_suffix(0, n->tag ? &n->tag->tagged->oid : oid, dst); if (suffix) - printf("%s", suffix); - printf("\n"); + strbuf_addstr(dst, suffix); return; } @@ -386,10 +377,9 @@ static void describe(const char *arg, int last_one) if (!match_cnt) { struct object_id *cmit_oid = &cmit->object.oid; if (always) { - printf("%s", find_unique_abbrev(cmit_oid->hash, abbrev)); + strbuf_addstr(dst, find_unique_abbrev(cmit_oid->hash, abbrev)); if (suffix) - printf("%s", suffix); - printf("\n"); + strbuf_addstr(dst, suffix); return; } if (unannotated_cnt) @@ -437,15 +427,36 @@ static void describe(const char *arg, int last_one) } } - display_name(all_matches[0].name); + append_name(all_matches[0].name, dst); if (abbrev) - show_suffix(all_matches[0].depth, &cmit->object.oid); + append_suffix(all_matches[0].depth, &cmit->object.oid, dst); if (suffix) - printf("%s", suffix); - printf("\n"); + strbuf_addstr(dst, suffix); +} + +static void describe(const char *arg, int last_one) +{ + struct object_id oid; + struct commit *cmit; + struct strbuf sb = STRBUF_INIT; + + if (debug) + fprintf(stderr, _("describe %s\n"), arg); + + if (get_oid(arg, &oid)) + die(_("Not a valid object name %s"), arg); + cmit = lookup_commit_reference(&oid); + if (!cmit) + die(_("%s is not a valid '%s' object"), arg, commit_type); + + describe_commit(&oid, &sb); + + puts(sb.buf); if (!last_one) clear_commit_marks(cmit, -1); + + strbuf_release(&sb); } int cmd_describe(int argc, const char **argv, const char *prefix) From 644eb60bd01a15f665b63774fd80e3b6316073a1 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 15 Nov 2017 18:00:39 -0800 Subject: [PATCH 7/7] builtin/describe.c: describe a blob Sometimes users are given a hash of an object and they want to identify it further (ex.: Use verify-pack to find the largest blobs, but what are these? or [1]) When describing commits, we try to anchor them to tags or refs, as these are conceptually on a higher level than the commit. And if there is no ref or tag that matches exactly, we're out of luck. So we employ a heuristic to make up a name for the commit. These names are ambiguous, there might be different tags or refs to anchor to, and there might be different path in the DAG to travel to arrive at the commit precisely. When describing a blob, we want to describe the blob from a higher layer as well, which is a tuple of (commit, deep/path) as the tree objects involved are rather uninteresting. The same blob can be referenced by multiple commits, so how we decide which commit to use? This patch implements a rather naive approach on this: As there are no back pointers from blobs to commits in which the blob occurs, we'll start walking from any tips available, listing the blobs in-order of the commit and once we found the blob, we'll take the first commit that listed the blob. For example git describe --tags v0.99:Makefile conversion-901-g7672db20c2:Makefile tells us the Makefile as it was in v0.99 was introduced in commit 7672db20. The walking is performed in reverse order to show the introduction of a blob rather than its last occurrence. [1] https://stackoverflow.com/questions/223678/which-commit-has-this-blob Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- Documentation/git-describe.txt | 18 ++++++++-- builtin/describe.c | 62 +++++++++++++++++++++++++++++++--- t/t6120-describe.sh | 34 +++++++++++++++++++ 3 files changed, 107 insertions(+), 7 deletions(-) diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt index c924c945ba..e027fb8c4b 100644 --- a/Documentation/git-describe.txt +++ b/Documentation/git-describe.txt @@ -3,14 +3,14 @@ git-describe(1) NAME ---- -git-describe - Describe a commit using the most recent tag reachable from it - +git-describe - Give an object a human readable name based on an available ref SYNOPSIS -------- [verse] 'git describe' [--all] [--tags] [--contains] [--abbrev=] [...] 'git describe' [--all] [--tags] [--contains] [--abbrev=] --dirty[=] +'git describe' DESCRIPTION ----------- @@ -24,6 +24,12 @@ By default (without --all or --tags) `git describe` only shows annotated tags. For more information about creating annotated tags see the -a and -s options to linkgit:git-tag[1]. +If the given object refers to a blob, it will be described +as `:`, such that the blob can be found +at `` in the ``, which itself describes the +first commit in which this blob occurs in a reverse revision walk +from HEAD. + OPTIONS ------- ...:: @@ -186,6 +192,14 @@ selected and output. Here fewest commits different is defined as the number of commits which would be shown by `git log tag..input` will be the smallest number of commits possible. +BUGS +---- + +Tree objects as well as tag objects not pointing at commits, cannot be described. +When describing blobs, the lightweight tags pointing at blobs are ignored, +but the blob is still described as : despite the lightweight +tag being favorable. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/describe.c b/builtin/describe.c index 9e9a5ed5d4..5b4bfaba3f 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -3,6 +3,7 @@ #include "lockfile.h" #include "commit.h" #include "tag.h" +#include "blob.h" #include "refs.h" #include "builtin.h" #include "exec_cmd.h" @@ -11,8 +12,9 @@ #include "hashmap.h" #include "argv-array.h" #include "run-command.h" +#include "revision.h" +#include "list-objects.h" -#define SEEN (1u << 0) #define MAX_TAGS (FLAG_BITS - 1) static const char * const describe_usage[] = { @@ -434,6 +436,53 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) strbuf_addstr(dst, suffix); } +struct process_commit_data { + struct object_id current_commit; + struct object_id looking_for; + struct strbuf *dst; + struct rev_info *revs; +}; + +static void process_commit(struct commit *commit, void *data) +{ + struct process_commit_data *pcd = data; + pcd->current_commit = commit->object.oid; +} + +static void process_object(struct object *obj, const char *path, void *data) +{ + struct process_commit_data *pcd = data; + + if (!oidcmp(&pcd->looking_for, &obj->oid) && !pcd->dst->len) { + reset_revision_walk(); + describe_commit(&pcd->current_commit, pcd->dst); + strbuf_addf(pcd->dst, ":%s", path); + free_commit_list(pcd->revs->commits); + pcd->revs->commits = NULL; + } +} + +static void describe_blob(struct object_id oid, struct strbuf *dst) +{ + struct rev_info revs; + struct argv_array args = ARGV_ARRAY_INIT; + struct process_commit_data pcd = { null_oid, oid, dst, &revs}; + + argv_array_pushl(&args, "internal: The first arg is not parsed", + "--objects", "--in-commit-order", "--reverse", "HEAD", + NULL); + + init_revisions(&revs, NULL); + if (setup_revisions(args.argc, args.argv, &revs, NULL) > 1) + BUG("setup_revisions could not handle all args?"); + + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + + traverse_commit_list(&revs, process_commit, process_object, &pcd); + reset_revision_walk(); +} + static void describe(const char *arg, int last_one) { struct object_id oid; @@ -445,11 +494,14 @@ static void describe(const char *arg, int last_one) if (get_oid(arg, &oid)) die(_("Not a valid object name %s"), arg); - cmit = lookup_commit_reference(&oid); - if (!cmit) - die(_("%s is not a valid '%s' object"), arg, commit_type); + cmit = lookup_commit_reference_gently(&oid, 1); - describe_commit(&oid, &sb); + if (cmit) + describe_commit(&oid, &sb); + else if (lookup_blob(&oid)) + describe_blob(oid, &sb); + else + die(_("%s is neither a commit nor blob"), arg); puts(sb.buf); diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index c8b7ed82d9..3e3fb462a0 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -310,6 +310,40 @@ test_expect_success 'describe ignoring a broken submodule' ' grep broken out ' +test_expect_success 'describe a blob at a directly tagged commit' ' + echo "make it a unique blob" >file && + git add file && git commit -m "content in file" && + git tag -a -m "latest annotated tag" unique-file && + git describe HEAD:file >actual && + echo "unique-file:file" >expect && + test_cmp expect actual +' + +test_expect_success 'describe a blob with its first introduction' ' + git commit --allow-empty -m "empty commit" && + git rm file && + git commit -m "delete blob" && + git revert HEAD && + git commit --allow-empty -m "empty commit" && + git describe HEAD:file >actual && + echo "unique-file:file" >expect && + test_cmp expect actual +' + +test_expect_success 'describe directly tagged blob' ' + git tag test-blob unique-file:file && + git describe test-blob >actual && + echo "unique-file:file" >expect && + # suboptimal: we rather want to see "test-blob" + test_cmp expect actual +' + +test_expect_success 'describe tag object' ' + git tag test-blob-1 -a -m msg unique-file:file && + test_must_fail git describe test-blob-1 2>actual && + test_i18ngrep "fatal: test-blob-1 is neither a commit nor blob" actual +' + test_expect_failure ULIMIT_STACK_SIZE 'name-rev works in a deep repo' ' i=1 && while test $i -lt 8000