Browse Source

Merge branch 'tr/filter-branch'

* tr/filter-branch:
  revision --simplify-merges: make it a no-op without pathspec
  revision --simplify-merges: do not leave commits unprocessed
  revision --simplify-merges: use decoration instead of commit->util field
  Documentation: rev-list-options: move --simplify-merges documentation
  filter-branch: use --simplify-merges
  filter-branch: fix ref rewriting with --subdirectory-filter
  filter-branch: Extend test to show rewriting bug
  Topo-sort before --simplify-merges
  revision traversal: show full history with merge simplification
  revision.c: whitespace fix
maint
Junio C Hamano 17 years ago
parent
commit
01914577ed
  1. 42
      Documentation/rev-list-options.txt
  2. 32
      git-filter-branch.sh
  3. 202
      revision.c
  4. 2
      revision.h
  5. 93
      t/t6012-rev-list-simplify.sh
  6. 8
      t/t7003-filter-branch.sh

42
Documentation/rev-list-options.txt

@ -409,6 +409,48 @@ Note that without '\--full-history', this still simplifies merges: if @@ -409,6 +409,48 @@ Note that without '\--full-history', this still simplifies merges: if
one of the parents is TREESAME, we follow only that one, so the other
sides of the merge are never walked.

Finally, there is a fourth simplification mode available:

--simplify-merges::

First, build a history graph in the same way that
'\--full-history' with parent rewriting does (see above).
+
Then simplify each commit `C` to its replacement `C'` in the final
history according to the following rules:
+
--
* Set `C'` to `C`.
+
* Replace each parent `P` of `C'` with its simplification `P'`. In
the process, drop parents that are ancestors of other parents, and
remove duplicates.
+
* If after this parent rewriting, `C'` is a root or merge commit (has
zero or >1 parents), a boundary commit, or !TREESAME, it remains.
Otherwise, it is replaced with its only parent.
--
+
The effect of this is best shown by way of comparing to
'\--full-history' with parent rewriting. The example turns into:
+
-----------------------------------------------------------------------
.-A---M---N---O
/ / /
I B D
\ / /
`---------'
-----------------------------------------------------------------------
+
Note the major differences in `N` and `P` over '\--full-history':
+
--
* `N`'s parent list had `I` removed, because it is an ancestor of the
other parent `M`. Still, `N` remained because it is !TREESAME.
+
* `P`'s parent list similarly had `I` removed. `P` was then
removed completely, because it had one parent and is TREESAME.
--

ifdef::git-rev-list[]
Bisection Helpers

32
git-filter-branch.sh

@ -232,11 +232,11 @@ mkdir ../map || die "Could not create map/ directory" @@ -232,11 +232,11 @@ mkdir ../map || die "Could not create map/ directory"
case "$filter_subdir" in
"")
git rev-list --reverse --topo-order --default HEAD \
--parents "$@"
--parents --simplify-merges "$@"
;;
*)
git rev-list --reverse --topo-order --default HEAD \
--parents "$@" -- "$filter_subdir"
--parents --simplify-merges "$@" -- "$filter_subdir"
esac > ../revs || die "Could not get the commits"
commits=$(wc -l <../revs | tr -d " ")

@ -317,24 +317,20 @@ done <../revs @@ -317,24 +317,20 @@ done <../revs

# In case of a subdirectory filter, it is possible that a specified head
# is not in the set of rewritten commits, because it was pruned by the
# revision walker. Fix it by mapping these heads to the next rewritten
# ancestor(s), i.e. the boundaries in the set of rewritten commits.
# revision walker. Fix it by mapping these heads to the unique nearest
# ancestor that survived the pruning.

# NEEDSWORK: we should sort the unmapped refs topologically first
while read ref
do
sha1=$(git rev-parse "$ref"^0)
test -f "$workdir"/../map/$sha1 && continue
# Assign the boundarie(s) in the set of rewritten commits
# as the replacement commit(s).
# (This would look a bit nicer if --not --stdin worked.)
for p in $( (cd "$workdir"/../map; ls | sed "s/^/^/") |
git rev-list $ref --boundary --stdin |
sed -n "s/^-//p")
if test "$filter_subdir"
then
while read ref
do
map $p >> "$workdir"/../map/$sha1
done
done < "$tempdir"/heads
sha1=$(git rev-parse "$ref"^0)
test -f "$workdir"/../map/$sha1 && continue
ancestor=$(git rev-list --simplify-merges -1 \
$ref -- "$filter_subdir")
test "$ancestor" && echo $(map $ancestor) >> "$workdir"/../map/$sha1
done < "$tempdir"/heads
fi

# Finally update the refs


202
revision.c

@ -489,7 +489,7 @@ static int add_parents_to_list(struct rev_info *revs, struct commit *commit, @@ -489,7 +489,7 @@ static int add_parents_to_list(struct rev_info *revs, struct commit *commit,
p->object.flags |= SEEN;
insert_by_date_cached(p, list, cached_base, cache_ptr);
}
if(revs->first_parent_only)
if (revs->first_parent_only)
break;
}
return 0;
@ -1041,6 +1041,11 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg @@ -1041,6 +1041,11 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
} else if (!strcmp(arg, "--topo-order")) {
revs->lifo = 1;
revs->topo_order = 1;
} else if (!strcmp(arg, "--simplify-merges")) {
revs->simplify_merges = 1;
revs->rewrite_parents = 1;
revs->simplify_history = 0;
revs->limited = 1;
} else if (!strcmp(arg, "--date-order")) {
revs->lifo = 0;
revs->topo_order = 1;
@ -1368,6 +1373,179 @@ static void add_child(struct rev_info *revs, struct commit *parent, struct commi @@ -1368,6 +1373,179 @@ static void add_child(struct rev_info *revs, struct commit *parent, struct commi
l->next = add_decoration(&revs->children, &parent->object, l);
}

static int remove_duplicate_parents(struct commit *commit)
{
struct commit_list **pp, *p;
int surviving_parents;

/* Examine existing parents while marking ones we have seen... */
pp = &commit->parents;
while ((p = *pp) != NULL) {
struct commit *parent = p->item;
if (parent->object.flags & TMP_MARK) {
*pp = p->next;
continue;
}
parent->object.flags |= TMP_MARK;
pp = &p->next;
}
/* count them while clearing the temporary mark */
surviving_parents = 0;
for (p = commit->parents; p; p = p->next) {
p->item->object.flags &= ~TMP_MARK;
surviving_parents++;
}
return surviving_parents;
}

struct merge_simplify_state {
struct commit *simplified;
};

static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit)
{
struct merge_simplify_state *st;

st = lookup_decoration(&revs->merge_simplification, &commit->object);
if (!st) {
st = xcalloc(1, sizeof(*st));
add_decoration(&revs->merge_simplification, &commit->object, st);
}
return st;
}

static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
{
struct commit_list *p;
struct merge_simplify_state *st, *pst;
int cnt;

st = locate_simplify_state(revs, commit);

/*
* Have we handled this one?
*/
if (st->simplified)
return tail;

/*
* An UNINTERESTING commit simplifies to itself, so does a
* root commit. We do not rewrite parents of such commit
* anyway.
*/
if ((commit->object.flags & UNINTERESTING) || !commit->parents) {
st->simplified = commit;
return tail;
}

/*
* Do we know what commit all of our parents should be rewritten to?
* Otherwise we are not ready to rewrite this one yet.
*/
for (cnt = 0, p = commit->parents; p; p = p->next) {
pst = locate_simplify_state(revs, p->item);
if (!pst->simplified) {
tail = &commit_list_insert(p->item, tail)->next;
cnt++;
}
}
if (cnt) {
tail = &commit_list_insert(commit, tail)->next;
return tail;
}

/*
* Rewrite our list of parents.
*/
for (p = commit->parents; p; p = p->next) {
pst = locate_simplify_state(revs, p->item);
p->item = pst->simplified;
}
cnt = remove_duplicate_parents(commit);

/*
* It is possible that we are a merge and one side branch
* does not have any commit that touches the given paths;
* in such a case, the immediate parents will be rewritten
* to different commits.
*
* o----X X: the commit we are looking at;
* / / o: a commit that touches the paths;
* ---o----'
*
* Further reduce the parents by removing redundant parents.
*/
if (1 < cnt) {
struct commit_list *h = reduce_heads(commit->parents);
cnt = commit_list_count(h);
free_commit_list(commit->parents);
commit->parents = h;
}

/*
* A commit simplifies to itself if it is a root, if it is
* UNINTERESTING, if it touches the given paths, or if it is a
* merge and its parents simplifies to more than one commits
* (the first two cases are already handled at the beginning of
* this function).
*
* Otherwise, it simplifies to what its sole parent simplifies to.
*/
if (!cnt ||
(commit->object.flags & UNINTERESTING) ||
!(commit->object.flags & TREESAME) ||
(1 < cnt))
st->simplified = commit;
else {
pst = locate_simplify_state(revs, commit->parents->item);
st->simplified = pst->simplified;
}
return tail;
}

static void simplify_merges(struct rev_info *revs)
{
struct commit_list *list;
struct commit_list *yet_to_do, **tail;

if (!revs->topo_order)
sort_in_topological_order(&revs->commits, revs->lifo);
if (!revs->prune)
return;

/* feed the list reversed */
yet_to_do = NULL;
for (list = revs->commits; list; list = list->next)
commit_list_insert(list->item, &yet_to_do);
while (yet_to_do) {
list = yet_to_do;
yet_to_do = NULL;
tail = &yet_to_do;
while (list) {
struct commit *commit = list->item;
struct commit_list *next = list->next;
free(list);
list = next;
tail = simplify_one(revs, commit, tail);
}
}

/* clean up the result, removing the simplified ones */
list = revs->commits;
revs->commits = NULL;
tail = &revs->commits;
while (list) {
struct commit *commit = list->item;
struct commit_list *next = list->next;
struct merge_simplify_state *st;
free(list);
list = next;
st = locate_simplify_state(revs, commit);
if (st->simplified == commit)
tail = &commit_list_insert(commit, tail)->next;
}
}

static void set_children(struct rev_info *revs)
{
struct commit_list *l;
@ -1408,6 +1586,8 @@ int prepare_revision_walk(struct rev_info *revs) @@ -1408,6 +1586,8 @@ int prepare_revision_walk(struct rev_info *revs)
return -1;
if (revs->topo_order)
sort_in_topological_order(&revs->commits, revs->lifo);
if (revs->simplify_merges)
simplify_merges(revs);
if (revs->children.name)
set_children(revs);
return 0;
@ -1440,26 +1620,6 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp @@ -1440,26 +1620,6 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp
}
}

static void remove_duplicate_parents(struct commit *commit)
{
struct commit_list **pp, *p;

/* Examine existing parents while marking ones we have seen... */
pp = &commit->parents;
while ((p = *pp) != NULL) {
struct commit *parent = p->item;
if (parent->object.flags & TMP_MARK) {
*pp = p->next;
continue;
}
parent->object.flags |= TMP_MARK;
pp = &p->next;
}
/* ... and clear the temporary mark */
for (p = commit->parents; p; p = p->next)
p->item->object.flags &= ~TMP_MARK;
}

static int rewrite_parents(struct rev_info *revs, struct commit *commit)
{
struct commit_list **pp = &commit->parents;

2
revision.h

@ -42,6 +42,7 @@ struct rev_info { @@ -42,6 +42,7 @@ struct rev_info {
simplify_history:1,
lifo:1,
topo_order:1,
simplify_merges:1,
tag_objects:1,
tree_objects:1,
blob_objects:1,
@ -110,6 +111,7 @@ struct rev_info { @@ -110,6 +111,7 @@ struct rev_info {

struct reflog_walk_info *reflog_info;
struct decoration children;
struct decoration merge_simplification;
};

#define REV_TREE_SAME 0

93
t/t6012-rev-list-simplify.sh

@ -0,0 +1,93 @@ @@ -0,0 +1,93 @@
#!/bin/sh

test_description='merge simplification'

. ./test-lib.sh

note () {
git tag "$1"
}

_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"

unnote () {
git name-rev --tags --stdin | sed -e "s|$_x40 (tags/\([^)]*\)) |\1 |g"
}

test_expect_success setup '
echo "Hi there" >file &&
git add file &&
test_tick && git commit -m "Initial file" &&
note A &&

git branch other-branch &&

echo "Hello" >file &&
git add file &&
test_tick && git commit -m "Modified file" &&
note B &&

git checkout other-branch &&

echo "Hello" >file &&
git add file &&
test_tick && git commit -m "Modified the file identically" &&
note C &&

echo "This is a stupid example" >another-file &&
git add another-file &&
test_tick && git commit -m "Add another file" &&
note D &&

test_tick && git merge -m "merge" master &&
note E &&

echo "Yet another" >elif &&
git add elif &&
test_tick && git commit -m "Irrelevant change" &&
note F &&

git checkout master &&
echo "Yet another" >elif &&
git add elif &&
test_tick && git commit -m "Another irrelevant change" &&
note G &&

test_tick && git merge -m "merge" other-branch &&
note H &&

echo "Final change" >file &&
test_tick && git commit -a -m "Final change" &&
note I
'

FMT='tformat:%P %H | %s'

check_result () {
for c in $1
do
echo "$c"
done >expect &&
shift &&
param="$*" &&
test_expect_success "log $param" '
git log --pretty="$FMT" --parents $param |
unnote >actual &&
sed -e "s/^.* \([^ ]*\) .*/\1/" >check <actual &&
test_cmp expect check || {
cat actual
false
}
'
}

check_result 'I H G F E D C B A' --full-history
check_result 'I H E C B A' --full-history -- file
check_result 'I H E C B A' --full-history --topo-order -- file
check_result 'I H E C B A' --full-history --date-order -- file
check_result 'I E C B A' --simplify-merges -- file
check_result 'I B A' -- file
check_result 'I B A' --topo-order -- file

test_done

8
t/t7003-filter-branch.sh

@ -96,13 +96,17 @@ test_expect_success 'filter subdirectory only' ' @@ -96,13 +96,17 @@ test_expect_success 'filter subdirectory only' '
test_tick &&
git commit -m "again not subdir" &&
git branch sub &&
git-filter-branch -f --subdirectory-filter subdir refs/heads/sub
git branch sub-earlier HEAD~2 &&
git-filter-branch -f --subdirectory-filter subdir \
refs/heads/sub refs/heads/sub-earlier
'

test_expect_success 'subdirectory filter result looks okay' '
test 2 = $(git rev-list sub | wc -l) &&
git show sub:new &&
test_must_fail git show sub:subdir
test_must_fail git show sub:subdir &&
git show sub-earlier:new &&
test_must_fail git show sub-earlier:subdir
'

test_expect_success 'more setup' '

Loading…
Cancel
Save