|
|
|
#include "cache.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "list-objects.h"
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
#include "pack.h"
|
|
|
|
#include "pack-bitmap.h"
|
|
|
|
#include "builtin.h"
|
|
|
|
#include "log-tree.h"
|
|
|
|
#include "graph.h"
|
|
|
|
#include "bisect.h"
|
|
|
|
#include "progress.h"
|
|
|
|
|
|
|
|
static const char rev_list_usage[] =
|
|
|
|
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
|
|
|
|
" limiting output:\n"
|
|
|
|
" --max-count=<n>\n"
|
|
|
|
" --max-age=<epoch>\n"
|
|
|
|
" --min-age=<epoch>\n"
|
|
|
|
" --sparse\n"
|
|
|
|
" --no-merges\n"
|
|
|
|
" --min-parents=<n>\n"
|
|
|
|
" --no-min-parents\n"
|
|
|
|
" --max-parents=<n>\n"
|
|
|
|
" --no-max-parents\n"
|
|
|
|
" --remove-empty\n"
|
|
|
|
" --all\n"
|
|
|
|
" --branches\n"
|
|
|
|
" --tags\n"
|
|
|
|
" --remotes\n"
|
|
|
|
" --stdin\n"
|
|
|
|
" --quiet\n"
|
|
|
|
" ordering output:\n"
|
|
|
|
" --topo-order\n"
|
|
|
|
" --date-order\n"
|
|
|
|
" --reverse\n"
|
|
|
|
" formatting output:\n"
|
|
|
|
" --parents\n"
|
|
|
|
" --children\n"
|
|
|
|
" --objects | --objects-edge\n"
|
|
|
|
" --unpacked\n"
|
|
|
|
" --header | --pretty\n"
|
|
|
|
" --abbrev=<n> | --no-abbrev\n"
|
|
|
|
" --abbrev-commit\n"
|
|
|
|
" --left-right\n"
|
|
|
|
" --count\n"
|
|
|
|
" special purpose:\n"
|
|
|
|
" --bisect\n"
|
|
|
|
" --bisect-vars\n"
|
|
|
|
" --bisect-all"
|
|
|
|
;
|
|
|
|
|
|
|
|
static struct progress *progress;
|
|
|
|
static unsigned progress_counter;
|
|
|
|
|
|
|
|
static void finish_commit(struct commit *commit, void *data);
|
|
|
|
static void show_commit(struct commit *commit, void *data)
|
|
|
|
{
|
|
|
|
struct rev_list_info *info = data;
|
|
|
|
struct rev_info *revs = info->revs;
|
|
|
|
|
|
|
|
display_progress(progress, ++progress_counter);
|
|
|
|
|
|
|
|
if (info->flags & REV_LIST_QUIET) {
|
|
|
|
finish_commit(commit, data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
graph_show_commit(revs->graph);
|
|
|
|
|
|
|
|
if (revs->count) {
|
|
|
|
if (commit->object.flags & PATCHSAME)
|
|
|
|
revs->count_same++;
|
|
|
|
else if (commit->object.flags & SYMMETRIC_LEFT)
|
|
|
|
revs->count_left++;
|
|
|
|
else
|
|
|
|
revs->count_right++;
|
|
|
|
finish_commit(commit, data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (info->show_timestamp)
|
|
|
|
printf("%"PRItime" ", commit->date);
|
|
|
|
if (info->header_prefix)
|
|
|
|
fputs(info->header_prefix, stdout);
|
|
|
|
|
|
|
|
if (!revs->graph)
|
|
|
|
fputs(get_revision_mark(revs, commit), stdout);
|
|
|
|
if (revs->abbrev_commit && revs->abbrev)
|
|
|
|
fputs(find_unique_abbrev(commit->object.oid.hash, revs->abbrev),
|
|
|
|
stdout);
|
|
|
|
else
|
|
|
|
fputs(oid_to_hex(&commit->object.oid), stdout);
|
|
|
|
if (revs->print_parents) {
|
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
while (parents) {
|
|
|
|
printf(" %s", oid_to_hex(&parents->item->object.oid));
|
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (revs->children.name) {
|
|
|
|
struct commit_list *children;
|
|
|
|
|
|
|
|
children = lookup_decoration(&revs->children, &commit->object);
|
|
|
|
while (children) {
|
|
|
|
printf(" %s", oid_to_hex(&children->item->object.oid));
|
|
|
|
children = children->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
show_decorations(revs, commit);
|
|
|
|
if (revs->commit_format == CMIT_FMT_ONELINE)
|
|
|
|
putchar(' ');
|
|
|
|
else
|
|
|
|
putchar('\n');
|
|
|
|
|
|
|
|
if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) {
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
struct pretty_print_context ctx = {0};
|
|
|
|
ctx.abbrev = revs->abbrev;
|
|
|
|
ctx.date_mode = revs->date_mode;
|
log: respect date_mode_explicit with --format:%gd
When we show a reflog selector (e.g., via "git log -g"), we
perform some DWIM magic: while we normally show the entry's
index (e.g., HEAD@{1}), if the user has given us a date
with "--date", then we show a date-based select (e.g.,
HEAD@{yesterday}).
However, we don't want to trigger this magic if the
alternate date format we got was from the "log.date"
configuration; that is not sufficiently strong context for
us to invoke this particular magic. To fix this, commit
f4ea32f (improve reflog date/number heuristic, 2009-09-24)
introduced a "date_mode_explicit" flag in rev_info. This
flag is set only when we see a "--date" option on the
command line, and we a vanilla date to the reflog code if
the date was not explicit.
Later, commit 8f8f547 (Introduce new pretty formats %g[sdD]
for reflog information, 2009-10-19) added another way to
show selectors, and it did not respect the date_mode_explicit
flag from f4ea32f.
This patch propagates the date_mode_explicit flag to the
pretty-print code, which can then use it to pass the
appropriate date field to the reflog code. This brings the
behavior of "%gd" in line with the other formats, and means
that its output is independent of any user configuration.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
ctx.date_mode_explicit = revs->date_mode_explicit;
|
|
|
|
ctx.fmt = revs->commit_format;
|
|
|
|
ctx.output_encoding = get_log_output_encoding();
|
|
|
|
pretty_print_commit(&ctx, commit, &buf);
|
|
|
|
if (buf.len) {
|
|
|
|
if (revs->commit_format != CMIT_FMT_ONELINE)
|
|
|
|
graph_show_oneline(revs->graph);
|
|
|
|
|
|
|
|
graph_show_commit_msg(revs->graph, stdout, &buf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a newline after the commit message.
|
|
|
|
*
|
|
|
|
* Usually, this newline produces a blank
|
|
|
|
* padding line between entries, in which case
|
|
|
|
* we need to add graph padding on this line.
|
|
|
|
*
|
|
|
|
* However, the commit message may not end in a
|
|
|
|
* newline. In this case the newline simply
|
|
|
|
* ends the last line of the commit message,
|
|
|
|
* and we don't need any graph output. (This
|
|
|
|
* always happens with CMIT_FMT_ONELINE, and it
|
|
|
|
* happens with CMIT_FMT_USERFORMAT when the
|
|
|
|
* format doesn't explicitly end in a newline.)
|
|
|
|
*/
|
|
|
|
if (buf.len && buf.buf[buf.len - 1] == '\n')
|
|
|
|
graph_show_padding(revs->graph);
|
|
|
|
putchar(info->hdr_termination);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If the message buffer is empty, just show
|
|
|
|
* the rest of the graph output for this
|
|
|
|
* commit.
|
|
|
|
*/
|
|
|
|
if (graph_show_remainder(revs->graph))
|
|
|
|
putchar('\n');
|
|
|
|
if (revs->commit_format == CMIT_FMT_ONELINE)
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
strbuf_release(&buf);
|
|
|
|
} else {
|
|
|
|
if (graph_show_remainder(revs->graph))
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
maybe_flush_or_die(stdout, "stdout");
|
|
|
|
finish_commit(commit, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void finish_commit(struct commit *commit, void *data)
|
|
|
|
{
|
|
|
|
if (commit->parents) {
|
|
|
|
free_commit_list(commit->parents);
|
|
|
|
commit->parents = NULL;
|
|
|
|
}
|
provide a helper to free commit buffer
This converts two lines into one at each caller. But more
importantly, it abstracts the concept of freeing the buffer,
which will make it easier to change later.
Note that we also need to provide a "detach" mechanism for a
tricky case in index-pack. We are passed a buffer for the
object generated by processing the incoming pack. If we are
not using --strict, we just calculate the sha1 on that
buffer and return, leaving the caller to free it. But if we
are using --strict, we actually attach that buffer to an
object, pass the object to the fsck functions, and then
detach the buffer from the object again (so that the caller
can free it as usual). In this case, we don't want to free
the buffer ourselves, but just make sure it is no longer
associated with the commit.
Note that we are making the assumption here that the
attach/detach process does not impact the buffer at all
(e.g., it is never reallocated or modified). That holds true
now, and we have no plans to change that. However, as we
abstract the commit_buffer code, this dependency becomes
less obvious. So when we detach, let's also make sure that
we get back the same buffer that we gave to the
commit_buffer code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
free_commit_buffer(commit);
|
|
|
|
}
|
|
|
|
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
static void finish_object(struct object *obj, const char *name, void *cb_data)
|
|
|
|
{
|
|
|
|
struct rev_list_info *info = cb_data;
|
|
|
|
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid))
|
|
|
|
die("missing blob object '%s'", oid_to_hex(&obj->oid));
|
|
|
|
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
|
object: convert parse_object* to take struct object_id
Make parse_object, parse_object_or_die, and parse_object_buffer take a
pointer to struct object_id. Remove the temporary variables inserted
earlier, since they are no longer necessary. Transform all of the
callers using the following semantic patch:
@@
expression E1;
@@
- parse_object(E1.hash)
+ parse_object(&E1)
@@
expression E1;
@@
- parse_object(E1->hash)
+ parse_object(E1)
@@
expression E1, E2;
@@
- parse_object_or_die(E1.hash, E2)
+ parse_object_or_die(&E1, E2)
@@
expression E1, E2;
@@
- parse_object_or_die(E1->hash, E2)
+ parse_object_or_die(E1, E2)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1.hash, E2, E3, E4, E5)
+ parse_object_buffer(&E1, E2, E3, E4, E5)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1->hash, E2, E3, E4, E5)
+ parse_object_buffer(E1, E2, E3, E4, E5)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
parse_object(&obj->oid);
|
|
|
|
}
|
|
|
|
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
static void show_object(struct object *obj, const char *name, void *cb_data)
|
|
|
|
{
|
|
|
|
struct rev_list_info *info = cb_data;
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
finish_object(obj, name, cb_data);
|
|
|
|
display_progress(progress, ++progress_counter);
|
|
|
|
if (info->flags & REV_LIST_QUIET)
|
|
|
|
return;
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
show_object_with_name(stdout, obj, name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void show_edge(struct commit *commit)
|
|
|
|
{
|
|
|
|
printf("-%s\n", oid_to_hex(&commit->object.oid));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_var_str(const char *var, const char *val)
|
|
|
|
{
|
|
|
|
printf("%s='%s'\n", var, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_var_int(const char *var, int val)
|
|
|
|
{
|
|
|
|
printf("%s=%d\n", var, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
|
|
|
|
{
|
|
|
|
int cnt, flags = info->flags;
|
|
|
|
char hex[GIT_MAX_HEXSZ + 1] = "";
|
|
|
|
struct commit_list *tried;
|
|
|
|
struct rev_info *revs = info->revs;
|
|
|
|
|
|
|
|
if (!revs->commits)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
revs->commits = filter_skipped(revs->commits, &tried,
|
|
|
|
flags & BISECT_SHOW_ALL,
|
|
|
|
NULL, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* revs->commits can reach "reaches" commits among
|
|
|
|
* "all" commits. If it is good, then there are
|
|
|
|
* (all-reaches) commits left to be bisected.
|
|
|
|
* On the other hand, if it is bad, then the set
|
|
|
|
* to bisect is "reaches".
|
|
|
|
* A bisect set of size N has (N-1) commits further
|
|
|
|
* to test, as we already know one bad one.
|
|
|
|
*/
|
|
|
|
cnt = all - reaches;
|
|
|
|
if (cnt < reaches)
|
|
|
|
cnt = reaches;
|
|
|
|
|
|
|
|
if (revs->commits)
|
|
|
|
oid_to_hex_r(hex, &revs->commits->item->object.oid);
|
|
|
|
|
|
|
|
if (flags & BISECT_SHOW_ALL) {
|
|
|
|
traverse_commit_list(revs, show_commit, show_object, info);
|
|
|
|
printf("------\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
print_var_str("bisect_rev", hex);
|
|
|
|
print_var_int("bisect_nr", cnt - 1);
|
|
|
|
print_var_int("bisect_good", all - reaches - 1);
|
|
|
|
print_var_int("bisect_bad", reaches - 1);
|
|
|
|
print_var_int("bisect_all", all);
|
|
|
|
print_var_int("bisect_steps", estimate_bisect_steps(all));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
static int show_object_fast(
|
|
|
|
const unsigned char *sha1,
|
|
|
|
enum object_type type,
|
|
|
|
int exclude,
|
|
|
|
uint32_t name_hash,
|
|
|
|
struct packed_git *found_pack,
|
|
|
|
off_t found_offset)
|
|
|
|
{
|
|
|
|
fprintf(stdout, "%s\n", sha1_to_hex(sha1));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
struct rev_info revs;
|
|
|
|
struct rev_list_info info;
|
|
|
|
int i;
|
|
|
|
int bisect_list = 0;
|
|
|
|
int bisect_show_vars = 0;
|
|
|
|
int bisect_find_all = 0;
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
int use_bitmap_index = 0;
|
|
|
|
const char *show_progress = NULL;
|
|
|
|
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
init_revisions(&revs, prefix);
|
|
|
|
revs.abbrev = DEFAULT_ABBREV;
|
|
|
|
revs.commit_format = CMIT_FMT_UNSPECIFIED;
|
|
|
|
argc = setup_revisions(argc, argv, &revs, NULL);
|
|
|
|
|
|
|
|
memset(&info, 0, sizeof(info));
|
|
|
|
info.revs = &revs;
|
|
|
|
if (revs.bisect)
|
|
|
|
bisect_list = 1;
|
|
|
|
|
|
|
|
if (DIFF_OPT_TST(&revs.diffopt, QUICK))
|
|
|
|
info.flags |= REV_LIST_QUIET;
|
|
|
|
for (i = 1 ; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
|
|
|
|
if (!strcmp(arg, "--header")) {
|
|
|
|
revs.verbose_header = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--timestamp")) {
|
|
|
|
info.show_timestamp = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--bisect")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--bisect-all")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_find_all = 1;
|
|
|
|
info.flags |= BISECT_SHOW_ALL;
|
|
|
|
revs.show_decorations = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--bisect-vars")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_show_vars = 1;
|
|
|
|
continue;
|
|
|
|
}
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (!strcmp(arg, "--use-bitmap-index")) {
|
|
|
|
use_bitmap_index = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--test-bitmap")) {
|
|
|
|
test_bitmap_walk(&revs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (skip_prefix(arg, "--progress=", &arg)) {
|
|
|
|
show_progress = arg;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
usage(rev_list_usage);
|
|
|
|
|
|
|
|
}
|
|
|
|
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
|
|
|
|
/* The command line has a --pretty */
|
|
|
|
info.hdr_termination = '\n';
|
|
|
|
if (revs.commit_format == CMIT_FMT_ONELINE)
|
|
|
|
info.header_prefix = "";
|
|
|
|
else
|
|
|
|
info.header_prefix = "commit ";
|
|
|
|
}
|
|
|
|
else if (revs.verbose_header)
|
|
|
|
/* Only --header was specified */
|
|
|
|
revs.commit_format = CMIT_FMT_RAW;
|
|
|
|
|
|
|
|
if ((!revs.commits &&
|
|
|
|
(!(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
!revs.pending.nr)) ||
|
|
|
|
revs.diff)
|
|
|
|
usage(rev_list_usage);
|
|
|
|
|
|
|
|
if (revs.show_notes)
|
|
|
|
die(_("rev-list does not support display of notes"));
|
|
|
|
|
"log --author=me --grep=it" should find intersection, not union
Historically, any grep filter in "git log" family of commands were taken
as restricting to commits with any of the words in the commit log message.
However, the user almost always want to find commits "done by this person
on that topic". With "--all-match" option, a series of grep patterns can
be turned into a requirement that all of them must produce a match, but
that makes it impossible to ask for "done by me, on either this or that"
with:
log --author=me --committer=him --grep=this --grep=that
because it will require both "this" and "that" to appear.
Change the "header" parser of grep library to treat the headers specially,
and parse it as:
(all-match-OR (HEADER-AUTHOR me)
(HEADER-COMMITTER him)
(OR
(PATTERN this)
(PATTERN that) ) )
Even though the "log" command line parser doesn't give direct access to
the extended grep syntax to group terms with parentheses, this change will
cover the majority of the case the users would want.
This incidentally revealed that one test in t7002 was bogus. It ran:
log --author=Thor --grep=Thu --format='%s'
and expected (wrongly) "Thu" to match "Thursday" in the author/committer
date, but that would never match, as the timestamp in raw commit buffer
does not have the name of the day-of-the-week.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
|
|
|
save_commit_buffer = (revs.verbose_header ||
|
|
|
|
revs.grep_filter.pattern_list ||
|
|
|
|
revs.grep_filter.header_list);
|
rev-list --bisect: limit list before bisecting.
I noticed bisect does not work well without both good and bad.
Running this script in git.git repository would give you quite
different results:
#!/bin/sh
initial=e83c5163316f89bfbde7d9ab23ca2e25604af290
mid0=`git rev-list --bisect ^$initial --all`
git rev-list $mid0 | wc -l
git rev-list ^$mid0 --all | wc -l
mid1=`git rev-list --bisect --all`
git rev-list $mid1 | wc -l
git rev-list ^$mid1 --all | wc -l
The $initial commit is the very first commit you made. The
first midpoint bisects things evenly as designed, but the latter
does not.
The reason I got interested in this was because I was wondering
if something like the following would help people converting a
huge repository from foreign SCM, or preparing a repository to
be fetched over plain dumb HTTP only:
#!/bin/sh
N=4
P=.git/objects/pack
bottom=
while test 0 \< $N
do
N=$((N-1))
if test -z "$bottom"
then
newbottom=`git rev-list --bisect --all`
else
newbottom=`git rev-list --bisect ^$bottom --all`
fi
if test -z "$bottom"
then
rev_list="$newbottom"
elif test 0 = $N
then
rev_list="^$bottom --all"
else
rev_list="^$bottom $newbottom"
fi
p=$(git rev-list --unpacked --objects $rev_list |
git pack-objects $P/pack)
git show-index <$P/pack-$p.idx | wc -l
bottom=$newbottom
done
The idea is to pack older half of the history to one pack, then
older half of the remaining history to another, to continue a
few times, using finer granularity as we get closer to the tip.
This may not matter, since for a truly huge history, running
bisect number of times could be quite time consuming, and we
might be better off running "git rev-list --all" once into a
temporary file, and manually pick cut-off points from the
resulting list of commits. After all we are talking about
"approximately half" for such an usage, and older history does
not matter much.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
if (bisect_list)
|
|
|
|
revs.limited = 1;
|
|
|
|
|
|
|
|
if (show_progress)
|
|
|
|
progress = start_progress_delay(show_progress, 0, 0, 2);
|
|
|
|
|
|
|
|
if (use_bitmap_index && !revs.prune) {
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (revs.count && !revs.left_right && !revs.cherry_mark) {
|
|
|
|
uint32_t commit_count;
|
rev-list: "adjust" results of "--count --use-bitmap-index -n"
If you ask rev-list for:
git rev-list --count --use-bitmap-index HEAD
we optimize out the actual traversal and just give you the
number of bits set in the commit bitmap. This is faster,
which is good.
But if you ask to limit the size of the traversal, like:
git rev-list --count --use-bitmap-index -n 100 HEAD
we'll still output the full bitmapped number we found. On
the surface, that might even seem OK. You explicitly asked
to use the bitmap index, and it was cheap to compute the
real answer, so we gave it to you.
But there's something much more complicated going on under
the hood. If we don't have a bitmap directly for HEAD, then
we have to actually traverse backwards, looking for a
bitmapped commit. And _that_ traversal is bounded by our
`-n` count.
This is a good thing, because it bounds the work we have to
do, which is probably what the user wanted by asking for
`-n`. But now it makes the output quite confusing. You might
get many values:
- your `-n` value, if we walked back and never found a
bitmap (or fewer if there weren't that many commits)
- the actual full count, if we found a bitmap root for
every path of our traversal with in the `-n` limit
- any number in between! We might have walked back and
found _some_ bitmaps, but then cut off the traversal
early with some commits not accounted for in the result.
So you cannot even see a value higher than your `-n` and say
"OK, bitmaps kicked in, this must be the real full count".
The only sane thing is for git to just clamp the value to a
maximum of the `-n` value, which means we should output the
exact same results whether bitmaps are in use or not.
The test in t5310 demonstrates this by using `-n 1`.
Without this patch we fail in the full-bitmap case (where we
do not have to traverse at all) but _not_ in the
partial-bitmap case (where we have to walk down to find an
actual bitmap). With this patch, both cases just work.
I didn't implement the crazy in-between case, just because
it's complicated to set up, and is really a subset of the
full-count case, which we do cover.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
int max_count = revs.max_count;
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (!prepare_bitmap_walk(&revs)) {
|
|
|
|
count_bitmap_commit_list(&commit_count, NULL, NULL, NULL);
|
rev-list: "adjust" results of "--count --use-bitmap-index -n"
If you ask rev-list for:
git rev-list --count --use-bitmap-index HEAD
we optimize out the actual traversal and just give you the
number of bits set in the commit bitmap. This is faster,
which is good.
But if you ask to limit the size of the traversal, like:
git rev-list --count --use-bitmap-index -n 100 HEAD
we'll still output the full bitmapped number we found. On
the surface, that might even seem OK. You explicitly asked
to use the bitmap index, and it was cheap to compute the
real answer, so we gave it to you.
But there's something much more complicated going on under
the hood. If we don't have a bitmap directly for HEAD, then
we have to actually traverse backwards, looking for a
bitmapped commit. And _that_ traversal is bounded by our
`-n` count.
This is a good thing, because it bounds the work we have to
do, which is probably what the user wanted by asking for
`-n`. But now it makes the output quite confusing. You might
get many values:
- your `-n` value, if we walked back and never found a
bitmap (or fewer if there weren't that many commits)
- the actual full count, if we found a bitmap root for
every path of our traversal with in the `-n` limit
- any number in between! We might have walked back and
found _some_ bitmaps, but then cut off the traversal
early with some commits not accounted for in the result.
So you cannot even see a value higher than your `-n` and say
"OK, bitmaps kicked in, this must be the real full count".
The only sane thing is for git to just clamp the value to a
maximum of the `-n` value, which means we should output the
exact same results whether bitmaps are in use or not.
The test in t5310 demonstrates this by using `-n 1`.
Without this patch we fail in the full-bitmap case (where we
do not have to traverse at all) but _not_ in the
partial-bitmap case (where we have to walk down to find an
actual bitmap). With this patch, both cases just work.
I didn't implement the crazy in-between case, just because
it's complicated to set up, and is really a subset of the
full-count case, which we do cover.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
if (max_count >= 0 && max_count < commit_count)
|
|
|
|
commit_count = max_count;
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
printf("%d\n", commit_count);
|
|
|
|
return 0;
|
|
|
|
}
|
rev-list: disable bitmaps when "-n" is used with listing objects
You can ask rev-list to use bitmaps to speed up an --objects
traversal, which should generally give you your answers much
faster.
Likewise, you can ask rev-list to limit such a traversal
with `-n`, in which case we'll show only a limited set of
commits (and only the tree and commit objects directly
reachable from those commits).
But if you do both together, the results are nonsensical. We
end up limiting any fallback traversal we do to _find_ the
bitmaps, but the actual set of objects we output will be
picked arbitrarily from the union of any bitmaps we do find,
and will involve the objects of many more commits.
It's possible that somebody might want this as a "show me
what you can, but limit the amount of work you do" flag.
But as with the prior commit clamping "--count", the results
are basically non-deterministic; you'll get the values from
some commits between `n` and the total number, and you can't
tell which.
And unlike the `--count` case, we can't easily generate the
"real" value from the bitmap values (you can't just walk
back `-n` commits and subtract out the reachable objects
from the boundary commits; the bitmaps for `X` record its
total reachability, so you don't know which objects are
directly from `X` itself, which from `X^`, and so on).
So let's just fallback to the non-bitmap code path in this
case, so we always give a sane answer.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
} else if (revs.max_count < 0 &&
|
|
|
|
revs.tag_objects && revs.tree_objects && revs.blob_objects) {
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (!prepare_bitmap_walk(&revs)) {
|
|
|
|
traverse_bitmap_commit_list(&show_object_fast);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prepare_revision_walk(&revs))
|
|
|
|
die("revision walk setup failed");
|
|
|
|
if (revs.tree_objects)
|
|
|
|
mark_edges_uninteresting(&revs, show_edge);
|
|
|
|
|
|
|
|
if (bisect_list) {
|
|
|
|
int reaches = reaches, all = all;
|
|
|
|
|
|
|
|
revs.commits = find_bisection(revs.commits, &reaches, &all,
|
|
|
|
bisect_find_all);
|
|
|
|
|
|
|
|
if (bisect_show_vars)
|
|
|
|
return show_bisect_vars(&info, reaches, all);
|
|
|
|
}
|
|
|
|
|
|
|
|
traverse_commit_list(&revs, show_commit, show_object, &info);
|
|
|
|
|
|
|
|
stop_progress(&progress);
|
|
|
|
|
|
|
|
if (revs.count) {
|
|
|
|
if (revs.left_right && revs.cherry_mark)
|
|
|
|
printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same);
|
|
|
|
else if (revs.left_right)
|
|
|
|
printf("%d\t%d\n", revs.count_left, revs.count_right);
|
|
|
|
else if (revs.cherry_mark)
|
|
|
|
printf("%d\t%d\n", revs.count_left + revs.count_right, revs.count_same);
|
|
|
|
else
|
|
|
|
printf("%d\n", revs.count_left + revs.count_right);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|