diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index f90f09b03f..74013335a1 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -104,6 +104,16 @@ OPTIONS buffering; this is much more efficient when invoking `--batch-check` on a large number of objects. +--unordered:: + When `--batch-all-objects` is in use, visit objects in an + order which may be more efficient for accessing the object + contents than hash order. The exact details of the order are + unspecified, but if you do not require a specific order, this + should generally result in faster output, especially with + `--batch`. Note that `cat-file` will still show each object + only once, even if it is stored multiple times in the + repository. + --allow-unknown-type:: Allow -s or -t to query broken/corrupt objects of unknown type. diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 4a44b2404f..08dced2614 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -21,6 +21,7 @@ struct batch_options { int print_contents; int buffer_output; int all_objects; + int unordered; int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */ const char *format; }; @@ -337,11 +338,11 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d } } -static void batch_object_write(const char *obj_name, struct batch_options *opt, +static void batch_object_write(const char *obj_name, + struct strbuf *scratch, + struct batch_options *opt, struct expand_data *data) { - struct strbuf buf = STRBUF_INIT; - if (!data->skip_object_info && oid_object_info_extended(the_repository, &data->oid, &data->info, OBJECT_INFO_LOOKUP_REPLACE) < 0) { @@ -351,10 +352,10 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt, return; } - strbuf_expand(&buf, opt->format, expand_format, data); - strbuf_addch(&buf, '\n'); - batch_write(opt, buf.buf, buf.len); - strbuf_release(&buf); + strbuf_reset(scratch); + strbuf_expand(scratch, opt->format, expand_format, data); + strbuf_addch(scratch, '\n'); + batch_write(opt, scratch->buf, scratch->len); if (opt->print_contents) { print_object_or_die(opt, data); @@ -362,7 +363,9 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt, } } -static void batch_one_object(const char *obj_name, struct batch_options *opt, +static void batch_one_object(const char *obj_name, + struct strbuf *scratch, + struct batch_options *opt, struct expand_data *data) { struct object_context ctx; @@ -404,42 +407,70 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt, return; } - batch_object_write(obj_name, opt, data); + batch_object_write(obj_name, scratch, opt, data); } struct object_cb_data { struct batch_options *opt; struct expand_data *expand; + struct oidset *seen; + struct strbuf *scratch; }; static int batch_object_cb(const struct object_id *oid, void *vdata) { struct object_cb_data *data = vdata; oidcpy(&data->expand->oid, oid); - batch_object_write(NULL, data->opt, data->expand); + batch_object_write(NULL, data->scratch, data->opt, data->expand); return 0; } -static int batch_loose_object(const struct object_id *oid, - const char *path, - void *data) +static int collect_loose_object(const struct object_id *oid, + const char *path, + void *data) { oid_array_append(data, oid); return 0; } -static int batch_packed_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) +static int collect_packed_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) { oid_array_append(data, oid); return 0; } +static int batch_unordered_object(const struct object_id *oid, void *vdata) +{ + struct object_cb_data *data = vdata; + + if (oidset_insert(data->seen, oid)) + return 0; + + return batch_object_cb(oid, data); +} + +static int batch_unordered_loose(const struct object_id *oid, + const char *path, + void *data) +{ + return batch_unordered_object(oid, data); +} + +static int batch_unordered_packed(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + return batch_unordered_object(oid, data); +} + static int batch_objects(struct batch_options *opt) { - struct strbuf buf = STRBUF_INIT; + struct strbuf input = STRBUF_INIT; + struct strbuf output = STRBUF_INIT; struct expand_data data; int save_warning; int retval = 0; @@ -454,8 +485,9 @@ static int batch_objects(struct batch_options *opt) */ memset(&data, 0, sizeof(data)); data.mark_query = 1; - strbuf_expand(&buf, opt->format, expand_format, &data); + strbuf_expand(&output, opt->format, expand_format, &data); data.mark_query = 0; + strbuf_release(&output); if (opt->cmdmode) data.split_on_whitespace = 1; @@ -473,19 +505,37 @@ static int batch_objects(struct batch_options *opt) data.info.typep = &data.type; if (opt->all_objects) { - struct oid_array sa = OID_ARRAY_INIT; struct object_cb_data cb; - for_each_loose_object(batch_loose_object, &sa, 0); - for_each_packed_object(batch_packed_object, &sa, 0); if (repository_format_partial_clone) warning("This repository has extensions.partialClone set. Some objects may not be loaded."); cb.opt = opt; cb.expand = &data; - oid_array_for_each_unique(&sa, batch_object_cb, &cb); + cb.scratch = &output; - oid_array_clear(&sa); + if (opt->unordered) { + struct oidset seen = OIDSET_INIT; + + cb.seen = &seen; + + for_each_loose_object(batch_unordered_loose, &cb, 0); + for_each_packed_object(batch_unordered_packed, &cb, + FOR_EACH_OBJECT_PACK_ORDER); + + oidset_clear(&seen); + } else { + struct oid_array sa = OID_ARRAY_INIT; + + for_each_loose_object(collect_loose_object, &sa, 0); + for_each_packed_object(collect_packed_object, &sa, 0); + + oid_array_for_each_unique(&sa, batch_object_cb, &cb); + + oid_array_clear(&sa); + } + + strbuf_release(&output); return 0; } @@ -499,14 +549,14 @@ static int batch_objects(struct batch_options *opt) save_warning = warn_on_object_refname_ambiguity; warn_on_object_refname_ambiguity = 0; - while (strbuf_getline(&buf, stdin) != EOF) { + while (strbuf_getline(&input, stdin) != EOF) { if (data.split_on_whitespace) { /* * Split at first whitespace, tying off the beginning * of the string and saving the remainder (or NULL) in * data.rest. */ - char *p = strpbrk(buf.buf, " \t"); + char *p = strpbrk(input.buf, " \t"); if (p) { while (*p && strchr(" \t", *p)) *p++ = '\0'; @@ -514,10 +564,11 @@ static int batch_objects(struct batch_options *opt) data.rest = p; } - batch_one_object(buf.buf, opt, &data); + batch_one_object(input.buf, &output, opt, &data); } - strbuf_release(&buf); + strbuf_release(&input); + strbuf_release(&output); warn_on_object_refname_ambiguity = save_warning; return retval; } @@ -586,6 +637,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) N_("follow in-tree symlinks (used with --batch or --batch-check)")), OPT_BOOL(0, "batch-all-objects", &batch.all_objects, N_("show all objects with --batch or --batch-check")), + OPT_BOOL(0, "unordered", &batch.unordered, + N_("do not order --batch-all-objects output")), OPT_END() }; diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index 4ff525e50f..a9e7b552b9 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -3,6 +3,7 @@ #include "progress.h" #include "parse-options.h" #include "packfile.h" +#include "object-store.h" static const char * const prune_packed_usage[] = { N_("git prune-packed [-n | --dry-run] [-q | --quiet]"), diff --git a/cache.h b/cache.h index 1398b2a4e4..66d3b91cdb 100644 --- a/cache.h +++ b/cache.h @@ -1575,62 +1575,6 @@ extern int odb_mkstemp(struct strbuf *temp_filename, const char *pattern); */ extern int odb_pack_keep(const char *name); -/* - * Iterate over the files in the loose-object parts of the object - * directory "path", triggering the following callbacks: - * - * - loose_object is called for each loose object we find. - * - * - loose_cruft is called for any files that do not appear to be - * loose objects. Note that we only look in the loose object - * directories "objects/[0-9a-f]{2}/", so we will not report - * "objects/foobar" as cruft. - * - * - loose_subdir is called for each top-level hashed subdirectory - * of the object directory (e.g., "$OBJDIR/f0"). It is called - * after the objects in the directory are processed. - * - * Any callback that is NULL will be ignored. Callbacks returning non-zero - * will end the iteration. - * - * In the "buf" variant, "path" is a strbuf which will also be used as a - * scratch buffer, but restored to its original contents before - * the function returns. - */ -typedef int each_loose_object_fn(const struct object_id *oid, - const char *path, - void *data); -typedef int each_loose_cruft_fn(const char *basename, - const char *path, - void *data); -typedef int each_loose_subdir_fn(unsigned int nr, - const char *path, - void *data); -int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); -int for_each_loose_file_in_objdir(const char *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); -int for_each_loose_file_in_objdir_buf(struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); - -/* - * Iterate over loose objects in both the local - * repository and any alternates repositories (unless the - * LOCAL_ONLY flag is set). - */ -#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1 -extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags); - /* * Set this to 0 to prevent sha1_object_info_extended() from fetching missing * blobs. This has a difference only if extensions.partialClone is set. diff --git a/commit-graph.c b/commit-graph.c index 0034740c26..8a1bec7b8a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -730,7 +730,7 @@ void write_commit_graph(const char *obj_dir, die(_("error adding pack %s"), packname.buf); if (open_pack_index(p)) die(_("error opening index for %s"), packname.buf); - for_each_object_in_pack(p, add_packed_commits, &oids); + for_each_object_in_pack(p, add_packed_commits, &oids, 0); close_pack(p); } strbuf_release(&packname); diff --git a/object-store.h b/object-store.h index e481f7ad41..162156f5dc 100644 --- a/object-store.h +++ b/object-store.h @@ -262,4 +262,94 @@ int oid_object_info_extended(struct repository *r, const struct object_id *, struct object_info *, unsigned flags); +/* + * Iterate over the files in the loose-object parts of the object + * directory "path", triggering the following callbacks: + * + * - loose_object is called for each loose object we find. + * + * - loose_cruft is called for any files that do not appear to be + * loose objects. Note that we only look in the loose object + * directories "objects/[0-9a-f]{2}/", so we will not report + * "objects/foobar" as cruft. + * + * - loose_subdir is called for each top-level hashed subdirectory + * of the object directory (e.g., "$OBJDIR/f0"). It is called + * after the objects in the directory are processed. + * + * Any callback that is NULL will be ignored. Callbacks returning non-zero + * will end the iteration. + * + * In the "buf" variant, "path" is a strbuf which will also be used as a + * scratch buffer, but restored to its original contents before + * the function returns. + */ +typedef int each_loose_object_fn(const struct object_id *oid, + const char *path, + void *data); +typedef int each_loose_cruft_fn(const char *basename, + const char *path, + void *data); +typedef int each_loose_subdir_fn(unsigned int nr, + const char *path, + void *data); +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); +int for_each_loose_file_in_objdir_buf(struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); + +/* Flags for for_each_*_object() below. */ +enum for_each_object_flags { + /* Iterate only over local objects, not alternates. */ + FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + + /* Only iterate over packs obtained from the promisor remote. */ + FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + + /* + * Visit objects within a pack in packfile order rather than .idx order + */ + FOR_EACH_OBJECT_PACK_ORDER = (1<<2), +}; + +/* + * Iterate over all accessible loose objects without respect to + * reachability. By default, this includes both local and alternate objects. + * The order in which objects are visited is unspecified. + * + * Any flags specific to packs are ignored. + */ +int for_each_loose_object(each_loose_object_fn, void *, + enum for_each_object_flags flags); + +/* + * Iterate over all accessible packed objects without respect to reachability. + * By default, this includes both local and alternate packs. + * + * Note that some objects may appear twice if they are found in multiple packs. + * Each pack is visited in an unspecified order. By default, objects within a + * pack are visited in pack-idx order (i.e., sorted by oid). + */ +typedef int each_packed_object_fn(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data); +int for_each_object_in_pack(struct packed_git *p, + each_packed_object_fn, void *data, + enum for_each_object_flags flags); +int for_each_packed_object(each_packed_object_fn, void *, + enum for_each_object_flags flags); + #endif /* OBJECT_STORE_H */ diff --git a/packfile.c b/packfile.c index 6974903e58..ebcb5742ec 100644 --- a/packfile.c +++ b/packfile.c @@ -1885,26 +1885,38 @@ int has_pack_index(const unsigned char *sha1) return 1; } -int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +int for_each_object_in_pack(struct packed_git *p, + each_packed_object_fn cb, void *data, + enum for_each_object_flags flags) { uint32_t i; int r = 0; + if (flags & FOR_EACH_OBJECT_PACK_ORDER) + load_pack_revindex(p); + for (i = 0; i < p->num_objects; i++) { + uint32_t pos; struct object_id oid; - if (!nth_packed_object_oid(&oid, p, i)) - return error("unable to get sha1 of object %u in %s", - i, p->pack_name); + if (flags & FOR_EACH_OBJECT_PACK_ORDER) + pos = p->revindex[i].nr; + else + pos = i; - r = cb(&oid, p, i, data); + if (!nth_packed_object_oid(&oid, p, pos)) + return error("unable to get sha1 of object %u in %s", + pos, p->pack_name); + + r = cb(&oid, p, pos, data); if (r) break; } return r; } -int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) +int for_each_packed_object(each_packed_object_fn cb, void *data, + enum for_each_object_flags flags) { struct packed_git *p; int r = 0; @@ -1921,7 +1933,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) pack_errors = 1; continue; } - r = for_each_object_in_pack(p, cb, data); + r = for_each_object_in_pack(p, cb, data, flags); if (r) break; } diff --git a/packfile.h b/packfile.h index fa36c473ad..630f35cf31 100644 --- a/packfile.h +++ b/packfile.h @@ -148,23 +148,6 @@ extern int has_object_pack(const struct object_id *oid); extern int has_pack_index(const unsigned char *sha1); -/* - * Only iterate over packs obtained from the promisor remote. - */ -#define FOR_EACH_OBJECT_PROMISOR_ONLY 2 - -/* - * Iterate over packed objects in both the local - * repository and any alternates repositories (unless the - * FOR_EACH_OBJECT_LOCAL_ONLY flag, defined in cache.h, is set). - */ -typedef int each_packed_object_fn(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data); -extern int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data); -extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/sha1-file.c b/sha1-file.c index c6ca960eb2..56e5329caf 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -2146,7 +2146,8 @@ static int loose_from_alt_odb(struct alternate_object_database *alt, return r; } -int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags) +int for_each_loose_object(each_loose_object_fn cb, void *data, + enum for_each_object_flags flags) { struct loose_alt_odb_data alt; int r; diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 13dd510b2e..7f19d591f2 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -550,8 +550,8 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a test_expect_success 'cat-file --batch-all-objects shows all objects' ' # make new repos so we know the full set of objects; we will # also make sure that there are some packed and some loose - # objects, some referenced and some not, and that there are - # some available only via alternates. + # objects, some referenced and some not, some duplicates, and that + # there are some available only via alternates. git init all-one && ( cd all-one && @@ -567,10 +567,23 @@ test_expect_success 'cat-file --batch-all-objects shows all objects' ' cd all-two && echo local-unref | git hash-object -w --stdin ) >>expect.unsorted && + git -C all-two rev-parse HEAD:file | + git -C all-two pack-objects .git/objects/pack/pack && sort expect && git -C all-two cat-file --batch-all-objects \ --batch-check="%(objectname)" >actual && test_cmp expect actual ' +# The only user-visible difference is that the objects are no longer sorted, +# and the resulting sort order is undefined. So we can only check that it +# produces the same objects as the ordered case, but that at least exercises +# the code. +test_expect_success 'cat-file --unordered works' ' + git -C all-two cat-file --batch-all-objects --unordered \ + --batch-check="%(objectname)" >actual.unsorted && + sort actual && + test_cmp expect actual +' + test_done