Merge branch 'jk/cat-file-batch-all'
"cat-file" learned "--batch-all-objects" option to enumerate all available objects in the repository more quickly than "rev-list --all --objects" (the output includes unreachable objects, though). * jk/cat-file-batch-all: cat-file: sort and de-dup output of --batch-all-objects cat-file: add --batch-all-objects option cat-file: split batch_one_object into two stages cat-file: stop returning value from batch_one_object cat-file: add --buffer option cat-file: move batch_options definition to top of file cat-file: minor style fix in options listmaint
commit
d2c3464fef
|
@ -69,6 +69,20 @@ OPTIONS
|
||||||
not be combined with any other options or arguments. See the
|
not be combined with any other options or arguments. See the
|
||||||
section `BATCH OUTPUT` below for details.
|
section `BATCH OUTPUT` below for details.
|
||||||
|
|
||||||
|
--batch-all-objects::
|
||||||
|
Instead of reading a list of objects on stdin, perform the
|
||||||
|
requested batch operation on all objects in the repository and
|
||||||
|
any alternate object stores (not just reachable objects).
|
||||||
|
Requires `--batch` or `--batch-check` be specified. Note that
|
||||||
|
the objects are visited in order sorted by their hashes.
|
||||||
|
|
||||||
|
--buffer::
|
||||||
|
Normally batch output is flushed after each object is output, so
|
||||||
|
that a process can interactively read and write from
|
||||||
|
`cat-file`. With this option, the output uses normal stdio
|
||||||
|
buffering; this is much more efficient when invoking
|
||||||
|
`--batch-check` on a large number of objects.
|
||||||
|
|
||||||
--allow-unknown-type::
|
--allow-unknown-type::
|
||||||
Allow -s or -t to query broken/corrupt objects of unknown type.
|
Allow -s or -t to query broken/corrupt objects of unknown type.
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,16 @@
|
||||||
#include "userdiff.h"
|
#include "userdiff.h"
|
||||||
#include "streaming.h"
|
#include "streaming.h"
|
||||||
#include "tree-walk.h"
|
#include "tree-walk.h"
|
||||||
|
#include "sha1-array.h"
|
||||||
|
|
||||||
|
struct batch_options {
|
||||||
|
int enabled;
|
||||||
|
int follow_symlinks;
|
||||||
|
int print_contents;
|
||||||
|
int buffer_output;
|
||||||
|
int all_objects;
|
||||||
|
const char *format;
|
||||||
|
};
|
||||||
|
|
||||||
static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
|
static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
|
||||||
int unknown_type)
|
int unknown_type)
|
||||||
|
@ -204,14 +214,25 @@ static size_t expand_format(struct strbuf *sb, const char *start, void *data)
|
||||||
return end - start + 1;
|
return end - start + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_object_or_die(int fd, struct expand_data *data)
|
static void batch_write(struct batch_options *opt, const void *data, int len)
|
||||||
|
{
|
||||||
|
if (opt->buffer_output) {
|
||||||
|
if (fwrite(data, 1, len, stdout) != len)
|
||||||
|
die_errno("unable to write to stdout");
|
||||||
|
} else
|
||||||
|
write_or_die(1, data, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
|
||||||
{
|
{
|
||||||
const unsigned char *sha1 = data->sha1;
|
const unsigned char *sha1 = data->sha1;
|
||||||
|
|
||||||
assert(data->info.typep);
|
assert(data->info.typep);
|
||||||
|
|
||||||
if (data->type == OBJ_BLOB) {
|
if (data->type == OBJ_BLOB) {
|
||||||
if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
|
if (opt->buffer_output)
|
||||||
|
fflush(stdout);
|
||||||
|
if (stream_blob_to_fd(1, sha1, NULL, 0) < 0)
|
||||||
die("unable to stream %s to stdout", sha1_to_hex(sha1));
|
die("unable to stream %s to stdout", sha1_to_hex(sha1));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -227,29 +248,40 @@ static void print_object_or_die(int fd, struct expand_data *data)
|
||||||
if (data->info.sizep && size != data->size)
|
if (data->info.sizep && size != data->size)
|
||||||
die("object %s changed size!?", sha1_to_hex(sha1));
|
die("object %s changed size!?", sha1_to_hex(sha1));
|
||||||
|
|
||||||
write_or_die(fd, contents, size);
|
batch_write(opt, contents, size);
|
||||||
free(contents);
|
free(contents);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct batch_options {
|
static void batch_object_write(const char *obj_name, struct batch_options *opt,
|
||||||
int enabled;
|
|
||||||
int follow_symlinks;
|
|
||||||
int print_contents;
|
|
||||||
const char *format;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int batch_one_object(const char *obj_name, struct batch_options *opt,
|
|
||||||
struct expand_data *data)
|
struct expand_data *data)
|
||||||
{
|
{
|
||||||
struct strbuf buf = STRBUF_INIT;
|
struct strbuf buf = STRBUF_INIT;
|
||||||
|
|
||||||
|
if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
|
||||||
|
printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1));
|
||||||
|
fflush(stdout);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
strbuf_expand(&buf, opt->format, expand_format, data);
|
||||||
|
strbuf_addch(&buf, '\n');
|
||||||
|
batch_write(opt, buf.buf, buf.len);
|
||||||
|
strbuf_release(&buf);
|
||||||
|
|
||||||
|
if (opt->print_contents) {
|
||||||
|
print_object_or_die(opt, data);
|
||||||
|
batch_write(opt, "\n", 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void batch_one_object(const char *obj_name, struct batch_options *opt,
|
||||||
|
struct expand_data *data)
|
||||||
|
{
|
||||||
struct object_context ctx;
|
struct object_context ctx;
|
||||||
int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0;
|
int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0;
|
||||||
enum follow_symlinks_result result;
|
enum follow_symlinks_result result;
|
||||||
|
|
||||||
if (!obj_name)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
result = get_sha1_with_context(obj_name, flags, data->sha1, &ctx);
|
result = get_sha1_with_context(obj_name, flags, data->sha1, &ctx);
|
||||||
if (result != FOUND) {
|
if (result != FOUND) {
|
||||||
switch (result) {
|
switch (result) {
|
||||||
|
@ -274,7 +306,7 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.mode == 0) {
|
if (ctx.mode == 0) {
|
||||||
|
@ -282,24 +314,38 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
|
||||||
(uintmax_t)ctx.symlink_path.len,
|
(uintmax_t)ctx.symlink_path.len,
|
||||||
ctx.symlink_path.buf);
|
ctx.symlink_path.buf);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
batch_object_write(obj_name, opt, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct object_cb_data {
|
||||||
|
struct batch_options *opt;
|
||||||
|
struct expand_data *expand;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void batch_object_cb(const unsigned char sha1[20], void *vdata)
|
||||||
|
{
|
||||||
|
struct object_cb_data *data = vdata;
|
||||||
|
hashcpy(data->expand->sha1, sha1);
|
||||||
|
batch_object_write(NULL, data->opt, data->expand);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int batch_loose_object(const unsigned char *sha1,
|
||||||
|
const char *path,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
sha1_array_append(data, sha1);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
|
static int batch_packed_object(const unsigned char *sha1,
|
||||||
printf("%s missing\n", obj_name);
|
struct packed_git *pack,
|
||||||
fflush(stdout);
|
uint32_t pos,
|
||||||
return 0;
|
void *data)
|
||||||
}
|
{
|
||||||
|
sha1_array_append(data, sha1);
|
||||||
strbuf_expand(&buf, opt->format, expand_format, data);
|
|
||||||
strbuf_addch(&buf, '\n');
|
|
||||||
write_or_die(1, buf.buf, buf.len);
|
|
||||||
strbuf_release(&buf);
|
|
||||||
|
|
||||||
if (opt->print_contents) {
|
|
||||||
print_object_or_die(1, data);
|
|
||||||
write_or_die(1, "\n", 1);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -330,6 +376,21 @@ static int batch_objects(struct batch_options *opt)
|
||||||
if (opt->print_contents)
|
if (opt->print_contents)
|
||||||
data.info.typep = &data.type;
|
data.info.typep = &data.type;
|
||||||
|
|
||||||
|
if (opt->all_objects) {
|
||||||
|
struct sha1_array sa = SHA1_ARRAY_INIT;
|
||||||
|
struct object_cb_data cb;
|
||||||
|
|
||||||
|
for_each_loose_object(batch_loose_object, &sa, 0);
|
||||||
|
for_each_packed_object(batch_packed_object, &sa, 0);
|
||||||
|
|
||||||
|
cb.opt = opt;
|
||||||
|
cb.expand = &data;
|
||||||
|
sha1_array_for_each_unique(&sa, batch_object_cb, &cb);
|
||||||
|
|
||||||
|
sha1_array_clear(&sa);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are going to call get_sha1 on a potentially very large number of
|
* We are going to call get_sha1 on a potentially very large number of
|
||||||
* objects. In most large cases, these will be actual object sha1s. The
|
* objects. In most large cases, these will be actual object sha1s. The
|
||||||
|
@ -355,9 +416,7 @@ static int batch_objects(struct batch_options *opt)
|
||||||
data.rest = p;
|
data.rest = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
retval = batch_one_object(buf.buf, opt, &data);
|
batch_one_object(buf.buf, opt, &data);
|
||||||
if (retval)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
strbuf_release(&buf);
|
strbuf_release(&buf);
|
||||||
|
@ -414,6 +473,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
|
||||||
N_("for blob objects, run textconv on object's content"), 'c'),
|
N_("for blob objects, run textconv on object's content"), 'c'),
|
||||||
OPT_BOOL(0, "allow-unknown-type", &unknown_type,
|
OPT_BOOL(0, "allow-unknown-type", &unknown_type,
|
||||||
N_("allow -s and -t to work with broken/corrupt objects")),
|
N_("allow -s and -t to work with broken/corrupt objects")),
|
||||||
|
OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")),
|
||||||
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
|
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
|
||||||
N_("show info and content of objects fed from the standard input"),
|
N_("show info and content of objects fed from the standard input"),
|
||||||
PARSE_OPT_OPTARG, batch_option_callback },
|
PARSE_OPT_OPTARG, batch_option_callback },
|
||||||
|
@ -422,6 +482,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
|
||||||
PARSE_OPT_OPTARG, batch_option_callback },
|
PARSE_OPT_OPTARG, batch_option_callback },
|
||||||
OPT_BOOL(0, "follow-symlinks", &batch.follow_symlinks,
|
OPT_BOOL(0, "follow-symlinks", &batch.follow_symlinks,
|
||||||
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
|
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
|
||||||
|
OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
|
||||||
|
N_("show all objects with --batch or --batch-check")),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -446,7 +508,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
|
||||||
usage_with_options(cat_file_usage, options);
|
usage_with_options(cat_file_usage, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batch.follow_symlinks && !batch.enabled) {
|
if ((batch.follow_symlinks || batch.all_objects) && !batch.enabled) {
|
||||||
usage_with_options(cat_file_usage, options);
|
usage_with_options(cat_file_usage, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -547,4 +547,30 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
|
||||||
test_cmp expect actual
|
test_cmp expect actual
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'cat-file --batch-all-objects shows all objects' '
|
||||||
|
# make new repos so we know the full set of objects; we will
|
||||||
|
# also make sure that there are some packed and some loose
|
||||||
|
# objects, some referenced and some not, and that there are
|
||||||
|
# some available only via alternates.
|
||||||
|
git init all-one &&
|
||||||
|
(
|
||||||
|
cd all-one &&
|
||||||
|
echo content >file &&
|
||||||
|
git add file &&
|
||||||
|
git commit -qm base &&
|
||||||
|
git rev-parse HEAD HEAD^{tree} HEAD:file &&
|
||||||
|
git repack -ad &&
|
||||||
|
echo not-cloned | git hash-object -w --stdin
|
||||||
|
) >expect.unsorted &&
|
||||||
|
git clone -s all-one all-two &&
|
||||||
|
(
|
||||||
|
cd all-two &&
|
||||||
|
echo local-unref | git hash-object -w --stdin
|
||||||
|
) >>expect.unsorted &&
|
||||||
|
sort <expect.unsorted >expect &&
|
||||||
|
git -C all-two cat-file --batch-all-objects \
|
||||||
|
--batch-check="%(objectname)" >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
|
Loading…
Reference in New Issue