From a9ff277e583782346181f431784e48046b0dfaa9 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 28 Nov 2010 13:42:46 -0600 Subject: [PATCH 1/6] fast-import: stricter parsing of integer options Check the result from strtoul to avoid accepting arguments like --depth=-1 and --active-branches=foo,bar,baz. Requested-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- fast-import.c | 13 +++++++++++-- t/t9300-fast-import.sh | 8 ++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fast-import.c b/fast-import.c index 77549ebd6f..4bd9bf7d08 100644 --- a/fast-import.c +++ b/fast-import.c @@ -2746,16 +2746,25 @@ static void option_date_format(const char *fmt) die("unknown --date-format argument %s", fmt); } +static unsigned long ulong_arg(const char *option, const char *arg) +{ + char *endptr; + unsigned long rv = strtoul(arg, &endptr, 0); + if (strchr(arg, '-') || endptr == arg || *endptr) + die("%s: argument must be a non-negative integer", option); + return rv; +} + static void option_depth(const char *depth) { - max_depth = strtoul(depth, NULL, 0); + max_depth = ulong_arg("--depth", depth); if (max_depth > MAX_DEPTH) die("--depth cannot exceed %u", MAX_DEPTH); } static void option_active_branches(const char *branches) { - max_active_branches = strtoul(branches, NULL, 0); + max_active_branches = ulong_arg("--active-branches", branches); } static void option_export_marks(const char *marks) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 14d17691b1..c80bb0cf10 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1659,6 +1659,14 @@ test_expect_success 'R: unknown commandline options are rejected' '\ test_must_fail git fast-import --non-existing-option < /dev/null ' +test_expect_success 'R: die on invalid option argument' ' + echo "option git active-branches=-5" | + test_must_fail git fast-import && + echo "option git depth=" | + test_must_fail git fast-import && + test_must_fail git fast-import --depth="5 elephants" input < Date: Sun, 28 Nov 2010 13:43:57 -0600 Subject: [PATCH 2/6] fast-import: clarify documentation of "feature" command The "feature" command allows streams to specify options for the import that must not be ignored. Logically, they are part of the stream, even though technically most supported features are synonyms to command-line options. Make this more obvious by being more explicit about how the analogy between most "feature" commands and command-line options works. Treat the feature (import-marks) that does not fit this analogy separately. Signed-off-by: Jonathan Nieder Acked-by: Sverre Rabbelier Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 37 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 5d0c245e38..ef86763948 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -885,28 +885,25 @@ Require that fast-import supports the specified feature, or abort if it does not. .... - 'feature' SP LF + 'feature' SP ('=' )? LF .... -The part of the command may be any string matching -^[a-zA-Z][a-zA-Z-]*$ and should be understood by fast-import. - -Feature work identical as their option counterparts with the -exception of the import-marks feature, see below. - -The following features are currently supported: - -* date-format -* import-marks -* export-marks -* relative-marks -* no-relative-marks -* force - -The import-marks behaves differently from when it is specified as -commandline option in that only one "feature import-marks" is allowed -per stream. Also, any --import-marks= specified on the commandline -will override those from the stream (if any). +The part of the command may be any one of the following: + +date-format:: +export-marks:: +relative-marks:: +no-relative-marks:: +force:: + Act as though the corresponding command-line option with + a leading '--' was passed on the command line + (see OPTIONS, above). + +import-marks:: + Like --import-marks except in two respects: first, only one + "feature import-marks" command is allowed per stream; + second, an --import-marks= command-line option overrides + any "feature import-marks" command in the stream. `option` ~~~~~~~~ From 85c62395b152f99e8867aaf84cea93dddc03243c Mon Sep 17 00:00:00 2001 From: David Barr Date: Sun, 28 Nov 2010 13:45:01 -0600 Subject: [PATCH 3/6] fast-import: let importers retrieve blobs New objects written by fast-import are not available immediately. Until a checkpoint has been started and finishes writing the pack index, any new blobs will not be accessible using standard git tools. So introduce a new way to access them: a "cat-blob" command in the command stream requests for fast-import to print a blob to stdout or a file descriptor specified by the argument to --cat-blob-fd. The value for cat-blob-fd cannot be specified in the stream because that would be a layering violation: the decision of where to direct a stream has to be made when fast-import is started anyway, so we might as well make the stream format is independent of that detail. Output uses the same format as "git cat-file --batch". Thanks to Sverre Rabbelier and Sam Vilain for guidance in designing the protocol. Based-on-patch-by: Jonathan Nieder Signed-off-by: David Barr Acked-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 41 +++++++ fast-import.c | 97 +++++++++++++++ t/t9300-fast-import.sh | 193 +++++++++++++++++++++++++++++- 3 files changed, 329 insertions(+), 2 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index ef86763948..5d8f60c790 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -92,6 +92,11 @@ OPTIONS --(no-)-relative-marks= with the --(import|export)-marks= options. +--cat-blob-fd=:: + Specify the file descriptor that will be written to + when the `cat-blob` command is encountered in the stream. + The default behaviour is to write to `stdout`. + --export-pack-edges=:: After creating a packfile, print a line of data to listing the filename of the packfile and the last @@ -320,6 +325,11 @@ and control the current import process. More detailed discussion standard output. This command is optional and is not needed to perform an import. +`cat-blob`:: + Causes fast-import to print a blob in 'cat-file --batch' + format to the file descriptor set with `--cat-blob-fd` or + `stdout` if unspecified. + `feature`:: Require that fast-import supports the specified feature, or abort if it does not. @@ -879,6 +889,29 @@ Placing a `progress` command immediately after a `checkpoint` will inform the reader when the `checkpoint` has been completed and it can safely access the refs that fast-import updated. +`cat-blob` +~~~~~~~~~~ +Causes fast-import to print a blob to a file descriptor previously +arranged with the `--cat-blob-fd` argument. The command otherwise +has no impact on the current import; its main purpose is to +retrieve blobs that may be in fast-import's memory but not +accessible from the target repository. + +.... + 'cat-blob' SP LF +.... + +The `` can be either a mark reference (`:`) +set previously or a full 40-byte SHA-1 of a Git blob, preexisting or +ready to be written. + +output uses the same format as `git cat-file --batch`: + +==== + SP 'blob' SP LF + LF +==== + `feature` ~~~~~~~~~ Require that fast-import supports the specified feature, or abort if @@ -905,6 +938,13 @@ import-marks:: second, an --import-marks= command-line option overrides any "feature import-marks" command in the stream. +cat-blob:: + Ignored. Versions of fast-import not supporting the + "cat-blob" command will exit with a message indicating so. + This lets the import error out early with a clear message, + rather than wasting time on the early part of an import + before the unsupported command is detected. + `option` ~~~~~~~~ Processes the specified option so that git fast-import behaves in a @@ -930,6 +970,7 @@ not be passed as option: * date-format * import-marks * export-marks +* cat-blob-fd * force Crash Reports diff --git a/fast-import.c b/fast-import.c index 4bd9bf7d08..dd58f517b9 100644 --- a/fast-import.c +++ b/fast-import.c @@ -55,6 +55,8 @@ Format of STDIN stream: ('from' sp committish lf)? lf?; + cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; + checkpoint ::= 'checkpoint' lf lf?; @@ -361,6 +363,9 @@ static uintmax_t next_mark; static struct strbuf new_data = STRBUF_INIT; static int seen_data_command; +/* Where to write output of cat-blob commands */ +static int cat_blob_fd = STDOUT_FILENO; + static void parse_argv(void); static void write_branch_report(FILE *rpt, struct branch *b) @@ -2689,6 +2694,81 @@ static void parse_reset_branch(void) unread_command_buf = 1; } +static void cat_blob_write(const char *buf, unsigned long size) +{ + if (write_in_full(cat_blob_fd, buf, size) != size) + die_errno("Write to frontend failed"); +} + +static void cat_blob(struct object_entry *oe, unsigned char sha1[20]) +{ + struct strbuf line = STRBUF_INIT; + unsigned long size; + enum object_type type = 0; + char *buf; + + if (!oe || oe->pack_id == MAX_PACK_ID) { + buf = read_sha1_file(sha1, &type, &size); + } else { + type = oe->type; + buf = gfi_unpack_entry(oe, &size); + } + + /* + * Output based on batch_one_object() from cat-file.c. + */ + if (type <= 0) { + strbuf_reset(&line); + strbuf_addf(&line, "%s missing\n", sha1_to_hex(sha1)); + cat_blob_write(line.buf, line.len); + strbuf_release(&line); + free(buf); + return; + } + if (!buf) + die("Can't read object %s", sha1_to_hex(sha1)); + if (type != OBJ_BLOB) + die("Object %s is a %s but a blob was expected.", + sha1_to_hex(sha1), typename(type)); + strbuf_reset(&line); + strbuf_addf(&line, "%s %s %lu\n", sha1_to_hex(sha1), + typename(type), size); + cat_blob_write(line.buf, line.len); + strbuf_release(&line); + cat_blob_write(buf, size); + cat_blob_write("\n", 1); + free(buf); +} + +static void parse_cat_blob(void) +{ + const char *p; + struct object_entry *oe = oe; + unsigned char sha1[20]; + + /* cat-blob SP LF */ + p = command_buf.buf + strlen("cat-blob "); + if (*p == ':') { + char *x; + oe = find_mark(strtoumax(p + 1, &x, 10)); + if (x == p + 1) + die("Invalid mark: %s", command_buf.buf); + if (!oe) + die("Unknown mark: %s", command_buf.buf); + if (*x) + die("Garbage after mark: %s", command_buf.buf); + hashcpy(sha1, oe->idx.sha1); + } else { + if (get_sha1_hex(p, sha1)) + die("Invalid SHA1: %s", command_buf.buf); + if (p[40]) + die("Garbage after SHA1: %s", command_buf.buf); + oe = find_object(sha1); + } + + cat_blob(oe, sha1); +} + static void parse_checkpoint(void) { if (object_count) { @@ -2773,6 +2853,14 @@ static void option_export_marks(const char *marks) safe_create_leading_directories_const(export_marks_file); } +static void option_cat_blob_fd(const char *fd) +{ + unsigned long n = ulong_arg("--cat-blob-fd", fd); + if (n > (unsigned long) INT_MAX) + die("--cat-blob-fd cannot exceed %d", INT_MAX); + cat_blob_fd = (int) n; +} + static void option_export_pack_edges(const char *edges) { if (pack_edges) @@ -2826,6 +2914,8 @@ static int parse_one_feature(const char *feature, int from_stream) option_import_marks(feature + 13, from_stream); } else if (!prefixcmp(feature, "export-marks=")) { option_export_marks(feature + 13); + } else if (!strcmp(feature, "cat-blob")) { + ; /* Don't die - this feature is supported */ } else if (!prefixcmp(feature, "relative-marks")) { relative_marks_paths = 1; } else if (!prefixcmp(feature, "no-relative-marks")) { @@ -2920,6 +3010,11 @@ static void parse_argv(void) if (parse_one_feature(a + 2, 0)) continue; + if (!prefixcmp(a + 2, "cat-blob-fd=")) { + option_cat_blob_fd(a + 2 + strlen("cat-blob-fd=")); + continue; + } + die("unknown option %s", a); } if (i != global_argc) @@ -2971,6 +3066,8 @@ int main(int argc, const char **argv) parse_new_tag(); else if (!prefixcmp(command_buf.buf, "reset ")) parse_reset_branch(); + else if (!prefixcmp(command_buf.buf, "cat-blob ")) + parse_cat_blob(); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); else if (!prefixcmp(command_buf.buf, "progress ")) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index c80bb0cf10..a0162b73d9 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -23,11 +23,18 @@ file5_data='an inline file. file6_data='#!/bin/sh echo "$@"' +>empty + ### ### series A ### test_tick + +test_expect_success 'empty stream succeeds' ' + git fast-import input <expect <<-EOF && + ${blob} blob 11 + yes it can + + EOF + echo "cat-blob $blob" | + git fast-import --cat-blob-fd=6 6>actual && + test_cmp expect actual +' + +test_expect_success 'R: in-stream cat-blob-fd not respected' ' + echo hello >greeting && + blob=$(git hash-object -w greeting) && + cat >expect <<-EOF && + ${blob} blob 6 + hello + + EOF + git fast-import --cat-blob-fd=3 3>actual.3 >actual.1 <<-EOF && + cat-blob $blob + EOF + test_cmp expect actual.3 && + test_cmp empty actual.1 && + git fast-import 3>actual.3 >actual.1 <<-EOF && + option cat-blob-fd=3 + cat-blob $blob + EOF + test_cmp empty actual.3 && + test_cmp expect actual.1 +' + +test_expect_success 'R: print new blob' ' + blob=$(echo "yep yep yep" | git hash-object --stdin) && + cat >expect <<-EOF && + ${blob} blob 12 + yep yep yep + + EOF + git fast-import --cat-blob-fd=6 6>actual <<-\EOF && + blob + mark :1 + data <expect <<-EOF && + ${blob} blob 25 + a new blob named by sha1 + + EOF + git fast-import --cat-blob-fd=6 6>actual <<-EOF && + blob + data <big && + for i in 1 2 3 + do + cat big big big big >bigger && + cat bigger bigger bigger bigger >big || + exit + done + ) +' + +test_expect_success 'R: print two blobs to stdout' ' + blob1=$(git hash-object big) && + blob1_len=$(wc -c expect && + { + cat <<-\END_PART1 && + blob + mark :1 + data <actual && + test_cmp expect actual +' + +test_expect_success 'setup: have pipes?' ' + rm -f frob && + if mkfifo frob + then + test_set_prereq PIPE + fi +' + +test_expect_success PIPE 'R: copy using cat-file' ' + expect_id=$(git hash-object big) && + expect_len=$(wc -c expect.response && + + rm -f blobs && + cat >frontend <<-\FRONTEND_END && + #!/bin/sh + cat <response && + dd if=/dev/stdin of=blob bs=$size count=1 <&3 && + read newline <&3 && + + cat < $GIT_COMMITTER_DATE + data <blobs + ) && + git show copied:file3 >actual && + test_cmp expect.response response && + test_cmp big actual +' + cat >input << EOF option git quiet blob @@ -1640,8 +1831,6 @@ hi EOF -touch empty - test_expect_success 'R: quiet option results in no stats being output' ' cat input | git fast-import 2> output && test_cmp empty output From 777f80d7429b0f2687cb9ff40f82b196b78384ff Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 28 Nov 2010 13:45:58 -0600 Subject: [PATCH 4/6] fast-import: Allow cat-blob requests at arbitrary points in stream The new rule: a "cat-blob" can be inserted wherever a comment is allowed, which means at the start of any line except in the middle of a "data" command. This saves frontends from having to loop over everything they want to commit in the next commit and cat-ing the necessary objects in advance. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 4 ++ fast-import.c | 28 +++++++------ t/t9300-fast-import.sh | 66 +++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 12 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 5d8f60c790..534b2519b3 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -912,6 +912,10 @@ output uses the same format as `git cat-file --batch`: LF ==== +This command can be used anywhere in the stream that comments are +accepted. In particular, the `cat-blob` command can be used in the +middle of a commit but not in the middle of a `data` command. + `feature` ~~~~~~~~~ Require that fast-import supports the specified feature, or abort if diff --git a/fast-import.c b/fast-import.c index dd58f517b9..f71ea3e7bc 100644 --- a/fast-import.c +++ b/fast-import.c @@ -55,8 +55,6 @@ Format of STDIN stream: ('from' sp committish lf)? lf?; - cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; - checkpoint ::= 'checkpoint' lf lf?; @@ -134,14 +132,17 @@ Format of STDIN stream: ts ::= # time since the epoch in seconds, ascii base10 notation; tz ::= # GIT style timezone; - # note: comments may appear anywhere in the input, except - # within a data command. Any form of the data command - # always escapes the related input from comment processing. + # note: comments and cat requests may appear anywhere + # in the input, except within a data command. Any form + # of the data command always escapes the related input + # from comment processing. # # In case it is not clear, the '#' that starts the comment # must be the first character on that line (an lf # preceded it). # + cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; + comment ::= '#' not_lf* lf; not_lf ::= # Any byte that is not ASCII newline (LF); */ @@ -367,6 +368,7 @@ static int seen_data_command; static int cat_blob_fd = STDOUT_FILENO; static void parse_argv(void); +static void parse_cat_blob(void); static void write_branch_report(FILE *rpt, struct branch *b) { @@ -1785,7 +1787,6 @@ static void read_marks(void) fclose(f); } - static int read_next_command(void) { static int stdin_eof = 0; @@ -1795,7 +1796,7 @@ static int read_next_command(void) return EOF; } - do { + for (;;) { if (unread_command_buf) { unread_command_buf = 0; } else { @@ -1828,9 +1829,14 @@ static int read_next_command(void) rc->prev->next = rc; cmd_tail = rc; } - } while (command_buf.buf[0] == '#'); - - return 0; + if (!prefixcmp(command_buf.buf, "cat-blob ")) { + parse_cat_blob(); + continue; + } + if (command_buf.buf[0] == '#') + continue; + return 0; + } } static void skip_optional_lf(void) @@ -3066,8 +3072,6 @@ int main(int argc, const char **argv) parse_new_tag(); else if (!prefixcmp(command_buf.buf, "reset ")) parse_reset_branch(); - else if (!prefixcmp(command_buf.buf, "cat-blob ")) - parse_cat_blob(); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); else if (!prefixcmp(command_buf.buf, "progress ")) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index a0162b73d9..d615d04a32 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1823,6 +1823,72 @@ test_expect_success PIPE 'R: copy using cat-file' ' test_cmp big actual ' +test_expect_success PIPE 'R: print blob mid-commit' ' + rm -f blobs && + echo "A blob from _before_ the commit." >expect && + mkfifo blobs && + ( + exec 3 $GIT_COMMITTER_DATE + data <blobs && + test_cmp expect actual +' + +test_expect_success PIPE 'R: print staged blob within commit' ' + rm -f blobs && + echo "A blob from _within_ the commit." >expect && + mkfifo blobs && + ( + exec 3 $GIT_COMMITTER_DATE + data <blobs && + test_cmp expect actual +' + cat >input << EOF option git quiet blob From 491e359c949ef7b7ed3fd24ba59f1b7e4cc17e87 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Dec 2010 12:28:00 -0800 Subject: [PATCH 5/6] t9300: remove unnecessary use of /dev/stdin We really shouldn't be using these funny /dev/* files that did not exist in V7 UNIX in our tests when we do not have to. Output from $ git grep -n -e /dev/ --and --not -e /dev/null t/ tells us that, aside from use of /dev/urandom in apache.conf used in http tests, "dd if=/dev/stdin" added recently to t/t9300-fast-import.sh are the only offenders, and "dd" reads from the standard input by default, so removing them should be straightforward. Reported-by: Thomas Rast Signed-off-by: Junio C Hamano --- t/t9300-fast-import.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index d615d04a32..055ddc6ddc 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1794,7 +1794,7 @@ test_expect_success PIPE 'R: copy using cat-file' ' read blob_id type size <&3 && echo "$blob_id $type $size" >response && - dd if=/dev/stdin of=blob bs=$size count=1 <&3 && + dd of=blob bs=$size count=1 <&3 && read newline <&3 && cat < Date: Mon, 13 Dec 2010 00:31:51 -0600 Subject: [PATCH 6/6] t9300: avoid short reads from dd dd is a thin wrapper around read(2). As open group Issue 7 explains: It shall read the input one block at a time, using the specified input block size; it shall then process the block of data actually returned, which could be smaller than the requested block size. Any short read --- for example from a pipe whose capacity cannot fill a block --- results in that block being truncated. As a result, the first cat-blob test (9300.114) fails on Mac OS X, where the pipe capacity is around 8 KiB. Fix the test by using a block size of 1. Each read will block until the next byte of input is available. It would be even nicer to use head -c which expresses the intention more clearly. Alas, IRIX "head" does not support the -c option. Reported-by: Brian Gernhardt Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9300-fast-import.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 055ddc6ddc..ed28d3cc83 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1794,7 +1794,7 @@ test_expect_success PIPE 'R: copy using cat-file' ' read blob_id type size <&3 && echo "$blob_id $type $size" >response && - dd of=blob bs=$size count=1 <&3 && + dd of=blob bs=1 count=$size <&3 && read newline <&3 && cat <