From 62edbec7dee0bc9788d60823ca074d7dfbb5a486 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 20 Feb 2019 14:58:42 -0800 Subject: [PATCH 1/5] t9300: demonstrate bug with get-mark and empty orphan commits Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 7 +++++- t/t9300-fast-import.sh | 37 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 43ab3b1637..339b6e7e98 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -422,7 +422,12 @@ However it is recommended that a `filedeleteall` command precede all `filemodify`, `filecopy`, `filerename` and `notemodify` commands in the same commit, as `filedeleteall` wipes the branch clean (see below). -The `LF` after the command is optional (it used to be required). +The `LF` after the command is optional (it used to be required). Note +that for reasons of backward compatibility, if the commit ends with a +`data` command (i.e. it has has no `from`, `merge`, `filemodify`, +`filedelete`, `filecopy`, `filerename`, `filedeleteall` or +`notemodify` commands) then two `LF` commands may appear at the end of +the command instead of just one. `author` ^^^^^^^^ diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 59a13b6a77..c304c8c47c 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3262,4 +3262,41 @@ test_expect_success PIPE 'V: checkpoint updates tags after tag' ' background_import_still_running ' +### +### series W (get-mark and empty orphan commits) +### + +cat >>W-input <<-W_INPUT_END + commit refs/heads/W-branch + mark :1 + author Full Name 1000000000 +0100 + committer Full Name 1000000000 +0100 + data 27 + Intentionally empty commit + LFsget-mark :1 + W_INPUT_END + +test_expect_failure !MINGW 'W: get-mark & empty orphan commit with no newlines' ' + sed -e s/LFs// W-input | tr L "\n" | git fast-import +' + +test_expect_failure !MINGW 'W: get-mark & empty orphan commit with one newline' ' + sed -e s/LFs/L/ W-input | tr L "\n" | git fast-import +' + +test_expect_success !MINGW 'W: get-mark & empty orphan commit with ugly second newline' ' + # Technically, this should fail as it has too many linefeeds + # according to the grammar in fast-import.txt. But, for whatever + # reason, it works. Since using the correct number of newlines + # does not work with older (pre-2.22) versions of git, allow apps + # that used this second-newline workaround to keep working by + # checking it with this test... + sed -e s/LFs/LL/ W-input | tr L "\n" | git fast-import +' + +test_expect_success !MINGW 'W: get-mark & empty orphan commit with erroneous third newline' ' + # ...but do NOT allow more empty lines than that (see previous test). + sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import +' + test_done From a63c54a019138c7aaa4624f48284722664428b15 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 20 Feb 2019 14:58:43 -0800 Subject: [PATCH 2/5] git-fast-import.txt: fix wording about where ls command can appear The docs claimed `ls` commands could appear almost anywhere, but the code told a different story. Modify the docs to match the code. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 339b6e7e98..f7e2d330b1 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -1016,8 +1016,8 @@ printing a blob from the active commit (with `cat-blob`) or copying a blob or tree from a previous commit for use in the current one (with `filemodify`). -The `ls` command can be used anywhere in the stream that comments are -accepted, including the middle of a commit. +The `ls` command can also be used where a `filemodify` directive can +appear, allowing it to be used in the middle of a commit. Reading from the active commit:: This form can only be used in the middle of a `commit`. From 5056bb7646cdd12d2985784f0ce4ed79550ebe63 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 20 Feb 2019 14:58:44 -0800 Subject: [PATCH 3/5] fast-import: check most prominent commands first This is not a very important change, and one that I expect to have no performance impact whatsoever, but reading the code bothered me. The parsing of command types in cmd_main() mostly runs in order of most common to least common commands; sure, it's hard to say for sure what the most common are without some type of study, but it seems fairly clear to mark the original four ("blob", "commit", "tag", "reset") as the most prominent. Indeed, the parsing for most other commands were added to later in the list. However, when "ls" was added, it was stuck near the top of the list, with no rationale for that particular location. Move it down to later to appease my Tourette's-like internal twitching that its former location was causing. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- fast-import.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fast-import.c b/fast-import.c index b7ba755c2b..3114ce17f1 100644 --- a/fast-import.c +++ b/fast-import.c @@ -3303,14 +3303,14 @@ int cmd_main(int argc, const char **argv) const char *v; if (!strcmp("blob", command_buf.buf)) parse_new_blob(); - else if (skip_prefix(command_buf.buf, "ls ", &v)) - parse_ls(v, NULL); else if (skip_prefix(command_buf.buf, "commit ", &v)) parse_new_commit(v); else if (skip_prefix(command_buf.buf, "tag ", &v)) parse_new_tag(v); else if (skip_prefix(command_buf.buf, "reset ", &v)) parse_reset_branch(v); + else if (skip_prefix(command_buf.buf, "ls ", &v)) + parse_ls(v, NULL); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); else if (!strcmp("done", command_buf.buf)) From 7ffde293f2e7f0ae455800b138416da601254436 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 20 Feb 2019 14:58:45 -0800 Subject: [PATCH 4/5] fast-import: only allow cat-blob requests where it makes sense In commit 777f80d7429b ("fast-import: Allow cat-blob requests at arbitrary points in stream", 2010-11-28), fast-import started allowing cat-blob commands to appear on the start of any line except in the middle of a "data" command. It could be in the middle of various directives that were part of a tag command, or in the middle of checkpoints or progresses (each of which allow an optional second empty newline), or even immediately after the mark command of a blob before the data directive appeared (raising the question of what if it used the mark for the blob that just barely appeared in the stream that we do not yet have the data for). None of these locations make any sense as places to put cat-blob requests. The purpose of this change as stated in that commit message was to [save] frontends from having to loop over everything they want to commit in the next commit and cat-ing the necessary objects in advance. However, that can be achieved by simply allowing cat-blob requests to appear whenever a filemodify directive is allowed. Further, it avoids setting a bad precedent for other commands to follow (e.g. get-mark); a precedent which caused parsing problems in corner cases. Technically, inline filemodify directives add a slight wrinkle in that frontends might want to have cat-blob directives appear after the start of the filemodify and before the data directive contained within it. I think it would have been better to disallow such a case (it would be trivial to use cat-blob before the filemodify instead), but since there is evidence this was used, for backwards compatibility let's support that case too. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 7 ++++--- fast-import.c | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index f7e2d330b1..982f82b0b3 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -1001,9 +1001,10 @@ Output uses the same format as `git cat-file --batch`: LF ==== -This command can be used anywhere in the stream that comments are -accepted. In particular, the `cat-blob` command can be used in the -middle of a commit but not in the middle of a `data` command. +This command can be used where a `filemodify` directive can appear, +allowing it to be used in the middle of a commit. For a `filemodify` +using an inline directive, it can also appear right before the `data` +directive. See ``Responses To Commands'' below for details about how to read this output safely. diff --git a/fast-import.c b/fast-import.c index 3114ce17f1..338db61e6e 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1786,10 +1786,6 @@ static int read_next_command(void) parse_get_mark(p); continue; } - if (skip_prefix(command_buf.buf, "cat-blob ", &p)) { - parse_cat_blob(p); - continue; - } if (command_buf.buf[0] == '#') continue; return 0; @@ -2254,8 +2250,15 @@ static void file_change_m(const char *p, struct branch *b) strbuf_addstr(&uq, p); p = uq.buf; } - read_next_command(); - parse_and_store_blob(&last_blob, &oid, 0); + while (read_next_command() != EOF) { + const char *v; + if (skip_prefix(command_buf.buf, "cat-blob ", &v)) + parse_cat_blob(v); + else { + parse_and_store_blob(&last_blob, &oid, 0); + break; + } + } } else { enum object_type expected = S_ISDIR(mode) ? OBJ_TREE: OBJ_BLOB; @@ -2627,6 +2630,8 @@ static void parse_new_commit(const char *arg) file_change_deleteall(b); else if (skip_prefix(command_buf.buf, "ls ", &v)) parse_ls(v, b); + else if (skip_prefix(command_buf.buf, "cat-blob ", &v)) + parse_cat_blob(v); else { unread_command_buf = 1; break; @@ -3311,6 +3316,8 @@ int cmd_main(int argc, const char **argv) parse_reset_branch(v); else if (skip_prefix(command_buf.buf, "ls ", &v)) parse_ls(v, NULL); + else if (skip_prefix(command_buf.buf, "cat-blob ", &v)) + parse_cat_blob(v); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); else if (!strcmp("done", command_buf.buf)) From cf7b857a77bda6a9a93e2cde2f6ae1764e7a2517 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 20 Feb 2019 14:58:46 -0800 Subject: [PATCH 5/5] fast-import: fix erroneous handling of get-mark with empty orphan commits When get-mark was introduced in commit 28c7b1f7b7b7 ("fast-import: add a get-mark command", 2015-07-01), it followed the precedent of the cat-blob command to be allowed on any line other than in the middle of a data directive; see commit 777f80d7429b ("fast-import: Allow cat-blob requests at arbitrary points in stream", 2010-11-28). It was useful to allow cat-blob directives in the middle of a commit to get more data that would be used in writing the current commit object. get-mark is not similarly useful since fast-import can already use either object id or mark. Further, trying to allow this command anywhere caused parsing bugs. Fix the parsing problems by only allowing get-mark commands to appear when other commands have completed. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 4 ---- fast-import.c | 8 ++------ t/t9300-fast-import.sh | 4 ++-- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 982f82b0b3..33cce1e150 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -971,10 +971,6 @@ might want to refer to in their commit messages. 'get-mark' SP ':' LF .... -This command can be used anywhere in the stream that comments are -accepted. In particular, the `get-mark` command can be used in the -middle of a commit but not in the middle of a `data` command. - See ``Responses To Commands'' below for details about how to read this output safely. diff --git a/fast-import.c b/fast-import.c index 338db61e6e..064c55e8be 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1748,8 +1748,6 @@ static int read_next_command(void) } for (;;) { - const char *p; - if (unread_command_buf) { unread_command_buf = 0; } else { @@ -1782,10 +1780,6 @@ static int read_next_command(void) rc->prev->next = rc; cmd_tail = rc; } - if (skip_prefix(command_buf.buf, "get-mark ", &p)) { - parse_get_mark(p); - continue; - } if (command_buf.buf[0] == '#') continue; return 0; @@ -3318,6 +3312,8 @@ int cmd_main(int argc, const char **argv) parse_ls(v, NULL); else if (skip_prefix(command_buf.buf, "cat-blob ", &v)) parse_cat_blob(v); + else if (skip_prefix(command_buf.buf, "get-mark ", &v)) + parse_get_mark(v); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); else if (!strcmp("done", command_buf.buf)) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index c304c8c47c..3668263c40 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3276,11 +3276,11 @@ cat >>W-input <<-W_INPUT_END LFsget-mark :1 W_INPUT_END -test_expect_failure !MINGW 'W: get-mark & empty orphan commit with no newlines' ' +test_expect_success !MINGW 'W: get-mark & empty orphan commit with no newlines' ' sed -e s/LFs// W-input | tr L "\n" | git fast-import ' -test_expect_failure !MINGW 'W: get-mark & empty orphan commit with one newline' ' +test_expect_success !MINGW 'W: get-mark & empty orphan commit with one newline' ' sed -e s/LFs/L/ W-input | tr L "\n" | git fast-import '