From ecd512582cc40b732be342c49b9cb311a6dabc77 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 19 Oct 2017 16:24:03 -0400 Subject: [PATCH 1/5] t4015: refactor --color-moved whitespace test In preparation for testing several different whitespace options, let's split out the setup and cleanup steps of the whitespace test. While we're here, let's also switch to using "<<-" to indent our here-documents properly, and use q_to_tab to more explicitly mark where we expect whitespace to appear. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t4015-diff-whitespace.sh | 49 ++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 87083f728f..164b502405 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1318,30 +1318,34 @@ test_expect_success 'no effect from --color-moved with --word-diff' ' test_cmp expect actual ' -test_expect_success 'move detection ignoring whitespace ' ' +test_expect_success 'set up whitespace tests' ' git reset --hard && - cat <<\EOF >lines.txt && -line 1 -line 2 -line 3 -line 4 -long line 5 -long line 6 -long line 7 -EOF - git add lines.txt && - git commit -m "add poetry" && - cat <<\EOF >lines.txt && + # Note that these lines have no leading or trailing whitespace. + cat <<-\EOF >lines.txt && + line 1 + line 2 + line 3 + line 4 long line 5 long line 6 long line 7 -line 1 -line 2 -line 3 -line 4 -EOF - test_config color.diff.oldMoved "magenta" && - test_config color.diff.newMoved "cyan" && + EOF + git add lines.txt && + git commit -m "add poetry" && + git config color.diff.oldMoved "magenta" && + git config color.diff.newMoved "cyan" +' + +test_expect_success 'move detection ignoring whitespace ' ' + q_to_tab <<-\EOF >lines.txt && + Qlong line 5 + Qlong line 6 + Qlong line 7 + line 1 + line 2 + line 3 + line 4 + EOF git diff HEAD --no-renames --color-moved --color | grep -v "index" | test_decode_color >actual && @@ -1385,6 +1389,11 @@ EOF test_cmp expected actual ' +test_expect_success 'clean up whitespace-test colors' ' + git config --unset color.diff.oldMoved && + git config --unset color.diff.newMoved +' + test_expect_success '--color-moved block at end of diff output respects MIN_ALNUM_COUNT' ' git reset --hard && >bar && From 83de23cfea3730293c52072376b28d87b37d756b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 19 Oct 2017 16:25:57 -0400 Subject: [PATCH 2/5] t4015: check "negative" case for "-w --color-moved" We test that lines with whitespace changes are not found by "--color-moved" by default, but are found if "-w" is added. Let's add one more twist: a line that has non-whitespace changes should not be marked as a pure move. This is perhaps an obvious case for us to get right (and we do), but as we add more whitespace tests, they will form a pattern of "make sure this case is a move and this other case is not". Note that we have to add a line to our moved block, since having a too-small block doesn't trigger the "moved" heuristics. And we also add a line of context to ensure that there's more context lines than moved lines (so the diff shows us moving the lines up, rather than moving the context down). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t4015-diff-whitespace.sh | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 164b502405..503c9bc7f3 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1326,9 +1326,11 @@ test_expect_success 'set up whitespace tests' ' line 2 line 3 line 4 - long line 5 + line 5 long line 6 long line 7 + long line 8 + long line 9 EOF git add lines.txt && git commit -m "add poetry" && @@ -1338,13 +1340,15 @@ test_expect_success 'set up whitespace tests' ' test_expect_success 'move detection ignoring whitespace ' ' q_to_tab <<-\EOF >lines.txt && - Qlong line 5 Qlong line 6 Qlong line 7 + Qlong line 8 + Qchanged long line 9 line 1 line 2 line 3 line 4 + line 5 EOF git diff HEAD --no-renames --color-moved --color | grep -v "index" | @@ -1353,17 +1357,20 @@ test_expect_success 'move detection ignoring whitespace ' ' diff --git a/lines.txt b/lines.txt --- a/lines.txt +++ b/lines.txt - @@ -1,7 +1,7 @@ - + long line 5 + @@ -1,9 +1,9 @@ + long line 6 + long line 7 + + long line 8 + + changed long line 9 line 1 line 2 line 3 line 4 - -long line 5 + line 5 -long line 6 -long line 7 + -long line 8 + -long line 9 EOF test_cmp expected actual && @@ -1374,17 +1381,20 @@ test_expect_success 'move detection ignoring whitespace ' ' diff --git a/lines.txt b/lines.txt --- a/lines.txt +++ b/lines.txt - @@ -1,7 +1,7 @@ - + long line 5 + @@ -1,9 +1,9 @@ + long line 6 + long line 7 + + long line 8 + + changed long line 9 line 1 line 2 line 3 line 4 - -long line 5 + line 5 -long line 6 -long line 7 + -long line 8 + -long line 9 EOF test_cmp expected actual ' From d5aae1f7cdcb6211d7884838f30e3cd1266b605f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 19 Oct 2017 16:26:31 -0400 Subject: [PATCH 3/5] t4015: test the output of "diff --color-moved -b" Commit fa5ba2c1dd (diff: fix infinite loop with --color-moved --ignore-space-change, 2017-10-12) added a test to make sure that "--color-moved -b" doesn't run forever, but the test in question doesn't actually have any moved lines in it. Let's scrap that test and add a variant of the existing "--color-moved -w" test, but this time we'll check that we find the move with whitespace changes, but not arbitrary whitespace additions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t4015-diff-whitespace.sh | 73 +++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 503c9bc7f3..1f54816c6b 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1399,6 +1399,70 @@ test_expect_success 'move detection ignoring whitespace ' ' test_cmp expected actual ' +test_expect_success 'move detection ignoring whitespace changes' ' + git reset --hard && + # Lines 6-8 have a space change, but 9 is new whitespace + q_to_tab <<-\EOF >lines.txt && + longQline 6 + longQline 7 + longQline 8 + long liQne 9 + line 1 + line 2 + line 3 + line 4 + line 5 + EOF + + git diff HEAD --no-renames --color-moved --color | + grep -v "index" | + test_decode_color >actual && + cat <<-\EOF >expected && + diff --git a/lines.txt b/lines.txt + --- a/lines.txt + +++ b/lines.txt + @@ -1,9 +1,9 @@ + +long line 6 + +long line 7 + +long line 8 + +long li ne 9 + line 1 + line 2 + line 3 + line 4 + line 5 + -long line 6 + -long line 7 + -long line 8 + -long line 9 + EOF + test_cmp expected actual && + + git diff HEAD --no-renames -b --color-moved --color | + grep -v "index" | + test_decode_color >actual && + cat <<-\EOF >expected && + diff --git a/lines.txt b/lines.txt + --- a/lines.txt + +++ b/lines.txt + @@ -1,9 +1,9 @@ + +long line 6 + +long line 7 + +long line 8 + +long li ne 9 + line 1 + line 2 + line 3 + line 4 + line 5 + -long line 6 + -long line 7 + -long line 8 + -long line 9 + EOF + test_cmp expected actual +' + test_expect_success 'clean up whitespace-test colors' ' git config --unset color.diff.oldMoved && git config --unset color.diff.newMoved @@ -1549,13 +1613,4 @@ test_expect_success 'move detection with submodules' ' test_cmp expect decoded_actual ' -test_expect_success 'move detection with whitespace changes' ' - test_when_finished "git reset --hard" && - test_seq 10 >test && - git add test && - sed s/3/42/ test.tmp && - mv test.tmp test && - git -c diff.colormoved diff --ignore-space-change -- test -' - test_done From da58318e76b37c345e4d0da4c42987ad45b4f155 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 19 Oct 2017 16:29:26 -0400 Subject: [PATCH 4/5] diff: fix whitespace-skipping with --color-moved The code for handling whitespace with --color-moved represents partial strings as a pair of pointers. There are two possible conventions for the end pointer: 1. It points to the byte right after the end of the string. 2. It points to the final byte of the string. But we seem to use both conventions in the code: a. we assign the initial pointers from the NUL-terminated string using (1) b. we eat trailing whitespace by checking the second pointer for isspace(), which needs (2) c. the next_byte() function checks for end-of-string with "if (cp > endp)", which is (2) d. in next_byte() we skip past internal whitespace with "while (cp < end)", which is (1) This creates fewer bugs than you might think, because there are some subtle interactions. Because of (a) and (c), we always return the NUL-terminator from next_byte(). But all of the callers of next_byte() happen to handle that gracefully. Because of the mismatch between (d) and (c), next_byte() could accidentally return a whitespace character right at endp. But because of the interaction of (a) and (b), we fail to actually chomp trailing whitespace, meaning our endp _always_ points to a NUL, canceling out the problem. But that does leave (b) as a real bug: when ignoring whitespace only at the end-of-line, we don't correctly trim it, and fail to match up lines. We can fix the whole thing by moving consistently to one convention. Since convention (1) is idiomatic in our code base, we'll pick that one. The existing "-w" and "-b" tests continue to pass, and a new "--ignore-space-at-eol" shows off the breakage we're fixing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 15 ++++++--- t/t4015-diff-whitespace.sh | 67 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/diff.c b/diff.c index 6fd288420b..09081a207c 100644 --- a/diff.c +++ b/diff.c @@ -712,7 +712,7 @@ static int next_byte(const char **cp, const char **endp, { int retval; - if (*cp > *endp) + if (*cp >= *endp) return -1; if (isspace(**cp)) { @@ -729,7 +729,12 @@ static int next_byte(const char **cp, const char **endp, if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) { while (*cp < *endp && isspace(**cp)) (*cp)++; - /* return the first non-ws character via the usual below */ + /* + * return the first non-ws character via the usual + * below, unless we ate all of the bytes + */ + if (*cp >= *endp) + return -1; } } @@ -750,9 +755,9 @@ static int moved_entry_cmp(const struct diff_options *diffopt, return a->es->len != b->es->len || memcmp(ap, bp, a->es->len); if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) { - while (ae > ap && isspace(*ae)) + while (ae > ap && isspace(ae[-1])) ae--; - while (be > bp && isspace(*be)) + while (be > bp && isspace(be[-1])) be--; } @@ -775,7 +780,7 @@ static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_opti int c; strbuf_reset(&sb); - while (ae > ap && isspace(*ae)) + while (ae > ap && isspace(ae[-1])) ae--; while ((c = next_byte(&ap, &ae, o)) > 0) strbuf_addch(&sb, c); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 1f54816c6b..6c9a93b734 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1463,6 +1463,73 @@ test_expect_success 'move detection ignoring whitespace changes' ' test_cmp expected actual ' +test_expect_success 'move detection ignoring whitespace at eol' ' + git reset --hard && + # Lines 6-9 have new eol whitespace, but 9 also has it in the middle + q_to_tab <<-\EOF >lines.txt && + long line 6Q + long line 7Q + long line 8Q + longQline 9Q + line 1 + line 2 + line 3 + line 4 + line 5 + EOF + + # avoid cluttering the output with complaints about our eol whitespace + test_config core.whitespace -blank-at-eol && + + git diff HEAD --no-renames --color-moved --color | + grep -v "index" | + test_decode_color >actual && + cat <<-\EOF >expected && + diff --git a/lines.txt b/lines.txt + --- a/lines.txt + +++ b/lines.txt + @@ -1,9 +1,9 @@ + +long line 6 + +long line 7 + +long line 8 + +long line 9 + line 1 + line 2 + line 3 + line 4 + line 5 + -long line 6 + -long line 7 + -long line 8 + -long line 9 + EOF + test_cmp expected actual && + + git diff HEAD --no-renames --ignore-space-at-eol --color-moved --color | + grep -v "index" | + test_decode_color >actual && + cat <<-\EOF >expected && + diff --git a/lines.txt b/lines.txt + --- a/lines.txt + +++ b/lines.txt + @@ -1,9 +1,9 @@ + +long line 6 + +long line 7 + +long line 8 + +long line 9 + line 1 + line 2 + line 3 + line 4 + line 5 + -long line 6 + -long line 7 + -long line 8 + -long line 9 + EOF + test_cmp expected actual +' + test_expect_success 'clean up whitespace-test colors' ' git config --unset color.diff.oldMoved && git config --unset color.diff.newMoved From b66b5072921fb706f1e9352f471098c988b0ca39 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 19 Oct 2017 16:31:20 -0400 Subject: [PATCH 5/5] diff: handle NULs in get_string_hash() For computing moved lines, we feed the characters of each line into a hash. When we've been asked to ignore whitespace, then we pick each character using next_byte(), which returns -1 on end-of-string, which it determines using the start/end pointers we feed it. However our check of its return value treats "0" the same as "-1", meaning we'd quit if the string has an embedded NUL. This is unlikely to ever come up in practice since our line boundaries generally come from calling strlen() in the first place. But it was a bit surprising to me as a reader of the next_byte() code. And it's possible that we may one day feed this function with more exotic input, which otherwise works with arbitrary ptr/len pairs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 09081a207c..c4a669ffa8 100644 --- a/diff.c +++ b/diff.c @@ -782,7 +782,7 @@ static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_opti strbuf_reset(&sb); while (ae > ap && isspace(ae[-1])) ae--; - while ((c = next_byte(&ap, &ae, o)) > 0) + while ((c = next_byte(&ap, &ae, o)) >= 0) strbuf_addch(&sb, c); return memhash(sb.buf, sb.len);