diff --git a/Documentation/config.txt b/Documentation/config.txt index 4e222f15a5..7b8cae1541 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -139,6 +139,51 @@ core.autocrlf:: "text" (i.e. be subjected to the autocrlf mechanism) is decided purely based on the contents. +core.safecrlf:: + If true, makes git check if converting `CRLF` as controlled by + `core.autocrlf` is reversible. Git will verify if a command + modifies a file in the work tree either directly or indirectly. + For example, committing a file followed by checking out the + same file should yield the original file in the work tree. If + this is not the case for the current setting of + `core.autocrlf`, git will reject the file. The variable can + be set to "warn", in which case git will only warn about an + irreversible conversion but continue the operation. ++ +CRLF conversion bears a slight chance of corrupting data. +autocrlf=true will convert CRLF to LF during commit and LF to +CRLF during checkout. A file that contains a mixture of LF and +CRLF before the commit cannot be recreated by git. For text +files this is the right thing to do: it corrects line endings +such that we have only LF line endings in the repository. +But for binary files that are accidentally classified as text the +conversion can corrupt data. ++ +If you recognize such corruption early you can easily fix it by +setting the conversion type explicitly in .gitattributes. Right +after committing you still have the original file in your work +tree and this file is not yet corrupted. You can explicitly tell +git that this file is binary and git will handle the file +appropriately. ++ +Unfortunately, the desired effect of cleaning up text files with +mixed line endings and the undesired effect of corrupting binary +files cannot be distinguished. In both cases CRLFs are removed +in an irreversible way. For text files this is the right thing +to do because CRLFs are line endings, while for binary files +converting CRLFs corrupts data. ++ +Note, this safety check does not mean that a checkout will generate a +file identical to the original file for a different setting of +`core.autocrlf`, but only for the current one. For example, a text +file with `LF` would be accepted with `core.autocrlf=input` and could +later be checked out with `core.autocrlf=true`, in which case the +resulting file would contain `CRLF`, although the original file +contained `LF`. However, in both work trees the line endings would be +consistent, that is either all `LF` or all `CRLF`, but never mixed. A +file with mixed line endings would be reported by the `core.safecrlf` +mechanism. + core.symlinks:: If false, symbolic links are checked out as small plain files that contain the link text. linkgit:git-update-index[1] and diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 35a29fd60c..84ec9623a2 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -133,6 +133,26 @@ When `core.autocrlf` is set to "input", line endings are converted to LF upon checkin, but there is no conversion done upon checkout. +If `core.safecrlf` is set to "true" or "warn", git verifies if +the conversion is reversible for the current setting of +`core.autocrlf`. For "true", git rejects irreversible +conversions; for "warn", git only prints a warning but accepts +an irreversible conversion. The safety triggers to prevent such +a conversion done to the files in the work tree, but there are a +few exceptions. Even though... + +- "git add" itself does not touch the files in the work tree, the + next checkout would, so the safety triggers; + +- "git apply" to update a text file with a patch does touch the files + in the work tree, but the operation is about text files and CRLF + conversion is about fixing the line ending inconsistencies, so the + safety does not trigger; + +- "git diff" itself does not touch the files in the work tree, it is + often run to inspect the changes you intend to next "git add". To + catch potential problems early, safety triggers. + `ident` ^^^^^^^ diff --git a/builtin-apply.c b/builtin-apply.c index 15432b6782..3b5618d434 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -1430,7 +1430,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf) case S_IFREG: if (strbuf_read_file(buf, path, st->st_size) != st->st_size) return error("unable to open or read %s", path); - convert_to_git(path, buf->buf, buf->len, buf); + convert_to_git(path, buf->buf, buf->len, buf, 0); return 0; default: return -1; diff --git a/builtin-blame.c b/builtin-blame.c index 9b4c02e87f..c361ee194e 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -2073,7 +2073,7 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con if (strbuf_read(&buf, 0, 0) < 0) die("read error %s from stdin", strerror(errno)); } - convert_to_git(path, buf.buf, buf.len, &buf); + convert_to_git(path, buf.buf, buf.len, &buf, 0); origin->file.ptr = buf.buf; origin->file.size = buf.len; pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1); diff --git a/cache.h b/cache.h index 549f4bbac7..6003c83b2d 100644 --- a/cache.h +++ b/cache.h @@ -330,6 +330,14 @@ extern size_t packed_git_limit; extern size_t delta_base_cache_limit; extern int auto_crlf; +enum safe_crlf { + SAFE_CRLF_FALSE = 0, + SAFE_CRLF_FAIL = 1, + SAFE_CRLF_WARN = 2, +}; + +extern enum safe_crlf safe_crlf; + #define GIT_REPO_VERSION 0 extern int repository_format_version; extern int check_repository_format(void); @@ -633,7 +641,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...); /* convert.c */ /* returns 1 if *dst was used */ -extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst); +extern int convert_to_git(const char *path, const char *src, size_t len, + struct strbuf *dst, enum safe_crlf checksafe); extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); /* add */ diff --git a/config.c b/config.c index 498259ebef..3256c99553 100644 --- a/config.c +++ b/config.c @@ -407,6 +407,15 @@ int git_default_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.safecrlf")) { + if (value && !strcasecmp(value, "warn")) { + safe_crlf = SAFE_CRLF_WARN; + return 0; + } + safe_crlf = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "user.name")) { strlcpy(git_default_name, value, sizeof(git_default_name)); return 0; diff --git a/convert.c b/convert.c index 80f114b2e2..3bbf0c51c2 100644 --- a/convert.c +++ b/convert.c @@ -85,8 +85,39 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } +static void check_safe_crlf(const char *path, int action, + struct text_stat *stats, enum safe_crlf checksafe) +{ + if (!checksafe) + return; + + if (action == CRLF_INPUT || auto_crlf <= 0) { + /* + * CRLFs would not be restored by checkout: + * check if we'd remove CRLFs + */ + if (stats->crlf) { + if (checksafe == SAFE_CRLF_WARN) + warning("CRLF will be replaced by LF in %s.", path); + else /* i.e. SAFE_CRLF_FAIL */ + die("CRLF would be replaced by LF in %s.", path); + } + } else if (auto_crlf > 0) { + /* + * CRLFs would be added by checkout: + * check if we have "naked" LFs + */ + if (stats->lf != stats->crlf) { + if (checksafe == SAFE_CRLF_WARN) + warning("LF will be replaced by CRLF in %s", path); + else /* i.e. SAFE_CRLF_FAIL */ + die("LF would be replaced by CRLF in %s", path); + } + } +} + static int crlf_to_git(const char *path, const char *src, size_t len, - struct strbuf *buf, int action) + struct strbuf *buf, int action, enum safe_crlf checksafe) { struct text_stat stats; char *dst; @@ -95,9 +126,6 @@ static int crlf_to_git(const char *path, const char *src, size_t len, return 0; gather_stats(src, len, &stats); - /* No CR? Nothing to convert, regardless. */ - if (!stats.cr) - return 0; if (action == CRLF_GUESS) { /* @@ -115,6 +143,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len, return 0; } + check_safe_crlf(path, action, &stats, checksafe); + + /* Optimization: No CR? Nothing to convert, regardless. */ + if (!stats.cr) + return 0; + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -536,7 +570,8 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } -int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst) +int convert_to_git(const char *path, const char *src, size_t len, + struct strbuf *dst, enum safe_crlf checksafe) { struct git_attr_check check[3]; int crlf = CRLF_GUESS; @@ -558,7 +593,7 @@ int convert_to_git(const char *path, const char *src, size_t len, struct strbuf src = dst->buf; len = dst->len; } - ret |= crlf_to_git(path, src, len, dst, crlf); + ret |= crlf_to_git(path, src, len, dst, crlf, checksafe); if (ret) { src = dst->buf; len = dst->len; diff --git a/diff.c b/diff.c index 5b8afdcb05..562c20ed40 100644 --- a/diff.c +++ b/diff.c @@ -1624,7 +1624,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) * Convert from working tree format to canonical git format */ strbuf_init(&buf, 0); - if (convert_to_git(s->path, s->data, s->size, &buf)) { + if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) { size_t size = 0; munmap(s->data, s->size); s->should_munmap = 0; diff --git a/environment.c b/environment.c index 18a1c4eec4..e351e99ed7 100644 --- a/environment.c +++ b/environment.c @@ -35,6 +35,7 @@ int pager_use_color = 1; char *editor_program; char *excludes_file; int auto_crlf = 0; /* 1: both ways, -1: only when adding git objects */ +enum safe_crlf safe_crlf = SAFE_CRLF_WARN; unsigned whitespace_rule_cfg = WS_DEFAULT_RULE; /* This is set by setup_git_dir_gently() and/or git_default_config() */ diff --git a/sha1_file.c b/sha1_file.c index 66a4e00fa8..41799492f9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2358,7 +2358,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { struct strbuf nbuf; strbuf_init(&nbuf, 0); - if (convert_to_git(path, buf, size, &nbuf)) { + if (convert_to_git(path, buf, size, &nbuf, + write_object ? safe_crlf : 0)) { munmap(buf, size); buf = strbuf_detach(&nbuf, &size); re_allocated = 1; diff --git a/t/t0020-crlf.sh b/t/t0020-crlf.sh index 8b27aa892b..90ea081db6 100755 --- a/t/t0020-crlf.sh +++ b/t/t0020-crlf.sh @@ -8,6 +8,10 @@ q_to_nul () { tr Q '\000' } +q_to_cr () { + tr Q '\015' +} + append_cr () { sed -e 's/$/Q/' | tr Q '\015' } @@ -42,6 +46,60 @@ test_expect_success setup ' echo happy. ' +test_expect_success 'safecrlf: autocrlf=input, all CRLF' ' + + git config core.autocrlf input && + git config core.safecrlf true && + + for w in I am all CRLF; do echo $w; done | append_cr >allcrlf && + ! git add allcrlf +' + +test_expect_success 'safecrlf: autocrlf=input, mixed LF/CRLF' ' + + git config core.autocrlf input && + git config core.safecrlf true && + + for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >mixed && + ! git add mixed +' + +test_expect_success 'safecrlf: autocrlf=true, all LF' ' + + git config core.autocrlf true && + git config core.safecrlf true && + + for w in I am all LF; do echo $w; done >alllf && + ! git add alllf +' + +test_expect_success 'safecrlf: autocrlf=true mixed LF/CRLF' ' + + git config core.autocrlf true && + git config core.safecrlf true && + + for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >mixed && + ! git add mixed +' + +test_expect_success 'safecrlf: print warning only once' ' + + git config core.autocrlf input && + git config core.safecrlf warn && + + for w in I am all LF; do echo $w; done >doublewarn && + git add doublewarn && + git commit -m "nowarn" && + for w in Oh here is CRLFQ in text; do echo $w; done | q_to_cr >doublewarn && + test $(git add doublewarn 2>&1 | grep "CRLF will be replaced by LF" | wc -l) = 1 +' + +test_expect_success 'switch off autocrlf, safecrlf, reset HEAD' ' + git config core.autocrlf false && + git config core.safecrlf false && + git reset --hard HEAD^ +' + test_expect_success 'update with autocrlf=input' ' rm -f tmp one dir/two three &&