diff --git a/fsck.c b/fsck.c index b1579c7e28..d80a96f4be 100644 --- a/fsck.c +++ b/fsck.c @@ -551,7 +551,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) while (desc.size) { unsigned mode; - const char *name; + const char *name, *backslash; const struct object_id *oid; oid = tree_entry_extract(&desc, &name, &mode); @@ -565,6 +565,15 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) is_hfs_dotgit(name) || is_ntfs_dotgit(name)); has_zero_pad |= *(char *)desc.buffer == '0'; + + if ((backslash = strchr(name, '\\'))) { + while (backslash) { + backslash++; + has_dotgit |= is_ntfs_dotgit(backslash); + backslash = strchr(backslash, '\\'); + } + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; diff --git a/path.c b/path.c index 9ac0531a29..2037e2d8c1 100644 --- a/path.c +++ b/path.c @@ -1302,22 +1302,57 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) return 1; } +/* + * On NTFS, we need to be careful to disallow certain synonyms of the `.git/` + * directory: + * + * - For historical reasons, file names that end in spaces or periods are + * automatically trimmed. Therefore, `.git . . ./` is a valid way to refer + * to `.git/`. + * + * - For other historical reasons, file names that do not conform to the 8.3 + * format (up to eight characters for the basename, three for the file + * extension, certain characters not allowed such as `+`, etc) are associated + * with a so-called "short name", at least on the `C:` drive by default. + * Which means that `git~1/` is a valid way to refer to `.git/`. + * + * Note: Technically, `.git/` could receive the short name `git~2` if the + * short name `git~1` were already used. In Git, however, we guarantee that + * `.git` is the first item in a directory, therefore it will be associated + * with the short name `git~1` (unless short names are disabled). + * + * - For yet other historical reasons, NTFS supports so-called "Alternate Data + * Streams", i.e. metadata associated with a given file, referred to via + * `::`. There exists a default stream + * type for directories, allowing `.git/` to be accessed via + * `.git::$INDEX_ALLOCATION/`. + * + * When this function returns 1, it indicates that the specified file/directory + * name refers to a `.git` file or directory, or to any of these synonyms, and + * Git should therefore not track it. + * + * For performance reasons, _all_ Alternate Data Streams of `.git/` are + * forbidden, not just `::$INDEX_ALLOCATION`. + * + * This function is intended to be used by `git fsck` even on platforms where + * the backslash is a regular filename character, therefore it needs to handle + * backlash characters in the provided `name` specially: they are interpreted + * as directory separators. + */ int is_ntfs_dotgit(const char *name) { size_t len; for (len = 0; ; len++) - if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len]) || + name[len] == ':') { if (only_spaces_and_periods(name, len, 4) && !strncasecmp(name, ".git", 4)) return 1; if (only_spaces_and_periods(name, len, 5) && !strncasecmp(name, "git~1", 5)) return 1; - if (name[len] != '\\') - return 0; - name += len + 1; - len = -1; + return 0; } } @@ -1334,7 +1369,7 @@ static int is_ntfs_dot_generic(const char *name, only_spaces_and_periods: for (;;) { char c = name[i++]; - if (!c) + if (!c || c == ':') return 1; if (c != ' ' && c != '.') return 0; diff --git a/read-cache.c b/read-cache.c index 5b57b369e8..bde1e70c51 100644 --- a/read-cache.c +++ b/read-cache.c @@ -874,7 +874,15 @@ inside: if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; + } else if (c == '\\' && protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } } + c = *path++; } } diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 94846550f7..16d8e689c8 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -176,6 +176,99 @@ static int is_dotgitmodules(const char *path) return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); } +/* + * A very simple, reproducible pseudo-random generator. Copied from + * `test-genrandom.c`. + */ +static uint64_t my_random_value = 1234; + +static uint64_t my_random(void) +{ + my_random_value = my_random_value * 1103515245 + 12345; + return my_random_value; +} + +/* + * A fast approximation of the square root, without requiring math.h. + * + * It uses Newton's method to approximate the solution of 0 = x^2 - value. + */ +static double my_sqrt(double value) +{ + const double epsilon = 1e-6; + double x = value; + + if (value == 0) + return 0; + + for (;;) { + double delta = (value / x - x) / 2; + if (delta < epsilon && delta > -epsilon) + return x + delta; + x += delta; + } +} + +static int protect_ntfs_hfs_benchmark(int argc, const char **argv) +{ + size_t i, j, nr, min_len = 3, max_len = 20; + char **names; + int repetitions = 15, file_mode = 0100644; + uint64_t begin, end; + double m[3][2], v[3][2]; + uint64_t cumul; + double cumul2; + + if (argc > 1 && !strcmp(argv[1], "--with-symlink-mode")) { + file_mode = 0120000; + argc--; + argv++; + } + + nr = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000000; + ALLOC_ARRAY(names, nr); + + if (argc > 2) { + min_len = strtoul(argv[2], NULL, 0); + if (argc > 3) + max_len = strtoul(argv[3], NULL, 0); + if (min_len > max_len) + die("min_len > max_len"); + } + + for (i = 0; i < nr; i++) { + size_t len = min_len + (my_random() % (max_len + 1 - min_len)); + + names[i] = xmallocz(len); + while (len > 0) + names[i][--len] = (char)(' ' + (my_random() % ('\x7f' - ' '))); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) { + cumul = 0; + cumul2 = 0; + for (i = 0; i < repetitions; i++) { + begin = getnanotime(); + for (j = 0; j < nr; j++) + verify_path(names[j], file_mode); + end = getnanotime(); + printf("protect_ntfs = %d, protect_hfs = %d: %lfms\n", protect_ntfs, protect_hfs, (end-begin) / (double)1e6); + cumul += end - begin; + cumul2 += (end - begin) * (end - begin); + } + m[protect_ntfs][protect_hfs] = cumul / (double)repetitions; + v[protect_ntfs][protect_hfs] = my_sqrt(cumul2 / (double)repetitions - m[protect_ntfs][protect_hfs] * m[protect_ntfs][protect_hfs]); + printf("mean: %lfms, stddev: %lfms\n", m[protect_ntfs][protect_hfs] / (double)1e6, v[protect_ntfs][protect_hfs] / (double)1e6); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) + printf("ntfs=%d/hfs=%d: %lf%% slower\n", protect_ntfs, protect_hfs, (m[protect_ntfs][protect_hfs] - m[0][0]) * 100 / m[0][0]); + + return 0; +} + int cmd_main(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -290,6 +383,9 @@ int cmd_main(int argc, const char **argv) return !!res; } + if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs")) + return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1); + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 3f3357ed9f..2b8589e921 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -408,6 +408,9 @@ test_expect_success 'match .gitmodules' ' ~1000000 \ ~9999999 \ \ + .gitmodules:\$DATA \ + "gitmod~4 . :\$DATA" \ + \ --not \ ".gitmodules x" \ ".gitmodules .x" \ @@ -432,7 +435,9 @@ test_expect_success 'match .gitmodules' ' \ GI7EB~1 \ GI7EB~01 \ - GI7EB~1X + GI7EB~1X \ + \ + .gitmodules,:\$DATA ' test_done diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index 2f5a25d503..da3376b3bb 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -49,6 +49,7 @@ git~1 .git.SPACE .git.{space} .\\\\.GIT\\\\foobar backslashes .git\\\\foobar backslashes2 +.git...:alternate-stream EOF test_expect_success 'utf-8 paths allowed with core.protectHFS off' '