From 1d8842d921cc2695f155f4a10904eeffad085c77 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 May 2009 13:22:36 -0700 Subject: [PATCH 1/7] Add 'fill_directory()' helper function for directory traversal Most of the users of "read_directory()" actually want a much simpler interface than the whole complex (but rather powerful) one. In fact 'git add' had already largely abstracted out the core interface issues into a private "fill_directory()" function that was largely applicable almost as-is to a number of callers. Yes, 'git add' wants to do some extra work of its own, specific to the add semantics, but we can easily split that out, and use the core as a generic function. This function does exactly that, and now that much simplified 'fill_directory()' function can be shared with a number of callers, while also ensuring that the rather more complex calling conventions of read_directory() are used by fewer call-sites. This also makes the 'common_prefix()' helper function private to dir.c, since all callers are now in that file. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-add.c | 45 ++++++++++++++------------------------------- builtin-clean.c | 12 +----------- builtin-ls-files.c | 7 +------ dir.c | 23 ++++++++++++++++++++++- dir.h | 3 +-- wt-status.c | 2 +- 6 files changed, 40 insertions(+), 52 deletions(-) diff --git a/builtin-add.c b/builtin-add.c index 78989dad8c..581a2a1748 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -97,35 +97,6 @@ static void treat_gitlinks(const char **pathspec) } } -static void fill_directory(struct dir_struct *dir, const char **pathspec, - int ignored_too) -{ - const char *path, *base; - int baselen; - - /* Set up the default git porcelain excludes */ - memset(dir, 0, sizeof(*dir)); - if (!ignored_too) { - dir->flags |= DIR_COLLECT_IGNORED; - setup_standard_excludes(dir); - } - - /* - * Calculate common prefix for the pathspec, and - * use that to optimize the directory walk - */ - baselen = common_prefix(pathspec); - path = "."; - base = ""; - if (baselen) - path = base = xmemdupz(*pathspec, baselen); - - /* Read the directory and prune it */ - read_directory(dir, path, base, baselen, pathspec); - if (pathspec) - prune_directory(dir, pathspec, baselen); -} - static void refresh(int verbose, const char **pathspec) { char *seen; @@ -343,9 +314,21 @@ int cmd_add(int argc, const char **argv, const char *prefix) die("index file corrupt"); treat_gitlinks(pathspec); - if (add_new_files) + if (add_new_files) { + int baselen; + + /* Set up the default git porcelain excludes */ + memset(&dir, 0, sizeof(dir)); + if (!ignored_too) { + dir.flags |= DIR_COLLECT_IGNORED; + setup_standard_excludes(&dir); + } + /* This picks up the paths that are not tracked */ - fill_directory(&dir, pathspec, ignored_too); + baselen = fill_directory(&dir, pathspec); + if (pathspec) + prune_directory(&dir, pathspec, baselen); + } if (refresh_only) { refresh(verbose, pathspec); diff --git a/builtin-clean.c b/builtin-clean.c index 1c1b6d26e9..2d8c735d48 100644 --- a/builtin-clean.c +++ b/builtin-clean.c @@ -33,7 +33,6 @@ int cmd_clean(int argc, const char **argv, const char *prefix) int ignored_only = 0, baselen = 0, config_set = 0, errors = 0; struct strbuf directory = STRBUF_INIT; struct dir_struct dir; - const char *path, *base; static const char **pathspec; struct strbuf buf = STRBUF_INIT; const char *qname; @@ -78,16 +77,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) pathspec = get_pathspec(prefix, argv); read_cache(); - /* - * Calculate common prefix for the pathspec, and - * use that to optimize the directory walk - */ - baselen = common_prefix(pathspec); - path = "."; - base = ""; - if (baselen) - path = base = xmemdupz(*pathspec, baselen); - read_directory(&dir, path, base, baselen, pathspec); + fill_directory(&dir, pathspec); if (pathspec) seen = xmalloc(argc > 0 ? argc : 1); diff --git a/builtin-ls-files.c b/builtin-ls-files.c index 2312866605..f473220502 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -161,12 +161,7 @@ static void show_files(struct dir_struct *dir, const char *prefix) /* For cached/deleted files we don't need to even do the readdir */ if (show_others || show_killed) { - const char *path = ".", *base = ""; - int baselen = prefix_len; - - if (baselen) - path = base = prefix; - read_directory(dir, path, base, baselen, pathspec); + fill_directory(dir, pathspec); if (show_others) show_other_files(dir); if (show_killed) diff --git a/dir.c b/dir.c index 74b3bbf6fd..0c8553b27c 100644 --- a/dir.c +++ b/dir.c @@ -19,7 +19,7 @@ static int read_directory_recursive(struct dir_struct *dir, int check_only, const struct path_simplify *simplify); static int get_dtype(struct dirent *de, const char *path); -int common_prefix(const char **pathspec) +static int common_prefix(const char **pathspec) { const char *path, *slash, *next; int prefix; @@ -52,6 +52,27 @@ int common_prefix(const char **pathspec) return prefix; } +int fill_directory(struct dir_struct *dir, const char **pathspec) +{ + const char *path, *base; + int baselen; + + /* + * Calculate common prefix for the pathspec, and + * use that to optimize the directory walk + */ + baselen = common_prefix(pathspec); + path = ""; + base = ""; + + if (baselen) + path = base = xmemdupz(*pathspec, baselen); + + /* Read the directory and prune it */ + read_directory(dir, path, base, baselen, pathspec); + return baselen; +} + /* * Does 'match' match the given name? * A match is found if diff --git a/dir.h b/dir.h index 541286ad1d..f9d69dd15f 100644 --- a/dir.h +++ b/dir.h @@ -61,13 +61,12 @@ struct dir_struct { char basebuf[PATH_MAX]; }; -extern int common_prefix(const char **pathspec); - #define MATCHED_RECURSIVELY 1 #define MATCHED_FNMATCH 2 #define MATCHED_EXACTLY 3 extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen); +extern int fill_directory(struct dir_struct *dir, const char **pathspec); extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec); extern int excluded(struct dir_struct *, const char *, int *); diff --git a/wt-status.c b/wt-status.c index 0ca4b13c29..47735d8129 100644 --- a/wt-status.c +++ b/wt-status.c @@ -255,7 +255,7 @@ static void wt_status_print_untracked(struct wt_status *s) DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; setup_standard_excludes(&dir); - read_directory(&dir, ".", "", 0, NULL); + fill_directory(&dir, NULL); for(i = 0; i < dir.nr; i++) { struct dir_entry *ent = dir.entries[i]; if (!cache_name_is_other(ent->name, ent->len)) From dba2e2037f40685bffc87d3e7114a02c5bda1eff Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2009 19:24:39 -0700 Subject: [PATCH 2/7] Simplify read_directory[_recursive]() arguments Stop the insanity with separate 'path' and 'base' arguments that must match. We don't need that crazy interface any more, since we cleaned up handling of 'path' in commit da4b3e8c28b1dc2b856d2555ac7bb47ab712598c. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 57 +++++++++++++++++++++++++------------------------- dir.h | 2 +- unpack-trees.c | 2 +- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/dir.c b/dir.c index 0c8553b27c..b0671f59c8 100644 --- a/dir.c +++ b/dir.c @@ -14,8 +14,7 @@ struct path_simplify { const char *path; }; -static int read_directory_recursive(struct dir_struct *dir, - const char *path, const char *base, int baselen, +static int read_directory_recursive(struct dir_struct *dir, const char *path, int len, int check_only, const struct path_simplify *simplify); static int get_dtype(struct dirent *de, const char *path); @@ -54,23 +53,22 @@ static int common_prefix(const char **pathspec) int fill_directory(struct dir_struct *dir, const char **pathspec) { - const char *path, *base; - int baselen; + const char *path; + int len; /* * Calculate common prefix for the pathspec, and * use that to optimize the directory walk */ - baselen = common_prefix(pathspec); + len = common_prefix(pathspec); path = ""; - base = ""; - if (baselen) - path = base = xmemdupz(*pathspec, baselen); + if (len) + path = xmemdupz(*pathspec, len); /* Read the directory and prune it */ - read_directory(dir, path, base, baselen, pathspec); - return baselen; + read_directory(dir, path, len, pathspec); + return len; } /* @@ -526,7 +524,7 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, /* This is the "show_other_directories" case */ if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return show_directory; - if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify)) + if (!read_directory_recursive(dir, dirname, len, 1, simplify)) return ignore_directory; return show_directory; } @@ -595,15 +593,15 @@ static int get_dtype(struct dirent *de, const char *path) * Also, we ignore the name ".git" (even if it is not a directory). * That likely will not change. */ -static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify) +static int read_directory_recursive(struct dir_struct *dir, const char *base, int baselen, int check_only, const struct path_simplify *simplify) { - DIR *fdir = opendir(*path ? path : "."); + DIR *fdir = opendir(*base ? base : "."); int contents = 0; if (fdir) { struct dirent *de; - char fullname[PATH_MAX + 1]; - memcpy(fullname, base, baselen); + char path[PATH_MAX + 1]; + memcpy(path, base, baselen); while ((de = readdir(fdir)) != NULL) { int len, dtype; @@ -614,17 +612,18 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co continue; len = strlen(de->d_name); /* Ignore overly long pathnames! */ - if (len + baselen + 8 > sizeof(fullname)) + if (len + baselen + 8 > sizeof(path)) continue; - memcpy(fullname + baselen, de->d_name, len+1); - if (simplify_away(fullname, baselen + len, simplify)) + memcpy(path + baselen, de->d_name, len+1); + len = baselen + len; + if (simplify_away(path, len, simplify)) continue; dtype = DTYPE(de); - exclude = excluded(dir, fullname, &dtype); + exclude = excluded(dir, path, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) - && in_pathspec(fullname, baselen + len, simplify)) - dir_add_ignored(dir, fullname, baselen + len); + && in_pathspec(path, len, simplify)) + dir_add_ignored(dir, path,len); /* * Excluded? If we don't explicitly want to show @@ -634,7 +633,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co continue; if (dtype == DT_UNKNOWN) - dtype = get_dtype(de, fullname); + dtype = get_dtype(de, path); /* * Do we want to see just the ignored files? @@ -651,9 +650,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co default: continue; case DT_DIR: - memcpy(fullname + baselen + len, "/", 2); + memcpy(path + len, "/", 2); len++; - switch (treat_directory(dir, fullname, baselen + len, simplify)) { + switch (treat_directory(dir, path, len, simplify)) { case show_directory: if (exclude != !!(dir->flags & DIR_SHOW_IGNORED)) @@ -661,7 +660,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co break; case recurse_into_directory: contents += read_directory_recursive(dir, - fullname, fullname, baselen + len, 0, simplify); + path, len, 0, simplify); continue; case ignore_directory: continue; @@ -675,7 +674,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co if (check_only) goto exit_early; else - dir_add_name(dir, fullname, baselen + len); + dir_add_name(dir, path, len); } exit_early: closedir(fdir); @@ -738,15 +737,15 @@ static void free_simplify(struct path_simplify *simplify) free(simplify); } -int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen, const char **pathspec) +int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec) { struct path_simplify *simplify; - if (has_symlink_leading_path(path, strlen(path))) + if (has_symlink_leading_path(path, len)) return dir->nr; simplify = create_simplify(pathspec); - read_directory_recursive(dir, path, base, baselen, 0, simplify); + read_directory_recursive(dir, path, len, 0, simplify); free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); diff --git a/dir.h b/dir.h index f9d69dd15f..a6314464f9 100644 --- a/dir.h +++ b/dir.h @@ -67,7 +67,7 @@ struct dir_struct { extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen); extern int fill_directory(struct dir_struct *dir, const char **pathspec); -extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec); +extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec); extern int excluded(struct dir_struct *, const char *, int *); extern void add_excludes_from_file(struct dir_struct *, const char *fname); diff --git a/unpack-trees.c b/unpack-trees.c index 05d0bb1f85..42c7d7d563 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -551,7 +551,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, memset(&d, 0, sizeof(d)); if (o->dir) d.exclude_per_dir = o->dir->exclude_per_dir; - i = read_directory(&d, ce->name, pathbuf, namelen+1, NULL); + i = read_directory(&d, pathbuf, namelen+1, NULL); if (i) return o->gently ? -1 : error(ERRORMSG(o, not_uptodate_dir), ce->name); From caa6b7825aaddb8a97a5b793ca61df0c1ec9b76b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2009 19:31:49 -0700 Subject: [PATCH 3/7] Avoid doing extra 'lstat()'s for d_type if we have an up-to-date cache entry On filesystems without d_type, we can look at the cache entry first. Doing an lstat() can be expensive. Reported by Dmitry Potapov for Cygwin. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index b0671f59c8..8a9e7d8131 100644 --- a/dir.c +++ b/dir.c @@ -16,7 +16,7 @@ struct path_simplify { static int read_directory_recursive(struct dir_struct *dir, const char *path, int len, int check_only, const struct path_simplify *simplify); -static int get_dtype(struct dirent *de, const char *path); +static int get_dtype(struct dirent *de, const char *path, int len); static int common_prefix(const char **pathspec) { @@ -326,7 +326,7 @@ static int excluded_1(const char *pathname, if (x->flags & EXC_FLAG_MUSTBEDIR) { if (*dtype == DT_UNKNOWN) - *dtype = get_dtype(NULL, pathname); + *dtype = get_dtype(NULL, pathname, pathlen); if (*dtype != DT_DIR) continue; } @@ -566,14 +566,18 @@ static int in_pathspec(const char *path, int len, const struct path_simplify *si return 0; } -static int get_dtype(struct dirent *de, const char *path) +static int get_dtype(struct dirent *de, const char *path, int len) { int dtype = de ? DTYPE(de) : DT_UNKNOWN; + struct cache_entry *ce; struct stat st; if (dtype != DT_UNKNOWN) return dtype; - if (lstat(path, &st)) + ce = cache_name_exists(path, len, 0); + if (ce && ce_uptodate(ce)) + st.st_mode = ce->ce_mode; + else if (lstat(path, &st)) return dtype; if (S_ISREG(st.st_mode)) return DT_REG; @@ -633,7 +637,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *base, in continue; if (dtype == DT_UNKNOWN) - dtype = get_dtype(de, path); + dtype = get_dtype(de, path, len); /* * Do we want to see just the ignored files? From 443e061a41bee30de34793648793ed70477ac575 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2009 13:14:28 -0700 Subject: [PATCH 4/7] Avoid using 'lstat()' to figure out directories If we have an up-to-date index entry for a file in that directory, we can know that the directories leading up to that file must be directories. No need to do an lstat() on the directory. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index 8a9e7d8131..e05b850acf 100644 --- a/dir.c +++ b/dir.c @@ -566,18 +566,55 @@ static int in_pathspec(const char *path, int len, const struct path_simplify *si return 0; } +static int get_index_dtype(const char *path, int len) +{ + int pos; + struct cache_entry *ce; + + ce = cache_name_exists(path, len, 0); + if (ce) { + if (!ce_uptodate(ce)) + return DT_UNKNOWN; + if (S_ISGITLINK(ce->ce_mode)) + return DT_DIR; + /* + * Nobody actually cares about the + * difference between DT_LNK and DT_REG + */ + return DT_REG; + } + + /* Try to look it up as a directory */ + pos = cache_name_pos(path, len); + if (pos >= 0) + return DT_UNKNOWN; + pos = -pos-1; + while (pos < active_nr) { + ce = active_cache[pos++]; + if (strncmp(ce->name, path, len)) + break; + if (ce->name[len] > '/') + break; + if (ce->name[len] < '/') + continue; + if (!ce_uptodate(ce)) + break; /* continue? */ + return DT_DIR; + } + return DT_UNKNOWN; +} + static int get_dtype(struct dirent *de, const char *path, int len) { int dtype = de ? DTYPE(de) : DT_UNKNOWN; - struct cache_entry *ce; struct stat st; if (dtype != DT_UNKNOWN) return dtype; - ce = cache_name_exists(path, len, 0); - if (ce && ce_uptodate(ce)) - st.st_mode = ce->ce_mode; - else if (lstat(path, &st)) + dtype = get_index_dtype(path, len); + if (dtype != DT_UNKNOWN) + return dtype; + if (lstat(path, &st)) return dtype; if (S_ISREG(st.st_mode)) return DT_REG; From 867f72bf434a05b9eadf851a81564be5173fbba5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2009 13:23:59 -0700 Subject: [PATCH 5/7] Prepare symlink caching for thread-safety This doesn't actually change the external interfaces, so they are still thread-unsafe, but it makes the code internally pass a pointer to a local 'struct cache_def' around, so that the core code can be made thread-safe. The threaded index preloading will want to verify that the paths leading up to a pathname are all real directories. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- symlinks.c | 75 +++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/symlinks.c b/symlinks.c index 8dcd63261f..08ad35330f 100644 --- a/symlinks.c +++ b/symlinks.c @@ -38,13 +38,13 @@ static struct cache_def { int flags; int track_flags; int prefix_len_stat_func; -} cache; +} default_cache; -static inline void reset_lstat_cache(void) +static inline void reset_lstat_cache(struct cache_def *cache) { - cache.path[0] = '\0'; - cache.len = 0; - cache.flags = 0; + cache->path[0] = '\0'; + cache->len = 0; + cache->flags = 0; /* * The track_flags and prefix_len_stat_func members is only * set by the safeguard rule inside lstat_cache() @@ -70,23 +70,23 @@ static inline void reset_lstat_cache(void) * of the prefix, where the cache should use the stat() function * instead of the lstat() function to test each path component. */ -static int lstat_cache(const char *name, int len, +static int lstat_cache(struct cache_def *cache, const char *name, int len, int track_flags, int prefix_len_stat_func) { int match_len, last_slash, last_slash_dir, previous_slash; int match_flags, ret_flags, save_flags, max_len, ret; struct stat st; - if (cache.track_flags != track_flags || - cache.prefix_len_stat_func != prefix_len_stat_func) { + if (cache->track_flags != track_flags || + cache->prefix_len_stat_func != prefix_len_stat_func) { /* * As a safeguard rule we clear the cache if the * values of track_flags and/or prefix_len_stat_func * does not match with the last supplied values. */ - reset_lstat_cache(); - cache.track_flags = track_flags; - cache.prefix_len_stat_func = prefix_len_stat_func; + reset_lstat_cache(cache); + cache->track_flags = track_flags; + cache->prefix_len_stat_func = prefix_len_stat_func; match_len = last_slash = 0; } else { /* @@ -94,10 +94,10 @@ static int lstat_cache(const char *name, int len, * the 2 "excluding" path types. */ match_len = last_slash = - longest_path_match(name, len, cache.path, cache.len, + longest_path_match(name, len, cache->path, cache->len, &previous_slash); - match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK); - if (match_flags && match_len == cache.len) + match_flags = cache->flags & track_flags & (FL_NOENT|FL_SYMLINK); + if (match_flags && match_len == cache->len) return match_flags; /* * If we now have match_len > 0, we would know that @@ -121,18 +121,18 @@ static int lstat_cache(const char *name, int len, max_len = len < PATH_MAX ? len : PATH_MAX; while (match_len < max_len) { do { - cache.path[match_len] = name[match_len]; + cache->path[match_len] = name[match_len]; match_len++; } while (match_len < max_len && name[match_len] != '/'); if (match_len >= max_len && !(track_flags & FL_FULLPATH)) break; last_slash = match_len; - cache.path[last_slash] = '\0'; + cache->path[last_slash] = '\0'; if (last_slash <= prefix_len_stat_func) - ret = stat(cache.path, &st); + ret = stat(cache->path, &st); else - ret = lstat(cache.path, &st); + ret = lstat(cache->path, &st); if (ret) { ret_flags = FL_LSTATERR; @@ -156,9 +156,9 @@ static int lstat_cache(const char *name, int len, */ save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK); if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) { - cache.path[last_slash] = '\0'; - cache.len = last_slash; - cache.flags = save_flags; + cache->path[last_slash] = '\0'; + cache->len = last_slash; + cache->flags = save_flags; } else if ((track_flags & FL_DIR) && last_slash_dir > 0 && last_slash_dir <= PATH_MAX) { /* @@ -172,11 +172,11 @@ static int lstat_cache(const char *name, int len, * can still cache the path components before the last * one (the found symlink or non-existing component). */ - cache.path[last_slash_dir] = '\0'; - cache.len = last_slash_dir; - cache.flags = FL_DIR; + cache->path[last_slash_dir] = '\0'; + cache->len = last_slash_dir; + cache->flags = FL_DIR; } else { - reset_lstat_cache(); + reset_lstat_cache(cache); } return ret_flags; } @@ -188,16 +188,17 @@ static int lstat_cache(const char *name, int len, void invalidate_lstat_cache(const char *name, int len) { int match_len, previous_slash; + struct cache_def *cache = &default_cache; /* FIXME */ - match_len = longest_path_match(name, len, cache.path, cache.len, + match_len = longest_path_match(name, len, cache->path, cache->len, &previous_slash); if (len == match_len) { - if ((cache.track_flags & FL_DIR) && previous_slash > 0) { - cache.path[previous_slash] = '\0'; - cache.len = previous_slash; - cache.flags = FL_DIR; + if ((cache->track_flags & FL_DIR) && previous_slash > 0) { + cache->path[previous_slash] = '\0'; + cache->len = previous_slash; + cache->flags = FL_DIR; } else { - reset_lstat_cache(); + reset_lstat_cache(cache); } } } @@ -207,7 +208,8 @@ void invalidate_lstat_cache(const char *name, int len) */ void clear_lstat_cache(void) { - reset_lstat_cache(); + struct cache_def *cache = &default_cache; /* FIXME */ + reset_lstat_cache(cache); } #define USE_ONLY_LSTAT 0 @@ -217,7 +219,8 @@ void clear_lstat_cache(void) */ int has_symlink_leading_path(const char *name, int len) { - return lstat_cache(name, len, + struct cache_def *cache = &default_cache; /* FIXME */ + return lstat_cache(cache, name, len, FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) & FL_SYMLINK; } @@ -228,7 +231,8 @@ int has_symlink_leading_path(const char *name, int len) */ int has_symlink_or_noent_leading_path(const char *name, int len) { - return lstat_cache(name, len, + struct cache_def *cache = &default_cache; /* FIXME */ + return lstat_cache(cache, name, len, FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) & (FL_SYMLINK|FL_NOENT); } @@ -242,7 +246,8 @@ int has_symlink_or_noent_leading_path(const char *name, int len) */ int has_dirs_only_path(const char *name, int len, int prefix_len) { - return lstat_cache(name, len, + struct cache_def *cache = &default_cache; /* FIXME */ + return lstat_cache(cache, name, len, FL_DIR|FL_FULLPATH, prefix_len) & FL_DIR; } From b9fd284657de3ec30922fb17c0baf243ae947fdd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2009 13:35:31 -0700 Subject: [PATCH 6/7] Export thread-safe version of 'has_symlink_leading_path()' The threaded index preloading will want it, so that it can avoid locking by simply using a per-thread symlink/directory cache. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- cache.h | 10 ++++++++++ symlinks.c | 21 ++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/cache.h b/cache.h index 871c9844e8..f1e5ede021 100644 --- a/cache.h +++ b/cache.h @@ -744,7 +744,17 @@ struct checkout { }; extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); + +struct cache_def { + char path[PATH_MAX + 1]; + int len; + int flags; + int track_flags; + int prefix_len_stat_func; +}; + extern int has_symlink_leading_path(const char *name, int len); +extern int threaded_has_symlink_leading_path(struct cache_def *, const char *, int); extern int has_symlink_or_noent_leading_path(const char *name, int len); extern int has_dirs_only_path(const char *name, int len, int prefix_len); extern void invalidate_lstat_cache(const char *name, int len); diff --git a/symlinks.c b/symlinks.c index 08ad35330f..4bdded39c5 100644 --- a/symlinks.c +++ b/symlinks.c @@ -32,13 +32,7 @@ static int longest_path_match(const char *name_a, int len_a, return match_len; } -static struct cache_def { - char path[PATH_MAX + 1]; - int len; - int flags; - int track_flags; - int prefix_len_stat_func; -} default_cache; +static struct cache_def default_cache; static inline void reset_lstat_cache(struct cache_def *cache) { @@ -214,15 +208,20 @@ void clear_lstat_cache(void) #define USE_ONLY_LSTAT 0 +/* + * Return non-zero if path 'name' has a leading symlink component + */ +int threaded_has_symlink_leading_path(struct cache_def *cache, const char *name, int len) +{ + return lstat_cache(cache, name, len, FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) & FL_SYMLINK; +} + /* * Return non-zero if path 'name' has a leading symlink component */ int has_symlink_leading_path(const char *name, int len) { - struct cache_def *cache = &default_cache; /* FIXME */ - return lstat_cache(cache, name, len, - FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) & - FL_SYMLINK; + return threaded_has_symlink_leading_path(&default_cache, name, len); } /* From f62ce3de9dd4803f50f65e17f5fc03c7bdb49c40 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2009 13:37:02 -0700 Subject: [PATCH 7/7] Make index preloading check the whole path to the file This uses the new thread-safe 'threaded_has_symlink_leading_path()' function to efficiently verify that the whole path leading up to the filename is a proper path, and does not contain symlinks. This makes 'ce_uptodate()' a much stronger guarantee: it no longer just guarantees that the 'lstat()' of the path would match, it also means that we know that people haven't played games with moving directories around and covered it up with symlinks. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- preload-index.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/preload-index.c b/preload-index.c index 88edc5f8a9..14d5281183 100644 --- a/preload-index.c +++ b/preload-index.c @@ -34,7 +34,9 @@ static void *preload_thread(void *_data) struct thread_data *p = _data; struct index_state *index = p->index; struct cache_entry **cep = index->cache + p->offset; + struct cache_def cache; + memset(&cache, 0, sizeof(cache)); nr = p->nr; if (nr + p->offset > index->cache_nr) nr = index->cache_nr - p->offset; @@ -49,6 +51,8 @@ static void *preload_thread(void *_data) continue; if (!ce_path_match(ce, p->pathspec)) continue; + if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce))) + continue; if (lstat(ce->name, &st)) continue; if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY))