From db5360f3f4966c4586d072bd1440880773185b81 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 17 Sep 2013 03:06:14 -0400 Subject: [PATCH 1/4] name-hash: refactor polymorphic index_name_exists() Depending upon the absence or presence of a trailing '/' on the incoming pathname, index_name_exists() checks either if a file is present in the index or if a directory is represented within the index. Each caller explicitly chooses the mode of operation by adding or removing a trailing '/' before invoking index_name_exists(). Since these two modes of operations are disjoint and have no code in common (one searches index_state.name_hash; the other dir_hash), they can be represented more naturally as distinct functions: one to search for a file, and one for a directory. Splitting index searching into two functions relieves callers of the artificial burden of having to add or remove a slash to select the mode of operation; instead they just call the desired function. A subsequent patch will take advantage of this benefit in order to eliminate the requirement that the incoming pathname for a directory search must have a trailing slash. (In order to avoid disturbing in-flight topics, index_name_exists() is retained as a thin wrapper dispatching either to index_dir_exists() or index_file_exists().) Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- cache.h | 4 ++++ name-hash.c | 54 +++++++++++++++++++++++++++++------------------------ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/cache.h b/cache.h index a47b9c0303..038afe128e 100644 --- a/cache.h +++ b/cache.h @@ -314,6 +314,8 @@ extern void free_name_hash(struct index_state *istate); #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL, NULL) #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) +#define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen)) +#define cache_file_exists(name, namelen, igncase) index_file_exists(&the_index, (name), (namelen), (igncase)) #define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase)) #define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen)) #define resolve_undo_clear() resolve_undo_clear_index(&the_index) @@ -463,6 +465,8 @@ extern int write_index(struct index_state *, int newfd); extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); +extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen); +extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase); extern int index_name_pos(const struct index_state *, const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ diff --git a/name-hash.c b/name-hash.c index 617c86c537..f06b049bb5 100644 --- a/name-hash.c +++ b/name-hash.c @@ -222,7 +222,29 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen return slow_same_name(name, namelen, ce->name, len); } -struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) +struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen) +{ + struct cache_entry *ce; + struct dir_entry *dir; + + lazy_init_name_hash(istate); + dir = find_dir_entry(istate, name, namelen); + if (dir && dir->nr) + return dir->ce; + + /* + * It might be a submodule. Unlike plain directories, which are stored + * in the dir-hash, submodules are stored in the name-hash, so check + * there, as well. + */ + ce = index_file_exists(istate, name, namelen - 1, 1); + if (ce && S_ISGITLINK(ce->ce_mode)) + return ce; + + return NULL; +} + +struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) { unsigned int hash = hash_name(name, namelen); struct cache_entry *ce; @@ -237,32 +259,16 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na } ce = ce->next; } - - /* - * When looking for a directory (trailing '/'), it might be a - * submodule or a directory. Despite submodules being directories, - * they are stored in the name hash without a closing slash. - * When ignore_case is 1, directories are stored in a separate hash - * table *with* their closing slash. - * - * The side effect of this storage technique is we have need to - * lookup the directory in a separate hash table, and if not found - * remove the slash from name and perform the lookup again without - * the slash. If a match is made, S_ISGITLINK(ce->mode) will be - * true. - */ - if (icase && name[namelen - 1] == '/') { - struct dir_entry *dir = find_dir_entry(istate, name, namelen); - if (dir && dir->nr) - return dir->ce; - - ce = index_name_exists(istate, name, namelen - 1, icase); - if (ce && S_ISGITLINK(ce->ce_mode)) - return ce; - } return NULL; } +struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) +{ + if (namelen > 0 && name[namelen - 1] == '/') + return index_dir_exists(istate, name, namelen); + return index_file_exists(istate, name, namelen, icase); +} + static int free_dir_entry(void *entry, void *unused) { struct dir_entry *dir = entry; From ebbd7439b1b7cce0e8c68fe3e220d71cc58dd980 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 17 Sep 2013 03:06:15 -0400 Subject: [PATCH 2/4] employ new explicit "exists in index?" API Each caller of index_name_exists() knows whether it is looking for a directory or a file, and can avoid the unnecessary indirection of index_name_exists() by instead calling index_dir_exists() or index_file_exists() directly. Invoking the appropriate search function explicitly will allow a subsequent patch to relieve callers of the artificial burden of having to add a trailing '/' to the pathname given to index_dir_exists(). Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- dir.c | 10 +++++----- read-cache.c | 4 ++-- unpack-trees.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index b439ff0e63..a8401b97ce 100644 --- a/dir.c +++ b/dir.c @@ -860,7 +860,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len) static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) { - if (cache_name_exists(pathname, len, ignore_case)) + if (cache_file_exists(pathname, len, ignore_case)) return NULL; ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); @@ -885,11 +885,11 @@ enum exist_status { /* * Do not use the alphabetically sorted index to look up * the directory name; instead, use the case insensitive - * name hash. + * directory hash. */ static enum exist_status directory_exists_in_index_icase(const char *dirname, int len) { - const struct cache_entry *ce = cache_name_exists(dirname, len + 1, ignore_case); + const struct cache_entry *ce = cache_dir_exists(dirname, len + 1); unsigned char endchar; if (!ce) @@ -1071,7 +1071,7 @@ static int get_index_dtype(const char *path, int len) int pos; const struct cache_entry *ce; - ce = cache_name_exists(path, len, 0); + ce = cache_file_exists(path, len, 0); if (ce) { if (!ce_uptodate(ce)) return DT_UNKNOWN; @@ -1131,7 +1131,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, int dtype, struct dirent *de) { int exclude; - int has_path_in_index = !!cache_name_exists(path->buf, path->len, ignore_case); + int has_path_in_index = !!cache_file_exists(path->buf, path->len, ignore_case); if (dtype == DT_UNKNOWN) dtype = get_dtype(de, path->buf, path->len); diff --git a/read-cache.c b/read-cache.c index 885943a6df..b8d3759b53 100644 --- a/read-cache.c +++ b/read-cache.c @@ -643,7 +643,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, if (*ptr == '/') { struct cache_entry *foundce; ++ptr; - foundce = index_name_exists(istate, ce->name, ptr - ce->name, ignore_case); + foundce = index_dir_exists(istate, ce->name, ptr - ce->name); if (foundce) { memcpy((void *)startPtr, foundce->name + (startPtr - ce->name), ptr - startPtr); startPtr = ptr; @@ -652,7 +652,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, } } - alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case); + alias = index_file_exists(istate, ce->name, ce_namelen(ce), ignore_case); if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, st, ce_option)) { /* Nothing changed, really */ free(ce); diff --git a/unpack-trees.c b/unpack-trees.c index 1a61e6f363..35cb05e92b 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1357,7 +1357,7 @@ static int icase_exists(struct unpack_trees_options *o, const char *name, int le { const struct cache_entry *src; - src = index_name_exists(o->src_index, name, len, 1); + src = index_file_exists(o->src_index, name, len, 1); return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE); } @@ -1403,7 +1403,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype, * delete this path, which is in a subdirectory that * is being replaced with a blob. */ - result = index_name_exists(&o->result, name, len, 0); + result = index_file_exists(&o->result, name, len, 0); if (result) { if (result->ce_flags & CE_REMOVE) return 0; From d28eec267374c476de490bff736a627da94575c5 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 17 Sep 2013 03:06:16 -0400 Subject: [PATCH 3/4] name-hash: stop storing trailing '/' on paths in index_state.dir_hash When 5102c617 (Add case insensitivity support for directories when using git status, 2010-10-03) added directories to the name-hash there was only a single hash table in which both real cache entries and leading directory prefixes were registered. To distinguish between the two types of entries, directories were stored with a trailing '/'. 2092678c (name-hash.c: fix endless loop with core.ignorecase=true, 2013-02-28), however, moved directories to a separate hash table (index_state.dir_hash) but retained the (now) redundant trailing '/', thus callers continue to bear the burden of ensuring the slash's presence before searching the index for a directory. Eliminate this redundancy by storing paths in the dir-hash without the trailing '/'. An important benefit of this change is that it eliminates undocumented and dangerous behavior of dir.c:directory_exists_in_index_icase() in which it assumes not only that it can validly access one character beyond the end of its incoming directory argument, but also that that character will unconditionally be a '/'. This perilous behavior was "tolerated" because the string passed in by its lone caller always had a '/' in that position, however, things broke [1] when 2eac2a4c (ls-files -k: a directory only can be killed if the index has a non-directory, 2013-08-15) added a new caller which failed to respect the undocumented assumption. [1]: http://thread.gmane.org/gmane.comp.version-control.git/232727 Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- dir.c | 2 +- name-hash.c | 11 ++++++----- read-cache.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index a8401b97ce..fccd47955b 100644 --- a/dir.c +++ b/dir.c @@ -889,7 +889,7 @@ enum exist_status { */ static enum exist_status directory_exists_in_index_icase(const char *dirname, int len) { - const struct cache_entry *ce = cache_dir_exists(dirname, len + 1); + const struct cache_entry *ce = cache_dir_exists(dirname, len); unsigned char endchar; if (!ce) diff --git a/name-hash.c b/name-hash.c index f06b049bb5..e5b6e1ad23 100644 --- a/name-hash.c +++ b/name-hash.c @@ -58,9 +58,9 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate, { /* * Throw each directory component in the hash for quick lookup - * during a git status. Directory components are stored with their + * during a git status. Directory components are stored without their * closing slash. Despite submodules being a directory, they never - * reach this point, because they are stored without a closing slash + * reach this point, because they are stored * in index_state.name_hash (as ordinary cache_entries). * * Note that the cache_entry stored with the dir_entry merely @@ -78,6 +78,7 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate, namelen--; if (namelen <= 0) return NULL; + namelen--; /* lookup existing entry for that directory */ dir = find_dir_entry(istate, ce->name, namelen); @@ -97,7 +98,7 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate, } /* recursively add missing parent directories */ - dir->parent = hash_dir_entry(istate, ce, namelen - 1); + dir->parent = hash_dir_entry(istate, ce, namelen); } return dir; } @@ -237,7 +238,7 @@ struct cache_entry *index_dir_exists(struct index_state *istate, const char *nam * in the dir-hash, submodules are stored in the name-hash, so check * there, as well. */ - ce = index_file_exists(istate, name, namelen - 1, 1); + ce = index_file_exists(istate, name, namelen, 1); if (ce && S_ISGITLINK(ce->ce_mode)) return ce; @@ -265,7 +266,7 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) { if (namelen > 0 && name[namelen - 1] == '/') - return index_dir_exists(istate, name, namelen); + return index_dir_exists(istate, name, namelen - 1); return index_file_exists(istate, name, namelen, icase); } diff --git a/read-cache.c b/read-cache.c index b8d3759b53..e25de322d2 100644 --- a/read-cache.c +++ b/read-cache.c @@ -643,7 +643,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, if (*ptr == '/') { struct cache_entry *foundce; ++ptr; - foundce = index_dir_exists(istate, ce->name, ptr - ce->name); + foundce = index_dir_exists(istate, ce->name, ptr - ce->name - 1); if (foundce) { memcpy((void *)startPtr, foundce->name + (startPtr - ce->name), ptr - startPtr); startPtr = ptr; From de372b1b46d91ffdcd6eb6e324cb8b586c99c539 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 17 Sep 2013 03:06:17 -0400 Subject: [PATCH 4/4] dir: revert work-around for retired dangerous behavior directory_exists_in_index_icase() dangerously assumed that it could access one character beyond the end of its directory argument, and that that character would unconditionally be '/'. 2eac2a4c (ls-files -k: a directory only can be killed if the index has a non-directory, 2013-08-15) added a caller which did not respect this undocumented assumption, and 680be044 (dir.c::test_one_path(): work around directory_exists_in_index_icase() breakage, 2013-08-23) added a work-around which temporarily appends a '/' before invoking directory_exists_in_index_icase(). Since the dangerous behavior of directory_exists_in_index_icase() has been eliminated, the work-around is now redundant, so retire it (but not the tests added by the same commit). Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- dir.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/dir.c b/dir.c index fccd47955b..23b6de4703 100644 --- a/dir.c +++ b/dir.c @@ -1160,21 +1160,9 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, */ if ((dir->flags & DIR_COLLECT_KILLED_ONLY) && (dtype == DT_DIR) && - !has_path_in_index) { - /* - * NEEDSWORK: directory_exists_in_index_icase() - * assumes that one byte past the given path is - * readable and has '/', which needs to be fixed, but - * until then, work it around in the caller. - */ - strbuf_addch(path, '/'); - if (directory_exists_in_index(path->buf, path->len - 1) == - index_nonexistent) { - strbuf_setlen(path, path->len - 1); - return path_none; - } - strbuf_setlen(path, path->len - 1); - } + !has_path_in_index && + (directory_exists_in_index(path->buf, path->len) == index_nonexistent)) + return path_none; exclude = is_excluded(dir, path->buf, &dtype);