From 5f73076c1a9b4b8dc94f77eac98eb558d25e33c0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 8 Feb 2006 21:15:24 -0800 Subject: [PATCH 1/5] "Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano --- apply.c | 2 +- cache.h | 6 +++-- checkout-index.c | 1 + config.c | 5 ++++ diff-files.c | 2 +- diff-index.c | 2 +- diff.c | 2 +- entry.c | 2 +- environment.c | 1 + read-cache.c | 28 +++++++++++++++++---- read-tree.c | 2 +- update-index.c | 65 ++++++++++++++++++++++++++++++++++++++++++------ write-tree.c | 2 +- 13 files changed, 99 insertions(+), 21 deletions(-) diff --git a/apply.c b/apply.c index 2ad47fbbb3..35ae48eeb5 100644 --- a/apply.c +++ b/apply.c @@ -1309,7 +1309,7 @@ static int check_patch(struct patch *patch) return -1; } - changed = ce_match_stat(active_cache[pos], &st); + changed = ce_match_stat(active_cache[pos], &st, 1); if (changed) return error("%s: does not match index", old_name); diff --git a/cache.h b/cache.h index bdbe2d683e..cd58fada15 100644 --- a/cache.h +++ b/cache.h @@ -91,6 +91,7 @@ struct cache_entry { #define CE_NAMEMASK (0x0fff) #define CE_STAGEMASK (0x3000) #define CE_UPDATE (0x4000) +#define CE_VALID (0x8000) #define CE_STAGESHIFT 12 #define create_ce_flags(len, stage) htons((len) | ((stage) << CE_STAGESHIFT)) @@ -144,8 +145,8 @@ extern int add_cache_entry(struct cache_entry *ce, int option); extern int remove_cache_entry_at(int pos); extern int remove_file_from_cache(const char *path); extern int ce_same_name(struct cache_entry *a, struct cache_entry *b); -extern int ce_match_stat(struct cache_entry *ce, struct stat *st); -extern int ce_modified(struct cache_entry *ce, struct stat *st); +extern int ce_match_stat(struct cache_entry *ce, struct stat *st, int); +extern int ce_modified(struct cache_entry *ce, struct stat *st, int); extern int ce_path_match(const struct cache_entry *ce, const char **pathspec); extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, const char *type); extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object); @@ -161,6 +162,7 @@ extern int commit_index_file(struct cache_file *); extern void rollback_index_file(struct cache_file *); extern int trust_executable_bit; +extern int assume_unchanged; extern int only_use_symrefs; extern int diff_rename_limit_default; extern int shared_repository; diff --git a/checkout-index.c b/checkout-index.c index 53dd8cba6f..957b4a86b0 100644 --- a/checkout-index.c +++ b/checkout-index.c @@ -116,6 +116,7 @@ int main(int argc, char **argv) int all = 0; prefix = setup_git_directory(); + git_config(git_default_config); prefix_length = prefix ? strlen(prefix) : 0; if (read_cache() < 0) { diff --git a/config.c b/config.c index 8355224bb1..7dbdce1966 100644 --- a/config.c +++ b/config.c @@ -222,6 +222,11 @@ int git_default_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.ignorestat")) { + assume_unchanged = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.symrefsonly")) { only_use_symrefs = git_config_bool(var, value); return 0; diff --git a/diff-files.c b/diff-files.c index d24d11c28d..c96ad35fb6 100644 --- a/diff-files.c +++ b/diff-files.c @@ -191,7 +191,7 @@ int main(int argc, const char **argv) show_file('-', ce); continue; } - changed = ce_match_stat(ce, &st); + changed = ce_match_stat(ce, &st, 0); if (!changed && !diff_options.find_copies_harder) continue; oldmode = ntohl(ce->ce_mode); diff --git a/diff-index.c b/diff-index.c index f8a102ec16..12a9418d6b 100644 --- a/diff-index.c +++ b/diff-index.c @@ -33,7 +33,7 @@ static int get_stat_data(struct cache_entry *ce, } return -1; } - changed = ce_match_stat(ce, &st); + changed = ce_match_stat(ce, &st, 0); if (changed) { mode = create_ce_mode(st.st_mode); if (!trust_executable_bit && diff --git a/diff.c b/diff.c index ec51e7dd18..c72064eeb2 100644 --- a/diff.c +++ b/diff.c @@ -311,7 +311,7 @@ static int work_tree_matches(const char *name, const unsigned char *sha1) ce = active_cache[pos]; if ((lstat(name, &st) < 0) || !S_ISREG(st.st_mode) || /* careful! */ - ce_match_stat(ce, &st) || + ce_match_stat(ce, &st, 0) || memcmp(sha1, ce->sha1, 20)) return 0; /* we return 1 only when we can stat, it is a regular file, diff --git a/entry.c b/entry.c index 6c47c3a3e1..8fb99bc83f 100644 --- a/entry.c +++ b/entry.c @@ -123,7 +123,7 @@ int checkout_entry(struct cache_entry *ce, struct checkout *state) strcpy(path + len, ce->name); if (!lstat(path, &st)) { - unsigned changed = ce_match_stat(ce, &st); + unsigned changed = ce_match_stat(ce, &st, 1); if (!changed) return 0; if (!state->force) { diff --git a/environment.c b/environment.c index 0596fc647b..251e53ca09 100644 --- a/environment.c +++ b/environment.c @@ -12,6 +12,7 @@ char git_default_email[MAX_GITNAME]; char git_default_name[MAX_GITNAME]; int trust_executable_bit = 1; +int assume_unchanged = 0; int only_use_symrefs = 0; int repository_format_version = 0; char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8"; diff --git a/read-cache.c b/read-cache.c index c5474d4975..efbb1be874 100644 --- a/read-cache.c +++ b/read-cache.c @@ -27,6 +27,9 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st) ce->ce_uid = htonl(st->st_uid); ce->ce_gid = htonl(st->st_gid); ce->ce_size = htonl(st->st_size); + + if (assume_unchanged) + ce->ce_flags |= htons(CE_VALID); } static int ce_compare_data(struct cache_entry *ce, struct stat *st) @@ -146,9 +149,18 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) return changed; } -int ce_match_stat(struct cache_entry *ce, struct stat *st) +int ce_match_stat(struct cache_entry *ce, struct stat *st, int ignore_valid) { - unsigned int changed = ce_match_stat_basic(ce, st); + unsigned int changed; + + /* + * If it's marked as always valid in the index, it's + * valid whatever the checked-out copy says. + */ + if (!ignore_valid && (ce->ce_flags & htons(CE_VALID))) + return 0; + + changed = ce_match_stat_basic(ce, st); /* * Within 1 second of this sequence: @@ -164,7 +176,7 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st) * effectively mean we can make at most one commit per second, * which is not acceptable. Instead, we check cache entries * whose mtime are the same as the index file timestamp more - * careful than others. + * carefully than others. */ if (!changed && index_file_timestamp && @@ -174,10 +186,10 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st) return changed; } -int ce_modified(struct cache_entry *ce, struct stat *st) +int ce_modified(struct cache_entry *ce, struct stat *st, int really) { int changed, changed_fs; - changed = ce_match_stat(ce, st); + changed = ce_match_stat(ce, st, really); if (!changed) return 0; /* @@ -233,6 +245,11 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla return -1; if (len1 > len2) return 1; + + /* Differences between "assume up-to-date" should not matter. */ + flags1 &= ~CE_VALID; + flags2 &= ~CE_VALID; + if (flags1 < flags2) return -1; if (flags1 > flags2) @@ -430,6 +447,7 @@ int add_cache_entry(struct cache_entry *ce, int option) int ok_to_add = option & ADD_CACHE_OK_TO_ADD; int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE; int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK; + pos = cache_name_pos(ce->name, ntohs(ce->ce_flags)); /* existing match? Just replace it. */ diff --git a/read-tree.c b/read-tree.c index 5580f15ba2..52f06e312a 100644 --- a/read-tree.c +++ b/read-tree.c @@ -349,7 +349,7 @@ static void verify_uptodate(struct cache_entry *ce) return; if (!lstat(ce->name, &st)) { - unsigned changed = ce_match_stat(ce, &st); + unsigned changed = ce_match_stat(ce, &st, 1); if (!changed) return; errno = 0; diff --git a/update-index.c b/update-index.c index afec98dd48..767fd49721 100644 --- a/update-index.c +++ b/update-index.c @@ -23,6 +23,10 @@ static int quiet; /* --refresh needing update is not error */ static int info_only; static int force_remove; static int verbose; +static int mark_valid_only = 0; +#define MARK_VALID 1 +#define UNMARK_VALID 2 + /* Three functions to allow overloaded pointer return; see linux/err.h */ static inline void *ERR_PTR(long error) @@ -53,6 +57,25 @@ static void report(const char *fmt, ...) va_end(vp); } +static int mark_valid(const char *path) +{ + int namelen = strlen(path); + int pos = cache_name_pos(path, namelen); + if (0 <= pos) { + switch (mark_valid_only) { + case MARK_VALID: + active_cache[pos]->ce_flags |= htons(CE_VALID); + break; + case UNMARK_VALID: + active_cache[pos]->ce_flags &= ~htons(CE_VALID); + break; + } + active_cache_changed = 1; + return 0; + } + return -1; +} + static int add_file_to_cache(const char *path) { int size, namelen, option, status; @@ -94,6 +117,7 @@ static int add_file_to_cache(const char *path) ce = xmalloc(size); memset(ce, 0, size); memcpy(ce->name, path, namelen); + ce->ce_flags = htons(namelen); fill_stat_cache_info(ce, &st); ce->ce_mode = create_ce_mode(st.st_mode); @@ -105,7 +129,6 @@ static int add_file_to_cache(const char *path) if (0 <= pos) ce->ce_mode = active_cache[pos]->ce_mode; } - ce->ce_flags = htons(namelen); if (index_path(ce->sha1, path, &st, !info_only)) return -1; @@ -128,7 +151,7 @@ static int add_file_to_cache(const char *path) * For example, you'd want to do this after doing a "git-read-tree", * to link up the stat cache details with the proper files. */ -static struct cache_entry *refresh_entry(struct cache_entry *ce) +static struct cache_entry *refresh_entry(struct cache_entry *ce, int really) { struct stat st; struct cache_entry *updated; @@ -137,21 +160,22 @@ static struct cache_entry *refresh_entry(struct cache_entry *ce) if (lstat(ce->name, &st) < 0) return ERR_PTR(-errno); - changed = ce_match_stat(ce, &st); + changed = ce_match_stat(ce, &st, really); if (!changed) return NULL; - if (ce_modified(ce, &st)) + if (ce_modified(ce, &st, really)) return ERR_PTR(-EINVAL); size = ce_size(ce); updated = xmalloc(size); memcpy(updated, ce, size); fill_stat_cache_info(updated, &st); + return updated; } -static int refresh_cache(void) +static int refresh_cache(int really) { int i; int has_errors = 0; @@ -171,12 +195,19 @@ static int refresh_cache(void) continue; } - new = refresh_entry(ce); + new = refresh_entry(ce, really); if (!new) continue; if (IS_ERR(new)) { if (not_new && PTR_ERR(new) == -ENOENT) continue; + if (really && PTR_ERR(new) == -EINVAL) { + /* If we are doing --really-refresh that + * means the index is not valid anymore. + */ + ce->ce_flags &= ~htons(CE_VALID); + active_cache_changed = 1; + } if (quiet) continue; printf("%s: needs update\n", ce->name); @@ -274,6 +305,8 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, memcpy(ce->name, path, len); ce->ce_flags = create_ce_flags(len, stage); ce->ce_mode = create_ce_mode(mode); + if (assume_unchanged) + ce->ce_flags |= htons(CE_VALID); option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; if (add_cache_entry(ce, option)) @@ -317,6 +350,12 @@ static void update_one(const char *path, const char *prefix, int prefix_length) fprintf(stderr, "Ignoring path %s\n", path); return; } + if (mark_valid_only) { + if (mark_valid(p)) + die("Unable to mark file %s", path); + return; + } + if (force_remove) { if (remove_file_from_cache(p)) die("git-update-index: unable to remove %s", path); @@ -467,7 +506,11 @@ int main(int argc, const char **argv) continue; } if (!strcmp(path, "--refresh")) { - has_errors |= refresh_cache(); + has_errors |= refresh_cache(0); + continue; + } + if (!strcmp(path, "--really-refresh")) { + has_errors |= refresh_cache(1); continue; } if (!strcmp(path, "--cacheinfo")) { @@ -493,6 +536,14 @@ int main(int argc, const char **argv) die("git-update-index: %s cannot chmod %s", path, argv[i]); continue; } + if (!strcmp(path, "--assume-unchanged")) { + mark_valid_only = MARK_VALID; + continue; + } + if (!strcmp(path, "--no-assume-unchanged")) { + mark_valid_only = UNMARK_VALID; + continue; + } if (!strcmp(path, "--info-only")) { info_only = 1; continue; diff --git a/write-tree.c b/write-tree.c index f866059f24..addb5de3b6 100644 --- a/write-tree.c +++ b/write-tree.c @@ -111,7 +111,7 @@ int main(int argc, char **argv) funny = 0; for (i = 0; i < entries; i++) { struct cache_entry *ce = active_cache[i]; - if (ntohs(ce->ce_flags) & ~CE_NAMEMASK) { + if (ce_stage(ce)) { if (10 < ++funny) { fprintf(stderr, "...\n"); break; From 8b9b0f3af73233595b6b1103ffb30242508a5e47 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 8 Feb 2006 21:49:47 -0800 Subject: [PATCH 2/5] "Assume unchanged" git: do not set CE_VALID with --refresh When working with automatic assume-unchanged mode using core.ignorestat, setting CE_VALID after --refresh makes things more cumbersome to use. Consider this scenario: (1) the working tree is on a filesystem with slow lstat(2). The user sets core.ignorestat = true. (2) "git checkout" to switch to a different branch (or initial checkout) updates all paths and the index starts out with "all clean". (3) The user knows she wants to edit certain paths. She uses update-index --no-assume-unchanged (we could call it --edit; the name is inmaterial) to mark these paths and starts editing. (4) After editing half of the paths marked to be edited, she runs "git status". This runs "update-index --refresh" to reduce the false hits from diff-files. (5) Now the other half of the paths, since she has not changed them, are found to match the index, and CE_VALID is set on them again. For this reason, this commit makes update-index --refresh not to set CE_VALID even after the path without CE_VALID are verified to be up to date. The user still can run --really-refresh to force lstat() to match the index entries to the reality. Signed-off-by: Junio C Hamano --- update-index.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/update-index.c b/update-index.c index 767fd49721..bb730509b8 100644 --- a/update-index.c +++ b/update-index.c @@ -172,6 +172,15 @@ static struct cache_entry *refresh_entry(struct cache_entry *ce, int really) memcpy(updated, ce, size); fill_stat_cache_info(updated, &st); + /* In this case, if really is not set, we should leave + * CE_VALID bit alone. Otherwise, paths marked with + * --no-assume-unchanged (i.e. things to be edited) will + * reacquire CE_VALID bit automatically, which is not + * really what we want. + */ + if (!really && assume_unchanged && !(ce->ce_flags & htons(CE_VALID))) + updated->ce_flags &= ~htons(CE_VALID); + return updated; } From 2bcab24080dc97fc628e1b601a195a9a96773fac Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 8 Feb 2006 21:50:18 -0800 Subject: [PATCH 3/5] ls-files: debugging aid for CE_VALID changes. This is not really part of the proposed updates for CE_VALID, but with this change, ls-files -t shows CE_VALID paths with lowercase tag letters instead of the usual uppercase. Useful for checking out what is going on. Signed-off-by: Junio C Hamano --- ls-files.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ls-files.c b/ls-files.c index 6af3b091b0..3f06ece84d 100644 --- a/ls-files.c +++ b/ls-files.c @@ -447,6 +447,22 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) if (pathspec && !match(pathspec, ce->name, len)) return; + if (tag && *tag && (ce->ce_flags & htons(CE_VALID))) { + static char alttag[4]; + memcpy(alttag, tag, 3); + if (isalpha(tag[0])) + alttag[0] = tolower(tag[0]); + else if (tag[0] == '?') + alttag[0] = '!'; + else { + alttag[0] = 'v'; + alttag[1] = tag[0]; + alttag[2] = ' '; + alttag[3] = 0; + } + tag = alttag; + } + if (!show_stage) { fputs(tag, stdout); write_name_quoted("", 0, ce->name + offset, @@ -503,7 +519,7 @@ static void show_files(void) err = lstat(ce->name, &st); if (show_deleted && err) show_ce_entry(tag_removed, ce); - if (show_modified && ce_modified(ce, &st)) + if (show_modified && ce_modified(ce, &st, 0)) show_ce_entry(tag_modified, ce); } } From dfdd309e574df2da93f48d9cf4e83d53233dbe14 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 7 Feb 2006 13:19:10 -0800 Subject: [PATCH 4/5] Do not allow empty name or email. Instead of silently allowing to create a bogus commit that lacks information by mistake, complain loudly and die. Signed-off-by: Junio C Hamano --- ident.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ident.c b/ident.c index 0461b8b2f8..23b8cfc600 100644 --- a/ident.c +++ b/ident.c @@ -167,6 +167,11 @@ static const char *get_ident(const char *name, const char *email, name = git_default_name; if (!email) email = git_default_email; + + if (!*name || !*email) + die("empty ident %s <%s> not allowed", + name, email); + strcpy(date, git_default_date); if (date_str) parse_date(date_str, date, sizeof(date)); From 47e013f9207a235a87390ee8ad03c8a7406f4147 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 7 Feb 2006 15:35:46 -0800 Subject: [PATCH 5/5] t6000: fix a careless test library add-on. It tried to "restore" GIT_AUTHOR_EMAIL environment variable but the variable started out as unset, so ended up setting it to an empty string. This is now caught as an error. Signed-off-by: Junio C Hamano --- t/t6000lib.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/t/t6000lib.sh b/t/t6000lib.sh index 01f796e9c8..c6752af48e 100755 --- a/t/t6000lib.sh +++ b/t/t6000lib.sh @@ -51,7 +51,12 @@ as_author() export GIT_AUTHOR_EMAIL="$_author" "$@" - export GIT_AUTHOR_EMAIL="$_save" + if test -z "$_save" + then + unset GIT_AUTHOR_EMAIL + else + export GIT_AUTHOR_EMAIL="$_save" + fi } commit_date()