From 17448209f5441718c69642871c85a80f00d12b43 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 20:20:25 -0700 Subject: [PATCH 1/3] Add test-dump-cache-tree This was useful in diagnosing the corrupt index.aux format problem. But do not bother building or installing it by default. Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 3 +++ dump-cache-tree.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 dump-cache-tree.c diff --git a/.gitignore b/.gitignore index b5959d6311..7906909b30 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,7 @@ git-write-tree git-core-*/?* test-date test-delta +test-dump-cache-tree common-cmds.h *.tar.gz *.dsc diff --git a/Makefile b/Makefile index 518c3c176b..1aa96f4f2e 100644 --- a/Makefile +++ b/Makefile @@ -611,6 +611,9 @@ test-date$X: test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz +test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS) + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done diff --git a/dump-cache-tree.c b/dump-cache-tree.c new file mode 100644 index 0000000000..80f8683f39 --- /dev/null +++ b/dump-cache-tree.c @@ -0,0 +1,32 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; + +static void dump_cache_tree(struct cache_tree *it, const char *pfx) +{ + int i; + if (it->entry_count < 0) + printf("%-40s %s\n", "invalid", pfx); + else + printf("%s %s (%d entries)\n", + sha1_to_hex(it->sha1), + pfx, it->entry_count); + for (i = 0; i < it->subtree_nr; i++) { + char path[PATH_MAX]; + struct cache_tree_sub *down = it->down[i]; + sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); + dump_cache_tree(down->cache_tree, path); + } +} + +int main(int ac, char **av) +{ + if (read_cache_1(active_cache_sha1) < 0) + die("unable to read index file"); + active_cache_tree = read_cache_tree(active_cache_sha1); + dump_cache_tree(active_cache_tree, ""); + return 0; +} From dd0c34c46bdda0c20fd92d00516e711a4c9f7837 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 15:12:42 -0700 Subject: [PATCH 2/3] cache-tree: protect against "git prune". We reused the cache-tree data without verifying the tree object still exists. Recompute in cache_tree_update() an otherwise valid cache-tree entry when the tree object disappeared. This is not usually a problem, but theoretically without this fix things can break when the user does something like this: - read-index from a side branch - write-tree the result - remove the side branch with "git branch -D" - remove the unreachable objects with "git prune" - write-tree what is in the index. Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index f6d1dd1d7f..b34b0bc317 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -177,7 +177,7 @@ static int update_one(struct cache_tree *it, char *buffer; int i; - if (0 <= it->entry_count) + if (0 <= it->entry_count && has_sha1_file(it->sha1)) return it->entry_count; /* From bad68ec92410cf47dd001aa9b95d0f24c5f4bf77 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 21:18:58 -0700 Subject: [PATCH 3/3] index: make the index file format extensible. ... and move the cache-tree data into it. Signed-off-by: Junio C Hamano --- apply.c | 11 +---- cache-tree.c | 108 +++++++++++----------------------------------- cache-tree.h | 9 ++-- cache.h | 6 +-- checkout-index.c | 1 + dump-cache-tree.c | 8 ++-- read-cache.c | 105 +++++++++++++++++++++++++++++++++----------- read-tree.c | 2 + update-index.c | 11 +---- write-tree.c | 36 +++++++++++----- 10 files changed, 147 insertions(+), 150 deletions(-) diff --git a/apply.c b/apply.c index e283df38aa..acecf8de54 100644 --- a/apply.c +++ b/apply.c @@ -12,10 +12,6 @@ #include "quote.h" #include "blob.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - - // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch // --stat does just a diffstat, and doesn't actually apply @@ -1919,9 +1915,8 @@ static int apply_patch(int fd, const char *filename) if (write_index) newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (check_index) { - if (read_cache_1(active_cache_sha1) < 0) + if (read_cache() < 0) die("unable to read index file"); - active_cache_tree = read_cache_tree(active_cache_sha1); } if ((check || apply) && check_patch_list(list) < 0) @@ -1931,11 +1926,9 @@ static int apply_patch(int fd, const char *filename) write_out_results(list, skipped_patch); if (write_index) { - if (write_cache_1(newfd, active_cache, active_nr, - active_cache_sha1) || + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); - write_cache_tree(active_cache_sha1, active_cache_tree); } if (show_index_info) diff --git a/cache-tree.c b/cache-tree.c index b34b0bc317..2146723e90 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -11,16 +11,18 @@ struct cache_tree *cache_tree(void) return it; } -void cache_tree_free(struct cache_tree *it) +void cache_tree_free(struct cache_tree **it_p) { int i; + struct cache_tree *it = *it_p; if (!it) return; for (i = 0; i < it->subtree_nr; i++) - cache_tree_free(it->down[i]->cache_tree); + cache_tree_free(&it->down[i]->cache_tree); free(it->down); free(it); + *it_p = NULL; } static struct cache_tree_sub *find_subtree(struct cache_tree *it, @@ -78,7 +80,7 @@ void cache_tree_invalidate_path(struct cache_tree *it, const char *path) break; } if (i < it->subtree_nr) { - cache_tree_free(it->down[i]->cache_tree); + cache_tree_free(&it->down[i]->cache_tree); free(it->down[i]); /* 0 1 2 3 4 5 * ^ ^subtree_nr = 6 @@ -159,13 +161,27 @@ static void discard_unused_subtrees(struct cache_tree *it) if (s->used) down[dst++] = s; else { - cache_tree_free(s->cache_tree); + cache_tree_free(&s->cache_tree); free(s); it->subtree_nr--; } } } +int cache_tree_fully_valid(struct cache_tree *it) +{ + int i; + if (!it) + return 0; + if (it->entry_count < 0 || !has_sha1_file(it->sha1)) + return 0; + for (i = 0; i < it->subtree_nr; i++) { + if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + return 0; + } + return 1; +} + static int update_one(struct cache_tree *it, struct cache_entry **cache, int entries, @@ -354,19 +370,15 @@ static void *write_one(struct cache_tree *it, return buffer; } -static void *cache_tree_write(const unsigned char *cache_sha1, - struct cache_tree *root, - unsigned long *offset_p) +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p) { char path[PATH_MAX]; unsigned long size = 8192; char *buffer = xmalloc(size); - /* the cache checksum of the corresponding index file. */ - memcpy(buffer, cache_sha1, 20); - *offset_p = 20; + *size_p = 0; path[0] = 0; - return write_one(root, path, 0, buffer, &size, offset_p); + return write_one(root, path, 0, buffer, &size, size_p); } static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) @@ -439,81 +451,13 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) return it; free_return: - cache_tree_free(it); + cache_tree_free(&it); return NULL; } -static struct cache_tree *cache_tree_read(unsigned char *sha1, - const char *buffer, - unsigned long size) +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) { - /* check the cache-tree matches the index */ - if (memcmp(buffer, sha1, 20)) - return NULL; /* checksum mismatch */ - if (buffer[20]) + if (buffer[0]) return NULL; /* not the whole tree */ - buffer += 20; - size -= 20; return read_one(&buffer, &size); } - -struct cache_tree *read_cache_tree(unsigned char *sha1) -{ - int fd; - struct stat st; - char path[PATH_MAX]; - unsigned long size = 0; - void *map; - struct cache_tree *it; - - sprintf(path, "%s.aux", get_index_file()); - fd = open(path, O_RDONLY); - if (fd < 0) - return cache_tree(); - - if (fstat(fd, &st)) - return cache_tree(); - size = st.st_size; - map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (map == MAP_FAILED) - return cache_tree(); - it = cache_tree_read(sha1, map, size); - munmap(map, size); - if (!it) - return cache_tree(); - return it; -} - -int write_cache_tree(const unsigned char *sha1, struct cache_tree *root) -{ - char path[PATH_MAX]; - unsigned long size = 0; - void *buf, *buffer; - int fd, ret = -1; - - sprintf(path, "%s.aux", get_index_file()); - if (!root) { - unlink(path); - return -1; - } - fd = open(path, O_WRONLY|O_CREAT, 0666); - if (fd < 0) - return -1; - buffer = buf = cache_tree_write(sha1, root, &size); - while (size) { - int written = xwrite(fd, buf, size); - if (written <= 0) - goto fail; - buf += written; - size -= written; - } - ret = 0; - - fail: - close(fd); - free(buffer); - if (ret) - unlink(path); - return ret; -} diff --git a/cache-tree.h b/cache-tree.h index 7b149afdc5..c70a7699a9 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -18,12 +18,13 @@ struct cache_tree { }; struct cache_tree *cache_tree(void); -void cache_tree_free(struct cache_tree *); +void cache_tree_free(struct cache_tree **); void cache_tree_invalidate_path(struct cache_tree *, const char *); -int write_cache_tree(const unsigned char *, struct cache_tree *); -struct cache_tree *read_cache_tree(unsigned char *); -int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); +int cache_tree_fully_valid(struct cache_tree *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); #endif diff --git a/cache.h b/cache.h index 8c9947ef49..a080727b00 100644 --- a/cache.h +++ b/cache.h @@ -114,6 +114,7 @@ static inline unsigned int create_ce_mode(unsigned int mode) extern struct cache_entry **active_cache; extern unsigned int active_nr, active_alloc, active_cache_changed; +extern struct cache_tree *active_cache_tree; #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" @@ -138,11 +139,8 @@ extern const char *prefix_filename(const char *prefix, int len, const char *path #define alloc_nr(x) (((x)+16)*3/2) /* Initialize and use the cache information */ -extern int read_cache_1(unsigned char *); -extern int write_cache_1(int, struct cache_entry **, int, unsigned char *); extern int read_cache(void); -extern int write_cache(int, struct cache_entry **, int); - +extern int write_cache(int newfd, struct cache_entry **cache, int entries); extern int cache_name_pos(const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/checkout-index.c b/checkout-index.c index dd6a2d86fe..e56c354f8c 100644 --- a/checkout-index.c +++ b/checkout-index.c @@ -39,6 +39,7 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "cache-tree.h" #define CHECKOUT_ALL 4 static const char *prefix; diff --git a/dump-cache-tree.c b/dump-cache-tree.c index 80f8683f39..01e8bff0ee 100644 --- a/dump-cache-tree.c +++ b/dump-cache-tree.c @@ -2,12 +2,11 @@ #include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - static void dump_cache_tree(struct cache_tree *it, const char *pfx) { int i; + if (!it) + return; if (it->entry_count < 0) printf("%-40s %s\n", "invalid", pfx); else @@ -24,9 +23,8 @@ static void dump_cache_tree(struct cache_tree *it, const char *pfx) int main(int ac, char **av) { - if (read_cache_1(active_cache_sha1) < 0) + if (read_cache() < 0) die("unable to read index file"); - active_cache_tree = read_cache_tree(active_cache_sha1); dump_cache_tree(active_cache_tree, ""); return 0; } diff --git a/read-cache.c b/read-cache.c index 50e094e053..1f71d12578 100644 --- a/read-cache.c +++ b/read-cache.c @@ -4,11 +4,26 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "cache.h" +#include "cache-tree.h" + +/* Index extensions. + * + * The first letter should be 'A'..'Z' for extensions that are not + * necessary for a correct operation (i.e. optimization data). + * When new extensions are added that _needs_ to be understood in + * order to correctly interpret the index file, pick character that + * is outside the range, to cause the reader to abort. + */ + +#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) +#define CACHE_EXT_TREE 0x54524545 /* "TREE" */ struct cache_entry **active_cache = NULL; static time_t index_file_timestamp; unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0; +struct cache_tree *active_cache_tree = NULL; + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -496,12 +511,10 @@ int add_cache_entry(struct cache_entry *ce, int option) return 0; } -static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned char *sha1) +static int verify_hdr(struct cache_header *hdr, unsigned long size) { SHA_CTX c; - unsigned char sha1_buf[20]; - if (!sha1) - sha1 = sha1_buf; + unsigned char sha1[20]; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); @@ -515,7 +528,23 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned cha return 0; } -int read_cache_1(unsigned char *cache_sha1) +static int read_index_extension(const char *ext, void *data, unsigned long sz) +{ + switch (CACHE_EXT(ext)) { + case CACHE_EXT_TREE: + active_cache_tree = cache_tree_read(data, sz); + break; + default: + if (*ext < 'A' || 'Z' < *ext) + return error("index uses %.4s extension, which we do not understand", + ext); + fprintf(stderr, "ignoring %.4s extension\n", ext); + break; + } + return 0; +} + +int read_cache(void) { int fd, i; struct stat st; @@ -549,7 +578,7 @@ int read_cache_1(unsigned char *cache_sha1) die("index file mmap failed (%s)", strerror(errno)); hdr = map; - if (verify_hdr(hdr, size, cache_sha1) < 0) + if (verify_hdr(hdr, size) < 0) goto unmap; active_nr = ntohl(hdr->hdr_entries); @@ -563,6 +592,22 @@ int read_cache_1(unsigned char *cache_sha1) active_cache[i] = ce; } index_file_timestamp = st.st_mtime; + while (offset <= size - 20 - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + unsigned long extsize; + memcpy(&extsize, map + offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(map + offset, + map + offset + 8, extsize) < 0) + goto unmap; + offset += 8; + offset += extsize; + } return active_nr; unmap: @@ -597,7 +642,18 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } -static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) +static int write_index_ext_header(SHA_CTX *context, int fd, + unsigned long ext, unsigned long sz) +{ + ext = htonl(ext); + sz = htonl(sz); + if ((ce_write(context, fd, &ext, 4) < 0) || + (ce_write(context, fd, &sz, 4) < 0)) + return -1; + return 0; +} + +static int ce_flush(SHA_CTX *context, int fd) { unsigned int left = write_buffer_len; @@ -614,8 +670,7 @@ static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) } /* Append the SHA1 signature at the end */ - SHA1_Final(sha1, context); - memcpy(write_buffer + left, sha1, 20); + SHA1_Final(write_buffer + left, context); left += 20; if (write(fd, write_buffer, left) != left) return -1; @@ -666,14 +721,11 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) } } -int write_cache_1(int newfd, struct cache_entry **cache, int entries, - unsigned char *cache_sha1) +int write_cache(int newfd, struct cache_entry **cache, int entries) { SHA_CTX c; struct cache_header hdr; int i, removed; - int status; - unsigned char sha1[20]; for (i = removed = 0; i < entries; i++) if (!cache[i]->ce_mode) @@ -697,18 +749,19 @@ int write_cache_1(int newfd, struct cache_entry **cache, int entries, if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } - status = ce_flush(&c, newfd, sha1); - if (cache_sha1) - memcpy(cache_sha1, sha1, 20); - return status; -} -int read_cache(void) -{ - return read_cache_1(NULL); -} - -int write_cache(int newfd, struct cache_entry **cache, int entries) -{ - return write_cache_1(newfd, cache, entries, NULL); + /* Write extension data here */ + if (active_cache_tree) { + unsigned long sz; + void *data = cache_tree_write(active_cache_tree, &sz); + if (data && + !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) && + !ce_write(&c, newfd, data, sz)) + ; + else { + free(data); + return -1; + } + } + return ce_flush(&c, newfd); } diff --git a/read-tree.c b/read-tree.c index 26f4f7e323..1c65101291 100644 --- a/read-tree.c +++ b/read-tree.c @@ -9,6 +9,7 @@ #include "object.h" #include "tree.h" +#include "cache-tree.h" #include #include @@ -828,6 +829,7 @@ int main(int argc, char **argv) } unpack_trees(fn); + cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); diff --git a/update-index.c b/update-index.c index 86f53948fc..d6d3295e32 100644 --- a/update-index.c +++ b/update-index.c @@ -6,12 +6,8 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" -#include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - /* * Default to not allowing changes to the list of files. The * tool doesn't actually care, but this makes it harder to add @@ -501,10 +497,9 @@ int main(int argc, const char **argv) if (newfd < 0) die("unable to create new cachefile"); - entries = read_cache_1(active_cache_sha1); + entries = read_cache(); if (entries < 0) die("cache corrupted"); - active_cache_tree = read_cache_tree(active_cache_sha1); for (i = 1 ; i < argc; i++) { const char *path = argv[i]; @@ -630,11 +625,9 @@ int main(int argc, const char **argv) } } if (active_cache_changed) { - if (write_cache_1(newfd, active_cache, active_nr, - active_cache_sha1) || + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); - write_cache_tree(active_cache_sha1, active_cache_tree); } return has_errors ? 1 : 0; diff --git a/write-tree.c b/write-tree.c index cef0c5bb42..a5069921a0 100644 --- a/write-tree.c +++ b/write-tree.c @@ -7,21 +7,20 @@ #include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - static int missing_ok = 0; static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; +static struct cache_file cache_file; + int main(int argc, char **argv) { - int entries; + int entries, was_valid, newfd; setup_git_directory(); - entries = read_cache_1(active_cache_sha1); - active_cache_tree = read_cache_tree(active_cache_sha1); + newfd = hold_index_file_for_update(&cache_file, get_index_file()); + entries = read_cache(); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) missing_ok = 1; @@ -35,11 +34,26 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - if (cache_tree_update(active_cache_tree, active_cache, active_nr, - missing_ok)) - die("git-write-tree: error building trees"); - write_cache_tree(active_cache_sha1, active_cache_tree); - + if (!active_cache_tree) + active_cache_tree = cache_tree(); + + was_valid = cache_tree_fully_valid(active_cache_tree); + if (!was_valid) { + if (cache_tree_update(active_cache_tree, + active_cache, active_nr, + missing_ok) < 0) + die("git-write-tree: error building trees"); + if (0 <= newfd) { + if (!write_cache(newfd, active_cache, active_nr)) + commit_index_file(&cache_file); + } + /* Not being able to write is fine -- we are only interested + * in updating the cache-tree part, and if the next caller + * ends up using the old index with unupdated cache-tree part + * it misses the work we did here, but that is just a + * performance penalty and not a big deal. + */ + } printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; }