From e54501004abbd20fa8d813c1e5b82c3b50bb9361 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 28 Jul 2012 11:03:01 -0400 Subject: [PATCH 01/11] diff: do not use null sha1 as a sentinel value The diff code represents paths using the diff_filespec struct. This struct has a sha1 to represent the sha1 of the content at that path, as well as a sha1_valid member which indicates whether its sha1 field is actually useful. If sha1_valid is not true, then the filespec represents a working tree file (e.g., for the no-index case, or for when the index is not up-to-date). The diff_filespec is only used internally, though. At the interfaces to the diff subsystem, callers feed the sha1 directly, and we create a diff_filespec from it. It's at that point that we look at the sha1 and decide whether it is valid or not; callers may pass the null sha1 as a sentinel value to indicate that it is not. We should not typically see the null sha1 coming from any other source (e.g., in the index itself, or from a tree). However, a corrupt tree might have a null sha1, which would cause "diff --patch" to accidentally diff the working tree version of a file instead of treating it as a blob. This patch extends the edges of the diff interface to accept a "sha1_valid" flag whenever we accept a sha1, and to use that flag when creating a filespec. In some cases, this means passing the flag through several layers, making the code change larger than would be desirable. One alternative would be to simply die() upon seeing corrupted trees with null sha1s. However, this fix more directly addresses the problem (while bogus sha1s in a tree are probably a bad thing, it is really the sentinel confusion sending us down the wrong code path that is what makes it devastating). And it means that git is more capable of examining and debugging these corrupted trees. For example, you can still "diff --raw" such a tree to find out when the bogus entry was introduced; you just cannot do a "--patch" diff (just as you could not with any other corrupted tree, as we do not have any content to diff). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin.h | 2 +- builtin/blame.c | 9 +++-- builtin/cat-file.c | 2 +- builtin/diff.c | 8 +++- combine-diff.c | 4 +- diff-lib.c | 20 +++++---- diff-no-index.c | 4 +- diff.c | 16 +++++--- diff.h | 5 +++ diffcore-rename.c | 2 +- diffcore.h | 2 +- revision.c | 2 + t/t4054-diff-bogus-tree.sh | 83 ++++++++++++++++++++++++++++++++++++++ tree-diff.c | 8 ++-- 14 files changed, 135 insertions(+), 32 deletions(-) create mode 100755 t/t4054-diff-bogus-tree.sh diff --git a/builtin.h b/builtin.h index 857b9c8aa8..47f540f37f 100644 --- a/builtin.h +++ b/builtin.h @@ -42,7 +42,7 @@ void finish_copy_notes_for_rewrite(struct notes_rewrite_cfg *c); extern int check_pager_config(const char *cmd); -extern int textconv_object(const char *path, unsigned mode, const unsigned char *sha1, char **buf, unsigned long *buf_size); +extern int textconv_object(const char *path, unsigned mode, const unsigned char *sha1, int sha1_valid, char **buf, unsigned long *buf_size); extern int cmd_add(int argc, const char **argv, const char *prefix); extern int cmd_annotate(int argc, const char **argv, const char *prefix); diff --git a/builtin/blame.c b/builtin/blame.c index 5a67c202f0..fac0e93e67 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -96,6 +96,7 @@ struct origin { int textconv_object(const char *path, unsigned mode, const unsigned char *sha1, + int sha1_valid, char **buf, unsigned long *buf_size) { @@ -103,7 +104,7 @@ int textconv_object(const char *path, struct userdiff_driver *textconv; df = alloc_filespec(path); - fill_filespec(df, sha1, mode); + fill_filespec(df, sha1, sha1_valid, mode); textconv = get_textconv(df); if (!textconv) { free_filespec(df); @@ -128,7 +129,7 @@ static void fill_origin_blob(struct diff_options *opt, num_read_blob++; if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) && - textconv_object(o->path, o->mode, o->blob_sha1, &file->ptr, &file_size)) + textconv_object(o->path, o->mode, o->blob_sha1, 1, &file->ptr, &file_size)) ; else file->ptr = read_sha1_file(o->blob_sha1, &type, &file_size); @@ -2114,7 +2115,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, switch (st.st_mode & S_IFMT) { case S_IFREG: if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) && - textconv_object(read_from, mode, null_sha1, &buf_ptr, &buf_len)) + textconv_object(read_from, mode, null_sha1, 0, &buf_ptr, &buf_len)) strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1); else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size) die_errno("cannot open or read '%s'", read_from); @@ -2506,7 +2507,7 @@ parse_done: die("no such path %s in %s", path, final_commit_name); if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) && - textconv_object(path, o->mode, o->blob_sha1, (char **) &sb.final_buf, + textconv_object(path, o->mode, o->blob_sha1, 1, (char **) &sb.final_buf, &sb.final_buf_size)) ; else diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 07bd984084..72205faeaf 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -143,7 +143,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) die("git cat-file --textconv %s: must be ", obj_name); - if (!textconv_object(obj_context.path, obj_context.mode, sha1, &buf, &size)) + if (!textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size)) die("git cat-file --textconv: unable to run textconv on %s", obj_name); break; diff --git a/builtin/diff.c b/builtin/diff.c index 387afa7568..ac2b1cc63f 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -29,6 +29,8 @@ static void stuff_change(struct diff_options *opt, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, + int old_sha1_valid, + int new_sha1_valid, const char *old_name, const char *new_name) { @@ -54,8 +56,8 @@ static void stuff_change(struct diff_options *opt, one = alloc_filespec(old_name); two = alloc_filespec(new_name); - fill_filespec(one, old_sha1, old_mode); - fill_filespec(two, new_sha1, new_mode); + fill_filespec(one, old_sha1, old_sha1_valid, old_mode); + fill_filespec(two, new_sha1, new_sha1_valid, new_mode); diff_queue(&diff_queued_diff, one, two); } @@ -84,6 +86,7 @@ static int builtin_diff_b_f(struct rev_info *revs, stuff_change(&revs->diffopt, blob[0].mode, canon_mode(st.st_mode), blob[0].sha1, null_sha1, + 1, 0, path, path); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); @@ -108,6 +111,7 @@ static int builtin_diff_blobs(struct rev_info *revs, stuff_change(&revs->diffopt, blob[0].mode, blob[1].mode, blob[0].sha1, blob[1].sha1, + 1, 1, blob[0].name, blob[1].name); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); diff --git a/combine-diff.c b/combine-diff.c index a2e8dcf855..e9abdbd0b9 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -111,7 +111,7 @@ static char *grab_blob(const unsigned char *sha1, unsigned int mode, return xcalloc(1, 1); } else if (textconv) { struct diff_filespec *df = alloc_filespec(path); - fill_filespec(df, sha1, mode); + fill_filespec(df, sha1, 1, mode); *size = fill_textconv(textconv, df, &blob); free_filespec(df); } else { @@ -823,7 +823,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, &result_size, NULL, NULL); } else if (textconv) { struct diff_filespec *df = alloc_filespec(elem->path); - fill_filespec(df, null_sha1, st.st_mode); + fill_filespec(df, null_sha1, 0, st.st_mode); result_size = fill_textconv(textconv, df, &result); free_filespec(df); } else if (0 <= (fd = open(elem->path, O_RDONLY))) { diff --git a/diff-lib.c b/diff-lib.c index fc0dff31b5..f35de0ffa0 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -206,7 +206,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (silent_on_removed) continue; diff_addremove(&revs->diffopt, '-', ce->ce_mode, - ce->sha1, ce->name, 0); + ce->sha1, !is_null_sha1(ce->sha1), + ce->name, 0); continue; } changed = match_stat_with_submodule(&revs->diffopt, ce, &st, @@ -220,6 +221,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) newmode = ce_mode_from_stat(ce, st.st_mode); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), + !is_null_sha1(ce->sha1), (changed ? 0 : !is_null_sha1(ce->sha1)), ce->name, 0, dirty_submodule); } @@ -236,11 +238,12 @@ int run_diff_files(struct rev_info *revs, unsigned int option) static void diff_index_show_file(struct rev_info *revs, const char *prefix, struct cache_entry *ce, - const unsigned char *sha1, unsigned int mode, + const unsigned char *sha1, int sha1_valid, + unsigned int mode, unsigned dirty_submodule) { diff_addremove(&revs->diffopt, prefix[0], mode, - sha1, ce->name, dirty_submodule); + sha1, sha1_valid, ce->name, dirty_submodule); } static int get_stat_data(struct cache_entry *ce, @@ -295,7 +298,7 @@ static void show_new_file(struct rev_info *revs, &dirty_submodule, &revs->diffopt) < 0) return; - diff_index_show_file(revs, "+", new, sha1, mode, dirty_submodule); + diff_index_show_file(revs, "+", new, sha1, !is_null_sha1(sha1), mode, dirty_submodule); } static int show_modified(struct rev_info *revs, @@ -312,7 +315,7 @@ static int show_modified(struct rev_info *revs, &dirty_submodule, &revs->diffopt) < 0) { if (report_missing) diff_index_show_file(revs, "-", old, - old->sha1, old->ce_mode, 0); + old->sha1, 1, old->ce_mode, 0); return -1; } @@ -347,7 +350,8 @@ static int show_modified(struct rev_info *revs, return 0; diff_change(&revs->diffopt, oldmode, mode, - old->sha1, sha1, old->name, 0, dirty_submodule); + old->sha1, sha1, 1, !is_null_sha1(sha1), + old->name, 0, dirty_submodule); return 0; } @@ -380,7 +384,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, struct diff_filepair *pair; pair = diff_unmerge(&revs->diffopt, idx->name); if (tree) - fill_filespec(pair->one, tree->sha1, tree->ce_mode); + fill_filespec(pair->one, tree->sha1, 1, tree->ce_mode); return; } @@ -396,7 +400,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, * Something removed from the tree? */ if (!idx) { - diff_index_show_file(revs, "-", tree, tree->sha1, tree->ce_mode, 0); + diff_index_show_file(revs, "-", tree, tree->sha1, 1, tree->ce_mode, 0); return; } diff --git a/diff-no-index.c b/diff-no-index.c index 3a36144687..6568eea6f4 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -141,8 +141,8 @@ static int queue_diff(struct diff_options *o, name2 = "/dev/null"; d1 = alloc_filespec(name1); d2 = alloc_filespec(name2); - fill_filespec(d1, null_sha1, mode1); - fill_filespec(d2, null_sha1, mode2); + fill_filespec(d1, null_sha1, 0, mode1); + fill_filespec(d2, null_sha1, 0, mode2); diff_queue(&diff_queued_diff, d1, d2); return 0; diff --git a/diff.c b/diff.c index 5388ded214..8933dd1996 100644 --- a/diff.c +++ b/diff.c @@ -2443,12 +2443,12 @@ void free_filespec(struct diff_filespec *spec) } void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1, - unsigned short mode) + int sha1_valid, unsigned short mode) { if (mode) { spec->mode = canon_mode(mode); hashcpy(spec->sha1, sha1); - spec->sha1_valid = !is_null_sha1(sha1); + spec->sha1_valid = sha1_valid; } } @@ -4589,6 +4589,7 @@ static int is_submodule_ignored(const char *path, struct diff_options *options) void diff_addremove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, + int sha1_valid, const char *concatpath, unsigned dirty_submodule) { struct diff_filespec *one, *two; @@ -4620,9 +4621,9 @@ void diff_addremove(struct diff_options *options, two = alloc_filespec(concatpath); if (addremove != '+') - fill_filespec(one, sha1, mode); + fill_filespec(one, sha1, sha1_valid, mode); if (addremove != '-') { - fill_filespec(two, sha1, mode); + fill_filespec(two, sha1, sha1_valid, mode); two->dirty_submodule = dirty_submodule; } @@ -4635,6 +4636,7 @@ void diff_change(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, + int old_sha1_valid, int new_sha1_valid, const char *concatpath, unsigned old_dirty_submodule, unsigned new_dirty_submodule) { @@ -4649,6 +4651,8 @@ void diff_change(struct diff_options *options, const unsigned char *tmp_c; tmp = old_mode; old_mode = new_mode; new_mode = tmp; tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c; + tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid; + new_sha1_valid = tmp; tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule; new_dirty_submodule = tmp; } @@ -4659,8 +4663,8 @@ void diff_change(struct diff_options *options, one = alloc_filespec(concatpath); two = alloc_filespec(concatpath); - fill_filespec(one, old_sha1, old_mode); - fill_filespec(two, new_sha1, new_mode); + fill_filespec(one, old_sha1, old_sha1_valid, old_mode); + fill_filespec(two, new_sha1, new_sha1_valid, new_mode); one->dirty_submodule = old_dirty_submodule; two->dirty_submodule = new_dirty_submodule; diff --git a/diff.h b/diff.h index 7af5f1e2a7..b5ba1402aa 100644 --- a/diff.h +++ b/diff.h @@ -19,12 +19,14 @@ typedef void (*change_fn_t)(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, + int old_sha1_valid, int new_sha1_valid, const char *fullpath, unsigned old_dirty_submodule, unsigned new_dirty_submodule); typedef void (*add_remove_fn_t)(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, + int sha1_valid, const char *fullpath, unsigned dirty_submodule); typedef void (*diff_format_fn_t)(struct diff_queue_struct *q, @@ -209,12 +211,15 @@ extern void diff_addremove(struct diff_options *, int addremove, unsigned mode, const unsigned char *sha1, + int sha1_valid, const char *fullpath, unsigned dirty_submodule); extern void diff_change(struct diff_options *, unsigned mode1, unsigned mode2, const unsigned char *sha1, const unsigned char *sha2, + int sha1_valid, + int sha2_valid, const char *fullpath, unsigned dirty_submodule1, unsigned dirty_submodule2); diff --git a/diffcore-rename.c b/diffcore-rename.c index f639601c76..e6f9be64cf 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -48,7 +48,7 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two, memmove(rename_dst + first + 1, rename_dst + first, (rename_dst_nr - first - 1) * sizeof(*rename_dst)); rename_dst[first].two = alloc_filespec(two->path); - fill_filespec(rename_dst[first].two, two->sha1, two->mode); + fill_filespec(rename_dst[first].two, two->sha1, two->sha1_valid, two->mode); rename_dst[first].pair = NULL; return &(rename_dst[first]); } diff --git a/diffcore.h b/diffcore.h index 8f32b824cd..c964ec114a 100644 --- a/diffcore.h +++ b/diffcore.h @@ -54,7 +54,7 @@ struct diff_filespec { extern struct diff_filespec *alloc_filespec(const char *); extern void free_filespec(struct diff_filespec *); extern void fill_filespec(struct diff_filespec *, const unsigned char *, - unsigned short); + int, unsigned short); extern int diff_populate_filespec(struct diff_filespec *, int); extern void diff_free_filespec_data(struct diff_filespec *); diff --git a/revision.c b/revision.c index 18be62b316..21ef729cfa 100644 --- a/revision.c +++ b/revision.c @@ -332,6 +332,7 @@ static int tree_difference = REV_TREE_SAME; static void file_add_remove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, + int sha1_valid, const char *fullpath, unsigned dirty_submodule) { int diff = addremove == '+' ? REV_TREE_NEW : REV_TREE_OLD; @@ -345,6 +346,7 @@ static void file_change(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, + int old_sha1_valid, int new_sha1_valid, const char *fullpath, unsigned old_dirty_submodule, unsigned new_dirty_submodule) { diff --git a/t/t4054-diff-bogus-tree.sh b/t/t4054-diff-bogus-tree.sh new file mode 100755 index 0000000000..0843c87890 --- /dev/null +++ b/t/t4054-diff-bogus-tree.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +test_description='test diff with a bogus tree containing the null sha1' +. ./test-lib.sh + +empty_tree=4b825dc642cb6eb9a060e54bf8d69288fbee4904 + +test_expect_success 'create bogus tree' ' + bogus_tree=$( + printf "100644 fooQQQQQQQQQQQQQQQQQQQQQ" | + q_to_nul | + git hash-object -w --stdin -t tree + ) +' + +test_expect_success 'create tree with matching file' ' + echo bar >foo && + git add foo && + good_tree=$(git write-tree) + blob=$(git rev-parse :foo) +' + +test_expect_success 'raw diff shows null sha1 (addition)' ' + echo ":000000 100644 $_z40 $_z40 A foo" >expect && + git diff-tree $empty_tree $bogus_tree >actual && + test_cmp expect actual +' + +test_expect_success 'raw diff shows null sha1 (removal)' ' + echo ":100644 000000 $_z40 $_z40 D foo" >expect && + git diff-tree $bogus_tree $empty_tree >actual && + test_cmp expect actual +' + +test_expect_success 'raw diff shows null sha1 (modification)' ' + echo ":100644 100644 $blob $_z40 M foo" >expect && + git diff-tree $good_tree $bogus_tree >actual && + test_cmp expect actual +' + +test_expect_success 'raw diff shows null sha1 (other direction)' ' + echo ":100644 100644 $_z40 $blob M foo" >expect && + git diff-tree $bogus_tree $good_tree >actual && + test_cmp expect actual +' + +test_expect_success 'raw diff shows null sha1 (reverse)' ' + echo ":100644 100644 $_z40 $blob M foo" >expect && + git diff-tree -R $good_tree $bogus_tree >actual && + test_cmp expect actual +' + +test_expect_success 'raw diff shows null sha1 (index)' ' + echo ":100644 100644 $_z40 $blob M foo" >expect && + git diff-index $bogus_tree >actual && + test_cmp expect actual +' + +test_expect_success 'patch fails due to bogus sha1 (addition)' ' + test_must_fail git diff-tree -p $empty_tree $bogus_tree +' + +test_expect_success 'patch fails due to bogus sha1 (removal)' ' + test_must_fail git diff-tree -p $bogus_tree $empty_tree +' + +test_expect_success 'patch fails due to bogus sha1 (modification)' ' + test_must_fail git diff-tree -p $good_tree $bogus_tree +' + +test_expect_success 'patch fails due to bogus sha1 (other direction)' ' + test_must_fail git diff-tree -p $bogus_tree $good_tree +' + +test_expect_success 'patch fails due to bogus sha1 (reverse)' ' + test_must_fail git diff-tree -R -p $good_tree $bogus_tree +' + +test_expect_success 'patch fails due to bogus sha1 (index)' ' + test_must_fail git diff-index -p $bogus_tree +' + +test_done diff --git a/tree-diff.c b/tree-diff.c index 28ad6db9ff..7e5483366e 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -49,12 +49,12 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode1)) { if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { opt->change(opt, mode1, mode2, - sha1, sha2, base->buf, 0, 0); + sha1, sha2, 1, 1, base->buf, 0, 0); } strbuf_addch(base, '/'); diff_tree_sha1(sha1, sha2, base->buf, opt); } else { - opt->change(opt, mode1, mode2, sha1, sha2, base->buf, 0, 0); + opt->change(opt, mode1, mode2, sha1, sha2, 1, 1, base->buf, 0, 0); } strbuf_setlen(base, old_baselen); return 0; @@ -100,7 +100,7 @@ static void show_entry(struct diff_options *opt, const char *prefix, die("corrupt tree sha %s", sha1_to_hex(sha1)); if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) - opt->add_remove(opt, *prefix, mode, sha1, base->buf, 0); + opt->add_remove(opt, *prefix, mode, sha1, 1, base->buf, 0); strbuf_addch(base, '/'); @@ -108,7 +108,7 @@ static void show_entry(struct diff_options *opt, const char *prefix, show_tree(opt, prefix, &inner, base); free(tree); } else - opt->add_remove(opt, prefix[0], mode, sha1, base->buf, 0); + opt->add_remove(opt, prefix[0], mode, sha1, 1, base->buf, 0); strbuf_setlen(base, old_baselen); } From 4337b5856f88f18da47c176e3cbc95a35627044c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 28 Jul 2012 11:05:24 -0400 Subject: [PATCH 02/11] do not write null sha1s to on-disk index We should never need to write the null sha1 into an index entry (short of the 1 in 2^160 chance that somebody actually has content that hashes to it). If we attempt to do so, it is much more likely that it is a bug, since we use the null sha1 as a sentinel value to mean "not valid". The presence of null sha1s in the index (which can come from, among other things, "update-index --cacheinfo", or by reading a corrupted tree) can cause problems for later readers, because they cannot distinguish the literal null sha1 from its use a sentinel value. For example, "git diff-files" on such an entry would make it appear as if it is stat-dirty, and until recently, the diff code assumed such an entry meant that we should be diffing a working tree file rather than a blob. Ideally, we would stop such entries from entering even our in-core index. However, we do sometimes legitimately add entries with null sha1s in order to represent these sentinel situations; simply forbidding them in add_index_entry breaks a lot of the existing code. However, we can at least make sure that our in-core sentinel representation never makes it to disk. To be thorough, we will test an attempt to add both a blob and a submodule entry. In the former case, we might run into problems anyway because we will be missing the blob object. But in the latter case, we do not enforce connectivity across gitlink entries, making this our only point of enforcement. The current implementation does not care which type of entry we are seeing, but testing both cases helps future-proof the test suite in case that changes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- read-cache.c | 2 ++ t/t2107-update-index-basic.sh | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/read-cache.c b/read-cache.c index 274e54b4f3..5ae7f2b680 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1615,6 +1615,8 @@ int write_index(struct index_state *istate, int newfd) continue; if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce)) ce_smudge_racily_clean_entry(ce); + if (is_null_sha1(ce->sha1)) + return error("cache entry has null sha1: %s", ce->name); if (ce_write_entry(&c, newfd, ce) < 0) return -1; } diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh index 809fafe208..0dbbb00d74 100755 --- a/t/t2107-update-index-basic.sh +++ b/t/t2107-update-index-basic.sh @@ -29,4 +29,23 @@ test_expect_success 'update-index -h with corrupt index' ' grep "[Uu]sage: git update-index" broken/usage ' +test_expect_success '--cacheinfo does not accept blob null sha1' ' + echo content >file && + git add file && + git rev-parse :file >expect && + test_must_fail git update-index --cacheinfo 100644 $_z40 file && + git rev-parse :file >actual && + test_cmp expect actual +' + +test_expect_success '--cacheinfo does not accept gitlink null sha1' ' + git init submodule && + (cd submodule && test_commit foo) && + git add submodule && + git rev-parse :submodule >expect && + test_must_fail git update-index --cacheinfo 160000 $_z40 submodule && + git rev-parse :submodule >actual && + test_cmp expect actual +' + test_done From c479d14a80743b1cb86d77695607f4c81f7d8797 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 28 Jul 2012 11:06:29 -0400 Subject: [PATCH 03/11] fsck: detect null sha1 in tree entries Short of somebody happening to beat the 1 in 2^160 odds of actually generating content that hashes to the null sha1, we should never see this value in a tree entry. So let's have fsck warn if it it seen. As in the previous commit, we test both blob and submodule entries to future-proof the test suite against the implementation depending on connectivity to notice the error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 8 +++++++- t/t1450-fsck.sh | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 6c855f84f0..da53cf41f9 100644 --- a/fsck.c +++ b/fsck.c @@ -139,6 +139,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con static int fsck_tree(struct tree *item, int strict, fsck_error error_func) { int retval; + int has_null_sha1 = 0; int has_full_path = 0; int has_empty_name = 0; int has_zero_pad = 0; @@ -157,9 +158,12 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) while (desc.size) { unsigned mode; const char *name; + const unsigned char *sha1; - tree_entry_extract(&desc, &name, &mode); + sha1 = tree_entry_extract(&desc, &name, &mode); + if (is_null_sha1(sha1)) + has_null_sha1 = 1; if (strchr(name, '/')) has_full_path = 1; if (!*name) @@ -207,6 +211,8 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) } retval = 0; + if (has_null_sha1) + retval += error_func(&item->object, FSCK_WARN, "contains entries pointing to null sha1"); if (has_full_path) retval += error_func(&item->object, FSCK_WARN, "contains full pathnames"); if (has_empty_name) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 5b8ebd8053..5e36cc71b4 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -217,4 +217,30 @@ test_expect_success 'rev-list --verify-objects with bad sha1' ' grep -q "error: sha1 mismatch 63ffffffffffffffffffffffffffffffffffffff" out ' +_bz='\0' +_bz5="$_bz$_bz$_bz$_bz$_bz" +_bz20="$_bz5$_bz5$_bz5$_bz5" + +test_expect_success 'fsck notices blob entry pointing to null sha1' ' + (git init null-blob && + cd null-blob && + sha=$(printf "100644 file$_bz$_bz20" | + git hash-object -w --stdin -t tree) && + git fsck 2>out && + cat out && + grep "warning.*null sha1" out + ) +' + +test_expect_success 'fsck notices submodule entry pointing to null sha1' ' + (git init null-commit && + cd null-commit && + sha=$(printf "160000 submodule$_bz$_bz20" | + git hash-object -w --stdin -t tree) && + git fsck 2>out && + cat out && + grep "warning.*null sha1" out + ) +' + test_done From b622d4d11d27fd290f7732c6a65f40c054796c1f Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Mon, 30 Jul 2012 21:25:40 +0200 Subject: [PATCH 04/11] send-email: improve RFC2047 quote parsing The RFC2047 unquoting, used to parse email addresses in From and Cc headers, is broken in several ways: * It erroneously substitutes ' ' for '_' in *the whole* header, even outside the quoted field. [Noticed by Christoph.] * It is too liberal in its matching, and happily matches the start of one quoted chunk against the end of another, or even just something that looks like such an end. [Noticed by Junio.] * It fundamentally cannot cope with encodings that are not a superset of ASCII, nor several (incompatible) encodings in the same header. This patch fixes the first two by doing a more careful decoding of the outer quoting (e.g. "=AB" to represent an octet whose value is 0xAB). Fixing the fundamental issues is left for a future, more intrusive, patch. Noticed-by: Christoph Miebach Helped-by: Junio C Hamano Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- git-send-email.perl | 10 ++++++---- t/t9001-send-email.sh | 13 +++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index ef30c557c7..664713709c 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -862,11 +862,13 @@ $time = time - scalar $#files; sub unquote_rfc2047 { local ($_) = @_; my $encoding; - if (s/=\?([^?]+)\?q\?(.*)\?=/$2/g) { + s{=\?([^?]+)\?q\?(.*?)\?=}{ $encoding = $1; - s/_/ /g; - s/=([0-9A-F]{2})/chr(hex($1))/eg; - } + my $e = $2; + $e =~ s/_/ /g; + $e =~ s/=([0-9A-F]{2})/chr(hex($1))/eg; + $e; + }eg; return wantarray ? ($_, $encoding) : $_; } diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 8c12c65c72..035122808b 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -841,6 +841,19 @@ test_expect_success $PREREQ '--compose adds MIME for utf8 subject' ' grep "^Subject: =?UTF-8?q?utf8-s=C3=BCbj=C3=ABct?=" msgtxt1 ' +test_expect_success $PREREQ 'utf8 author is correctly passed on' ' + clean_fake_sendmail && + test_commit weird_author && + test_when_finished "git reset --hard HEAD^" && + git commit --amend --author "Füñný Nâmé " && + git format-patch --stdout -1 >funny_name.patch && + git send-email --from="Example " \ + --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + funny_name.patch && + grep "^From: Füñný Nâmé " msgtxt1 +' + test_expect_success $PREREQ 'detects ambiguous reference/file conflict' ' echo master > master && git add master && From 4d4b573977833f887635be386b84681e73834b72 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Fri, 3 Aug 2012 10:21:20 +0200 Subject: [PATCH 05/11] setup: clarify error messages for file/revisions ambiguity The previous "Use '--' to separate filenames from revisions" may sound obvious for an old-time Unix user, but does not make it clear how to use this '--'. In addition to mentionning this '--', give an idea of what the new command should look like. Ideally, we could provide cut-and-paste ready commands based on the command that just failed, but we have no easy access to argv[] in this place of the code. Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- setup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.c b/setup.c index e11497720e..7663a4cbb2 100644 --- a/setup.c +++ b/setup.c @@ -82,7 +82,7 @@ static void NORETURN die_verify_filename(const char *prefix, if (!diagnose_misspelt_rev) die("%s: no such path in the working tree.\n" - "Use '-- ...' to specify paths that do not exist locally.", + "Use 'git -- ...' to specify paths that do not exist locally.", arg); /* * Saying "'(icase)foo' does not exist in the index" when the @@ -96,7 +96,8 @@ static void NORETURN die_verify_filename(const char *prefix, /* ... or fall back the most general message. */ die("ambiguous argument '%s': unknown revision or path not in the working tree.\n" - "Use '--' to separate paths from revisions", arg); + "Use '--' to separate paths from revisions, like this:\n" + "'git [...] -- [...]'", arg); } @@ -145,7 +146,8 @@ void verify_non_filename(const char *prefix, const char *arg) if (!check_filename(prefix, arg)) return; die("ambiguous argument '%s': both revision and filename\n" - "Use '--' to separate filenames from revisions", arg); + "Use '--' to separate paths from revisions, like this:\n" + "'git [...] -- [...]'", arg); } /* From 2c3fd4bbb42b586b016319d2009a05fe5b878533 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Mon, 6 Aug 2012 22:01:48 -0700 Subject: [PATCH 06/11] t/t5400: demonstrate breakage caused by informational message from prune When receive-pack triggers 'git gc --auto' and 'git prune' is called to remove a stale temporary object, 'git prune' prints an informational message to stdout about the file that it will remove. Since this message is written to stdout, it is sent back over the transport channel to the git client which tries to interpret it as part of the pack protocol and then promptly terminates with a complaint about a protocol error. Introduce a test which exercises the auto-gc functionality of receive-pack and demonstrates this breakage. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- t/t5400-send-pack.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/t/t5400-send-pack.sh b/t/t5400-send-pack.sh index 0eace37a03..04a87913ed 100755 --- a/t/t5400-send-pack.sh +++ b/t/t5400-send-pack.sh @@ -145,6 +145,41 @@ test_expect_success 'push --all excludes remote-tracking hierarchy' ' ) ' +test_expect_failure 'receive-pack runs auto-gc in remote repo' ' + rm -rf parent child && + git init parent && + ( + # Setup a repo with 2 packs + cd parent && + echo "Some text" >file.txt && + git add . && + git commit -m "Initial commit" && + git repack -adl && + echo "Some more text" >>file.txt && + git commit -a -m "Second commit" && + git repack + ) && + cp -a parent child && + ( + # Set the child to auto-pack if more than one pack exists + cd child && + git config gc.autopacklimit 1 && + git branch test_auto_gc && + # And create a file that follows the temporary object naming + # convention for the auto-gc to remove + : >.git/objects/tmp_test_object && + test-chmtime =-1209601 .git/objects/tmp_test_object + ) && + ( + cd parent && + echo "Even more text" >>file.txt && + git commit -a -m "Third commit" && + git send-pack ../child HEAD:refs/heads/test_auto_gc >output 2>&1 && + grep "Auto packing the repository for optimum performance." output + ) && + test ! -e child/.git/objects/tmp_test_object +' + rewound_push_setup() { rm -rf parent child && mkdir parent && From 4b7f2fa4c6c2c4675ab00474d419fa356afdfa71 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 6 Aug 2012 22:31:10 -0700 Subject: [PATCH 07/11] receive-pack: do not leak output from auto-gc to standard output The standard output channel of receive-pack is a structured protocol channel, and subprocesses must never be allowed to leak anything into it by writing to their standard output. Use RUN_COMMAND_STDOUT_TO_STDERR option to run_command_v_opt() just like we do when running hooks to prevent output from "gc" leaking to the standard output. Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 3 ++- t/t5400-send-pack.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 0afb8b2896..3f05d971ec 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -977,7 +977,8 @@ int cmd_receive_pack(int argc, const char **argv, const char *prefix) const char *argv_gc_auto[] = { "gc", "--auto", "--quiet", NULL, }; - run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); + int opt = RUN_GIT_CMD | RUN_COMMAND_STDOUT_TO_STDERR; + run_command_v_opt(argv_gc_auto, opt); } if (auto_update_server_info) update_server_info(0); diff --git a/t/t5400-send-pack.sh b/t/t5400-send-pack.sh index 04a87913ed..250c720c14 100755 --- a/t/t5400-send-pack.sh +++ b/t/t5400-send-pack.sh @@ -145,7 +145,7 @@ test_expect_success 'push --all excludes remote-tracking hierarchy' ' ) ' -test_expect_failure 'receive-pack runs auto-gc in remote repo' ' +test_expect_success 'receive-pack runs auto-gc in remote repo' ' rm -rf parent child && git init parent && ( From 785063e02bb249ef3a39db88575fe626b310d4a7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 8 Aug 2012 12:08:17 -0700 Subject: [PATCH 08/11] sh-setup: protect from exported IFS Many scripted Porcelains rely on being able to split words at the default $IFS characters, i.e. SP, HT and LF. If the user exports a non-default IFS to the environment, what they read from plumbing commands such as ls-files that use HT to delimit fields may not be split in the way we expect. Protect outselves by resetting it, just like we do so against CDPATH exported to the environment. Noticed by Andrew Dranse . Signed-off-by: Junio C Hamano --- git-sh-setup.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/git-sh-setup.sh b/git-sh-setup.sh index 5d8e4e6c89..69472e4125 100644 --- a/git-sh-setup.sh +++ b/git-sh-setup.sh @@ -9,8 +9,12 @@ # you would cause "cd" to be taken to unexpected places. If you # like CDPATH, define it for your interactive shell sessions without # exporting it. +# But we protect ourselves from such a user mistake nevertheless. unset CDPATH +# Similarly for IFS +unset IFS + git_broken_path_fix () { case ":$PATH:" in *:$1:*) : ok ;; From 1af221ef5cb84c792303a0d16ca91197e2b70279 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 10 Aug 2012 08:51:19 +0200 Subject: [PATCH 09/11] rebase -i: use full onto sha1 in reflog 'git rebase' uses the full onto sha1 for the reflog message whereas 'git rebase -i' uses the short sha1. This is not only inconsistent, but can lead to problems when the reflog is inspected at a later time at which that abbreviation may have become ambiguous. Make 'rebase -i' use the full onto sha1, as well. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- git-rebase--interactive.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index 2e1325824c..905346084c 100644 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -533,11 +533,10 @@ do_next () { test -s "$todo" && return comment_for_reflog finish && - shortonto=$(git rev-parse --short $onto) && newhead=$(git rev-parse HEAD) && case $head_name in refs/*) - message="$GIT_REFLOG_ACTION: $head_name onto $shortonto" && + message="$GIT_REFLOG_ACTION: $head_name onto $onto" && git update-ref -m "$message" $head_name $newhead $orig_head && git symbolic-ref \ -m "$GIT_REFLOG_ACTION: returning to $head_name" \ From cacfc09ba82bfc6b0e1c047247785d56a6054b2f Mon Sep 17 00:00:00 2001 From: Jay Soffian Date: Wed, 8 Aug 2012 22:29:26 -0400 Subject: [PATCH 10/11] gitweb: URL-decode $my_url/$my_uri when stripping PATH_INFO When gitweb is used as a DirectoryIndex, it attempts to strip PATH_INFO on its own, as $cgi->url() fails to do so. However, it fails to account for the fact that PATH_INFO has already been URL-decoded by the web server, but the value returned by $cgi->url() has not been. This causes the stripping to fail whenever the URL contains encoded characters. To see this in action, setup gitweb as a DirectoryIndex and then use it on a repository with a directory containing a space in the name. Navigate to tree view, examine the gitweb generated html and you'll see a link such as: directory with spaces When clicked on, the browser will URL-encode this link, giving a $cgi->url() of the form: /test.git/tree/HEAD:/directory%20with%20spaces While PATH_INFO is: /test.git/tree/HEAD:/directory with spaces Fix this by calling unescape() on both $my_url and $my_uri before stripping PATH_INFO from them. Signed-off-by: Jay Soffian Acked-by: Jakub Narebski Signed-off-by: Junio C Hamano --- gitweb/gitweb.perl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index a8b5fad266..126d3ef4bd 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -54,6 +54,11 @@ sub evaluate_uri { # to build the base URL ourselves: our $path_info = decode_utf8($ENV{"PATH_INFO"}); if ($path_info) { + # $path_info has already been URL-decoded by the web server, but + # $my_url and $my_uri have not. URL-decode them so we can properly + # strip $path_info. + $my_url = unescape($my_url); + $my_uri = unescape($my_uri); if ($my_url =~ s,\Q$path_info\E$,, && $my_uri =~ s,\Q$path_info\E$,, && defined $ENV{'SCRIPT_NAME'}) { From 9a8eea9604ade731c3d0ff10136f2ae81b2a13c8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 10 Sep 2012 15:30:46 -0700 Subject: [PATCH 11/11] Almost 1.7.11.6 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/1.7.11.6.txt | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/RelNotes/1.7.11.6.txt b/Documentation/RelNotes/1.7.11.6.txt index e548a59824..84ba827b1b 100644 --- a/Documentation/RelNotes/1.7.11.6.txt +++ b/Documentation/RelNotes/1.7.11.6.txt @@ -4,7 +4,8 @@ Git v1.7.11.6 Release Notes Fixes since v1.7.11.5 --------------------- -This is primarily documentation and low-impact code clarification. +This consists primarily of documentation updates and low-impact code +clarification and bugfixes. - "ciabot" script (in contrib/) has been updated with extensive documentation. @@ -32,3 +33,30 @@ This is primarily documentation and low-impact code clarification. - "git commit --amend" let the user edit the log message and then died when the human-readable committer name was given insufficiently by getpwent(3). + + - The reflog entries left by "git rebase" and "git rebase -i" were + inconsistent (the interactive one gave an abbreviated object name). + + - When the user exports a non-default IFS without HT, scripts that + rely on being able to parse "ls-files -s | while read a b c..." + started to fail. Protect them from such a misconfiguration. + + - When "git push" triggered the automatic gc on the receiving end, a + message from "git prune" that said it was removing cruft leaked to + the standard output, breaking the communication protocol. + + - "git diff" had a confusion between taking data from a path in the + working tree and taking data from an object that happens to have + name 0{40} recorded in a tree. + + - "git send-email" did not unquote encoded words that appear on the + header correctly, and lost "_" from strings. + + - When the user gives an argument that can be taken as both a + revision name and a pathname without disambiguating with "--", we + used to give a help message "Use '--' to separate". The message + has been clarified to show where that '--' goes on the command + line. + + - "gitweb" when used with PATH_INFO failed to notice directories with + SP (and other characters that need URL-style quoting) in them.