From b78281f7215c0236da6bd5c04ec5e500c83fd101 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 14 Sep 2007 12:12:32 -0700 Subject: [PATCH 1/6] diff --no-index: do not forget to run diff_setup_done() Code inspection by Linus found this. Signed-off-by: Junio C Hamano --- diff-lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/diff-lib.c b/diff-lib.c index f5568c3b36..da5571302d 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -298,6 +298,8 @@ int setup_diff_no_index(struct rev_info *revs, revs->diffopt.nr_paths = 2; revs->diffopt.no_index = 1; revs->max_count = -2; + if (diff_setup_done(&revs->diffopt) < 0) + die("diff_setup_done failed"); return 0; } From 0024a54923a12f8c05ce4290f5ebefab0cce4336 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Sep 2007 10:39:48 -0700 Subject: [PATCH 2/6] Fix the rename detection limit checking This adds more proper rename detection limits. Instead of just checking the limit against the number of potential rename destinations, we verify that the rename matrix (which is what really matters) doesn't grow ridiculously large, and we also make sure that we don't overflow when doing the matrix size calculation. This also changes the default limits from unlimited, to a rename matrix that is limited to 100 entries on a side. You can raise it with the config entry, or by using the "-l" command line flag, but at least the default is now a sane number that avoids spending lots of time (and memory) in situations that likely don't merit it. The choice of default value is of course very debatable. Limiting the rename matrix to a 100x100 size will mean that even if you have just one obvious rename, but you also create (or delete) 10,000 files, the rename matrix will be so big that we disable the heuristics. Sounds reasonable to me, but let's see if people hit this (and, perhaps more importantly, actually *care*) in real life. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 2 +- diffcore-rename.c | 19 +++++++++++++++++-- wt-status.c | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/diff.c b/diff.c index 1aca5df522..0ee9ea1c1b 100644 --- a/diff.c +++ b/diff.c @@ -17,7 +17,7 @@ #endif static int diff_detect_rename_default; -static int diff_rename_limit_default = -1; +static int diff_rename_limit_default = 100; static int diff_use_color_default; int diff_auto_refresh_index = 1; diff --git a/diffcore-rename.c b/diffcore-rename.c index 6bde4396f2..41b35c3a9e 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -298,10 +298,25 @@ void diffcore_rename(struct diff_options *options) else if (detect_rename == DIFF_DETECT_COPY) register_rename_src(p->one, 1, p->score); } - if (rename_dst_nr == 0 || rename_src_nr == 0 || - (0 < rename_limit && rename_limit < rename_dst_nr)) + if (rename_dst_nr == 0 || rename_src_nr == 0) goto cleanup; /* nothing to do */ + /* + * This basically does a test for the rename matrix not + * growing larger than a "rename_limit" square matrix, ie: + * + * rename_dst_nr * rename_src_nr > rename_limit * rename_limit + * + * but handles the potential overflow case specially (and we + * assume at least 32-bit integers) + */ + if (rename_limit <= 0 || rename_limit > 32767) + rename_limit = 32767; + if (rename_dst_nr > rename_limit && rename_src_nr > rename_limit) + goto cleanup; + if (rename_dst_nr * rename_src_nr > rename_limit * rename_limit) + goto cleanup; + /* We really want to cull the candidates list early * with cheap tests in order to avoid doing deltas. * The first round matches up the up-to-date entries, diff --git a/wt-status.c b/wt-status.c index 52054201c2..10ce6eedc7 100644 --- a/wt-status.c +++ b/wt-status.c @@ -227,6 +227,7 @@ static void wt_status_print_updated(struct wt_status *s) rev.diffopt.format_callback = wt_status_print_updated_cb; rev.diffopt.format_callback_data = s; rev.diffopt.detect_rename = 1; + rev.diffopt.rename_limit = 100; wt_read_cache(s); run_diff_index(&rev, 1); } From 42b5f8693eddbbd486579fb2304db1bc7282ac26 Mon Sep 17 00:00:00 2001 From: Jari Aalto Date: Fri, 14 Sep 2007 21:38:02 +0300 Subject: [PATCH 3/6] Documentation/git-archive.txt: a couple of clarifications. The description of the option gave impression that there were several formats available by using three dots. There are no other formats than tar and gzip currently supported. Clarify that the archive goes to the standard output. Signed-off-by: Jari Aalto Signed-off-by: Junio C Hamano --- Documentation/git-archive.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index f2080eb6ad..e1e2d60fef 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -15,7 +15,8 @@ SYNOPSIS DESCRIPTION ----------- Creates an archive of the specified format containing the tree -structure for the named tree. If is specified it is +structure for the named tree, and writes it out to the standard +output. If is specified it is prepended to the filenames in the archive. 'git-archive' behaves differently when given a tree ID versus when @@ -31,7 +32,7 @@ OPTIONS ------- --format=:: - Format of the resulting archive: 'tar', 'zip'... The default + Format of the resulting archive: 'tar' or 'zip'. The default is 'tar'. --list, -l:: From 43b98acc23306fd7fff888477937063361a09593 Mon Sep 17 00:00:00 2001 From: Benoit Sigoure Date: Fri, 14 Sep 2007 10:29:04 +0200 Subject: [PATCH 4/6] Add test to check recent fix to "git add -u" An earlier commit fixed type-change case in "git add -u". This adds a test to make sure we do not introduce regression. At the same time, it fixes a stupid typo in the error message. Signed-off-by: Benoit Sigoure Signed-off-by: Junio C Hamano --- builtin-add.c | 2 +- t/t2200-add-update.sh | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/builtin-add.c b/builtin-add.c index 9847b7e019..3d8b8b4f89 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -95,7 +95,7 @@ static void update_callback(struct diff_queue_struct *q, const char *path = p->one->path; switch (p->status) { default: - die("unexpacted diff status %c", p->status); + die("unexpected diff status %c", p->status); case DIFF_STATUS_UNMERGED: case DIFF_STATUS_MODIFIED: case DIFF_STATUS_TYPE_CHANGED: diff --git a/t/t2200-add-update.sh b/t/t2200-add-update.sh index 61d08bb431..eb1ced3c37 100755 --- a/t/t2200-add-update.sh +++ b/t/t2200-add-update.sh @@ -16,11 +16,12 @@ only the updates to dir/sub.' test_expect_success setup ' echo initial >check && echo initial >top && + echo initial >foo && mkdir dir1 dir2 && echo initial >dir1/sub1 && echo initial >dir1/sub2 && echo initial >dir2/sub3 && - git add check dir1 dir2 top && + git add check dir1 dir2 top foo && test_tick git-commit -m initial && @@ -76,4 +77,12 @@ test_expect_success 'change gets noticed' ' ' +test_expect_success 'replace a file with a symlink' ' + + rm foo && + ln -s top foo && + git add -u -- foo + +' + test_done From 7c8b5eaf225db0b6b502126fc16f8f6814c38d24 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 14 Sep 2007 14:51:08 -0700 Subject: [PATCH 5/6] Documentation/git-config.txt: AsciiDoc tweak to avoid leading dot Bram Schoenmakers noticed that git-config document was formatted incorrectly. Depending on the version of AsciiDoc and docbook toolchain, it is sometimes taken as a numbered example by AsciiDoc, some other times passed intact to roff format to confuse "man". Since we refer to the repository metadata directory as $GIT_DIR elsewhere, work it around by using that symbolic name. Signed-off-by: Junio C Hamano --- Documentation/git-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index 5b794f4399..a592b61e2f 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -142,7 +142,7 @@ FILES If not set explicitly with '--file', there are three files where git-config will search for configuration options: -.git/config:: +$GIT_DIR/config:: Repository specific configuration file. (The filename is of course relative to the repository root, not the working directory.) From d99ebf081797dbb43ff618ff59f4c607b0acf045 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 14 Sep 2007 00:31:00 -0700 Subject: [PATCH 6/6] Split grep arguments in a way that does not requires to add /dev/null. In order to (almost) always show the name of the file without relying on "-H" option of GNU grep, we used to add /dev/null to the argument list unless we are doing -l or -L. This caused "/dev/null:0" to show up when -c is given in the output. It is not enough to add -c to the set of options we do not pass /dev/null for. When we have too many files, we invoke grep multiple times and we need to avoid giving a widow filename to the last invocation -- otherwise we will not see the name. This keeps two filenames when the argv[] buffer is about to overflow and we have not finished iterating over the index, so that the last round will always have at least two paths to work with (and not require /dev/null). An obvious and the only exception is when there is only 1 file that is given to the underlying grep, and in that case we avoid passing /dev/null and let the external "grep -c" report only the number of matches. Signed-off-by: Junio C Hamano --- builtin-grep.c | 90 +++++++++++++++++++++++++++++++++++++++++-------- t/t7002-grep.sh | 4 +++ 2 files changed, 80 insertions(+), 14 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index e13cb31f2b..c7b45c4d58 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -187,6 +187,78 @@ static int exec_grep(int argc, const char **argv) else die("maximum number of args exceeded"); \ } while (0) +/* + * If you send a singleton filename to grep, it does not give + * the name of the file. GNU grep has "-H" but we would want + * that behaviour in a portable way. + * + * So we keep two pathnames in argv buffer unsent to grep in + * the main loop if we need to do more than one grep. + */ +static int flush_grep(struct grep_opt *opt, + int argc, int arg0, const char **argv, int *kept) +{ + int status; + int count = argc - arg0; + const char *kept_0 = NULL; + + if (count <= 2) { + /* + * Because we keep at least 2 paths in the call from + * the main loop (i.e. kept != NULL), and MAXARGS is + * far greater than 2, this usually is a call to + * conclude the grep. However, the user could attempt + * to overflow the argv buffer by giving too many + * options to leave very small number of real + * arguments even for the call in the main loop. + */ + if (kept) + die("insanely many options to grep"); + + /* + * If we have two or more paths, we do not have to do + * anything special, but we need to push /dev/null to + * get "-H" behaviour of GNU grep portably but when we + * are not doing "-l" nor "-L" nor "-c". + */ + if (count == 1 && + !opt->name_only && + !opt->unmatch_name_only && + !opt->count) { + argv[argc++] = "/dev/null"; + argv[argc] = NULL; + } + } + + else if (kept) { + /* + * Called because we found many paths and haven't finished + * iterating over the cache yet. We keep two paths + * for the concluding call. argv[argc-2] and argv[argc-1] + * has the last two paths, so save the first one away, + * replace it with NULL while sending the list to grep, + * and recover them after we are done. + */ + *kept = 2; + kept_0 = argv[argc-2]; + argv[argc-2] = NULL; + argc -= 2; + } + + status = exec_grep(argc, argv); + + if (kept_0) { + /* + * Then recover them. Now the last arg is beyond the + * terminating NULL which is at argc, and the second + * from the last is what we saved away in kept_0 + */ + argv[arg0++] = kept_0; + argv[arg0] = argv[argc+1]; + } + return status; +} + static int external_grep(struct grep_opt *opt, const char **paths, int cached) { int i, nr, argc, hit, len, status; @@ -253,22 +325,12 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) push_arg(p->pattern); } - /* - * To make sure we get the header printed out when we want it, - * add /dev/null to the paths to grep. This is unnecessary - * (and wrong) with "-l" or "-L", which always print out the - * name anyway. - * - * GNU grep has "-H", but this is portable. - */ - if (!opt->name_only && !opt->unmatch_name_only) - push_arg("/dev/null"); - hit = 0; argc = nr; for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; char *name; + int kept; if (!S_ISREG(ntohl(ce->ce_mode))) continue; if (!pathspec_matches(paths, ce->name)) @@ -283,10 +345,10 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) argv[argc++] = name; if (argc < MAXARGS && !ce_stage(ce)) continue; - status = exec_grep(argc, argv); + status = flush_grep(opt, argc, nr, argv, &kept); if (0 < status) hit = 1; - argc = nr; + argc = nr + kept; if (ce_stage(ce)) { do { i++; @@ -296,7 +358,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) } } if (argc > nr) { - status = exec_grep(argc, argv); + status = flush_grep(opt, argc, nr, argv, NULL); if (0 < status) hit = 1; } diff --git a/t/t7002-grep.sh b/t/t7002-grep.sh index 6bfb899ed1..68b2b92879 100755 --- a/t/t7002-grep.sh +++ b/t/t7002-grep.sh @@ -107,6 +107,10 @@ do diff expected actual ' + test_expect_failure "grep -c $L (no /dev/null)" ' + git grep -c test $H | grep -q "/dev/null" + ' + done test_done