From b8ffedca6f9e1043956ba611ae52bea449779456 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Mon, 12 Dec 2011 22:16:06 +0100 Subject: [PATCH 1/3] grep: load funcname patterns for -W git-grep avoids loading the funcname patterns unless they are needed. ba8ea74 (grep: add option to show whole function as context, 2011-08-01) forgot to extend this test also to the new funcbody feature. Do so. The catch is that we also have to disable threading when using userdiff, as explained in grep_threads_ok(). So we must be careful to introduce the same test there. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- grep.c | 7 ++++--- t/t7810-grep.sh | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index b29d09c7f6..7a070e97c4 100644 --- a/grep.c +++ b/grep.c @@ -948,8 +948,8 @@ int grep_threads_ok(const struct grep_opt *opt) * machinery in grep_buffer_1. The attribute code is not * thread safe, so we disable the use of threads. */ - if (opt->funcname && !opt->unmatch_name_only && !opt->status_only && - !opt->name_only) + if ((opt->funcname || opt->funcbody) + && !opt->unmatch_name_only && !opt->status_only && !opt->name_only) return 0; return 1; @@ -1008,7 +1008,8 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, } memset(&xecfg, 0, sizeof(xecfg)); - if (opt->funcname && !opt->unmatch_name_only && !opt->status_only && + if ((opt->funcname || opt->funcbody) + && !opt->unmatch_name_only && !opt->status_only && !opt->name_only && !binary_match_only && !collect_hits) { struct userdiff_driver *drv = userdiff_find_by_path(name); if (drv && drv->funcname.pattern) { diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 81263b7851..7ba5b16f99 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -523,6 +523,20 @@ test_expect_success 'grep -W' ' test_cmp expected actual ' +cat >expected <.gitattributes && + git grep -W return >actual && + test_cmp expected actual +' + test_expect_success 'grep from a subdirectory to search wider area (1)' ' mkdir -p s && ( From 0579f91dd74a0902e52d1e6e839cc31b99f12cfc Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Mon, 12 Dec 2011 22:16:07 +0100 Subject: [PATCH 2/3] grep: enable threading with -p and -W using lazy attribute lookup Lazily load the userdiff attributes in match_funcname(). Use a separate mutex around this loading to protect the (not thread-safe) attributes machinery. This lets us re-enable threading with -p and -W while reducing the overhead caused by looking up attributes. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- builtin/grep.c | 13 ++++++--- grep.c | 73 ++++++++++++++++++++++++++++++-------------------- grep.h | 10 +++++++ 3 files changed, 63 insertions(+), 33 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 988ea1d332..6474eed19e 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -17,7 +17,6 @@ #include "grep.h" #include "quote.h" #include "dir.h" -#include "thread-utils.h" static char const * const grep_usage[] = { "git grep [options] [-e] [...] [[--] ...]", @@ -256,6 +255,7 @@ static void start_threads(struct grep_opt *opt) pthread_mutex_init(&grep_mutex, NULL); pthread_mutex_init(&read_sha1_mutex, NULL); + pthread_mutex_init(&grep_attr_mutex, NULL); pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); pthread_cond_init(&cond_result, NULL); @@ -303,6 +303,7 @@ static int wait_all(void) pthread_mutex_destroy(&grep_mutex); pthread_mutex_destroy(&read_sha1_mutex); + pthread_mutex_destroy(&grep_attr_mutex); pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); pthread_cond_destroy(&cond_result); @@ -1002,17 +1003,21 @@ int cmd_grep(int argc, const char **argv, const char *prefix) opt.regflags |= REG_ICASE; #ifndef NO_PTHREADS - if (online_cpus() == 1 || !grep_threads_ok(&opt)) + if (online_cpus() == 1) use_threads = 0; +#else + use_threads = 0; +#endif + + opt.use_threads = use_threads; +#ifndef NO_PTHREADS if (use_threads) { if (opt.pre_context || opt.post_context || opt.file_break || opt.funcbody) skip_first_line = 1; start_threads(&opt); } -#else - use_threads = 0; #endif compile_grep_patterns(&opt); diff --git a/grep.c b/grep.c index 7a070e97c4..486230b511 100644 --- a/grep.c +++ b/grep.c @@ -806,10 +806,46 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, opt->output(opt, "\n", 1); } -static int match_funcname(struct grep_opt *opt, char *bol, char *eol) +#ifndef NO_PTHREADS +/* + * This lock protects access to the gitattributes machinery, which is + * not thread-safe. + */ +pthread_mutex_t grep_attr_mutex; + +static inline void grep_attr_lock(struct grep_opt *opt) +{ + if (opt->use_threads) + pthread_mutex_lock(&grep_attr_mutex); +} + +static inline void grep_attr_unlock(struct grep_opt *opt) +{ + if (opt->use_threads) + pthread_mutex_unlock(&grep_attr_mutex); +} +#else +#define grep_attr_lock(opt) +#define grep_attr_unlock(opt) +#endif + +static int match_funcname(struct grep_opt *opt, const char *name, char *bol, char *eol) { xdemitconf_t *xecfg = opt->priv; - if (xecfg && xecfg->find_func) { + if (xecfg && !xecfg->find_func) { + struct userdiff_driver *drv; + grep_attr_lock(opt); + drv = userdiff_find_by_path(name); + grep_attr_unlock(opt); + if (drv && drv->funcname.pattern) { + const struct userdiff_funcname *pe = &drv->funcname; + xdiff_set_find_func(xecfg, pe->pattern, pe->cflags); + } else { + xecfg = opt->priv = NULL; + } + } + + if (xecfg) { char buf[1]; return xecfg->find_func(bol, eol - bol, buf, 1, xecfg->find_func_priv) >= 0; @@ -835,7 +871,7 @@ static void show_funcname_line(struct grep_opt *opt, const char *name, if (lno <= opt->last_shown) break; - if (match_funcname(opt, bol, eol)) { + if (match_funcname(opt, name, bol, eol)) { show_line(opt, bol, eol, name, lno, '='); break; } @@ -848,7 +884,7 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf, unsigned cur = lno, from = 1, funcname_lno = 0; int funcname_needed = !!opt->funcname; - if (opt->funcbody && !match_funcname(opt, bol, end)) + if (opt->funcbody && !match_funcname(opt, name, bol, end)) funcname_needed = 2; if (opt->pre_context < lno) @@ -864,7 +900,7 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf, while (bol > buf && bol[-1] != '\n') bol--; cur--; - if (funcname_needed && match_funcname(opt, bol, eol)) { + if (funcname_needed && match_funcname(opt, name, bol, eol)) { funcname_lno = cur; funcname_needed = 0; } @@ -942,19 +978,6 @@ static int look_ahead(struct grep_opt *opt, return 0; } -int grep_threads_ok(const struct grep_opt *opt) -{ - /* If this condition is true, then we may use the attribute - * machinery in grep_buffer_1. The attribute code is not - * thread safe, so we disable the use of threads. - */ - if ((opt->funcname || opt->funcbody) - && !opt->unmatch_name_only && !opt->status_only && !opt->name_only) - return 0; - - return 1; -} - static void std_output(struct grep_opt *opt, const void *buf, size_t size) { fwrite(buf, size, 1, stdout); @@ -1008,16 +1031,8 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, } memset(&xecfg, 0, sizeof(xecfg)); - if ((opt->funcname || opt->funcbody) - && !opt->unmatch_name_only && !opt->status_only && - !opt->name_only && !binary_match_only && !collect_hits) { - struct userdiff_driver *drv = userdiff_find_by_path(name); - if (drv && drv->funcname.pattern) { - const struct userdiff_funcname *pe = &drv->funcname; - xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags); - opt->priv = &xecfg; - } - } + opt->priv = &xecfg; + try_lookahead = should_lookahead(opt); while (left) { @@ -1093,7 +1108,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, show_function = 1; goto next_line; } - if (show_function && match_funcname(opt, bol, eol)) + if (show_function && match_funcname(opt, name, bol, eol)) show_function = 0; if (show_function || (last_hit && lno <= last_hit + opt->post_context)) { diff --git a/grep.h b/grep.h index a65280026d..fb205f3542 100644 --- a/grep.h +++ b/grep.h @@ -8,6 +8,7 @@ typedef int pcre; typedef int pcre_extra; #endif #include "kwset.h" +#include "thread-utils.h" enum grep_pat_token { GREP_PATTERN, @@ -115,6 +116,7 @@ struct grep_opt { int show_hunk_mark; int file_break; int heading; + int use_threads; void *priv; void (*output)(struct grep_opt *opt, const void *data, size_t size); @@ -131,4 +133,12 @@ extern int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsign extern struct grep_opt *grep_opt_dup(const struct grep_opt *opt); extern int grep_threads_ok(const struct grep_opt *opt); +#ifndef NO_PTHREADS +/* + * Mutex used around access to the attributes machinery if + * opt->use_threads. Must be initialized/destroyed by callers! + */ +extern pthread_mutex_t grep_attr_mutex; +#endif + #endif From 53b8d931b649a0d82e16358a6bf1dd980dc24a6b Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Mon, 12 Dec 2011 22:16:08 +0100 Subject: [PATCH 3/3] grep: disable threading in non-worktree case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measurements by various people have shown that grepping in parallel is not beneficial when the object store is involved. For example, with a simple regex: Threads | --cached case | worktree case ---------------------------------------------------------------- 8 (default) | 2.88u 0.21s 0:02.94real | 0.19u 0.32s 0:00.16real 4 | 2.89u 0.29s 0:02.99real | 0.16u 0.34s 0:00.17real 2 | 2.83u 0.36s 0:02.87real | 0.18u 0.32s 0:00.26real NO_PTHREADS | 2.16u 0.08s 0:02.25real | 0.12u 0.17s 0:00.31real This happens because all the threads contend on read_sha1_mutex almost all of the time. A more complex regex allows the threads to do more work in parallel, but as Jeff King found out, the "super boost" (much higher clock when only one core is active) feature of recent CPUs still causes the unthreaded case to win by a large margin. So until the pack machinery allows unthreaded access, we disable grep's threading in all but the worktree case. Helped-by: René Scharfe Helped-by: Jeff King Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- builtin/grep.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 6474eed19e..9ce064ac11 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1002,24 +1002,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; -#ifndef NO_PTHREADS - if (online_cpus() == 1) - use_threads = 0; -#else - use_threads = 0; -#endif - - opt.use_threads = use_threads; - -#ifndef NO_PTHREADS - if (use_threads) { - if (opt.pre_context || opt.post_context || opt.file_break || - opt.funcbody) - skip_first_line = 1; - start_threads(&opt); - } -#endif - compile_grep_patterns(&opt); /* Check revs and then paths */ @@ -1041,6 +1023,24 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } +#ifndef NO_PTHREADS + if (list.nr || cached || online_cpus() == 1) + use_threads = 0; +#else + use_threads = 0; +#endif + + opt.use_threads = use_threads; + +#ifndef NO_PTHREADS + if (use_threads) { + if (opt.pre_context || opt.post_context || opt.file_break || + opt.funcbody) + skip_first_line = 1; + start_threads(&opt); + } +#endif + /* The rest are paths */ if (!seen_dashdash) { int j;