diff: cache textconv output
Running a textconv filter can take a long time. It's particularly bad for a large file which needs to be spooled to disk, but even for small files, the fork+exec overhead can add up for something like "git log -p". This patch uses the notes-cache mechanism to keep a fast cache of textconv output. Caches are stored in refs/notes/textconv/$x, where $x is the userdiff driver defined in gitattributes. Caching is enabled only if diff.$x.cachetextconv is true. In my test repo, on a commit with 45 jpg and avi files changed and a textconv to show their exif tags: [before] $ time git show >/dev/null real 0m13.724s user 0m12.057s sys 0m1.624s [after, first run] $ git config diff.mfo.cachetextconv true $ time git show >/dev/null real 0m14.252s user 0m12.197s sys 0m1.800s [after, subsequent runs] $ time git show >/dev/null real 0m0.352s user 0m0.148s sys 0m0.200s So for a slight (3.8%) cost on the first run, we achieve an almost 40x speed up on subsequent runs. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
parent
840383b2c2
commit
d9bae1a178
|
@ -414,6 +414,26 @@ because it quickly conveys the changes you have made), you
|
||||||
should generate it separately and send it as a comment _in
|
should generate it separately and send it as a comment _in
|
||||||
addition to_ the usual binary diff that you might send.
|
addition to_ the usual binary diff that you might send.
|
||||||
|
|
||||||
|
Because text conversion can be slow, especially when doing a
|
||||||
|
large number of them with `git log -p`, git provides a mechanism
|
||||||
|
to cache the output and use it in future diffs. To enable
|
||||||
|
caching, set the "cachetextconv" variable in your diff driver's
|
||||||
|
config. For example:
|
||||||
|
|
||||||
|
------------------------
|
||||||
|
[diff "jpg"]
|
||||||
|
textconv = exif
|
||||||
|
cachetextconv = true
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This will cache the result of running "exif" on each blob
|
||||||
|
indefinitely. If you change the textconv config variable for a
|
||||||
|
diff driver, git will automatically invalidate the cache entries
|
||||||
|
and re-run the textconv filter. If you want to invalidate the
|
||||||
|
cache manually (e.g., because your version of "exif" was updated
|
||||||
|
and now produces better output), you can remove the cache
|
||||||
|
manually with `git update-ref -d refs/notes/textconv/jpg` (where
|
||||||
|
"jpg" is the name of the diff driver, as in the example above).
|
||||||
|
|
||||||
Performing a three-way merge
|
Performing a three-way merge
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
52
diff.c
52
diff.c
|
@ -43,7 +43,7 @@ static char diff_colors[][COLOR_MAXLEN] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static void diff_filespec_load_driver(struct diff_filespec *one);
|
static void diff_filespec_load_driver(struct diff_filespec *one);
|
||||||
static size_t fill_textconv(const char *cmd,
|
static size_t fill_textconv(struct userdiff_driver *driver,
|
||||||
struct diff_filespec *df, char **outbuf);
|
struct diff_filespec *df, char **outbuf);
|
||||||
|
|
||||||
static int parse_diff_color_slot(const char *var, int ofs)
|
static int parse_diff_color_slot(const char *var, int ofs)
|
||||||
|
@ -466,8 +466,8 @@ static void emit_rewrite_diff(const char *name_a,
|
||||||
const char *name_b,
|
const char *name_b,
|
||||||
struct diff_filespec *one,
|
struct diff_filespec *one,
|
||||||
struct diff_filespec *two,
|
struct diff_filespec *two,
|
||||||
const char *textconv_one,
|
struct userdiff_driver *textconv_one,
|
||||||
const char *textconv_two,
|
struct userdiff_driver *textconv_two,
|
||||||
struct diff_options *o)
|
struct diff_options *o)
|
||||||
{
|
{
|
||||||
int lc_a, lc_b;
|
int lc_a, lc_b;
|
||||||
|
@ -1569,14 +1569,26 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
|
||||||
options->b_prefix = b;
|
options->b_prefix = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *get_textconv(struct diff_filespec *one)
|
static struct userdiff_driver *get_textconv(struct diff_filespec *one)
|
||||||
{
|
{
|
||||||
if (!DIFF_FILE_VALID(one))
|
if (!DIFF_FILE_VALID(one))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (!S_ISREG(one->mode))
|
if (!S_ISREG(one->mode))
|
||||||
return NULL;
|
return NULL;
|
||||||
diff_filespec_load_driver(one);
|
diff_filespec_load_driver(one);
|
||||||
return one->driver->textconv;
|
if (!one->driver->textconv)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (one->driver->textconv_want_cache && !one->driver->textconv_cache) {
|
||||||
|
struct notes_cache *c = xmalloc(sizeof(*c));
|
||||||
|
struct strbuf name = STRBUF_INIT;
|
||||||
|
|
||||||
|
strbuf_addf(&name, "textconv/%s", one->driver->name);
|
||||||
|
notes_cache_init(c, name.buf, one->driver->textconv);
|
||||||
|
one->driver->textconv_cache = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
return one->driver;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void builtin_diff(const char *name_a,
|
static void builtin_diff(const char *name_a,
|
||||||
|
@ -1593,7 +1605,8 @@ static void builtin_diff(const char *name_a,
|
||||||
const char *set = diff_get_color_opt(o, DIFF_METAINFO);
|
const char *set = diff_get_color_opt(o, DIFF_METAINFO);
|
||||||
const char *reset = diff_get_color_opt(o, DIFF_RESET);
|
const char *reset = diff_get_color_opt(o, DIFF_RESET);
|
||||||
const char *a_prefix, *b_prefix;
|
const char *a_prefix, *b_prefix;
|
||||||
const char *textconv_one = NULL, *textconv_two = NULL;
|
struct userdiff_driver *textconv_one = NULL;
|
||||||
|
struct userdiff_driver *textconv_two = NULL;
|
||||||
struct strbuf header = STRBUF_INIT;
|
struct strbuf header = STRBUF_INIT;
|
||||||
|
|
||||||
if (DIFF_OPT_TST(o, SUBMODULE_LOG) &&
|
if (DIFF_OPT_TST(o, SUBMODULE_LOG) &&
|
||||||
|
@ -3888,13 +3901,13 @@ static char *run_textconv(const char *pgm, struct diff_filespec *spec,
|
||||||
return strbuf_detach(&buf, outsize);
|
return strbuf_detach(&buf, outsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t fill_textconv(const char *cmd,
|
static size_t fill_textconv(struct userdiff_driver *driver,
|
||||||
struct diff_filespec *df,
|
struct diff_filespec *df,
|
||||||
char **outbuf)
|
char **outbuf)
|
||||||
{
|
{
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
if (!cmd) {
|
if (!driver || !driver->textconv) {
|
||||||
if (!DIFF_FILE_VALID(df)) {
|
if (!DIFF_FILE_VALID(df)) {
|
||||||
*outbuf = "";
|
*outbuf = "";
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -3905,8 +3918,29 @@ static size_t fill_textconv(const char *cmd,
|
||||||
return df->size;
|
return df->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
*outbuf = run_textconv(cmd, df, &size);
|
if (driver->textconv_cache) {
|
||||||
|
*outbuf = notes_cache_get(driver->textconv_cache, df->sha1,
|
||||||
|
&size);
|
||||||
|
if (*outbuf)
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
*outbuf = run_textconv(driver->textconv, df, &size);
|
||||||
if (!*outbuf)
|
if (!*outbuf)
|
||||||
die("unable to read files to diff");
|
die("unable to read files to diff");
|
||||||
|
|
||||||
|
if (driver->textconv_cache) {
|
||||||
|
/* ignore errors, as we might be in a readonly repository */
|
||||||
|
notes_cache_put(driver->textconv_cache, df->sha1, *outbuf,
|
||||||
|
size);
|
||||||
|
/*
|
||||||
|
* we could save up changes and flush them all at the end,
|
||||||
|
* but we would need an extra call after all diffing is done.
|
||||||
|
* Since generating a cache entry is the slow path anyway,
|
||||||
|
* this extra overhead probably isn't a big deal.
|
||||||
|
*/
|
||||||
|
notes_cache_write(driver->textconv_cache);
|
||||||
|
}
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='test textconv caching'
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
cat >helper <<'EOF'
|
||||||
|
#!/bin/sh
|
||||||
|
sed 's/^/converted: /' "$@" >helper.out
|
||||||
|
cat helper.out
|
||||||
|
EOF
|
||||||
|
chmod +x helper
|
||||||
|
|
||||||
|
test_expect_success 'setup' '
|
||||||
|
echo foo content 1 >foo.bin &&
|
||||||
|
echo bar content 1 >bar.bin &&
|
||||||
|
git add . &&
|
||||||
|
git commit -m one &&
|
||||||
|
echo foo content 2 >foo.bin &&
|
||||||
|
echo bar content 2 >bar.bin &&
|
||||||
|
git commit -a -m two &&
|
||||||
|
echo "*.bin diff=magic" >.gitattributes &&
|
||||||
|
git config diff.magic.textconv ./helper &&
|
||||||
|
git config diff.magic.cachetextconv true
|
||||||
|
'
|
||||||
|
|
||||||
|
cat >expect <<EOF
|
||||||
|
diff --git a/bar.bin b/bar.bin
|
||||||
|
index fcf9166..28283d5 100644
|
||||||
|
--- a/bar.bin
|
||||||
|
+++ b/bar.bin
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-converted: bar content 1
|
||||||
|
+converted: bar content 2
|
||||||
|
diff --git a/foo.bin b/foo.bin
|
||||||
|
index d5b9fe3..1345db2 100644
|
||||||
|
--- a/foo.bin
|
||||||
|
+++ b/foo.bin
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-converted: foo content 1
|
||||||
|
+converted: foo content 2
|
||||||
|
EOF
|
||||||
|
|
||||||
|
test_expect_success 'first textconv works' '
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'cached textconv produces same output' '
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'cached textconv does not run helper' '
|
||||||
|
rm -f helper.out &&
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
test_cmp expect actual &&
|
||||||
|
! test -r helper.out
|
||||||
|
'
|
||||||
|
|
||||||
|
cat >expect <<EOF
|
||||||
|
diff --git a/bar.bin b/bar.bin
|
||||||
|
index fcf9166..28283d5 100644
|
||||||
|
--- a/bar.bin
|
||||||
|
+++ b/bar.bin
|
||||||
|
@@ -1,2 +1,2 @@
|
||||||
|
converted: other
|
||||||
|
-converted: bar content 1
|
||||||
|
+converted: bar content 2
|
||||||
|
diff --git a/foo.bin b/foo.bin
|
||||||
|
index d5b9fe3..1345db2 100644
|
||||||
|
--- a/foo.bin
|
||||||
|
+++ b/foo.bin
|
||||||
|
@@ -1,2 +1,2 @@
|
||||||
|
converted: other
|
||||||
|
-converted: foo content 1
|
||||||
|
+converted: foo content 2
|
||||||
|
EOF
|
||||||
|
test_expect_success 'changing textconv invalidates cache' '
|
||||||
|
echo other >other &&
|
||||||
|
git config diff.magic.textconv "./helper other" &&
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
cat >expect <<EOF
|
||||||
|
diff --git a/bar.bin b/bar.bin
|
||||||
|
index fcf9166..28283d5 100644
|
||||||
|
--- a/bar.bin
|
||||||
|
+++ b/bar.bin
|
||||||
|
@@ -1,2 +1,2 @@
|
||||||
|
converted: other
|
||||||
|
-converted: bar content 1
|
||||||
|
+converted: bar content 2
|
||||||
|
diff --git a/foo.bin b/foo.bin
|
||||||
|
index d5b9fe3..1345db2 100644
|
||||||
|
--- a/foo.bin
|
||||||
|
+++ b/foo.bin
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-converted: foo content 1
|
||||||
|
+converted: foo content 2
|
||||||
|
EOF
|
||||||
|
test_expect_success 'switching diff driver produces correct results' '
|
||||||
|
git config diff.moremagic.textconv ./helper &&
|
||||||
|
echo foo.bin diff=moremagic >>.gitattributes &&
|
||||||
|
git diff HEAD^ HEAD >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "cache.h"
|
||||||
#include "userdiff.h"
|
#include "userdiff.h"
|
||||||
#include "cache.h"
|
#include "cache.h"
|
||||||
#include "attr.h"
|
#include "attr.h"
|
||||||
|
@ -167,6 +168,12 @@ static int parse_tristate(int *b, const char *k, const char *v)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int parse_bool(int *b, const char *k, const char *v)
|
||||||
|
{
|
||||||
|
*b = git_config_bool(k, v);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
int userdiff_config(const char *k, const char *v)
|
int userdiff_config(const char *k, const char *v)
|
||||||
{
|
{
|
||||||
struct userdiff_driver *drv;
|
struct userdiff_driver *drv;
|
||||||
|
@ -181,6 +188,8 @@ int userdiff_config(const char *k, const char *v)
|
||||||
return parse_string(&drv->external, k, v);
|
return parse_string(&drv->external, k, v);
|
||||||
if ((drv = parse_driver(k, v, "textconv")))
|
if ((drv = parse_driver(k, v, "textconv")))
|
||||||
return parse_string(&drv->textconv, k, v);
|
return parse_string(&drv->textconv, k, v);
|
||||||
|
if ((drv = parse_driver(k, v, "cachetextconv")))
|
||||||
|
return parse_bool(&drv->textconv_want_cache, k, v);
|
||||||
if ((drv = parse_driver(k, v, "wordregex")))
|
if ((drv = parse_driver(k, v, "wordregex")))
|
||||||
return parse_string(&drv->word_regex, k, v);
|
return parse_string(&drv->word_regex, k, v);
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#ifndef USERDIFF_H
|
#ifndef USERDIFF_H
|
||||||
#define USERDIFF_H
|
#define USERDIFF_H
|
||||||
|
|
||||||
|
#include "notes-cache.h"
|
||||||
|
|
||||||
struct userdiff_funcname {
|
struct userdiff_funcname {
|
||||||
const char *pattern;
|
const char *pattern;
|
||||||
int cflags;
|
int cflags;
|
||||||
|
@ -13,6 +15,8 @@ struct userdiff_driver {
|
||||||
struct userdiff_funcname funcname;
|
struct userdiff_funcname funcname;
|
||||||
const char *word_regex;
|
const char *word_regex;
|
||||||
const char *textconv;
|
const char *textconv;
|
||||||
|
struct notes_cache *textconv_cache;
|
||||||
|
int textconv_want_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
int userdiff_config(const char *k, const char *v);
|
int userdiff_config(const char *k, const char *v);
|
||||||
|
|
Loading…
Reference in New Issue