From 56c6910028a0468761b0cd9ee5c0946ce637b586 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 7 Jan 2020 19:04:28 +0000 Subject: [PATCH 1/4] fsmonitor: change last update timestamp on the index_state to opaque token Some file system monitors might not use or take a timestamp for processing and in the case of watchman could have race conditions with using a timestamp. Watchman uses something called a clockid that is used for race free queries to it. The clockid for watchman is simply a string. Change the fsmonitor_last_update from being a uint64_t to a char pointer so that any arbitrary data can be stored in it and passed back to the fsmonitor. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- cache.h | 2 +- fsmonitor.c | 49 ++++++++++++++++++++++------------ t/helper/test-dump-fsmonitor.c | 2 +- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/cache.h b/cache.h index cbfaead23a..1a2e1cce3d 100644 --- a/cache.h +++ b/cache.h @@ -324,7 +324,7 @@ struct index_state { struct hashmap dir_hash; struct object_id oid; struct untracked_cache *untracked; - uint64_t fsmonitor_last_update; + char *fsmonitor_last_update; struct ewah_bitmap *fsmonitor_dirty; struct mem_pool *ce_mem_pool; struct progress *progress; diff --git a/fsmonitor.c b/fsmonitor.c index 868cca01e2..9860587225 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -6,8 +6,9 @@ #include "run-command.h" #include "strbuf.h" -#define INDEX_EXTENSION_VERSION (1) -#define HOOK_INTERFACE_VERSION (1) +#define INDEX_EXTENSION_VERSION1 (1) +#define INDEX_EXTENSION_VERSION2 (2) +#define HOOK_INTERFACE_VERSION (1) struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); @@ -32,17 +33,26 @@ int read_fsmonitor_extension(struct index_state *istate, const void *data, uint32_t ewah_size; struct ewah_bitmap *fsmonitor_dirty; int ret; + uint64_t timestamp; + struct strbuf last_update = STRBUF_INIT; - if (sz < sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t)) + if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t)) return error("corrupt fsmonitor extension (too short)"); hdr_version = get_be32(index); index += sizeof(uint32_t); - if (hdr_version != INDEX_EXTENSION_VERSION) + if (hdr_version == INDEX_EXTENSION_VERSION1) { + timestamp = get_be64(index); + strbuf_addf(&last_update, "%"PRIu64"", timestamp); + index += sizeof(uint64_t); + } else if (hdr_version == INDEX_EXTENSION_VERSION2) { + strbuf_addstr(&last_update, index); + index += last_update.len + 1; + } else { return error("bad fsmonitor version %d", hdr_version); + } - istate->fsmonitor_last_update = get_be64(index); - index += sizeof(uint64_t); + istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); ewah_size = get_be32(index); index += sizeof(uint32_t); @@ -79,7 +89,6 @@ void fill_fsmonitor_bitmap(struct index_state *istate) void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) { uint32_t hdr_version; - uint64_t tm; uint32_t ewah_start; uint32_t ewah_size = 0; int fixup = 0; @@ -89,11 +98,12 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); - put_be32(&hdr_version, INDEX_EXTENSION_VERSION); + put_be32(&hdr_version, INDEX_EXTENSION_VERSION2); strbuf_add(sb, &hdr_version, sizeof(uint32_t)); - put_be64(&tm, istate->fsmonitor_last_update); - strbuf_add(sb, &tm, sizeof(uint64_t)); + strbuf_addstr(sb, istate->fsmonitor_last_update); + strbuf_addch(sb, 0); /* Want to keep a NUL */ + fixup = sb->len; strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */ @@ -110,9 +120,9 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) } /* - * Call the query-fsmonitor hook passing the time of the last saved results. + * Call the query-fsmonitor hook passing the last update token of the saved results. */ -static int query_fsmonitor(int version, uint64_t last_update, struct strbuf *query_result) +static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result) { struct child_process cp = CHILD_PROCESS_INIT; @@ -121,7 +131,7 @@ static int query_fsmonitor(int version, uint64_t last_update, struct strbuf *que argv_array_push(&cp.args, core_fsmonitor); argv_array_pushf(&cp.args, "%d", version); - argv_array_pushf(&cp.args, "%" PRIuMAX, (uintmax_t)last_update); + argv_array_pushf(&cp.args, "%s", last_update); cp.use_shell = 1; cp.dir = get_git_work_tree(); @@ -151,6 +161,7 @@ void refresh_fsmonitor(struct index_state *istate) int query_success = 0; size_t bol; /* beginning of line */ uint64_t last_update; + struct strbuf last_update_token = STRBUF_INIT; char *buf; unsigned int i; @@ -164,6 +175,7 @@ void refresh_fsmonitor(struct index_state *istate) * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* * If we have a last update time, call query_fsmonitor for the set of @@ -217,18 +229,21 @@ void refresh_fsmonitor(struct index_state *istate) } strbuf_release(&query_result); - /* Now that we've updated istate, save the last_update time */ - istate->fsmonitor_last_update = last_update; + /* Now that we've updated istate, save the last_update_token */ + FREE_AND_NULL(istate->fsmonitor_last_update); + istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL); } void add_fsmonitor(struct index_state *istate) { unsigned int i; + struct strbuf last_update = STRBUF_INIT; if (!istate->fsmonitor_last_update) { trace_printf_key(&trace_fsmonitor, "add fsmonitor"); istate->cache_changed |= FSMONITOR_CHANGED; - istate->fsmonitor_last_update = getnanotime(); + strbuf_addf(&last_update, "%"PRIu64"", getnanotime()); + istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); /* reset the fsmonitor state */ for (i = 0; i < istate->cache_nr; i++) @@ -250,7 +265,7 @@ void remove_fsmonitor(struct index_state *istate) if (istate->fsmonitor_last_update) { trace_printf_key(&trace_fsmonitor, "remove fsmonitor"); istate->cache_changed |= FSMONITOR_CHANGED; - istate->fsmonitor_last_update = 0; + FREE_AND_NULL(istate->fsmonitor_last_update); } } diff --git a/t/helper/test-dump-fsmonitor.c b/t/helper/test-dump-fsmonitor.c index 2786f47088..975f0ac890 100644 --- a/t/helper/test-dump-fsmonitor.c +++ b/t/helper/test-dump-fsmonitor.c @@ -13,7 +13,7 @@ int cmd__dump_fsmonitor(int ac, const char **av) printf("no fsmonitor\n"); return 0; } - printf("fsmonitor last update %"PRIuMAX"\n", (uintmax_t)istate->fsmonitor_last_update); + printf("fsmonitor last update %s\n", istate->fsmonitor_last_update); for (i = 0; i < istate->cache_nr; i++) printf((istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) ? "+" : "-"); From 8da2c57629a16e148b0f94282ac787c1503e4779 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 7 Jan 2020 19:04:29 +0000 Subject: [PATCH 2/4] fsmonitor: handle version 2 of the hooks that will use opaque token Some file monitors like watchman will use something other than a timestamp to keep better track of what changes happen in between calls to query the fsmonitor. The clockid in watchman is a string. Now that the index is storing an opaque token for the last update the code needs to be updated to pass that opaque token to a verion 2 of the fsmonitor hook. Because there are repos that already have version 1 of the hook and we want them to continue to work when git is updated, we need to handle both version 1 and version 2 of the hook. In order to do that a config value is being added core.fsmonitorHookVersion to force what version of the hook should be used. When this is not set it will default to -1 and then the code will attempt to call version 2 of the hook first. If that fails it will fallback to trying version 1. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- fsmonitor.c | 75 +++++++++++++++++++++++++++++++------ t/t7519-status-fsmonitor.sh | 7 +++- t/t7519/fsmonitor-all | 1 - t/t7519/fsmonitor-watchman | 3 +- 4 files changed, 71 insertions(+), 15 deletions(-) diff --git a/fsmonitor.c b/fsmonitor.c index 9860587225..932bd9012d 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -8,7 +8,8 @@ #define INDEX_EXTENSION_VERSION1 (1) #define INDEX_EXTENSION_VERSION2 (2) -#define HOOK_INTERFACE_VERSION (1) +#define HOOK_INTERFACE_VERSION1 (1) +#define HOOK_INTERFACE_VERSION2 (2) struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); @@ -25,6 +26,22 @@ static void fsmonitor_ewah_callback(size_t pos, void *is) ce->ce_flags &= ~CE_FSMONITOR_VALID; } +static int fsmonitor_hook_version(void) +{ + int hook_version; + + if (git_config_get_int("core.fsmonitorhookversion", &hook_version)) + return -1; + + if (hook_version == HOOK_INTERFACE_VERSION1 || + hook_version == HOOK_INTERFACE_VERSION2) + return hook_version; + + warning("Invalid hook version '%i' in core.fsmonitorhookversion. " + "Must be 1 or 2.", hook_version); + return -1; +} + int read_fsmonitor_extension(struct index_state *istate, const void *data, unsigned long sz) { @@ -158,8 +175,8 @@ static void fsmonitor_refresh_callback(struct index_state *istate, const char *n void refresh_fsmonitor(struct index_state *istate) { struct strbuf query_result = STRBUF_INIT; - int query_success = 0; - size_t bol; /* beginning of line */ + int query_success = 0, hook_version = -1; + size_t bol = 0; /* beginning of line */ uint64_t last_update; struct strbuf last_update_token = STRBUF_INIT; char *buf; @@ -167,6 +184,9 @@ void refresh_fsmonitor(struct index_state *istate) if (!core_fsmonitor || istate->fsmonitor_has_run_once) return; + + hook_version = fsmonitor_hook_version(); + istate->fsmonitor_has_run_once = 1; trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); @@ -175,27 +195,60 @@ void refresh_fsmonitor(struct index_state *istate) * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); - strbuf_addf(&last_update_token, "%"PRIu64"", last_update); + if (hook_version == HOOK_INTERFACE_VERSION1) + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* - * If we have a last update time, call query_fsmonitor for the set of - * changes since that time, else assume everything is possibly dirty + * If we have a last update token, call query_fsmonitor for the set of + * changes since that token, else assume everything is possibly dirty * and check it all. */ if (istate->fsmonitor_last_update) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION, - istate->fsmonitor_last_update, &query_result); + if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) { + query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2, + istate->fsmonitor_last_update, &query_result); + + if (query_success) { + if (hook_version < 0) + hook_version = HOOK_INTERFACE_VERSION2; + + /* + * First entry will be the last update token + * Need to use a char * variable because static + * analysis was suggesting to use strbuf_addbuf + * but we don't want to copy the entire strbuf + * only the the chars up to the first NUL + */ + buf = query_result.buf; + strbuf_addstr(&last_update_token, buf); + if (!last_update_token.len) { + warning("Empty last update token."); + query_success = 0; + } else { + bol = last_update_token.len + 1; + } + } else if (hook_version < 0) { + hook_version = HOOK_INTERFACE_VERSION1; + if (!last_update_token.len) + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); + } + } + + if (hook_version == HOOK_INTERFACE_VERSION1) { + query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1, + istate->fsmonitor_last_update, &query_result); + } + trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor); trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s", core_fsmonitor, query_success ? "success" : "failure"); } /* a fsmonitor process can return '/' to indicate all entries are invalid */ - if (query_success && query_result.buf[0] != '/') { + if (query_success && query_result.buf[bol] != '/') { /* Mark all entries returned by the monitor as dirty */ buf = query_result.buf; - bol = 0; - for (i = 0; i < query_result.len; i++) { + for (i = bol; i < query_result.len; i++) { if (buf[i] != '\0') continue; fsmonitor_refresh_callback(istate, buf + bol); diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index cf0fda2d5a..fbfdcca000 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -32,11 +32,12 @@ write_integration_script () { echo "$0: exactly 2 arguments expected" exit 2 fi - if test "$1" != 1 + if test "$1" != 2 then echo "Unsupported core.fsmonitor hook version." >&2 exit 1 fi + printf "last_update_token\0" printf "untracked\0" printf "dir1/untracked\0" printf "dir2/untracked\0" @@ -107,6 +108,7 @@ EOF # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" EOF git update-index --fsmonitor && git update-index --fsmonitor-valid dir1/modified && @@ -167,6 +169,7 @@ EOF # test that newly added files are marked valid test_expect_success 'newly added files are marked valid' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" EOF git add new && git add dir1/new && @@ -207,6 +210,7 @@ EOF # test that *only* files returned by the integration script get flagged as invalid test_expect_success '*only* files returned by the integration script get flagged as invalid' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" printf "dir1/modified\0" EOF clean_repo && @@ -276,6 +280,7 @@ do # (if enabled) files unless it is told about them. test_expect_success "status doesn't detect unreported modifications" ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" :>marker EOF clean_repo && diff --git a/t/t7519/fsmonitor-all b/t/t7519/fsmonitor-all index 691bc94dc2..94ab66bd3d 100755 --- a/t/t7519/fsmonitor-all +++ b/t/t7519/fsmonitor-all @@ -17,7 +17,6 @@ fi if test "$1" != 1 then - echo "Unsupported core.fsmonitor hook version." >&2 exit 1 fi diff --git a/t/t7519/fsmonitor-watchman b/t/t7519/fsmonitor-watchman index d8e7a1e5ba..264b9daf83 100755 --- a/t/t7519/fsmonitor-watchman +++ b/t/t7519/fsmonitor-watchman @@ -26,8 +26,7 @@ if ($version == 1) { # subtract one second to make sure watchman will return all changes $time = int ($time / 1000000000) - 1; } else { - die "Unsupported query-fsmonitor hook version '$version'.\n" . - "Falling back to scanning...\n"; + exit 1; } my $git_work_tree; From e4e1e8342aa03c4e4585908736a103130cc8906d Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Thu, 23 Jan 2020 15:26:46 +0000 Subject: [PATCH 3/4] fsmonitor: add fsmonitor hook scripts for version 2 Version 2 of the fsmonitor hooks is passed the version and an update token and must pass back a last update token to use for subsequent calls to the hook. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- t/t7519/fsmonitor-all-v2 | 21 +++ t/t7519/fsmonitor-watchman-v2 | 173 +++++++++++++++++++++ templates/hooks--fsmonitor-watchman.sample | 164 +++++++++++++------ 3 files changed, 308 insertions(+), 50 deletions(-) create mode 100755 t/t7519/fsmonitor-all-v2 create mode 100755 t/t7519/fsmonitor-watchman-v2 diff --git a/t/t7519/fsmonitor-all-v2 b/t/t7519/fsmonitor-all-v2 new file mode 100755 index 0000000000..061907e88b --- /dev/null +++ b/t/t7519/fsmonitor-all-v2 @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +use strict; +use warnings; +# +# An test hook script to integrate with git to test fsmonitor. +# +# The hook is passed a version (currently 2) and since token +# formatted as a string and outputs to stdout all files that have been +# modified since the given time. Paths must be relative to the root of +# the working tree and separated by a single NUL. +# +#echo "$0 $*" >&2 +my ($version, $last_update_token) = @ARGV; + +if ($version ne 2) { + print "Unsupported query-fsmonitor hook version '$version'.\n"; + exit 1; +} + +print "last_update_token\0/\0" diff --git a/t/t7519/fsmonitor-watchman-v2 b/t/t7519/fsmonitor-watchman-v2 new file mode 100755 index 0000000000..14ed0aa42d --- /dev/null +++ b/t/t7519/fsmonitor-watchman-v2 @@ -0,0 +1,173 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IPC::Open2; + +# An example hook script to integrate Watchman +# (https://facebook.github.io/watchman/) with git to speed up detecting +# new and modified files. +# +# The hook is passed a version (currently 2) and last update token +# formatted as a string and outputs to stdout a new update token and +# all files that have been modified since the update token. Paths must +# be relative to the root of the working tree and separated by a single NUL. +# +# To enable this hook, rename this file to "query-watchman" and set +# 'git config core.fsmonitor .git/hooks/query-watchman' +# +my ($version, $last_update_token) = @ARGV; + +# Uncomment for debugging +# print STDERR "$0 $version $last_update_token\n"; + +# Check the hook interface version +if ($version ne 2) { + die "Unsupported query-fsmonitor hook version '$version'.\n" . + "Falling back to scanning...\n"; +} + +my $git_work_tree = get_working_dir(); + +my $retry = 1; + +my $json_pkg; +eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; +} or do { + require JSON::PP; + $json_pkg = "JSON::PP"; +}; + +launch_watchman(); + +sub launch_watchman { + my $o = watchman_query(); + if (is_work_tree_watched($o)) { + output_result($o->{clock}, @{$o->{files}}); + } +} + +sub output_result { + my ($clockid, @files) = @_; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # binmode $fh, ":utf8"; + # print $fh "$clockid\n@files\n"; + # close $fh; + + binmode STDOUT, ":utf8"; + print $clockid; + print "\0"; + local $, = "\0"; + print @files; +} + +sub watchman_clock { + my $response = qx/watchman clock "$git_work_tree"/; + die "Failed to get clock id on '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + return $json_pkg->new->utf8->decode($response); +} + +sub watchman_query { + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; + + # In the query expression below we're asking for names of files that + # changed since $last_update_token but not from the .git folder. + # + # To accomplish this, we're using the "since" generator to use the + # recency index to select candidate nodes and "fields" to limit the + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + if (substr($last_update_token, 0, 1) eq "c") { + $last_update_token = "\"$last_update_token\""; + } + my $query = <<" END"; + ["query", "$git_work_tree", { + "since": $last_update_token, + "fields": ["name"], + "expression": ["not", ["dirname", ".git"]] + }] + END + + # Uncomment for debugging the watchman query + # open (my $fh, ">", ".git/watchman-query.json"); + # print $fh $query; + # close $fh; + + print CHLD_IN $query; + close CHLD_IN; + my $response = do {local $/; }; + + # Uncomment for debugging the watch response + # open ($fh, ">", ".git/watchman-response.json"); + # print $fh $response; + # close $fh; + + die "Watchman: command returned no output.\n" . + "Falling back to scanning...\n" if $response eq ""; + die "Watchman: command returned invalid output: $response\n" . + "Falling back to scanning...\n" unless $response =~ /^\{/; + + return $json_pkg->new->utf8->decode($response); +} + +sub is_work_tree_watched { + my ($output) = @_; + my $error = $output->{error}; + if ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) { + $retry--; + my $response = qx/watchman watch "$git_work_tree"/; + die "Failed to make watchman watch '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + $output = $json_pkg->new->utf8->decode($response); + $error = $output->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # close $fh; + + # Watchman will always return all files on the first query so + # return the fast "everything is dirty" flag to git and do the + # Watchman query just to get it over with now so we won't pay + # the cost in git to look up each individual file. + my $o = watchman_clock(); + $error = $output->{error}; + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + output_result($o->{clock}, ("/")); + $last_update_token = $o->{clock}; + + eval { launch_watchman() }; + return 0; + } + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + return 1; +} + +sub get_working_dir { + my $working_dir; + if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $working_dir = Win32::GetCwd(); + $working_dir =~ tr/\\/\//; + } else { + require Cwd; + $working_dir = Cwd::cwd(); + } + + return $working_dir; +} diff --git a/templates/hooks--fsmonitor-watchman.sample b/templates/hooks--fsmonitor-watchman.sample index ef94fa2938..14ed0aa42d 100755 --- a/templates/hooks--fsmonitor-watchman.sample +++ b/templates/hooks--fsmonitor-watchman.sample @@ -8,102 +8,166 @@ use IPC::Open2; # (https://facebook.github.io/watchman/) with git to speed up detecting # new and modified files. # -# The hook is passed a version (currently 1) and a time in nanoseconds -# formatted as a string and outputs to stdout all files that have been -# modified since the given time. Paths must be relative to the root of -# the working tree and separated by a single NUL. +# The hook is passed a version (currently 2) and last update token +# formatted as a string and outputs to stdout a new update token and +# all files that have been modified since the update token. Paths must +# be relative to the root of the working tree and separated by a single NUL. # # To enable this hook, rename this file to "query-watchman" and set # 'git config core.fsmonitor .git/hooks/query-watchman' # -my ($version, $time) = @ARGV; +my ($version, $last_update_token) = @ARGV; + +# Uncomment for debugging +# print STDERR "$0 $version $last_update_token\n"; # Check the hook interface version - -if ($version == 1) { - # convert nanoseconds to seconds - # subtract one second to make sure watchman will return all changes - $time = int ($time / 1000000000) - 1; -} else { +if ($version ne 2) { die "Unsupported query-fsmonitor hook version '$version'.\n" . "Falling back to scanning...\n"; } -my $git_work_tree; -if ($^O =~ 'msys' || $^O =~ 'cygwin') { - $git_work_tree = Win32::GetCwd(); - $git_work_tree =~ tr/\\/\//; -} else { - require Cwd; - $git_work_tree = Cwd::cwd(); -} +my $git_work_tree = get_working_dir(); my $retry = 1; +my $json_pkg; +eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; +} or do { + require JSON::PP; + $json_pkg = "JSON::PP"; +}; + launch_watchman(); sub launch_watchman { + my $o = watchman_query(); + if (is_work_tree_watched($o)) { + output_result($o->{clock}, @{$o->{files}}); + } +} +sub output_result { + my ($clockid, @files) = @_; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # binmode $fh, ":utf8"; + # print $fh "$clockid\n@files\n"; + # close $fh; + + binmode STDOUT, ":utf8"; + print $clockid; + print "\0"; + local $, = "\0"; + print @files; +} + +sub watchman_clock { + my $response = qx/watchman clock "$git_work_tree"/; + die "Failed to get clock id on '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + return $json_pkg->new->utf8->decode($response); +} + +sub watchman_query { my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') - or die "open2() failed: $!\n" . - "Falling back to scanning...\n"; + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; # In the query expression below we're asking for names of files that - # changed since $time but were not transient (ie created after - # $time but no longer exist). + # changed since $last_update_token but not from the .git folder. # # To accomplish this, we're using the "since" generator to use the # recency index to select candidate nodes and "fields" to limit the - # output to file names only. - + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + if (substr($last_update_token, 0, 1) eq "c") { + $last_update_token = "\"$last_update_token\""; + } my $query = <<" END"; ["query", "$git_work_tree", { - "since": $time, - "fields": ["name"] + "since": $last_update_token, + "fields": ["name"], + "expression": ["not", ["dirname", ".git"]] }] END + # Uncomment for debugging the watchman query + # open (my $fh, ">", ".git/watchman-query.json"); + # print $fh $query; + # close $fh; + print CHLD_IN $query; close CHLD_IN; my $response = do {local $/; }; + # Uncomment for debugging the watch response + # open ($fh, ">", ".git/watchman-response.json"); + # print $fh $response; + # close $fh; + die "Watchman: command returned no output.\n" . - "Falling back to scanning...\n" if $response eq ""; + "Falling back to scanning...\n" if $response eq ""; die "Watchman: command returned invalid output: $response\n" . - "Falling back to scanning...\n" unless $response =~ /^\{/; + "Falling back to scanning...\n" unless $response =~ /^\{/; - my $json_pkg; - eval { - require JSON::XS; - $json_pkg = "JSON::XS"; - 1; - } or do { - require JSON::PP; - $json_pkg = "JSON::PP"; - }; + return $json_pkg->new->utf8->decode($response); +} - my $o = $json_pkg->new->utf8->decode($response); - - if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { - print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; +sub is_work_tree_watched { + my ($output) = @_; + my $error = $output->{error}; + if ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) { $retry--; - qx/watchman watch "$git_work_tree"/; + my $response = qx/watchman watch "$git_work_tree"/; die "Failed to make watchman watch '$git_work_tree'.\n" . "Falling back to scanning...\n" if $? != 0; + $output = $json_pkg->new->utf8->decode($response); + $error = $output->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # close $fh; # Watchman will always return all files on the first query so # return the fast "everything is dirty" flag to git and do the # Watchman query just to get it over with now so we won't pay # the cost in git to look up each individual file. - print "/\0"; + my $o = watchman_clock(); + $error = $output->{error}; + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + output_result($o->{clock}, ("/")); + $last_update_token = $o->{clock}; + eval { launch_watchman() }; - exit 0; + return 0; } - die "Watchman: $o->{error}.\n" . - "Falling back to scanning...\n" if $o->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; - binmode STDOUT, ":utf8"; - local $, = "\0"; - print @{$o->{files}}; + return 1; +} + +sub get_working_dir { + my $working_dir; + if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $working_dir = Win32::GetCwd(); + $working_dir =~ tr/\\/\//; + } else { + require Cwd; + $working_dir = Cwd::cwd(); + } + + return $working_dir; } From dfaed028620c2dca08d24583c7fc8b0aef9b6d0f Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Thu, 23 Jan 2020 15:26:47 +0000 Subject: [PATCH 4/4] fsmonitor: update documentation for hook version and watchman hooks A new config value for core.fsmonitorHookVersion was added to be able to force the version of the fsmonitor hook. Possible values are 1 or 2. When this is not set the code will use a value of -1 and attempt to use version 2 of the hook first and if that fails will attempt version 1. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- Documentation/config/core.txt | 11 +++++++++++ Documentation/githooks.txt | 13 ++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 9e440b160d..74619a9c03 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -68,6 +68,17 @@ core.fsmonitor:: avoiding unnecessary processing of files that have not changed. See the "fsmonitor-watchman" section of linkgit:githooks[5]. +core.fsmonitorHookVersion:: + Sets the version of hook that is to be used when calling fsmonitor. + There are currently versions 1 and 2. When this is not set, + version 2 will be tried first and if it fails then version 1 + will be tried. Version 1 uses a timestamp as input to determine + which files have changes since that time but some monitors + like watchman have race conditions when used with a timestamp. + Version 2 uses an opaque string so that the monitor can return + something that can be used to determine what files have changed + without race conditions. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 50365f2914..3dccab5375 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -490,9 +490,16 @@ fsmonitor-watchman ~~~~~~~~~~~~~~~~~~ This hook is invoked when the configuration option `core.fsmonitor` is -set to `.git/hooks/fsmonitor-watchman`. It takes two arguments, a version -(currently 1) and the time in elapsed nanoseconds since midnight, -January 1, 1970. +set to `.git/hooks/fsmonitor-watchman` or `.git/hooks/fsmonitor-watchmanv2` +depending on the version of the hook to use. + +Version 1 takes two arguments, a version (1) and the time in elapsed +nanoseconds since midnight, January 1, 1970. + +Version 2 takes two arguments, a version (2) and a token that is used +for identifying changes since the token. For watchman this would be +a clock id. This version must output to stdout the new token followed +by a NUL before the list of files. The hook should output to stdout the list of all files in the working directory that may have changed since the requested time. The logic