You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
614 lines
18 KiB
614 lines
18 KiB
#include "cache.h" |
|
#include "config.h" |
|
#include "dir.h" |
|
#include "environment.h" |
|
#include "ewah/ewok.h" |
|
#include "fsmonitor.h" |
|
#include "fsmonitor-ipc.h" |
|
#include "run-command.h" |
|
#include "strbuf.h" |
|
#include "trace2.h" |
|
|
|
#define INDEX_EXTENSION_VERSION1 (1) |
|
#define INDEX_EXTENSION_VERSION2 (2) |
|
#define HOOK_INTERFACE_VERSION1 (1) |
|
#define HOOK_INTERFACE_VERSION2 (2) |
|
|
|
struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); |
|
|
|
static void assert_index_minimum(struct index_state *istate, size_t pos) |
|
{ |
|
if (pos > istate->cache_nr) |
|
BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", |
|
(uintmax_t)pos, istate->cache_nr); |
|
} |
|
|
|
static void fsmonitor_ewah_callback(size_t pos, void *is) |
|
{ |
|
struct index_state *istate = (struct index_state *)is; |
|
struct cache_entry *ce; |
|
|
|
assert_index_minimum(istate, pos + 1); |
|
|
|
ce = istate->cache[pos]; |
|
ce->ce_flags &= ~CE_FSMONITOR_VALID; |
|
} |
|
|
|
static int fsmonitor_hook_version(void) |
|
{ |
|
int hook_version; |
|
|
|
if (git_config_get_int("core.fsmonitorhookversion", &hook_version)) |
|
return -1; |
|
|
|
if (hook_version == HOOK_INTERFACE_VERSION1 || |
|
hook_version == HOOK_INTERFACE_VERSION2) |
|
return hook_version; |
|
|
|
warning("Invalid hook version '%i' in core.fsmonitorhookversion. " |
|
"Must be 1 or 2.", hook_version); |
|
return -1; |
|
} |
|
|
|
int read_fsmonitor_extension(struct index_state *istate, const void *data, |
|
unsigned long sz) |
|
{ |
|
const char *index = data; |
|
uint32_t hdr_version; |
|
uint32_t ewah_size; |
|
struct ewah_bitmap *fsmonitor_dirty; |
|
int ret; |
|
uint64_t timestamp; |
|
struct strbuf last_update = STRBUF_INIT; |
|
|
|
if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t)) |
|
return error("corrupt fsmonitor extension (too short)"); |
|
|
|
hdr_version = get_be32(index); |
|
index += sizeof(uint32_t); |
|
if (hdr_version == INDEX_EXTENSION_VERSION1) { |
|
timestamp = get_be64(index); |
|
strbuf_addf(&last_update, "%"PRIu64"", timestamp); |
|
index += sizeof(uint64_t); |
|
} else if (hdr_version == INDEX_EXTENSION_VERSION2) { |
|
strbuf_addstr(&last_update, index); |
|
index += last_update.len + 1; |
|
} else { |
|
return error("bad fsmonitor version %d", hdr_version); |
|
} |
|
|
|
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); |
|
|
|
ewah_size = get_be32(index); |
|
index += sizeof(uint32_t); |
|
|
|
fsmonitor_dirty = ewah_new(); |
|
ret = ewah_read_mmap(fsmonitor_dirty, index, ewah_size); |
|
if (ret != ewah_size) { |
|
ewah_free(fsmonitor_dirty); |
|
return error("failed to parse ewah bitmap reading fsmonitor index extension"); |
|
} |
|
istate->fsmonitor_dirty = fsmonitor_dirty; |
|
|
|
if (!istate->split_index) |
|
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); |
|
|
|
trace2_data_string("index", NULL, "extension/fsmn/read/token", |
|
istate->fsmonitor_last_update); |
|
trace_printf_key(&trace_fsmonitor, |
|
"read fsmonitor extension successful '%s'", |
|
istate->fsmonitor_last_update); |
|
return 0; |
|
} |
|
|
|
void fill_fsmonitor_bitmap(struct index_state *istate) |
|
{ |
|
unsigned int i, skipped = 0; |
|
istate->fsmonitor_dirty = ewah_new(); |
|
for (i = 0; i < istate->cache_nr; i++) { |
|
if (istate->cache[i]->ce_flags & CE_REMOVE) |
|
skipped++; |
|
else if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID)) |
|
ewah_set(istate->fsmonitor_dirty, i - skipped); |
|
} |
|
} |
|
|
|
void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) |
|
{ |
|
uint32_t hdr_version; |
|
uint32_t ewah_start; |
|
uint32_t ewah_size = 0; |
|
int fixup = 0; |
|
|
|
if (!istate->split_index) |
|
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); |
|
|
|
put_be32(&hdr_version, INDEX_EXTENSION_VERSION2); |
|
strbuf_add(sb, &hdr_version, sizeof(uint32_t)); |
|
|
|
strbuf_addstr(sb, istate->fsmonitor_last_update); |
|
strbuf_addch(sb, 0); /* Want to keep a NUL */ |
|
|
|
fixup = sb->len; |
|
strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */ |
|
|
|
ewah_start = sb->len; |
|
ewah_serialize_strbuf(istate->fsmonitor_dirty, sb); |
|
ewah_free(istate->fsmonitor_dirty); |
|
istate->fsmonitor_dirty = NULL; |
|
|
|
/* fix up size field */ |
|
put_be32(&ewah_size, sb->len - ewah_start); |
|
memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t)); |
|
|
|
trace2_data_string("index", NULL, "extension/fsmn/write/token", |
|
istate->fsmonitor_last_update); |
|
trace_printf_key(&trace_fsmonitor, |
|
"write fsmonitor extension successful '%s'", |
|
istate->fsmonitor_last_update); |
|
} |
|
|
|
/* |
|
* Call the query-fsmonitor hook passing the last update token of the saved results. |
|
*/ |
|
static int query_fsmonitor_hook(struct repository *r, |
|
int version, |
|
const char *last_update, |
|
struct strbuf *query_result) |
|
{ |
|
struct child_process cp = CHILD_PROCESS_INIT; |
|
int result; |
|
|
|
if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK) |
|
return -1; |
|
|
|
strvec_push(&cp.args, fsm_settings__get_hook_path(r)); |
|
strvec_pushf(&cp.args, "%d", version); |
|
strvec_pushf(&cp.args, "%s", last_update); |
|
cp.use_shell = 1; |
|
cp.dir = get_git_work_tree(); |
|
|
|
trace2_region_enter("fsm_hook", "query", NULL); |
|
|
|
result = capture_command(&cp, query_result, 1024); |
|
|
|
if (result) |
|
trace2_data_intmax("fsm_hook", NULL, "query/failed", result); |
|
else |
|
trace2_data_intmax("fsm_hook", NULL, "query/response-length", |
|
query_result->len); |
|
|
|
trace2_region_leave("fsm_hook", "query", NULL); |
|
|
|
return result; |
|
} |
|
|
|
static void fsmonitor_refresh_callback(struct index_state *istate, char *name) |
|
{ |
|
int i, len = strlen(name); |
|
int pos = index_name_pos(istate, name, len); |
|
|
|
trace_printf_key(&trace_fsmonitor, |
|
"fsmonitor_refresh_callback '%s' (pos %d)", |
|
name, pos); |
|
|
|
if (name[len - 1] == '/') { |
|
/* |
|
* The daemon can decorate directory events, such as |
|
* moves or renames, with a trailing slash if the OS |
|
* FS Event contains sufficient information, such as |
|
* MacOS. |
|
* |
|
* Use this to invalidate the entire cone under that |
|
* directory. |
|
* |
|
* We do not expect an exact match because the index |
|
* does not normally contain directory entries, so we |
|
* start at the insertion point and scan. |
|
*/ |
|
if (pos < 0) |
|
pos = -pos - 1; |
|
|
|
/* Mark all entries for the folder invalid */ |
|
for (i = pos; i < istate->cache_nr; i++) { |
|
if (!starts_with(istate->cache[i]->name, name)) |
|
break; |
|
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID; |
|
} |
|
|
|
/* |
|
* We need to remove the traling "/" from the path |
|
* for the untracked cache. |
|
*/ |
|
name[len - 1] = '\0'; |
|
} else if (pos >= 0) { |
|
/* |
|
* We have an exact match for this path and can just |
|
* invalidate it. |
|
*/ |
|
istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID; |
|
} else { |
|
/* |
|
* The path is not a tracked file -or- it is a |
|
* directory event on a platform that cannot |
|
* distinguish between file and directory events in |
|
* the event handler, such as Windows. |
|
* |
|
* Scan as if it is a directory and invalidate the |
|
* cone under it. (But remember to ignore items |
|
* between "name" and "name/", such as "name-" and |
|
* "name.". |
|
*/ |
|
pos = -pos - 1; |
|
|
|
for (i = pos; i < istate->cache_nr; i++) { |
|
if (!starts_with(istate->cache[i]->name, name)) |
|
break; |
|
if ((unsigned char)istate->cache[i]->name[len] > '/') |
|
break; |
|
if (istate->cache[i]->name[len] == '/') |
|
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID; |
|
} |
|
} |
|
|
|
/* |
|
* Mark the untracked cache dirty even if it wasn't found in the index |
|
* as it could be a new untracked file. |
|
*/ |
|
untracked_cache_invalidate_path(istate, name, 0); |
|
} |
|
|
|
/* |
|
* The number of pathnames that we need to receive from FSMonitor |
|
* before we force the index to be updated. |
|
* |
|
* Note that any pathname within the set of received paths MAY cause |
|
* cache-entry or istate flag bits to be updated and thus cause the |
|
* index to be updated on disk. |
|
* |
|
* However, the response may contain many paths (such as ignored |
|
* paths) that will not update any flag bits. And thus not force the |
|
* index to be updated. (This is fine and normal.) It also means |
|
* that the token will not be updated in the FSMonitor index |
|
* extension. So the next Git command will find the same token in the |
|
* index, make the same token-relative request, and receive the same |
|
* response (plus any newly changed paths). If this response is large |
|
* (and continues to grow), performance could be impacted. |
|
* |
|
* For example, if the user runs a build and it writes 100K object |
|
* files but doesn't modify any source files, the index would not need |
|
* to be updated. The FSMonitor response (after the build and |
|
* relative to a pre-build token) might be 5MB. Each subsequent Git |
|
* command will receive that same 100K/5MB response until something |
|
* causes the index to be updated. And `refresh_fsmonitor()` will |
|
* have to iterate over those 100K paths each time. |
|
* |
|
* Performance could be improved if we optionally force update the |
|
* index after a very large response and get an updated token into |
|
* the FSMonitor index extension. This should allow subsequent |
|
* commands to get smaller and more current responses. |
|
* |
|
* The value chosen here does not need to be precise. The index |
|
* will be updated automatically the first time the user touches |
|
* a tracked file and causes a command like `git status` to |
|
* update an mtime to be updated and/or set a flag bit. |
|
*/ |
|
static int fsmonitor_force_update_threshold = 100; |
|
|
|
void refresh_fsmonitor(struct index_state *istate) |
|
{ |
|
static int warn_once = 0; |
|
struct strbuf query_result = STRBUF_INIT; |
|
int query_success = 0, hook_version = -1; |
|
size_t bol = 0; /* beginning of line */ |
|
uint64_t last_update; |
|
struct strbuf last_update_token = STRBUF_INIT; |
|
char *buf; |
|
unsigned int i; |
|
int is_trivial = 0; |
|
struct repository *r = istate->repo; |
|
enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r); |
|
enum fsmonitor_reason reason = fsm_settings__get_reason(r); |
|
|
|
if (!warn_once && reason > FSMONITOR_REASON_OK) { |
|
char *msg = fsm_settings__get_incompatible_msg(r, reason); |
|
warn_once = 1; |
|
warning("%s", msg); |
|
free(msg); |
|
} |
|
|
|
if (fsm_mode <= FSMONITOR_MODE_DISABLED || |
|
istate->fsmonitor_has_run_once) |
|
return; |
|
|
|
istate->fsmonitor_has_run_once = 1; |
|
|
|
trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); |
|
|
|
if (fsm_mode == FSMONITOR_MODE_IPC) { |
|
query_success = !fsmonitor_ipc__send_query( |
|
istate->fsmonitor_last_update ? |
|
istate->fsmonitor_last_update : "builtin:fake", |
|
&query_result); |
|
if (query_success) { |
|
/* |
|
* The response contains a series of nul terminated |
|
* strings. The first is the new token. |
|
* |
|
* Use `char *buf` as an interlude to trick the CI |
|
* static analysis to let us use `strbuf_addstr()` |
|
* here (and only copy the token) rather than |
|
* `strbuf_addbuf()`. |
|
*/ |
|
buf = query_result.buf; |
|
strbuf_addstr(&last_update_token, buf); |
|
bol = last_update_token.len + 1; |
|
is_trivial = query_result.buf[bol] == '/'; |
|
if (is_trivial) |
|
trace2_data_intmax("fsm_client", NULL, |
|
"query/trivial-response", 1); |
|
} else { |
|
/* |
|
* The builtin daemon is not available on this |
|
* platform -OR- we failed to get a response. |
|
* |
|
* Generate a fake token (rather than a V1 |
|
* timestamp) for the index extension. (If |
|
* they switch back to the hook API, we don't |
|
* want ambiguous state.) |
|
*/ |
|
strbuf_addstr(&last_update_token, "builtin:fake"); |
|
} |
|
|
|
goto apply_results; |
|
} |
|
|
|
assert(fsm_mode == FSMONITOR_MODE_HOOK); |
|
|
|
hook_version = fsmonitor_hook_version(); |
|
|
|
/* |
|
* This could be racy so save the date/time now and query_fsmonitor_hook |
|
* should be inclusive to ensure we don't miss potential changes. |
|
*/ |
|
last_update = getnanotime(); |
|
if (hook_version == HOOK_INTERFACE_VERSION1) |
|
strbuf_addf(&last_update_token, "%"PRIu64"", last_update); |
|
|
|
/* |
|
* If we have a last update token, call query_fsmonitor_hook for the set of |
|
* changes since that token, else assume everything is possibly dirty |
|
* and check it all. |
|
*/ |
|
if (istate->fsmonitor_last_update) { |
|
if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) { |
|
query_success = !query_fsmonitor_hook( |
|
r, HOOK_INTERFACE_VERSION2, |
|
istate->fsmonitor_last_update, &query_result); |
|
|
|
if (query_success) { |
|
if (hook_version < 0) |
|
hook_version = HOOK_INTERFACE_VERSION2; |
|
|
|
/* |
|
* First entry will be the last update token |
|
* Need to use a char * variable because static |
|
* analysis was suggesting to use strbuf_addbuf |
|
* but we don't want to copy the entire strbuf |
|
* only the chars up to the first NUL |
|
*/ |
|
buf = query_result.buf; |
|
strbuf_addstr(&last_update_token, buf); |
|
if (!last_update_token.len) { |
|
warning("Empty last update token."); |
|
query_success = 0; |
|
} else { |
|
bol = last_update_token.len + 1; |
|
is_trivial = query_result.buf[bol] == '/'; |
|
} |
|
} else if (hook_version < 0) { |
|
hook_version = HOOK_INTERFACE_VERSION1; |
|
if (!last_update_token.len) |
|
strbuf_addf(&last_update_token, "%"PRIu64"", last_update); |
|
} |
|
} |
|
|
|
if (hook_version == HOOK_INTERFACE_VERSION1) { |
|
query_success = !query_fsmonitor_hook( |
|
r, HOOK_INTERFACE_VERSION1, |
|
istate->fsmonitor_last_update, &query_result); |
|
if (query_success) |
|
is_trivial = query_result.buf[0] == '/'; |
|
} |
|
|
|
if (is_trivial) |
|
trace2_data_intmax("fsm_hook", NULL, |
|
"query/trivial-response", 1); |
|
|
|
trace_performance_since(last_update, "fsmonitor process '%s'", |
|
fsm_settings__get_hook_path(r)); |
|
trace_printf_key(&trace_fsmonitor, |
|
"fsmonitor process '%s' returned %s", |
|
fsm_settings__get_hook_path(r), |
|
query_success ? "success" : "failure"); |
|
} |
|
|
|
apply_results: |
|
/* |
|
* The response from FSMonitor (excluding the header token) is |
|
* either: |
|
* |
|
* [a] a (possibly empty) list of NUL delimited relative |
|
* pathnames of changed paths. This list can contain |
|
* files and directories. Directories have a trailing |
|
* slash. |
|
* |
|
* [b] a single '/' to indicate the provider had no |
|
* information and that we should consider everything |
|
* invalid. We call this a trivial response. |
|
*/ |
|
trace2_region_enter("fsmonitor", "apply_results", istate->repo); |
|
|
|
if (query_success && !is_trivial) { |
|
/* |
|
* Mark all pathnames returned by the monitor as dirty. |
|
* |
|
* This updates both the cache-entries and the untracked-cache. |
|
*/ |
|
int count = 0; |
|
|
|
buf = query_result.buf; |
|
for (i = bol; i < query_result.len; i++) { |
|
if (buf[i] != '\0') |
|
continue; |
|
fsmonitor_refresh_callback(istate, buf + bol); |
|
bol = i + 1; |
|
count++; |
|
} |
|
if (bol < query_result.len) { |
|
fsmonitor_refresh_callback(istate, buf + bol); |
|
count++; |
|
} |
|
|
|
/* Now mark the untracked cache for fsmonitor usage */ |
|
if (istate->untracked) |
|
istate->untracked->use_fsmonitor = 1; |
|
|
|
if (count > fsmonitor_force_update_threshold) |
|
istate->cache_changed |= FSMONITOR_CHANGED; |
|
|
|
trace2_data_intmax("fsmonitor", istate->repo, "apply_count", |
|
count); |
|
|
|
} else { |
|
/* |
|
* We failed to get a response or received a trivial response, |
|
* so invalidate everything. |
|
* |
|
* We only want to run the post index changed hook if |
|
* we've actually changed entries, so keep track if we |
|
* actually changed entries or not. |
|
*/ |
|
int is_cache_changed = 0; |
|
|
|
for (i = 0; i < istate->cache_nr; i++) { |
|
if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) { |
|
is_cache_changed = 1; |
|
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID; |
|
} |
|
} |
|
|
|
/* |
|
* If we're going to check every file, ensure we save |
|
* the results. |
|
*/ |
|
if (is_cache_changed) |
|
istate->cache_changed |= FSMONITOR_CHANGED; |
|
|
|
if (istate->untracked) |
|
istate->untracked->use_fsmonitor = 0; |
|
} |
|
trace2_region_leave("fsmonitor", "apply_results", istate->repo); |
|
|
|
strbuf_release(&query_result); |
|
|
|
/* Now that we've updated istate, save the last_update_token */ |
|
FREE_AND_NULL(istate->fsmonitor_last_update); |
|
istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL); |
|
} |
|
|
|
/* |
|
* The caller wants to turn on FSMonitor. And when the caller writes |
|
* the index to disk, a FSMonitor extension should be included. This |
|
* requires that `istate->fsmonitor_last_update` not be NULL. But we |
|
* have not actually talked to a FSMonitor process yet, so we don't |
|
* have an initial value for this field. |
|
* |
|
* For a protocol V1 FSMonitor process, this field is a formatted |
|
* "nanoseconds since epoch" field. However, for a protocol V2 |
|
* FSMonitor process, this field is an opaque token. |
|
* |
|
* Historically, `add_fsmonitor()` has initialized this field to the |
|
* current time for protocol V1 processes. There are lots of race |
|
* conditions here, but that code has shipped... |
|
* |
|
* The only true solution is to use a V2 FSMonitor and get a current |
|
* or default token value (that it understands), but we cannot do that |
|
* until we have actually talked to an instance of the FSMonitor process |
|
* (but the protocol requires that we send a token first...). |
|
* |
|
* For simplicity, just initialize like we have a V1 process and require |
|
* that V2 processes adapt. |
|
*/ |
|
static void initialize_fsmonitor_last_update(struct index_state *istate) |
|
{ |
|
struct strbuf last_update = STRBUF_INIT; |
|
|
|
strbuf_addf(&last_update, "%"PRIu64"", getnanotime()); |
|
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); |
|
} |
|
|
|
void add_fsmonitor(struct index_state *istate) |
|
{ |
|
unsigned int i; |
|
|
|
if (!istate->fsmonitor_last_update) { |
|
trace_printf_key(&trace_fsmonitor, "add fsmonitor"); |
|
istate->cache_changed |= FSMONITOR_CHANGED; |
|
initialize_fsmonitor_last_update(istate); |
|
|
|
/* reset the fsmonitor state */ |
|
for (i = 0; i < istate->cache_nr; i++) |
|
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID; |
|
|
|
/* reset the untracked cache */ |
|
if (istate->untracked) { |
|
add_untracked_cache(istate); |
|
istate->untracked->use_fsmonitor = 1; |
|
} |
|
|
|
/* Update the fsmonitor state */ |
|
refresh_fsmonitor(istate); |
|
} |
|
} |
|
|
|
void remove_fsmonitor(struct index_state *istate) |
|
{ |
|
if (istate->fsmonitor_last_update) { |
|
trace_printf_key(&trace_fsmonitor, "remove fsmonitor"); |
|
istate->cache_changed |= FSMONITOR_CHANGED; |
|
FREE_AND_NULL(istate->fsmonitor_last_update); |
|
} |
|
} |
|
|
|
void tweak_fsmonitor(struct index_state *istate) |
|
{ |
|
unsigned int i; |
|
int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo) |
|
> FSMONITOR_MODE_DISABLED); |
|
|
|
if (istate->fsmonitor_dirty) { |
|
if (fsmonitor_enabled) { |
|
/* Mark all entries valid */ |
|
for (i = 0; i < istate->cache_nr; i++) { |
|
if (S_ISGITLINK(istate->cache[i]->ce_mode)) |
|
continue; |
|
istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID; |
|
} |
|
|
|
/* Mark all previously saved entries as dirty */ |
|
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); |
|
ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate); |
|
|
|
refresh_fsmonitor(istate); |
|
} |
|
|
|
ewah_free(istate->fsmonitor_dirty); |
|
istate->fsmonitor_dirty = NULL; |
|
} |
|
|
|
if (fsmonitor_enabled) |
|
add_fsmonitor(istate); |
|
else |
|
remove_fsmonitor(istate); |
|
}
|
|
|