Merge branch 'sj/ref-consistency-checks-more'

"git fsck" becomes more careful when checking the refs.

* sj/ref-consistency-checks-more:
  builtin/fsck: add `git refs verify` child process
  packed-backend: check whether the "packed-refs" is sorted
  packed-backend: add "packed-refs" entry consistency check
  packed-backend: check whether the refname contains NUL characters
  packed-backend: add "packed-refs" header consistency check
  packed-backend: check if header starts with "# pack-refs with: "
  packed-backend: check whether the "packed-refs" is regular file
  builtin/refs: get worktrees without reading head information
  t0602: use subshell to ensure working directory unchanged
maint
Junio C Hamano 2025-03-26 16:26:09 +09:00
commit de35b7b3ff
9 changed files with 1139 additions and 460 deletions

View File

@ -16,6 +16,13 @@
`badObjectSha1`:: `badObjectSha1`::
(ERROR) An object has a bad sha1. (ERROR) An object has a bad sha1.


`badPackedRefEntry`::
(ERROR) The "packed-refs" file contains an invalid entry.

`badPackedRefHeader`::
(ERROR) The "packed-refs" file contains an invalid
header.

`badParentSha1`:: `badParentSha1`::
(ERROR) A commit object has a bad parent sha1. (ERROR) A commit object has a bad parent sha1.


@ -176,6 +183,13 @@
`nullSha1`:: `nullSha1`::
(WARN) Tree contains entries pointing to a null sha1. (WARN) Tree contains entries pointing to a null sha1.


`packedRefEntryNotTerminated`::
(ERROR) The "packed-refs" file contains an entry that is
not terminated by a newline.

`packedRefUnsorted`::
(ERROR) The "packed-refs" file is not sorted.

`refMissingNewline`:: `refMissingNewline`::
(INFO) A loose ref that does not end with newline(LF). As (INFO) A loose ref that does not end with newline(LF). As
valid implementations of Git never created such a loose ref valid implementations of Git never created such a loose ref

View File

@ -12,7 +12,7 @@ SYNOPSIS
'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs] 'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]
[--[no-]full] [--strict] [--verbose] [--lost-found] [--[no-]full] [--strict] [--verbose] [--lost-found]
[--[no-]dangling] [--[no-]progress] [--connectivity-only] [--[no-]dangling] [--[no-]progress] [--connectivity-only]
[--[no-]name-objects] [<object>...] [--[no-]name-objects] [--[no-]references] [<object>...]


DESCRIPTION DESCRIPTION
----------- -----------
@ -104,6 +104,11 @@ care about this output and want to speed it up further.
progress status even if the standard error stream is not progress status even if the standard error stream is not
directed to a terminal. directed to a terminal.


--[no-]references::
Control whether to check the references database consistency
via 'git refs verify'. See linkgit:git-refs[1] for details.
The default is to check the references database.

CONFIGURATION CONFIGURATION
------------- -------------



View File

@ -50,6 +50,7 @@ static int verbose;
static int show_progress = -1; static int show_progress = -1;
static int show_dangling = 1; static int show_dangling = 1;
static int name_objects; static int name_objects;
static int check_references = 1;
#define ERROR_OBJECT 01 #define ERROR_OBJECT 01
#define ERROR_REACHABLE 02 #define ERROR_REACHABLE 02
#define ERROR_PACK 04 #define ERROR_PACK 04
@ -905,11 +906,37 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res; return res;
} }


static void fsck_refs(struct repository *r)
{
struct child_process refs_verify = CHILD_PROCESS_INIT;
struct progress *progress = NULL;

if (show_progress)
progress = start_progress(r, _("Checking ref database"), 1);

if (verbose)
fprintf_ln(stderr, _("Checking ref database"));

child_process_init(&refs_verify);
refs_verify.git_cmd = 1;
strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
if (verbose)
strvec_push(&refs_verify.args, "--verbose");
if (check_strict)
strvec_push(&refs_verify.args, "--strict");

if (run_command(&refs_verify))
errors_found |= ERROR_REFS;

display_progress(progress, 1);
stop_progress(&progress);
}

static char const * const fsck_usage[] = { static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n" N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n" " [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
" [--[no-]dangling] [--[no-]progress] [--connectivity-only]\n" " [--[no-]dangling] [--[no-]progress] [--connectivity-only]\n"
" [--[no-]name-objects] [<object>...]"), " [--[no-]name-objects] [--[no-]references] [<object>...]"),
NULL NULL
}; };


@ -928,6 +955,7 @@ static struct option fsck_opts[] = {
N_("write dangling objects in .git/lost-found")), N_("write dangling objects in .git/lost-found")),
OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")), OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")),
OPT_BOOL(0, "references", &check_references, N_("check reference database consistency")),
OPT_END(), OPT_END(),
}; };


@ -970,6 +998,9 @@ int cmd_fsck(int argc,
git_config(git_fsck_config, &fsck_obj_options); git_config(git_fsck_config, &fsck_obj_options);
prepare_repo_settings(the_repository); prepare_repo_settings(the_repository);


if (check_references)
fsck_refs(the_repository);

if (connectivity_only) { if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0); for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
for_each_packed_object(the_repository, for_each_packed_object(the_repository,

View File

@ -91,7 +91,7 @@ static int cmd_refs_verify(int argc, const char **argv, const char *prefix,
git_config(git_fsck_config, &fsck_refs_options); git_config(git_fsck_config, &fsck_refs_options);
prepare_repo_settings(the_repository); prepare_repo_settings(the_repository);


worktrees = get_worktrees(); worktrees = get_worktrees_without_reading_head();
for (size_t i = 0; worktrees[i]; i++) for (size_t i = 0; worktrees[i]; i++)
ret |= refs_fsck(get_worktree_ref_store(worktrees[i]), ret |= refs_fsck(get_worktree_ref_store(worktrees[i]),
&fsck_refs_options, worktrees[i]); &fsck_refs_options, worktrees[i]);

4
fsck.h
View File

@ -30,6 +30,8 @@ enum fsck_msg_type {
FUNC(BAD_EMAIL, ERROR) \ FUNC(BAD_EMAIL, ERROR) \
FUNC(BAD_NAME, ERROR) \ FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PACKED_REF_ENTRY, ERROR) \
FUNC(BAD_PACKED_REF_HEADER, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_CONTENT, ERROR) \ FUNC(BAD_REF_CONTENT, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \
@ -53,6 +55,8 @@ enum fsck_msg_type {
FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE, ERROR) \
FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \
FUNC(MULTIPLE_AUTHORS, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \
FUNC(PACKED_REF_ENTRY_NOT_TERMINATED, ERROR) \
FUNC(PACKED_REF_UNSORTED, ERROR) \
FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(TREE_NOT_SORTED, ERROR) \
FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \
FUNC(ZERO_PADDED_DATE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \

View File

@ -4,6 +4,7 @@
#include "../git-compat-util.h" #include "../git-compat-util.h"
#include "../config.h" #include "../config.h"
#include "../dir.h" #include "../dir.h"
#include "../fsck.h"
#include "../gettext.h" #include "../gettext.h"
#include "../hash.h" #include "../hash.h"
#include "../hex.h" #include "../hex.h"
@ -299,14 +300,9 @@ struct snapshot_record {
size_t len; size_t len;
}; };


static int cmp_packed_ref_records(const void *v1, const void *v2,
void *cb_data)
{
const struct snapshot *snapshot = cb_data;
const struct snapshot_record *e1 = v1, *e2 = v2;
const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1;
const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1;


static int cmp_packed_refname(const char *r1, const char *r2)
{
while (1) { while (1) {
if (*r1 == '\n') if (*r1 == '\n')
return *r2 == '\n' ? 0 : -1; return *r2 == '\n' ? 0 : -1;
@ -321,6 +317,17 @@ static int cmp_packed_ref_records(const void *v1, const void *v2,
} }
} }


static int cmp_packed_ref_records(const void *v1, const void *v2,
void *cb_data)
{
const struct snapshot *snapshot = cb_data;
const struct snapshot_record *e1 = v1, *e2 = v2;
const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1;
const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1;

return cmp_packed_refname(r1, r2);
}

/* /*
* Compare a snapshot record at `rec` to the specified NUL-terminated * Compare a snapshot record at `rec` to the specified NUL-terminated
* refname. * refname.
@ -493,6 +500,21 @@ static void verify_buffer_safe(struct snapshot *snapshot)
last_line, eof - last_line); last_line, eof - last_line);
} }


/*
* When parsing the "packed-refs" file, we will parse it line by line.
* Because we know the start pointer of the refname and the next
* newline pointer, we could calculate the length of the refname by
* subtracting the two pointers. However, there is a corner case where
* the refname contains corrupted embedded NUL characters. And
* `check_refname_format()` will not catch this when the truncated
* refname is still a valid refname. To prevent this, we need to check
* whether the refname contains the NUL characters.
*/
static int refname_contains_nul(struct strbuf *refname)
{
return !!memchr(refname->buf, '\0', refname->len);
}

#define SMALL_FILE_SIZE (32*1024) #define SMALL_FILE_SIZE (32*1024)


/* /*
@ -693,7 +715,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs)


tmp = xmemdupz(snapshot->buf, eol - snapshot->buf); tmp = xmemdupz(snapshot->buf, eol - snapshot->buf);


if (!skip_prefix(tmp, "# pack-refs with:", (const char **)&p)) if (!skip_prefix(tmp, "# pack-refs with: ", (const char **)&p))
die_invalid_line(refs->path, die_invalid_line(refs->path,
snapshot->buf, snapshot->buf,
snapshot->eof - snapshot->buf); snapshot->eof - snapshot->buf);
@ -894,6 +916,9 @@ static int next_record(struct packed_ref_iterator *iter)
strbuf_add(&iter->refname_buf, p, eol - p); strbuf_add(&iter->refname_buf, p, eol - p);
iter->base.refname = iter->refname_buf.buf; iter->base.refname = iter->refname_buf.buf;


if (refname_contains_nul(&iter->refname_buf))
die("packed refname contains embedded NULL: %s", iter->base.refname);

if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) {
if (!refname_is_safe(iter->base.refname)) if (!refname_is_safe(iter->base.refname))
die("packed refname is dangerous: %s", die("packed refname is dangerous: %s",
@ -1748,15 +1773,329 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin(); return empty_ref_iterator_begin();
} }


static int packed_fsck(struct ref_store *ref_store UNUSED, static int packed_fsck_ref_next_line(struct fsck_options *o,
struct fsck_options *o UNUSED, unsigned long line_number, const char *start,
const char *eof, const char **eol)
{
int ret = 0;

*eol = memchr(start, '\n', eof - start);
if (!*eol) {
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };

strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;
ret = fsck_report_ref(o, &report,
FSCK_MSG_PACKED_REF_ENTRY_NOT_TERMINATED,
"'%.*s' is not terminated with a newline",
(int)(eof - start), start);

/*
* There is no newline but we still want to parse it to the end of
* the buffer.
*/
*eol = eof;
strbuf_release(&packed_entry);
}

return ret;
}

static int packed_fsck_ref_header(struct fsck_options *o,
const char *start, const char *eol,
unsigned int *sorted)
{
struct string_list traits = STRING_LIST_INIT_NODUP;
char *tmp_line;
int ret = 0;
char *p;

tmp_line = xmemdupz(start, eol - start);
if (!skip_prefix(tmp_line, "# pack-refs with: ", (const char **)&p)) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs.header";

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_HEADER,
"'%.*s' does not start with '# pack-refs with: '",
(int)(eol - start), start);
goto cleanup;
}

string_list_split_in_place(&traits, p, " ", -1);
*sorted = unsorted_string_list_has_string(&traits, "sorted");

cleanup:
free(tmp_line);
string_list_clear(&traits, 0);
return ret;
}

static int packed_fsck_ref_peeled_line(struct fsck_options *o,
struct ref_store *ref_store,
unsigned long line_number,
const char *start, const char *eol)
{
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct object_id peeled;
const char *p;
int ret = 0;

/*
* Skip the '^' and parse the peeled oid.
*/
start++;
if (parse_oid_hex_algop(start, &peeled, &p, ref_store->repo->hash_algo)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"'%.*s' has invalid peeled oid",
(int)(eol - start), start);
goto cleanup;
}

if (p != eol) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"has trailing garbage after peeled oid '%.*s'",
(int)(eol - p), p);
goto cleanup;
}

cleanup:
strbuf_release(&packed_entry);
return ret;
}

static int packed_fsck_ref_main_line(struct fsck_options *o,
struct ref_store *ref_store,
unsigned long line_number,
struct strbuf *refname,
const char *start, const char *eol)
{
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct object_id oid;
const char *p;
int ret = 0;

if (parse_oid_hex_algop(start, &oid, &p, ref_store->repo->hash_algo)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"'%.*s' has invalid oid",
(int)(eol - start), start);
goto cleanup;
}

if (p == eol || !isspace(*p)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"has no space after oid '%s' but with '%.*s'",
oid_to_hex(&oid), (int)(eol - p), p);
goto cleanup;
}

p++;
strbuf_reset(refname);
strbuf_add(refname, p, eol - p);
if (refname_contains_nul(refname)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"refname '%s' contains NULL binaries",
refname->buf);
}

if (check_refname_format(refname->buf, 0)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_NAME,
"has bad refname '%s'", refname->buf);
}

cleanup:
strbuf_release(&packed_entry);
return ret;
}

static int packed_fsck_ref_sorted(struct fsck_options *o,
struct ref_store *ref_store,
const char *start, const char *eof)
{
size_t hexsz = ref_store->repo->hash_algo->hexsz;
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct strbuf refname1 = STRBUF_INIT;
struct strbuf refname2 = STRBUF_INIT;
unsigned long line_number = 1;
const char *former = NULL;
const char *current;
const char *eol;
int ret = 0;

if (*start == '#') {
eol = memchr(start, '\n', eof - start);
start = eol + 1;
line_number++;
}

for (; start < eof; line_number++, start = eol + 1) {
eol = memchr(start, '\n', eof - start);

if (*start == '^')
continue;

if (!former) {
former = start + hexsz + 1;
continue;
}

current = start + hexsz + 1;
if (cmp_packed_refname(former, current) >= 0) {
const char *err_fmt =
"refname '%s' is less than previous refname '%s'";

eol = memchr(former, '\n', eof - former);
strbuf_add(&refname1, former, eol - former);
eol = memchr(current, '\n', eof - current);
strbuf_add(&refname2, current, eol - current);

strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;
ret = fsck_report_ref(o, &report,
FSCK_MSG_PACKED_REF_UNSORTED,
err_fmt, refname2.buf, refname1.buf);
goto cleanup;
}
former = current;
}

cleanup:
strbuf_release(&packed_entry);
strbuf_release(&refname1);
strbuf_release(&refname2);
return ret;
}

static int packed_fsck_ref_content(struct fsck_options *o,
struct ref_store *ref_store,
unsigned int *sorted,
const char *start, const char *eof)
{
struct strbuf refname = STRBUF_INIT;
unsigned long line_number = 1;
const char *eol;
int ret = 0;

ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
if (*start == '#') {
ret |= packed_fsck_ref_header(o, start, eol, sorted);

start = eol + 1;
line_number++;
}

while (start < eof) {
ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
ret |= packed_fsck_ref_main_line(o, ref_store, line_number, &refname, start, eol);
start = eol + 1;
line_number++;
if (start < eof && *start == '^') {
ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
ret |= packed_fsck_ref_peeled_line(o, ref_store, line_number,
start, eol);
start = eol + 1;
line_number++;
}
}

strbuf_release(&refname);
return ret;
}

static int packed_fsck(struct ref_store *ref_store,
struct fsck_options *o,
struct worktree *wt) struct worktree *wt)
{ {
struct packed_ref_store *refs = packed_downcast(ref_store,
REF_STORE_READ, "fsck");
struct strbuf packed_ref_content = STRBUF_INIT;
unsigned int sorted = 0;
struct stat st;
int ret = 0;
int fd = -1;


if (!is_main_worktree(wt)) if (!is_main_worktree(wt))
return 0; goto cleanup;


return 0; if (o->verbose)
fprintf_ln(stderr, "Checking packed-refs file %s", refs->path);

fd = open_nofollow(refs->path, O_RDONLY);
if (fd < 0) {
/*
* If the packed-refs file doesn't exist, there's nothing
* to check.
*/
if (errno == ENOENT)
goto cleanup;

if (errno == ELOOP) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs";
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_FILETYPE,
"not a regular file but a symlink");
goto cleanup;
}

ret = error_errno(_("unable to open '%s'"), refs->path);
goto cleanup;
} else if (fstat(fd, &st) < 0) {
ret = error_errno(_("unable to stat '%s'"), refs->path);
goto cleanup;
} else if (!S_ISREG(st.st_mode)) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs";
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_FILETYPE,
"not a regular file");
goto cleanup;
}

if (strbuf_read(&packed_ref_content, fd, 0) < 0) {
ret = error_errno(_("unable to read '%s'"), refs->path);
goto cleanup;
}

ret = packed_fsck_ref_content(o, ref_store, &sorted, packed_ref_content.buf,
packed_ref_content.buf + packed_ref_content.len);
if (!ret && sorted)
ret = packed_fsck_ref_sorted(o, ref_store, packed_ref_content.buf,
packed_ref_content.buf + packed_ref_content.len);

cleanup:
if (fd >= 0)
close(fd);
strbuf_release(&packed_ref_content);
return ret;
} }


struct ref_storage_be refs_be_packed = { struct ref_storage_be refs_be_packed = {

File diff suppressed because it is too large Load Diff

View File

@ -199,6 +199,11 @@ struct worktree **get_worktrees(void)
return get_worktrees_internal(0); return get_worktrees_internal(0);
} }


struct worktree **get_worktrees_without_reading_head(void)
{
return get_worktrees_internal(1);
}

char *get_worktree_git_dir(const struct worktree *wt) char *get_worktree_git_dir(const struct worktree *wt)
{ {
if (!wt) if (!wt)

View File

@ -30,6 +30,14 @@ struct worktree {
*/ */
struct worktree **get_worktrees(void); struct worktree **get_worktrees(void);


/*
* Like `get_worktrees`, but does not read HEAD. Skip reading HEAD allows to
* get the worktree without worrying about failures pertaining to parsing
* the HEAD ref. This is useful in contexts where it is assumed that the
* refdb may not be in a consistent state.
*/
struct worktree **get_worktrees_without_reading_head(void);

/* /*
* Returns 1 if linked worktrees exist, 0 otherwise. * Returns 1 if linked worktrees exist, 0 otherwise.
*/ */