Merge branch 'sj/ref-consistency-checks-more'

"git fsck" becomes more careful when checking the refs.

* sj/ref-consistency-checks-more:
  builtin/fsck: add `git refs verify` child process
  packed-backend: check whether the "packed-refs" is sorted
  packed-backend: add "packed-refs" entry consistency check
  packed-backend: check whether the refname contains NUL characters
  packed-backend: add "packed-refs" header consistency check
  packed-backend: check if header starts with "# pack-refs with: "
  packed-backend: check whether the "packed-refs" is regular file
  builtin/refs: get worktrees without reading head information
  t0602: use subshell to ensure working directory unchanged
maint
Junio C Hamano 2025-03-26 16:26:09 +09:00
commit de35b7b3ff
9 changed files with 1139 additions and 460 deletions

View File

@ -16,6 +16,13 @@
`badObjectSha1`::
(ERROR) An object has a bad sha1.

`badPackedRefEntry`::
(ERROR) The "packed-refs" file contains an invalid entry.

`badPackedRefHeader`::
(ERROR) The "packed-refs" file contains an invalid
header.

`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.

@ -176,6 +183,13 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.

`packedRefEntryNotTerminated`::
(ERROR) The "packed-refs" file contains an entry that is
not terminated by a newline.

`packedRefUnsorted`::
(ERROR) The "packed-refs" file is not sorted.

`refMissingNewline`::
(INFO) A loose ref that does not end with newline(LF). As
valid implementations of Git never created such a loose ref

View File

@ -12,7 +12,7 @@ SYNOPSIS
'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]
[--[no-]full] [--strict] [--verbose] [--lost-found]
[--[no-]dangling] [--[no-]progress] [--connectivity-only]
[--[no-]name-objects] [<object>...]
[--[no-]name-objects] [--[no-]references] [<object>...]

DESCRIPTION
-----------
@ -104,6 +104,11 @@ care about this output and want to speed it up further.
progress status even if the standard error stream is not
directed to a terminal.

--[no-]references::
Control whether to check the references database consistency
via 'git refs verify'. See linkgit:git-refs[1] for details.
The default is to check the references database.

CONFIGURATION
-------------


View File

@ -50,6 +50,7 @@ static int verbose;
static int show_progress = -1;
static int show_dangling = 1;
static int name_objects;
static int check_references = 1;
#define ERROR_OBJECT 01
#define ERROR_REACHABLE 02
#define ERROR_PACK 04
@ -905,11 +906,37 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}

static void fsck_refs(struct repository *r)
{
struct child_process refs_verify = CHILD_PROCESS_INIT;
struct progress *progress = NULL;

if (show_progress)
progress = start_progress(r, _("Checking ref database"), 1);

if (verbose)
fprintf_ln(stderr, _("Checking ref database"));

child_process_init(&refs_verify);
refs_verify.git_cmd = 1;
strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
if (verbose)
strvec_push(&refs_verify.args, "--verbose");
if (check_strict)
strvec_push(&refs_verify.args, "--strict");

if (run_command(&refs_verify))
errors_found |= ERROR_REFS;

display_progress(progress, 1);
stop_progress(&progress);
}

static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
" [--[no-]dangling] [--[no-]progress] [--connectivity-only]\n"
" [--[no-]name-objects] [<object>...]"),
" [--[no-]name-objects] [--[no-]references] [<object>...]"),
NULL
};

@ -928,6 +955,7 @@ static struct option fsck_opts[] = {
N_("write dangling objects in .git/lost-found")),
OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")),
OPT_BOOL(0, "references", &check_references, N_("check reference database consistency")),
OPT_END(),
};

@ -970,6 +998,9 @@ int cmd_fsck(int argc,
git_config(git_fsck_config, &fsck_obj_options);
prepare_repo_settings(the_repository);

if (check_references)
fsck_refs(the_repository);

if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
for_each_packed_object(the_repository,

View File

@ -91,7 +91,7 @@ static int cmd_refs_verify(int argc, const char **argv, const char *prefix,
git_config(git_fsck_config, &fsck_refs_options);
prepare_repo_settings(the_repository);

worktrees = get_worktrees();
worktrees = get_worktrees_without_reading_head();
for (size_t i = 0; worktrees[i]; i++)
ret |= refs_fsck(get_worktree_ref_store(worktrees[i]),
&fsck_refs_options, worktrees[i]);

4
fsck.h
View File

@ -30,6 +30,8 @@ enum fsck_msg_type {
FUNC(BAD_EMAIL, ERROR) \
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PACKED_REF_ENTRY, ERROR) \
FUNC(BAD_PACKED_REF_HEADER, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_CONTENT, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
@ -53,6 +55,8 @@ enum fsck_msg_type {
FUNC(MISSING_TYPE, ERROR) \
FUNC(MISSING_TYPE_ENTRY, ERROR) \
FUNC(MULTIPLE_AUTHORS, ERROR) \
FUNC(PACKED_REF_ENTRY_NOT_TERMINATED, ERROR) \
FUNC(PACKED_REF_UNSORTED, ERROR) \
FUNC(TREE_NOT_SORTED, ERROR) \
FUNC(UNKNOWN_TYPE, ERROR) \
FUNC(ZERO_PADDED_DATE, ERROR) \

View File

@ -4,6 +4,7 @@
#include "../git-compat-util.h"
#include "../config.h"
#include "../dir.h"
#include "../fsck.h"
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
@ -299,14 +300,9 @@ struct snapshot_record {
size_t len;
};

static int cmp_packed_ref_records(const void *v1, const void *v2,
void *cb_data)
{
const struct snapshot *snapshot = cb_data;
const struct snapshot_record *e1 = v1, *e2 = v2;
const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1;
const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1;

static int cmp_packed_refname(const char *r1, const char *r2)
{
while (1) {
if (*r1 == '\n')
return *r2 == '\n' ? 0 : -1;
@ -321,6 +317,17 @@ static int cmp_packed_ref_records(const void *v1, const void *v2,
}
}

static int cmp_packed_ref_records(const void *v1, const void *v2,
void *cb_data)
{
const struct snapshot *snapshot = cb_data;
const struct snapshot_record *e1 = v1, *e2 = v2;
const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1;
const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1;

return cmp_packed_refname(r1, r2);
}

/*
* Compare a snapshot record at `rec` to the specified NUL-terminated
* refname.
@ -493,6 +500,21 @@ static void verify_buffer_safe(struct snapshot *snapshot)
last_line, eof - last_line);
}

/*
* When parsing the "packed-refs" file, we will parse it line by line.
* Because we know the start pointer of the refname and the next
* newline pointer, we could calculate the length of the refname by
* subtracting the two pointers. However, there is a corner case where
* the refname contains corrupted embedded NUL characters. And
* `check_refname_format()` will not catch this when the truncated
* refname is still a valid refname. To prevent this, we need to check
* whether the refname contains the NUL characters.
*/
static int refname_contains_nul(struct strbuf *refname)
{
return !!memchr(refname->buf, '\0', refname->len);
}

#define SMALL_FILE_SIZE (32*1024)

/*
@ -693,7 +715,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs)

tmp = xmemdupz(snapshot->buf, eol - snapshot->buf);

if (!skip_prefix(tmp, "# pack-refs with:", (const char **)&p))
if (!skip_prefix(tmp, "# pack-refs with: ", (const char **)&p))
die_invalid_line(refs->path,
snapshot->buf,
snapshot->eof - snapshot->buf);
@ -894,6 +916,9 @@ static int next_record(struct packed_ref_iterator *iter)
strbuf_add(&iter->refname_buf, p, eol - p);
iter->base.refname = iter->refname_buf.buf;

if (refname_contains_nul(&iter->refname_buf))
die("packed refname contains embedded NULL: %s", iter->base.refname);

if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) {
if (!refname_is_safe(iter->base.refname))
die("packed refname is dangerous: %s",
@ -1748,15 +1773,329 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}

static int packed_fsck(struct ref_store *ref_store UNUSED,
struct fsck_options *o UNUSED,
static int packed_fsck_ref_next_line(struct fsck_options *o,
unsigned long line_number, const char *start,
const char *eof, const char **eol)
{
int ret = 0;

*eol = memchr(start, '\n', eof - start);
if (!*eol) {
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };

strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;
ret = fsck_report_ref(o, &report,
FSCK_MSG_PACKED_REF_ENTRY_NOT_TERMINATED,
"'%.*s' is not terminated with a newline",
(int)(eof - start), start);

/*
* There is no newline but we still want to parse it to the end of
* the buffer.
*/
*eol = eof;
strbuf_release(&packed_entry);
}

return ret;
}

static int packed_fsck_ref_header(struct fsck_options *o,
const char *start, const char *eol,
unsigned int *sorted)
{
struct string_list traits = STRING_LIST_INIT_NODUP;
char *tmp_line;
int ret = 0;
char *p;

tmp_line = xmemdupz(start, eol - start);
if (!skip_prefix(tmp_line, "# pack-refs with: ", (const char **)&p)) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs.header";

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_HEADER,
"'%.*s' does not start with '# pack-refs with: '",
(int)(eol - start), start);
goto cleanup;
}

string_list_split_in_place(&traits, p, " ", -1);
*sorted = unsorted_string_list_has_string(&traits, "sorted");

cleanup:
free(tmp_line);
string_list_clear(&traits, 0);
return ret;
}

static int packed_fsck_ref_peeled_line(struct fsck_options *o,
struct ref_store *ref_store,
unsigned long line_number,
const char *start, const char *eol)
{
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct object_id peeled;
const char *p;
int ret = 0;

/*
* Skip the '^' and parse the peeled oid.
*/
start++;
if (parse_oid_hex_algop(start, &peeled, &p, ref_store->repo->hash_algo)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"'%.*s' has invalid peeled oid",
(int)(eol - start), start);
goto cleanup;
}

if (p != eol) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"has trailing garbage after peeled oid '%.*s'",
(int)(eol - p), p);
goto cleanup;
}

cleanup:
strbuf_release(&packed_entry);
return ret;
}

static int packed_fsck_ref_main_line(struct fsck_options *o,
struct ref_store *ref_store,
unsigned long line_number,
struct strbuf *refname,
const char *start, const char *eol)
{
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct object_id oid;
const char *p;
int ret = 0;

if (parse_oid_hex_algop(start, &oid, &p, ref_store->repo->hash_algo)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"'%.*s' has invalid oid",
(int)(eol - start), start);
goto cleanup;
}

if (p == eol || !isspace(*p)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"has no space after oid '%s' but with '%.*s'",
oid_to_hex(&oid), (int)(eol - p), p);
goto cleanup;
}

p++;
strbuf_reset(refname);
strbuf_add(refname, p, eol - p);
if (refname_contains_nul(refname)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_PACKED_REF_ENTRY,
"refname '%s' contains NULL binaries",
refname->buf);
}

if (check_refname_format(refname->buf, 0)) {
strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;

ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_NAME,
"has bad refname '%s'", refname->buf);
}

cleanup:
strbuf_release(&packed_entry);
return ret;
}

static int packed_fsck_ref_sorted(struct fsck_options *o,
struct ref_store *ref_store,
const char *start, const char *eof)
{
size_t hexsz = ref_store->repo->hash_algo->hexsz;
struct strbuf packed_entry = STRBUF_INIT;
struct fsck_ref_report report = { 0 };
struct strbuf refname1 = STRBUF_INIT;
struct strbuf refname2 = STRBUF_INIT;
unsigned long line_number = 1;
const char *former = NULL;
const char *current;
const char *eol;
int ret = 0;

if (*start == '#') {
eol = memchr(start, '\n', eof - start);
start = eol + 1;
line_number++;
}

for (; start < eof; line_number++, start = eol + 1) {
eol = memchr(start, '\n', eof - start);

if (*start == '^')
continue;

if (!former) {
former = start + hexsz + 1;
continue;
}

current = start + hexsz + 1;
if (cmp_packed_refname(former, current) >= 0) {
const char *err_fmt =
"refname '%s' is less than previous refname '%s'";

eol = memchr(former, '\n', eof - former);
strbuf_add(&refname1, former, eol - former);
eol = memchr(current, '\n', eof - current);
strbuf_add(&refname2, current, eol - current);

strbuf_addf(&packed_entry, "packed-refs line %lu", line_number);
report.path = packed_entry.buf;
ret = fsck_report_ref(o, &report,
FSCK_MSG_PACKED_REF_UNSORTED,
err_fmt, refname2.buf, refname1.buf);
goto cleanup;
}
former = current;
}

cleanup:
strbuf_release(&packed_entry);
strbuf_release(&refname1);
strbuf_release(&refname2);
return ret;
}

static int packed_fsck_ref_content(struct fsck_options *o,
struct ref_store *ref_store,
unsigned int *sorted,
const char *start, const char *eof)
{
struct strbuf refname = STRBUF_INIT;
unsigned long line_number = 1;
const char *eol;
int ret = 0;

ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
if (*start == '#') {
ret |= packed_fsck_ref_header(o, start, eol, sorted);

start = eol + 1;
line_number++;
}

while (start < eof) {
ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
ret |= packed_fsck_ref_main_line(o, ref_store, line_number, &refname, start, eol);
start = eol + 1;
line_number++;
if (start < eof && *start == '^') {
ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol);
ret |= packed_fsck_ref_peeled_line(o, ref_store, line_number,
start, eol);
start = eol + 1;
line_number++;
}
}

strbuf_release(&refname);
return ret;
}

static int packed_fsck(struct ref_store *ref_store,
struct fsck_options *o,
struct worktree *wt)
{
struct packed_ref_store *refs = packed_downcast(ref_store,
REF_STORE_READ, "fsck");
struct strbuf packed_ref_content = STRBUF_INIT;
unsigned int sorted = 0;
struct stat st;
int ret = 0;
int fd = -1;

if (!is_main_worktree(wt))
return 0;
goto cleanup;

return 0;
if (o->verbose)
fprintf_ln(stderr, "Checking packed-refs file %s", refs->path);

fd = open_nofollow(refs->path, O_RDONLY);
if (fd < 0) {
/*
* If the packed-refs file doesn't exist, there's nothing
* to check.
*/
if (errno == ENOENT)
goto cleanup;

if (errno == ELOOP) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs";
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_FILETYPE,
"not a regular file but a symlink");
goto cleanup;
}

ret = error_errno(_("unable to open '%s'"), refs->path);
goto cleanup;
} else if (fstat(fd, &st) < 0) {
ret = error_errno(_("unable to stat '%s'"), refs->path);
goto cleanup;
} else if (!S_ISREG(st.st_mode)) {
struct fsck_ref_report report = { 0 };
report.path = "packed-refs";
ret = fsck_report_ref(o, &report,
FSCK_MSG_BAD_REF_FILETYPE,
"not a regular file");
goto cleanup;
}

if (strbuf_read(&packed_ref_content, fd, 0) < 0) {
ret = error_errno(_("unable to read '%s'"), refs->path);
goto cleanup;
}

ret = packed_fsck_ref_content(o, ref_store, &sorted, packed_ref_content.buf,
packed_ref_content.buf + packed_ref_content.len);
if (!ret && sorted)
ret = packed_fsck_ref_sorted(o, ref_store, packed_ref_content.buf,
packed_ref_content.buf + packed_ref_content.len);

cleanup:
if (fd >= 0)
close(fd);
strbuf_release(&packed_ref_content);
return ret;
}

struct ref_storage_be refs_be_packed = {

File diff suppressed because it is too large Load Diff

View File

@ -199,6 +199,11 @@ struct worktree **get_worktrees(void)
return get_worktrees_internal(0);
}

struct worktree **get_worktrees_without_reading_head(void)
{
return get_worktrees_internal(1);
}

char *get_worktree_git_dir(const struct worktree *wt)
{
if (!wt)

View File

@ -30,6 +30,14 @@ struct worktree {
*/
struct worktree **get_worktrees(void);

/*
* Like `get_worktrees`, but does not read HEAD. Skip reading HEAD allows to
* get the worktree without worrying about failures pertaining to parsing
* the HEAD ref. This is useful in contexts where it is assumed that the
* refdb may not be in a consistent state.
*/
struct worktree **get_worktrees_without_reading_head(void);

/*
* Returns 1 if linked worktrees exist, 0 otherwise.
*/