Merge branch 'kn/reftable-consistency-checks'

The reftable backend learned to sanity check its on-disk data more
carefully.

* kn/reftable-consistency-checks:
  refs/reftable: add fsck check for checking the table name
  reftable: add code to facilitate consistency checks
  fsck: order 'fsck_msg_type' alphabetically
  Documentation/fsck-msgids: remove duplicate msg id
  reftable: check for trailing newline in 'tables.list'
  refs: move consistency check msg to generic layer
  refs: remove unused headers
maint
Junio C Hamano 2025-10-13 22:00:35 -07:00
commit f50f046794
16 changed files with 333 additions and 62 deletions

View File

@ -38,6 +38,9 @@
`badReferentName`::
(ERROR) The referent name of a symref is invalid.

`badReftableTableName`::
(WARN) A reftable table has an invalid name.

`badTagName`::
(INFO) A tag has an invalid format.

@ -104,9 +107,6 @@
`gitmodulesParse`::
(INFO) Could not parse `.gitmodules` blob.

`gitmodulesLarge`;
(ERROR) `.gitmodules` blob is too large to parse.

`gitmodulesPath`::
(ERROR) `.gitmodules` path is invalid.


View File

@ -2767,9 +2767,10 @@ XDIFF_OBJS += xdiff/xutils.o
xdiff-objs: $(XDIFF_OBJS)

REFTABLE_OBJS += reftable/basics.o
REFTABLE_OBJS += reftable/error.o
REFTABLE_OBJS += reftable/block.o
REFTABLE_OBJS += reftable/blocksource.o
REFTABLE_OBJS += reftable/error.o
REFTABLE_OBJS += reftable/fsck.o
REFTABLE_OBJS += reftable/iter.o
REFTABLE_OBJS += reftable/merged.o
REFTABLE_OBJS += reftable/pq.o

45
fsck.h
View File

@ -33,15 +33,27 @@ enum fsck_msg_type {
FUNC(BAD_PACKED_REF_ENTRY, ERROR) \
FUNC(BAD_PACKED_REF_HEADER, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REFERENT_NAME, ERROR) \
FUNC(BAD_REF_CONTENT, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_REFERENT_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
FUNC(BAD_TYPE, ERROR) \
FUNC(DUPLICATE_ENTRIES, ERROR) \
FUNC(GITATTRIBUTES_BLOB, ERROR) \
FUNC(GITATTRIBUTES_LARGE, ERROR) \
FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \
FUNC(GITATTRIBUTES_MISSING, ERROR) \
FUNC(GITMODULES_BLOB, ERROR) \
FUNC(GITMODULES_LARGE, ERROR) \
FUNC(GITMODULES_MISSING, ERROR) \
FUNC(GITMODULES_NAME, ERROR) \
FUNC(GITMODULES_PATH, ERROR) \
FUNC(GITMODULES_SYMLINK, ERROR) \
FUNC(GITMODULES_UPDATE, ERROR) \
FUNC(GITMODULES_URL, ERROR) \
FUNC(MISSING_AUTHOR, ERROR) \
FUNC(MISSING_COMMITTER, ERROR) \
FUNC(MISSING_EMAIL, ERROR) \
@ -60,39 +72,28 @@ enum fsck_msg_type {
FUNC(TREE_NOT_SORTED, ERROR) \
FUNC(UNKNOWN_TYPE, ERROR) \
FUNC(ZERO_PADDED_DATE, ERROR) \
FUNC(GITMODULES_MISSING, ERROR) \
FUNC(GITMODULES_BLOB, ERROR) \
FUNC(GITMODULES_LARGE, ERROR) \
FUNC(GITMODULES_NAME, ERROR) \
FUNC(GITMODULES_SYMLINK, ERROR) \
FUNC(GITMODULES_URL, ERROR) \
FUNC(GITMODULES_PATH, ERROR) \
FUNC(GITMODULES_UPDATE, ERROR) \
FUNC(GITATTRIBUTES_MISSING, ERROR) \
FUNC(GITATTRIBUTES_LARGE, ERROR) \
FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \
FUNC(GITATTRIBUTES_BLOB, ERROR) \
/* warnings */ \
FUNC(BAD_REFTABLE_TABLE_NAME, WARN) \
FUNC(EMPTY_NAME, WARN) \
FUNC(FULL_PATHNAME, WARN) \
FUNC(HAS_DOT, WARN) \
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
FUNC(NULL_SHA1, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
/* infos (reported as warnings, but ignored by default) */ \
FUNC(BAD_FILEMODE, INFO) \
FUNC(EMPTY_PACKED_REFS_FILE, INFO) \
FUNC(GITMODULES_PARSE, INFO) \
FUNC(GITIGNORE_SYMLINK, INFO) \
FUNC(GITATTRIBUTES_SYMLINK, INFO) \
FUNC(MAILMAP_SYMLINK, INFO) \
FUNC(BAD_TAG_NAME, INFO) \
FUNC(EMPTY_PACKED_REFS_FILE, INFO) \
FUNC(GITATTRIBUTES_SYMLINK, INFO) \
FUNC(GITIGNORE_SYMLINK, INFO) \
FUNC(GITMODULES_PARSE, INFO) \
FUNC(MAILMAP_SYMLINK, INFO) \
FUNC(MISSING_TAGGER_ENTRY, INFO) \
FUNC(SYMLINK_REF, INFO) \
FUNC(REF_MISSING_NEWLINE, INFO) \
FUNC(SYMLINK_REF, INFO) \
FUNC(SYMREF_TARGET_IS_NOT_A_REF, INFO) \
FUNC(TRAILING_REF_CONTENT, INFO) \
/* ignored (elevated when requested) */ \

View File

@ -452,6 +452,7 @@ libgit_sources = [
'reftable/error.c',
'reftable/block.c',
'reftable/blocksource.c',
'reftable/fsck.c',
'reftable/iter.c',
'reftable/merged.c',
'reftable/pq.c',

4
refs.c
View File

@ -32,6 +32,7 @@
#include "commit.h"
#include "wildmatch.h"
#include "ident.h"
#include "fsck.h"

/*
* List of all available backends
@ -323,6 +324,9 @@ int check_refname_format(const char *refname, int flags)
int refs_fsck(struct ref_store *refs, struct fsck_options *o,
struct worktree *wt)
{
if (o->verbose)
fprintf_ln(stderr, _("Checking references consistency"));

return refs->be->fsck(refs, o, wt);
}


View File

@ -1,7 +1,6 @@
#include "git-compat-util.h"
#include "hex.h"
#include "refs-internal.h"
#include "string-list.h"
#include "trace.h"

static struct trace_key trace_refs = TRACE_KEY_INIT(REFS);

View File

@ -20,7 +20,6 @@
#include "../dir-iterator.h"
#include "../lockfile.h"
#include "../object.h"
#include "../object-file.h"
#include "../path.h"
#include "../dir.h"
#include "../chdir-notify.h"
@ -3970,8 +3969,6 @@ static int files_fsck_refs(struct ref_store *ref_store,
NULL,
};

if (o->verbose)
fprintf_ln(stderr, _("Checking references consistency"));
return files_fsck_refs_dir(ref_store, o, "refs", wt, fsck_refs_fn);
}


View File

@ -6,20 +6,21 @@
#include "../config.h"
#include "../dir.h"
#include "../environment.h"
#include "../fsck.h"
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
#include "../iterator.h"
#include "../ident.h"
#include "../lockfile.h"
#include "../object.h"
#include "../path.h"
#include "../refs.h"
#include "../reftable/reftable-basics.h"
#include "../reftable/reftable-stack.h"
#include "../reftable/reftable-record.h"
#include "../reftable/reftable-error.h"
#include "../reftable/reftable-fsck.h"
#include "../reftable/reftable-iterator.h"
#include "../reftable/reftable-record.h"
#include "../reftable/reftable-stack.h"
#include "../repo-settings.h"
#include "../setup.h"
#include "../strmap.h"
@ -2714,11 +2715,56 @@ done:
return ret;
}

static int reftable_be_fsck(struct ref_store *ref_store UNUSED,
struct fsck_options *o UNUSED,
static void reftable_fsck_verbose_handler(const char *msg, void *cb_data)
{
struct fsck_options *o = cb_data;

if (o->verbose)
fprintf_ln(stderr, "%s", msg);
}

static const enum fsck_msg_id fsck_msg_id_map[] = {
[REFTABLE_FSCK_ERROR_TABLE_NAME] = FSCK_MSG_BAD_REFTABLE_TABLE_NAME,
};

static int reftable_fsck_error_handler(struct reftable_fsck_info *info,
void *cb_data)
{
struct fsck_ref_report report = { .path = info->path };
struct fsck_options *o = cb_data;
enum fsck_msg_id msg_id;

if (info->error < 0 || info->error >= REFTABLE_FSCK_MAX_VALUE)
BUG("unknown fsck error: %d", (int)info->error);

msg_id = fsck_msg_id_map[info->error];

if (!msg_id)
BUG("fsck_msg_id value missing for reftable error: %d", (int)info->error);

return fsck_report_ref(o, &report, msg_id, "%s", info->msg);
}

static int reftable_be_fsck(struct ref_store *ref_store, struct fsck_options *o,
struct worktree *wt UNUSED)
{
return 0;
struct reftable_ref_store *refs;
struct strmap_entry *entry;
struct hashmap_iter iter;
int ret = 0;

refs = reftable_be_downcast(ref_store, REF_STORE_READ, "fsck");

ret |= reftable_fsck_check(refs->main_backend.stack, reftable_fsck_error_handler,
reftable_fsck_verbose_handler, o);

strmap_for_each_entry(&refs->worktree_backends, &iter, entry) {
struct reftable_backend *b = (struct reftable_backend *)entry->value;
ret |= reftable_fsck_check(b->stack, reftable_fsck_error_handler,
reftable_fsck_verbose_handler, o);
}

return ret;
}

struct ref_storage_be refs_be_reftable = {

View File

@ -195,44 +195,55 @@ size_t names_length(const char **names)
return p - names;
}

char **parse_names(char *buf, int size)
int parse_names(char *buf, int size, char ***out)
{
char **names = NULL;
size_t names_cap = 0;
size_t names_len = 0;
char *p = buf;
char *end = buf + size;
int err = 0;

while (p < end) {
char *next = strchr(p, '\n');
if (next && next < end) {
*next = 0;
if (!next) {
err = REFTABLE_FORMAT_ERROR;
goto done;
} else if (next < end) {
*next = '\0';
} else {
next = end;
}

if (p < next) {
if (REFTABLE_ALLOC_GROW(names, names_len + 1,
names_cap))
goto err;
names_cap)) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}

names[names_len] = reftable_strdup(p);
if (!names[names_len++])
goto err;
if (!names[names_len++]) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
}
p = next + 1;
}

if (REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap))
goto err;
if (REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap)) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
names[names_len] = NULL;

return names;

err:
*out = names;
return 0;
done:
for (size_t i = 0; i < names_len; i++)
reftable_free(names[i]);
reftable_free(names);
return NULL;
return err;
}

int names_equal(const char **a, const char **b)

View File

@ -167,10 +167,11 @@ void free_names(char **a);

/*
* Parse a newline separated list of names. `size` is the length of the buffer,
* without terminating '\0'. Empty names are discarded. Returns a `NULL`
* pointer when allocations fail.
* without terminating '\0'. Empty names are discarded.
*
* Returns 0 on success, a reftable error code on error.
*/
char **parse_names(char *buf, int size);
int parse_names(char *buf, int size, char ***out);

/* compares two NULL-terminated arrays of strings. */
int names_equal(const char **a, const char **b);

100
reftable/fsck.c Normal file
View File

@ -0,0 +1,100 @@
#include "basics.h"
#include "reftable-fsck.h"
#include "reftable-table.h"
#include "stack.h"

static bool table_has_valid_name(const char *name)
{
const char *ptr = name;
char *endptr;

/* strtoull doesn't set errno on success */
errno = 0;

strtoull(ptr, &endptr, 16);
if (errno)
return false;
ptr = endptr;

if (*ptr != '-')
return false;
ptr++;

strtoull(ptr, &endptr, 16);
if (errno)
return false;
ptr = endptr;

if (*ptr != '-')
return false;
ptr++;

strtoul(ptr, &endptr, 16);
if (errno)
return false;
ptr = endptr;

if (strcmp(ptr, ".ref") && strcmp(ptr, ".log"))
return false;

return true;
}

typedef int (*table_check_fn)(struct reftable_table *table,
reftable_fsck_report_fn report_fn,
void *cb_data);

static int table_check_name(struct reftable_table *table,
reftable_fsck_report_fn report_fn,
void *cb_data)
{
if (!table_has_valid_name(table->name)) {
struct reftable_fsck_info info;

info.error = REFTABLE_FSCK_ERROR_TABLE_NAME;
info.msg = "invalid reftable table name";
info.path = table->name;

return report_fn(&info, cb_data);
}

return 0;
}

static int table_checks(struct reftable_table *table,
reftable_fsck_report_fn report_fn,
reftable_fsck_verbose_fn verbose_fn UNUSED,
void *cb_data)
{
table_check_fn table_check_fns[] = {
table_check_name,
NULL,
};
int err = 0;

for (size_t i = 0; table_check_fns[i]; i++)
err |= table_check_fns[i](table, report_fn, cb_data);

return err;
}

int reftable_fsck_check(struct reftable_stack *stack,
reftable_fsck_report_fn report_fn,
reftable_fsck_verbose_fn verbose_fn,
void *cb_data)
{
struct reftable_buf msg = REFTABLE_BUF_INIT;
int err = 0;

for (size_t i = 0; i < stack->tables_len; i++) {
reftable_buf_reset(&msg);
reftable_buf_addstr(&msg, "Checking table: ");
reftable_buf_addstr(&msg, stack->tables[i]->name);
verbose_fn(msg.buf, cb_data);

err |= table_checks(stack->tables[i], report_fn, verbose_fn, cb_data);
}

reftable_buf_release(&msg);
return err;
}

40
reftable/reftable-fsck.h Normal file
View File

@ -0,0 +1,40 @@
#ifndef REFTABLE_FSCK_H
#define REFTABLE_FSCK_H

#include "reftable-stack.h"

enum reftable_fsck_error {
/* Invalid table name */
REFTABLE_FSCK_ERROR_TABLE_NAME = 0,
/* Used for bounds checking, must be last */
REFTABLE_FSCK_MAX_VALUE,
};

/* Represents an individual error encountered during the FSCK checks. */
struct reftable_fsck_info {
enum reftable_fsck_error error;
const char *msg;
const char *path;
};

typedef int reftable_fsck_report_fn(struct reftable_fsck_info *info,
void *cb_data);
typedef void reftable_fsck_verbose_fn(const char *msg, void *cb_data);

/*
* Given a reftable stack, perform consistency checks on the stack.
*
* If an issue is encountered, the issue is reported to the callee via the
* provided 'report_fn'. If the issue is non-recoverable the flow will not
* continue. If it is recoverable, the flow will continue and further issues
* will be reported as identified.
*
* The 'verbose_fn' will be invoked to provide verbose information about
* the progress and state of the consistency checks.
*/
int reftable_fsck_check(struct reftable_stack *stack,
reftable_fsck_report_fn report_fn,
reftable_fsck_verbose_fn verbose_fn,
void *cb_data);

#endif /* REFTABLE_FSCK_H */

View File

@ -109,12 +109,7 @@ static int fd_read_lines(int fd, char ***namesp)
}
buf[size] = 0;

*namesp = parse_names(buf, size);
if (!*namesp) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}

err = parse_names(buf, size, namesp);
done:
reftable_free(buf);
return err;

View File

@ -146,6 +146,7 @@ integration_tests = [
't0611-reftable-httpd.sh',
't0612-reftable-jgit-compatibility.sh',
't0613-reftable-write-options.sh',
't0614-reftable-fsck.sh',
't1000-read-tree-m-3way.sh',
't1001-read-tree-m-2way.sh',
't1002-read-tree-m-u-2way.sh',

58
t/t0614-reftable-fsck.sh Executable file
View File

@ -0,0 +1,58 @@
#!/bin/sh

test_description='Test reftable backend consistency check'

GIT_TEST_DEFAULT_REF_FORMAT=reftable
export GIT_TEST_DEFAULT_REF_FORMAT

. ./test-lib.sh

test_expect_success "no errors reported on a well formed repository" '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
git commit --allow-empty -m initial &&

for i in $(test_seq 20)
do
git update-ref refs/heads/branch-$i HEAD || return 1
done &&

# The repository should end up with multiple tables.
test_line_count ">" 1 .git/reftable/tables.list &&

git refs verify 2>err &&
test_must_be_empty err
)
'

for TABLE_NAME in "foo-bar-e4d12d59.ref" \
"0x00000000zzzz-0x00000000zzzz-e4d12d59.ref" \
"0x000000000001-0x000000000002-e4d12d59.abc" \
"0x000000000001-0x000000000002-e4d12d59.refabc"; do
test_expect_success "table name $TABLE_NAME should be checked" '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
git commit --allow-empty -m initial &&

git refs verify 2>err &&
test_must_be_empty err &&

EXISTING_TABLE=$(head -n1 .git/reftable/tables.list) &&
mv ".git/reftable/$EXISTING_TABLE" ".git/reftable/$TABLE_NAME" &&
sed "s/${EXISTING_TABLE}/${TABLE_NAME}/g" .git/reftable/tables.list > tables.list &&
mv tables.list .git/reftable/tables.list &&

git refs verify 2>err &&
cat >expect <<-EOF &&
warning: ${TABLE_NAME}: badReftableTableName: invalid reftable table name
EOF
test_cmp expect err
)
'
done

test_done

View File

@ -9,6 +9,7 @@ https://developers.google.com/open-source/licenses/bsd
#include "unit-test.h"
#include "lib-reftable.h"
#include "reftable/basics.h"
#include "reftable/reftable-error.h"

struct integer_needle_lesseq_args {
int needle;
@ -79,14 +80,18 @@ void test_reftable_basics__names_equal(void)
void test_reftable_basics__parse_names(void)
{
char in1[] = "line\n";
char in2[] = "a\nb\nc";
char **out = parse_names(in1, strlen(in1));
char in2[] = "a\nb\nc\n";
char **out = NULL;
int err = parse_names(in1, strlen(in1), &out);
cl_assert(err == 0);
cl_assert(out != NULL);
cl_assert_equal_s(out[0], "line");
cl_assert(!out[1]);
free_names(out);

out = parse_names(in2, strlen(in2));
out = NULL;
err = parse_names(in2, strlen(in2), &out);
cl_assert(err == 0);
cl_assert(out != NULL);
cl_assert_equal_s(out[0], "a");
cl_assert_equal_s(out[1], "b");
@ -95,10 +100,21 @@ void test_reftable_basics__parse_names(void)
free_names(out);
}

void test_reftable_basics__parse_names_missing_newline(void)
{
char in1[] = "line\nline2";
char **out = NULL;
int err = parse_names(in1, strlen(in1), &out);
cl_assert(err == REFTABLE_FORMAT_ERROR);
cl_assert(out == NULL);
}

void test_reftable_basics__parse_names_drop_empty_string(void)
{
char in[] = "a\n\nb\n";
char **out = parse_names(in, strlen(in));
char **out = NULL;
int err = parse_names(in, strlen(in), &out);
cl_assert(err == 0);
cl_assert(out != NULL);
cl_assert_equal_s(out[0], "a");
/* simply '\n' should be dropped as empty string */