Merge branch 'ps/reftable-hardening' into jch

The reftable code has been hardened against corrupted tables by
fixing out-of-bounds writes, out-of-bounds reads, and abort calls
during parsing.

* ps/reftable-hardening:
  reftable/table: fix OOB read on truncated table
  reftable/table: fix NULL pointer access when seeking to bogus offsets
  reftable/block: fix OOB read with bogus restart offset
  reftable/block: fix use of uninitialized memory when binsearch fails
  reftable/block: fix OOB read with bogus restart count
  reftable/block: fix OOB read with bogus block size
  reftable/block: fix OOB write with bogus inflated log size
  t/unit-tests: introduce test helper to write reftable blocks
  reftable/record: don't abort when decoding invalid ref value type
  reftable/basics: fix OOB read on binary search of empty range
  oss-fuzz: add fuzzer for parsing reftables
  meson: support building fuzzers with libFuzzer
jch
Junio C Hamano 2026-07-01 10:48:38 -07:00
commit f3ffec5adb
15 changed files with 436 additions and 26 deletions

View File

@ -2601,6 +2601,7 @@ FUZZ_OBJS += oss-fuzz/fuzz-date.o
FUZZ_OBJS += oss-fuzz/fuzz-pack-headers.o
FUZZ_OBJS += oss-fuzz/fuzz-pack-idx.o
FUZZ_OBJS += oss-fuzz/fuzz-parse-attr-line.o
FUZZ_OBJS += oss-fuzz/fuzz-reftable.o
FUZZ_OBJS += oss-fuzz/fuzz-url-decode-mem.o
.PHONY: fuzz-objs
fuzz-objs: $(FUZZ_OBJS)

View File

@ -21,6 +21,7 @@ date
pack-headers
pack-idx
parse-attr-line
reftable
url-decode-mem
"


View File

@ -161,6 +161,21 @@
# These machine files can be passed to `meson setup` via the `--native-file`
# option.
#
# Fuzzing
# =======
#
# Meson supports building the fuzzing targets by setting `-Dfuzzers=true`. By
# default, the targets will be built without libFuzzer and thus won't be usable
# for fuzzing. You have to configure a couple of options to properly wire up
# libFuzzer:
#
# $ meson setup build-fuzzers \
# -Db_sanitize=address,fuzzer-no-link \
# -Dfuzzers=true \
# -Dfuzzers_link_args=-fsanitize=fuzzer
# $ meson compile -C build-fuzzers
# $ ./build-fuzzers/oss-fuzz/fuzz-config <args>
#
# Cross compilation
# =================
#

View File

@ -133,3 +133,5 @@ option('test_utf8_locale', type: 'string',
description: 'Name of a UTF-8 locale used for testing.')
option('fuzzers', type: 'boolean', value: false,
description: 'Enable building fuzzers.')
option('fuzzers_link_args', type: 'array', value: [],
description: 'Linker arguments used to link fuzzers. Use -fsanitize=fuzzer for fuzzing.')

1
oss-fuzz/.gitignore vendored
View File

@ -5,4 +5,5 @@ fuzz-date
fuzz-pack-headers
fuzz-pack-idx
fuzz-parse-attr-line
fuzz-reftable
fuzz-url-decode-mem

74
oss-fuzz/fuzz-reftable.c Normal file
View File

@ -0,0 +1,74 @@
#include "git-compat-util.h"
#include "reftable/basics.h"
#include "reftable/blocksource.h"
#include "reftable/reftable-blocksource.h"
#include "reftable/reftable-error.h"
#include "reftable/reftable-iterator.h"
#include "reftable/reftable-record.h"
#include "reftable/reftable-table.h"
#include "reftable/reftable-writer.h"

int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);

int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
struct reftable_block_source source = { 0 };
struct reftable_buf buf = REFTABLE_BUF_INIT;
struct reftable_table *table = NULL;
int err;

if (reftable_buf_add(&buf, (const char *)data, size) < 0)
goto out;
block_source_from_buf(&source, &buf);

err = reftable_table_new(&table, &source, "fuzz-input");
if (err < 0)
goto out;

/*
* Exercise the ref, log and raw block iterators so that we cover as
* much of the parsing code as possible.
*/
{
struct reftable_ref_record ref = { 0 };
struct reftable_iterator it = { 0 };

reftable_table_init_ref_iterator(table, &it);
if (!reftable_iterator_seek_ref(&it, ""))
while (!reftable_iterator_next_ref(&it, &ref))
;

reftable_ref_record_release(&ref);
reftable_iterator_destroy(&it);
}

{
struct reftable_log_record log = { 0 };
struct reftable_iterator it = { 0 };

reftable_table_init_log_iterator(table, &it);
if (!reftable_iterator_seek_log(&it, ""))
while (!reftable_iterator_next_log(&it, &log))
;

reftable_log_record_release(&log);
reftable_iterator_destroy(&it);
}

{
struct reftable_table_iterator it = { 0 };
const struct reftable_block *block;

if (!reftable_table_iterator_init(&it, table))
while (!reftable_table_iterator_next(&it, &block))
;

reftable_table_iterator_release(&it);
}

out:
if (table)
reftable_table_decref(table);
reftable_buf_release(&buf);
return 0;
}

View File

@ -6,6 +6,7 @@ fuzz_programs = [
'fuzz-pack-headers.c',
'fuzz-pack-idx.c',
'fuzz-parse-attr-line.c',
'fuzz-reftable.c',
'fuzz-url-decode-mem.c',
]

@ -16,5 +17,6 @@ foreach fuzz_program : fuzz_programs
fuzz_program,
],
dependencies: [libgit_commonmain],
link_args: get_option('fuzzers_link_args'),
)
endforeach

View File

@ -152,6 +152,9 @@ size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
size_t lo = 0;
size_t hi = sz;

if (!sz)
return 0;

/* Invariants:
*
* (hi == sz) || f(hi) == true

View File

@ -260,6 +260,15 @@ int reftable_block_init(struct reftable_block *block,
goto done;
}

/*
* Verify that the block size covers at least the table header, block
* header and the 2 byte restart counter.
*/
if (block_size < header_size + 4 + 2) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}

if (block_type == REFTABLE_BLOCK_TYPE_LOG) {
uint32_t block_header_skip = 4 + header_size;
uLong dst_len = block_size - block_header_skip;
@ -331,8 +340,21 @@ int reftable_block_init(struct reftable_block *block,
full_block_size = block_size;
}

/*
* Ensure that we have sufficient data available now to satisfy the
* claimed block size.
*/
if (block_size > block->block_data.len) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}

restart_count = reftable_get_be16(block->block_data.data + block_size - 2);
restart_off = block_size - 2 - 3 * restart_count;
if (restart_off < header_size + 4 || restart_off > block_size - 2) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}

block->block_type = block_type;
block->hash_size = hash_size;
@ -418,6 +440,15 @@ static int restart_needle_less(size_t idx, void *_args)
uint8_t extra;
int n;

/*
* The restart offset must point to a record, which is stored before
* the restart table. Verify that this is the case.
*/
if (off >= args->block->restart_off) {
args->error = 1;
return -1;
}

/*
* Records at restart points are stored without prefix compression, so
* there is no need to fully decode the record key here. This removes
@ -495,6 +526,10 @@ int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
int err = 0;
size_t i;

err = reftable_record_init(&rec, reftable_block_type(it->block));
if (err < 0)
goto done;

/*
* Perform a binary search over the block's restart points, which
* avoids doing a linear scan over the whole block. Like this, we
@ -536,10 +571,6 @@ int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
else
it->next_off = it->block->header_off + 4;

err = reftable_record_init(&rec, reftable_block_type(it->block));
if (err < 0)
goto done;

/*
* We're looking for the last entry less than the wanted key so that
* the next call to `block_reader_next()` would yield the wanted

View File

@ -388,7 +388,6 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key,
r->refname[key.len] = 0;

r->update_index = update_index;
r->value_type = val_type;
switch (val_type) {
case REFTABLE_REF_VAL1:
if (in.len < hash_size) {
@ -426,9 +425,10 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key,
case REFTABLE_REF_DELETION:
break;
default:
abort();
break;
err = REFTABLE_FORMAT_ERROR;
goto done;
}
r->value_type = val_type;

return start.len - in.len;


View File

@ -242,6 +242,8 @@ static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ)
int err;

err = table_init_block(ti->table, &ti->block, off, typ);
if (err > 0)
return REFTABLE_FORMAT_ERROR;
if (err != 0)
return err;

@ -560,6 +562,11 @@ int reftable_table_new(struct reftable_table **out,
goto done;
}

if (file_size < header_size(t->version) + footer_size(t->version)) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}

t->size = file_size - footer_size(t->version);
t->source = *source;
t->name = reftable_strdup(name);

View File

@ -60,6 +60,17 @@ void test_reftable_basics__binsearch(void)
}
}

static int unreachable_lesseq(size_t i UNUSED, void *args UNUSED)
{
cl_fail("comparison function called for empty range");
return 0;
}

void test_reftable_basics__binsearch_empty(void)
{
cl_assert_equal_i(binsearch(0, &unreachable_lesseq, NULL), 0);
}

void test_reftable_basics__names_length(void)
{
const char *a[] = { "a", "b", NULL };

View File

@ -14,6 +14,31 @@ https://developers.google.com/open-source/licenses/bsd
#include "reftable/reftable-error.h"
#include "strbuf.h"

static int cl_reftable_write_block(struct reftable_buf *buf,
uint8_t block_type,
struct reftable_record *recs,
size_t nrecs)
{
struct block_writer writer = {
.last_key = REFTABLE_BUF_INIT,
};
uint8_t block[1024];
int block_end;

cl_must_pass(block_writer_init(&writer, block_type, block, 1024,
0, hash_size(REFTABLE_HASH_SHA1)));
for (size_t i = 0; i < nrecs; i++)
cl_must_pass(block_writer_add(&writer, &recs[i]));

block_end = block_writer_finish(&writer);
cl_assert(block_end > 0);

cl_must_pass(reftable_buf_add(buf, block, block_end));

block_writer_release(&writer);
return block_end;
}

void test_reftable_block__read_write(void)
{
const int header_off = 21; /* random */
@ -381,25 +406,13 @@ void test_reftable_block__ref_read_write(void)
void test_reftable_block__iterator(void)
{
struct reftable_block_source source = { 0 };
struct block_writer writer = {
.last_key = REFTABLE_BUF_INIT,
};
struct reftable_record expected_refs[20];
struct reftable_ref_record ref = { 0 };
struct reftable_iterator it = { 0 };
struct reftable_block block = { 0 };
struct reftable_buf data;
struct reftable_buf data = REFTABLE_BUF_INIT;
int err;

data.len = 1024;
REFTABLE_CALLOC_ARRAY(data.buf, data.len);
cl_assert(data.buf != NULL);

err = block_writer_init(&writer, REFTABLE_BLOCK_TYPE_REF,
(uint8_t *) data.buf, data.len,
0, hash_size(REFTABLE_HASH_SHA1));
cl_assert(!err);

for (size_t i = 0; i < ARRAY_SIZE(expected_refs); i++) {
expected_refs[i] = (struct reftable_record) {
.type = REFTABLE_BLOCK_TYPE_REF,
@ -409,13 +422,10 @@ void test_reftable_block__iterator(void)
},
};
memset(expected_refs[i].u.ref.value.val1, i, REFTABLE_HASH_SIZE_SHA1);

err = block_writer_add(&writer, &expected_refs[i]);
cl_assert_equal_i(err, 0);
}

err = block_writer_finish(&writer);
cl_assert(err > 0);
cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF,
expected_refs, ARRAY_SIZE(expected_refs));

block_source_from_buf(&source, &data);
reftable_block_init(&block, &source, 0, 0, data.len,
@ -453,6 +463,142 @@ void test_reftable_block__iterator(void)
reftable_ref_record_release(&ref);
reftable_iterator_destroy(&it);
reftable_block_release(&block);
block_writer_release(&writer);
reftable_buf_release(&data);
}

void test_reftable_block__corrupt_log_block_size(void)
{
struct reftable_block_source source = { 0 };
struct reftable_record rec = {
.type = REFTABLE_BLOCK_TYPE_LOG,
.u.log = {
.refname = (char *) "refs/heads/main",
.update_index = 1,
.value_type = REFTABLE_LOG_UPDATE,
},
};
struct reftable_block block = { 0 };
struct reftable_buf data = REFTABLE_BUF_INIT;

cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_LOG, &rec, 1);

/*
* Log blocks store their inflated size as a big-endian 24-bit integer
* right after the one-byte block type. Rewrite it to claim a size that
* is smaller than the block header.
*/
reftable_put_be24((uint8_t *) data.buf + 1, 1);

block_source_from_buf(&source, &data);
cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len,
REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_LOG),
REFTABLE_FORMAT_ERROR);

reftable_block_release(&block);
reftable_buf_release(&data);
}

void test_reftable_block__corrupt_block_size(void)
{
struct reftable_block_source source = { 0 };
struct reftable_record rec = {
.type = REFTABLE_BLOCK_TYPE_REF,
.u.ref = {
.value_type = REFTABLE_REF_VAL1,
.refname = (char *) "refs/heads/main",
},
};
struct reftable_block block = { 0 };
struct reftable_buf data = REFTABLE_BUF_INIT;

cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1);

/*
* The block size is stored as a big-endian 24-bit integer right after
* the one-byte block type at the start of the block. Corrupt it to
* claim a size that is larger than the data we actually have. Reading
* the restart count and restart table relative to such a bogus block
* size must not access out-of-bounds memory.
*/
reftable_put_be24((uint8_t *) data.buf + 1, 0xffffff);

block_source_from_buf(&source, &data);
cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len,
REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF),
REFTABLE_FORMAT_ERROR);

reftable_block_release(&block);
reftable_buf_release(&data);
}

void test_reftable_block__corrupt_restart_count(void)
{
struct reftable_block_source source = { 0 };
struct reftable_record rec = {
.type = REFTABLE_BLOCK_TYPE_REF,
.u.ref = {
.value_type = REFTABLE_REF_VAL1,
.refname = (char *) "refs/heads/main",
},
};
struct reftable_block block = { 0 };
struct reftable_buf data = REFTABLE_BUF_INIT;
int block_size;

block_size = cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1);

/*
* Corrupt the restart count to claim a bogus number of restart points.
* Note that this would only cause us to perform an out-of-bounds
* access when seeking into the block, but we want to refuse such a
* block outright.
*/
reftable_put_be16((uint8_t *) data.buf + block_size - 2, 0xffff);

block_source_from_buf(&source, &data);
cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len,
REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF),
REFTABLE_FORMAT_ERROR);

reftable_block_release(&block);
reftable_buf_release(&data);
}

void test_reftable_block__corrupt_restart_offset(void)
{
struct reftable_block_source source = { 0 };
struct reftable_record rec = {
.type = REFTABLE_BLOCK_TYPE_REF,
.u.ref = {
.value_type = REFTABLE_REF_VAL1,
.refname = (char *) "refs/heads/main",
},
};
struct reftable_block block = { 0 };
struct block_iter it = BLOCK_ITER_INIT;
struct reftable_buf want = REFTABLE_BUF_INIT;
struct reftable_buf data = REFTABLE_BUF_INIT;

cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1);

block_source_from_buf(&source, &data);
cl_must_pass(reftable_block_init(&block, &source, 0, 0, data.len,
REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF));

/*
* Corrupt the first restart offset, stored as a big-endian 24-bit
* integer at the start of the restart table, to point past the end of
* the records section. Seeking such a block must fail gracefully.
*/
reftable_put_be24((uint8_t *) block.block_data.data + block.restart_off,
0xffffff);

block_iter_init(&it, &block);
cl_must_pass(reftable_buf_addstr(&want, "refs/heads/main"));
cl_assert_equal_i(block_iter_seek_key(&it, &want), REFTABLE_FORMAT_ERROR);

reftable_buf_release(&want);
block_iter_close(&it);
reftable_block_release(&block);
reftable_buf_release(&data);
}

View File

@ -11,6 +11,7 @@
#include "reftable/basics.h"
#include "reftable/constants.h"
#include "reftable/record.h"
#include "reftable/reftable-error.h"

static void t_copy(struct reftable_record *rec)
{
@ -202,6 +203,29 @@ void test_reftable_record__ref_record_roundtrip(void)
reftable_buf_release(&scratch);
}

void test_reftable_record__ref_record_decode_invalid_value_type(void)
{
struct reftable_buf scratch = REFTABLE_BUF_INIT;
struct reftable_record out = {
.type = REFTABLE_BLOCK_TYPE_REF,
};
struct reftable_buf key = REFTABLE_BUF_INIT;
uint8_t buffer[1024] = { 0 };
struct string_view dest = {
.buf = buffer,
.len = sizeof(buffer),
};

cl_must_pass(reftable_buf_addstr(&key, "refs/heads/master"));
cl_assert_equal_i(reftable_record_decode(&out, key, REFTABLE_NR_REF_VALUETYPES,
dest, REFTABLE_HASH_SIZE_SHA1, &scratch),
REFTABLE_FORMAT_ERROR);

reftable_record_release(&out);
reftable_buf_release(&key);
reftable_buf_release(&scratch);
}

void test_reftable_record__log_record_comparison(void)
{
struct reftable_record in[3] = {

View File

@ -1,8 +1,11 @@
#include "unit-test.h"
#include "lib-reftable.h"
#include "reftable/basics.h"
#include "reftable/block.h"
#include "reftable/blocksource.h"
#include "reftable/constants.h"
#include "reftable/iter.h"
#include "reftable/reftable-error.h"
#include "reftable/table.h"
#include "strbuf.h"

@ -201,3 +204,92 @@ void test_reftable_table__block_iterator(void)
reftable_buf_release(&buf);
reftable_free(records);
}

void test_reftable_table__seek_invalid_log_offset(void)
{
struct reftable_ref_record refs[] = {
{
.refname = (char *) "refs/heads/main",
.value_type = REFTABLE_REF_VAL1,
.value.val1 = { 42 },
},
};
struct reftable_log_record logs[] = {
{
.refname = (char *) "refs/heads/main",
.update_index = 1,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.name = (char *) "user",
.email = (char *) "user@example.com",
.message = (char *) "message\n",
},
},
};
struct reftable_block_source source = { 0 };
struct reftable_log_record log = { 0 };
struct reftable_iterator it = { 0 };
struct reftable_table *table;
struct reftable_buf buf = REFTABLE_BUF_INIT;
size_t fsize = footer_size(1);
uint8_t *footer;

cl_reftable_write_to_buf(&buf, refs, ARRAY_SIZE(refs),
logs, ARRAY_SIZE(logs), REFTABLE_HASH_SHA1, NULL);

/*
* Corrupt the log section offset stored in the footer so that it points
* past the end of the table. The footer is checksummed, so we also have
* to recompute and rewrite the CRC.
*/
footer = (uint8_t *) buf.buf + buf.len - fsize;
reftable_put_be64(footer + header_size(1) + 24, UINT64_MAX);
reftable_put_be32(footer + fsize - 4, crc32(0, footer, fsize - 4));

block_source_from_buf(&source, &buf);
cl_must_pass(reftable_table_new(&table, &source, "name"));

/*
* Seeking the log iterator must not crash even though the log section
* offset is bogus. As the offset points past the end of the table we
* know that the table is corrupt, so the seek must report a format
* error instead of pretending that the section is empty.
*/
reftable_table_init_log_iterator(table, &it);
cl_assert_equal_i(reftable_iterator_seek_log(&it, ""),
REFTABLE_FORMAT_ERROR);

reftable_log_record_release(&log);
reftable_iterator_destroy(&it);
reftable_table_decref(table);
reftable_buf_release(&buf);
}

void test_reftable_table__new_with_truncated_table(void)
{
struct reftable_ref_record refs[] = {
{
.refname = (char *) "refs/heads/main",
.value_type = REFTABLE_REF_VAL1,
.value.val1 = { 42 },
},
};
struct reftable_block_source source = { 0 };
struct reftable_table *table;
struct reftable_buf buf = REFTABLE_BUF_INIT;

cl_reftable_write_to_buf(&buf, refs, ARRAY_SIZE(refs), NULL, 0,
REFTABLE_HASH_SHA1, NULL);

/*
* Truncate the table so that it is large enough to read the header, but
* too small to also contain the footer.
*/
buf.len = footer_size(1) - 1;
block_source_from_buf(&source, &buf);

cl_assert_equal_i(reftable_table_new(&table, &source, "name"),
REFTABLE_FORMAT_ERROR);

reftable_buf_release(&buf);
}