diff --git a/Makefile b/Makefile index 1f3f099f5c..d794fcf87e 100644 --- a/Makefile +++ b/Makefile @@ -2601,6 +2601,7 @@ FUZZ_OBJS += oss-fuzz/fuzz-date.o FUZZ_OBJS += oss-fuzz/fuzz-pack-headers.o FUZZ_OBJS += oss-fuzz/fuzz-pack-idx.o FUZZ_OBJS += oss-fuzz/fuzz-parse-attr-line.o +FUZZ_OBJS += oss-fuzz/fuzz-reftable.o FUZZ_OBJS += oss-fuzz/fuzz-url-decode-mem.o .PHONY: fuzz-objs fuzz-objs: $(FUZZ_OBJS) diff --git a/ci/run-build-and-minimal-fuzzers.sh b/ci/run-build-and-minimal-fuzzers.sh index e7b97952e7..37b24b092d 100755 --- a/ci/run-build-and-minimal-fuzzers.sh +++ b/ci/run-build-and-minimal-fuzzers.sh @@ -21,6 +21,7 @@ date pack-headers pack-idx parse-attr-line +reftable url-decode-mem " diff --git a/meson.build b/meson.build index b1319c4a40..8848bcc7ca 100644 --- a/meson.build +++ b/meson.build @@ -161,6 +161,21 @@ # These machine files can be passed to `meson setup` via the `--native-file` # option. # +# Fuzzing +# ======= +# +# Meson supports building the fuzzing targets by setting `-Dfuzzers=true`. By +# default, the targets will be built without libFuzzer and thus won't be usable +# for fuzzing. You have to configure a couple of options to properly wire up +# libFuzzer: +# +# $ meson setup build-fuzzers \ +# -Db_sanitize=address,fuzzer-no-link \ +# -Dfuzzers=true \ +# -Dfuzzers_link_args=-fsanitize=fuzzer +# $ meson compile -C build-fuzzers +# $ ./build-fuzzers/oss-fuzz/fuzz-config +# # Cross compilation # ================= # diff --git a/meson_options.txt b/meson_options.txt index 1bc75278a8..eadc86e0e4 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -133,3 +133,5 @@ option('test_utf8_locale', type: 'string', description: 'Name of a UTF-8 locale used for testing.') option('fuzzers', type: 'boolean', value: false, description: 'Enable building fuzzers.') +option('fuzzers_link_args', type: 'array', value: [], + description: 'Linker arguments used to link fuzzers. Use -fsanitize=fuzzer for fuzzing.') diff --git a/oss-fuzz/.gitignore b/oss-fuzz/.gitignore index f2d74de457..dc7a127a62 100644 --- a/oss-fuzz/.gitignore +++ b/oss-fuzz/.gitignore @@ -5,4 +5,5 @@ fuzz-date fuzz-pack-headers fuzz-pack-idx fuzz-parse-attr-line +fuzz-reftable fuzz-url-decode-mem diff --git a/oss-fuzz/fuzz-reftable.c b/oss-fuzz/fuzz-reftable.c new file mode 100644 index 0000000000..c46eac2c6b --- /dev/null +++ b/oss-fuzz/fuzz-reftable.c @@ -0,0 +1,74 @@ +#include "git-compat-util.h" +#include "reftable/basics.h" +#include "reftable/blocksource.h" +#include "reftable/reftable-blocksource.h" +#include "reftable/reftable-error.h" +#include "reftable/reftable-iterator.h" +#include "reftable/reftable-record.h" +#include "reftable/reftable-table.h" +#include "reftable/reftable-writer.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + struct reftable_block_source source = { 0 }; + struct reftable_buf buf = REFTABLE_BUF_INIT; + struct reftable_table *table = NULL; + int err; + + if (reftable_buf_add(&buf, (const char *)data, size) < 0) + goto out; + block_source_from_buf(&source, &buf); + + err = reftable_table_new(&table, &source, "fuzz-input"); + if (err < 0) + goto out; + + /* + * Exercise the ref, log and raw block iterators so that we cover as + * much of the parsing code as possible. + */ + { + struct reftable_ref_record ref = { 0 }; + struct reftable_iterator it = { 0 }; + + reftable_table_init_ref_iterator(table, &it); + if (!reftable_iterator_seek_ref(&it, "")) + while (!reftable_iterator_next_ref(&it, &ref)) + ; + + reftable_ref_record_release(&ref); + reftable_iterator_destroy(&it); + } + + { + struct reftable_log_record log = { 0 }; + struct reftable_iterator it = { 0 }; + + reftable_table_init_log_iterator(table, &it); + if (!reftable_iterator_seek_log(&it, "")) + while (!reftable_iterator_next_log(&it, &log)) + ; + + reftable_log_record_release(&log); + reftable_iterator_destroy(&it); + } + + { + struct reftable_table_iterator it = { 0 }; + const struct reftable_block *block; + + if (!reftable_table_iterator_init(&it, table)) + while (!reftable_table_iterator_next(&it, &block)) + ; + + reftable_table_iterator_release(&it); + } + +out: + if (table) + reftable_table_decref(table); + reftable_buf_release(&buf); + return 0; +} diff --git a/oss-fuzz/meson.build b/oss-fuzz/meson.build index 878afd8426..5a3854256b 100644 --- a/oss-fuzz/meson.build +++ b/oss-fuzz/meson.build @@ -6,6 +6,7 @@ fuzz_programs = [ 'fuzz-pack-headers.c', 'fuzz-pack-idx.c', 'fuzz-parse-attr-line.c', + 'fuzz-reftable.c', 'fuzz-url-decode-mem.c', ] @@ -16,5 +17,6 @@ foreach fuzz_program : fuzz_programs fuzz_program, ], dependencies: [libgit_commonmain], + link_args: get_option('fuzzers_link_args'), ) endforeach diff --git a/reftable/basics.c b/reftable/basics.c index e969927b61..f0442a46cf 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -152,6 +152,9 @@ size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) size_t lo = 0; size_t hi = sz; + if (!sz) + return 0; + /* Invariants: * * (hi == sz) || f(hi) == true diff --git a/reftable/block.c b/reftable/block.c index 920b3f4486..1fa81405d2 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -260,6 +260,15 @@ int reftable_block_init(struct reftable_block *block, goto done; } + /* + * Verify that the block size covers at least the table header, block + * header and the 2 byte restart counter. + */ + if (block_size < header_size + 4 + 2) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + if (block_type == REFTABLE_BLOCK_TYPE_LOG) { uint32_t block_header_skip = 4 + header_size; uLong dst_len = block_size - block_header_skip; @@ -331,8 +340,21 @@ int reftable_block_init(struct reftable_block *block, full_block_size = block_size; } + /* + * Ensure that we have sufficient data available now to satisfy the + * claimed block size. + */ + if (block_size > block->block_data.len) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + restart_count = reftable_get_be16(block->block_data.data + block_size - 2); restart_off = block_size - 2 - 3 * restart_count; + if (restart_off < header_size + 4 || restart_off > block_size - 2) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } block->block_type = block_type; block->hash_size = hash_size; @@ -418,6 +440,15 @@ static int restart_needle_less(size_t idx, void *_args) uint8_t extra; int n; + /* + * The restart offset must point to a record, which is stored before + * the restart table. Verify that this is the case. + */ + if (off >= args->block->restart_off) { + args->error = 1; + return -1; + } + /* * Records at restart points are stored without prefix compression, so * there is no need to fully decode the record key here. This removes @@ -495,6 +526,10 @@ int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) int err = 0; size_t i; + err = reftable_record_init(&rec, reftable_block_type(it->block)); + if (err < 0) + goto done; + /* * Perform a binary search over the block's restart points, which * avoids doing a linear scan over the whole block. Like this, we @@ -536,10 +571,6 @@ int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) else it->next_off = it->block->header_off + 4; - err = reftable_record_init(&rec, reftable_block_type(it->block)); - if (err < 0) - goto done; - /* * We're looking for the last entry less than the wanted key so that * the next call to `block_reader_next()` would yield the wanted diff --git a/reftable/record.c b/reftable/record.c index fcd387ba5d..1fce441930 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -388,7 +388,6 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key, r->refname[key.len] = 0; r->update_index = update_index; - r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: if (in.len < hash_size) { @@ -426,9 +425,10 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key, case REFTABLE_REF_DELETION: break; default: - abort(); - break; + err = REFTABLE_FORMAT_ERROR; + goto done; } + r->value_type = val_type; return start.len - in.len; diff --git a/reftable/table.c b/reftable/table.c index 56362df0ed..b4d3f9e211 100644 --- a/reftable/table.c +++ b/reftable/table.c @@ -242,6 +242,8 @@ static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) int err; err = table_init_block(ti->table, &ti->block, off, typ); + if (err > 0) + return REFTABLE_FORMAT_ERROR; if (err != 0) return err; @@ -560,6 +562,11 @@ int reftable_table_new(struct reftable_table **out, goto done; } + if (file_size < header_size(t->version) + footer_size(t->version)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + t->size = file_size - footer_size(t->version); t->source = *source; t->name = reftable_strdup(name); diff --git a/t/unit-tests/u-reftable-basics.c b/t/unit-tests/u-reftable-basics.c index 73566ed0eb..c5d83b6714 100644 --- a/t/unit-tests/u-reftable-basics.c +++ b/t/unit-tests/u-reftable-basics.c @@ -60,6 +60,17 @@ void test_reftable_basics__binsearch(void) } } +static int unreachable_lesseq(size_t i UNUSED, void *args UNUSED) +{ + cl_fail("comparison function called for empty range"); + return 0; +} + +void test_reftable_basics__binsearch_empty(void) +{ + cl_assert_equal_i(binsearch(0, &unreachable_lesseq, NULL), 0); +} + void test_reftable_basics__names_length(void) { const char *a[] = { "a", "b", NULL }; diff --git a/t/unit-tests/u-reftable-block.c b/t/unit-tests/u-reftable-block.c index f4bded7d26..b8bb9a23e4 100644 --- a/t/unit-tests/u-reftable-block.c +++ b/t/unit-tests/u-reftable-block.c @@ -14,6 +14,31 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable/reftable-error.h" #include "strbuf.h" +static int cl_reftable_write_block(struct reftable_buf *buf, + uint8_t block_type, + struct reftable_record *recs, + size_t nrecs) +{ + struct block_writer writer = { + .last_key = REFTABLE_BUF_INIT, + }; + uint8_t block[1024]; + int block_end; + + cl_must_pass(block_writer_init(&writer, block_type, block, 1024, + 0, hash_size(REFTABLE_HASH_SHA1))); + for (size_t i = 0; i < nrecs; i++) + cl_must_pass(block_writer_add(&writer, &recs[i])); + + block_end = block_writer_finish(&writer); + cl_assert(block_end > 0); + + cl_must_pass(reftable_buf_add(buf, block, block_end)); + + block_writer_release(&writer); + return block_end; +} + void test_reftable_block__read_write(void) { const int header_off = 21; /* random */ @@ -381,25 +406,13 @@ void test_reftable_block__ref_read_write(void) void test_reftable_block__iterator(void) { struct reftable_block_source source = { 0 }; - struct block_writer writer = { - .last_key = REFTABLE_BUF_INIT, - }; struct reftable_record expected_refs[20]; struct reftable_ref_record ref = { 0 }; struct reftable_iterator it = { 0 }; struct reftable_block block = { 0 }; - struct reftable_buf data; + struct reftable_buf data = REFTABLE_BUF_INIT; int err; - data.len = 1024; - REFTABLE_CALLOC_ARRAY(data.buf, data.len); - cl_assert(data.buf != NULL); - - err = block_writer_init(&writer, REFTABLE_BLOCK_TYPE_REF, - (uint8_t *) data.buf, data.len, - 0, hash_size(REFTABLE_HASH_SHA1)); - cl_assert(!err); - for (size_t i = 0; i < ARRAY_SIZE(expected_refs); i++) { expected_refs[i] = (struct reftable_record) { .type = REFTABLE_BLOCK_TYPE_REF, @@ -409,13 +422,10 @@ void test_reftable_block__iterator(void) }, }; memset(expected_refs[i].u.ref.value.val1, i, REFTABLE_HASH_SIZE_SHA1); - - err = block_writer_add(&writer, &expected_refs[i]); - cl_assert_equal_i(err, 0); } - err = block_writer_finish(&writer); - cl_assert(err > 0); + cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, + expected_refs, ARRAY_SIZE(expected_refs)); block_source_from_buf(&source, &data); reftable_block_init(&block, &source, 0, 0, data.len, @@ -453,6 +463,142 @@ void test_reftable_block__iterator(void) reftable_ref_record_release(&ref); reftable_iterator_destroy(&it); reftable_block_release(&block); - block_writer_release(&writer); + reftable_buf_release(&data); +} + +void test_reftable_block__corrupt_log_block_size(void) +{ + struct reftable_block_source source = { 0 }; + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_LOG, + .u.log = { + .refname = (char *) "refs/heads/main", + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + }, + }; + struct reftable_block block = { 0 }; + struct reftable_buf data = REFTABLE_BUF_INIT; + + cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_LOG, &rec, 1); + + /* + * Log blocks store their inflated size as a big-endian 24-bit integer + * right after the one-byte block type. Rewrite it to claim a size that + * is smaller than the block header. + */ + reftable_put_be24((uint8_t *) data.buf + 1, 1); + + block_source_from_buf(&source, &data); + cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len, + REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_LOG), + REFTABLE_FORMAT_ERROR); + + reftable_block_release(&block); + reftable_buf_release(&data); +} + +void test_reftable_block__corrupt_block_size(void) +{ + struct reftable_block_source source = { 0 }; + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u.ref = { + .value_type = REFTABLE_REF_VAL1, + .refname = (char *) "refs/heads/main", + }, + }; + struct reftable_block block = { 0 }; + struct reftable_buf data = REFTABLE_BUF_INIT; + + cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1); + + /* + * The block size is stored as a big-endian 24-bit integer right after + * the one-byte block type at the start of the block. Corrupt it to + * claim a size that is larger than the data we actually have. Reading + * the restart count and restart table relative to such a bogus block + * size must not access out-of-bounds memory. + */ + reftable_put_be24((uint8_t *) data.buf + 1, 0xffffff); + + block_source_from_buf(&source, &data); + cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len, + REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF), + REFTABLE_FORMAT_ERROR); + + reftable_block_release(&block); + reftable_buf_release(&data); +} + +void test_reftable_block__corrupt_restart_count(void) +{ + struct reftable_block_source source = { 0 }; + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u.ref = { + .value_type = REFTABLE_REF_VAL1, + .refname = (char *) "refs/heads/main", + }, + }; + struct reftable_block block = { 0 }; + struct reftable_buf data = REFTABLE_BUF_INIT; + int block_size; + + block_size = cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1); + + /* + * Corrupt the restart count to claim a bogus number of restart points. + * Note that this would only cause us to perform an out-of-bounds + * access when seeking into the block, but we want to refuse such a + * block outright. + */ + reftable_put_be16((uint8_t *) data.buf + block_size - 2, 0xffff); + + block_source_from_buf(&source, &data); + cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len, + REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF), + REFTABLE_FORMAT_ERROR); + + reftable_block_release(&block); + reftable_buf_release(&data); +} + +void test_reftable_block__corrupt_restart_offset(void) +{ + struct reftable_block_source source = { 0 }; + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u.ref = { + .value_type = REFTABLE_REF_VAL1, + .refname = (char *) "refs/heads/main", + }, + }; + struct reftable_block block = { 0 }; + struct block_iter it = BLOCK_ITER_INIT; + struct reftable_buf want = REFTABLE_BUF_INIT; + struct reftable_buf data = REFTABLE_BUF_INIT; + + cl_reftable_write_block(&data, REFTABLE_BLOCK_TYPE_REF, &rec, 1); + + block_source_from_buf(&source, &data); + cl_must_pass(reftable_block_init(&block, &source, 0, 0, data.len, + REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_REF)); + + /* + * Corrupt the first restart offset, stored as a big-endian 24-bit + * integer at the start of the restart table, to point past the end of + * the records section. Seeking such a block must fail gracefully. + */ + reftable_put_be24((uint8_t *) block.block_data.data + block.restart_off, + 0xffffff); + + block_iter_init(&it, &block); + cl_must_pass(reftable_buf_addstr(&want, "refs/heads/main")); + cl_assert_equal_i(block_iter_seek_key(&it, &want), REFTABLE_FORMAT_ERROR); + + reftable_buf_release(&want); + block_iter_close(&it); + reftable_block_release(&block); reftable_buf_release(&data); } diff --git a/t/unit-tests/u-reftable-record.c b/t/unit-tests/u-reftable-record.c index 1bf2e170dc..9c95083ef4 100644 --- a/t/unit-tests/u-reftable-record.c +++ b/t/unit-tests/u-reftable-record.c @@ -11,6 +11,7 @@ #include "reftable/basics.h" #include "reftable/constants.h" #include "reftable/record.h" +#include "reftable/reftable-error.h" static void t_copy(struct reftable_record *rec) { @@ -202,6 +203,29 @@ void test_reftable_record__ref_record_roundtrip(void) reftable_buf_release(&scratch); } +void test_reftable_record__ref_record_decode_invalid_value_type(void) +{ + struct reftable_buf scratch = REFTABLE_BUF_INIT; + struct reftable_record out = { + .type = REFTABLE_BLOCK_TYPE_REF, + }; + struct reftable_buf key = REFTABLE_BUF_INIT; + uint8_t buffer[1024] = { 0 }; + struct string_view dest = { + .buf = buffer, + .len = sizeof(buffer), + }; + + cl_must_pass(reftable_buf_addstr(&key, "refs/heads/master")); + cl_assert_equal_i(reftable_record_decode(&out, key, REFTABLE_NR_REF_VALUETYPES, + dest, REFTABLE_HASH_SIZE_SHA1, &scratch), + REFTABLE_FORMAT_ERROR); + + reftable_record_release(&out); + reftable_buf_release(&key); + reftable_buf_release(&scratch); +} + void test_reftable_record__log_record_comparison(void) { struct reftable_record in[3] = { diff --git a/t/unit-tests/u-reftable-table.c b/t/unit-tests/u-reftable-table.c index fae478ee04..d4251fae6f 100644 --- a/t/unit-tests/u-reftable-table.c +++ b/t/unit-tests/u-reftable-table.c @@ -1,8 +1,11 @@ #include "unit-test.h" #include "lib-reftable.h" +#include "reftable/basics.h" +#include "reftable/block.h" #include "reftable/blocksource.h" #include "reftable/constants.h" #include "reftable/iter.h" +#include "reftable/reftable-error.h" #include "reftable/table.h" #include "strbuf.h" @@ -201,3 +204,92 @@ void test_reftable_table__block_iterator(void) reftable_buf_release(&buf); reftable_free(records); } + +void test_reftable_table__seek_invalid_log_offset(void) +{ + struct reftable_ref_record refs[] = { + { + .refname = (char *) "refs/heads/main", + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 42 }, + }, + }; + struct reftable_log_record logs[] = { + { + .refname = (char *) "refs/heads/main", + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + .value.update = { + .name = (char *) "user", + .email = (char *) "user@example.com", + .message = (char *) "message\n", + }, + }, + }; + struct reftable_block_source source = { 0 }; + struct reftable_log_record log = { 0 }; + struct reftable_iterator it = { 0 }; + struct reftable_table *table; + struct reftable_buf buf = REFTABLE_BUF_INIT; + size_t fsize = footer_size(1); + uint8_t *footer; + + cl_reftable_write_to_buf(&buf, refs, ARRAY_SIZE(refs), + logs, ARRAY_SIZE(logs), REFTABLE_HASH_SHA1, NULL); + + /* + * Corrupt the log section offset stored in the footer so that it points + * past the end of the table. The footer is checksummed, so we also have + * to recompute and rewrite the CRC. + */ + footer = (uint8_t *) buf.buf + buf.len - fsize; + reftable_put_be64(footer + header_size(1) + 24, UINT64_MAX); + reftable_put_be32(footer + fsize - 4, crc32(0, footer, fsize - 4)); + + block_source_from_buf(&source, &buf); + cl_must_pass(reftable_table_new(&table, &source, "name")); + + /* + * Seeking the log iterator must not crash even though the log section + * offset is bogus. As the offset points past the end of the table we + * know that the table is corrupt, so the seek must report a format + * error instead of pretending that the section is empty. + */ + reftable_table_init_log_iterator(table, &it); + cl_assert_equal_i(reftable_iterator_seek_log(&it, ""), + REFTABLE_FORMAT_ERROR); + + reftable_log_record_release(&log); + reftable_iterator_destroy(&it); + reftable_table_decref(table); + reftable_buf_release(&buf); +} + +void test_reftable_table__new_with_truncated_table(void) +{ + struct reftable_ref_record refs[] = { + { + .refname = (char *) "refs/heads/main", + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 42 }, + }, + }; + struct reftable_block_source source = { 0 }; + struct reftable_table *table; + struct reftable_buf buf = REFTABLE_BUF_INIT; + + cl_reftable_write_to_buf(&buf, refs, ARRAY_SIZE(refs), NULL, 0, + REFTABLE_HASH_SHA1, NULL); + + /* + * Truncate the table so that it is large enough to read the header, but + * too small to also contain the footer. + */ + buf.len = footer_size(1) - 1; + block_source_from_buf(&source, &buf); + + cl_assert_equal_i(reftable_table_new(&table, &source, "name"), + REFTABLE_FORMAT_ERROR); + + reftable_buf_release(&buf); +}