Merge branch 'jk/chunk-bounds'

The codepaths that read "chunk" formatted files have been corrected
to pay attention to the chunk size and notice broken files.

* jk/chunk-bounds: (21 commits)
  t5319: make corrupted large-offset test more robust
  chunk-format: drop pair_chunk_unsafe()
  commit-graph: detect out-of-order BIDX offsets
  commit-graph: check bounds when accessing BIDX chunk
  commit-graph: check bounds when accessing BDAT chunk
  commit-graph: bounds-check generation overflow chunk
  commit-graph: check size of generations chunk
  commit-graph: bounds-check base graphs chunk
  commit-graph: detect out-of-bounds extra-edges pointers
  commit-graph: check size of commit data chunk
  midx: check size of revindex chunk
  midx: bounds-check large offset chunk
  midx: check size of object offset chunk
  midx: enforce chunk alignment on reading
  midx: check size of pack names chunk
  commit-graph: check consistency of fanout table
  midx: check size of oid lookup chunk
  commit-graph: check size of oid fanout chunk
  midx: stop ignoring malformed oid fanout chunk
  t: add library for munging chunk-format files
  ...
maint
Junio C Hamano 2023-10-23 13:56:36 -07:00
commit f32af12cee
15 changed files with 570 additions and 47 deletions

34
bloom.c
View File

@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos)
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
} }


static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
uint32_t offset)
{
/*
* Note that we allow offsets equal to the data size, which would set
* our pointers at one past the end of the chunk memory. This is
* necessary because the on-disk index points to the end of the
* entries (so we can compute size by comparing adjacent ones). And
* naturally the final entry's end is one-past-the-end of the chunk.
*/
if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE)
return 0;

warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path"
" filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")",
(uintmax_t)offset, (uintmax_t)pos,
g->filename, (uintmax_t)g->chunk_bloom_data_size);
return -1;
}

static int load_bloom_filter_from_graph(struct commit_graph *g, static int load_bloom_filter_from_graph(struct commit_graph *g,
struct bloom_filter *filter, struct bloom_filter *filter,
uint32_t graph_pos) uint32_t graph_pos)
@ -51,6 +71,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
else else
start_index = 0; start_index = 0;


if (check_bloom_offset(g, lex_pos, end_index) < 0 ||
check_bloom_offset(g, lex_pos - 1, start_index) < 0)
return 0;

if (end_index < start_index) {
warning("ignoring decreasing changed-path index offsets"
" (%"PRIuMAX" > %"PRIuMAX") for positions"
" %"PRIuMAX" and %"PRIuMAX" of %s",
(uintmax_t)start_index, (uintmax_t)end_index,
(uintmax_t)(lex_pos-1), (uintmax_t)lex_pos,
g->filename);
return 0;
}

filter->len = end_index - start_index; filter->len = end_index - start_index;
filter->data = (unsigned char *)(g->chunk_bloom_data + filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index + sizeof(unsigned char) * start_index +

View File

@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile, const unsigned char *mfile,
size_t mfile_size, size_t mfile_size,
uint64_t toc_offset, uint64_t toc_offset,
int toc_length) int toc_length,
unsigned expected_alignment)
{ {
int i; int i;
uint32_t chunk_id; uint32_t chunk_id;
@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf,
error(_("terminating chunk id appears earlier than expected")); error(_("terminating chunk id appears earlier than expected"));
return 1; return 1;
} }
if (chunk_offset % expected_alignment != 0) {
error(_("chunk id %"PRIx32" not %d-byte aligned"),
chunk_id, expected_alignment);
return 1;
}


table_of_contents += CHUNK_TOC_ENTRY_SIZE; table_of_contents += CHUNK_TOC_ENTRY_SIZE;
next_chunk_offset = get_be64(table_of_contents + 4); next_chunk_offset = get_be64(table_of_contents + 4);
@ -154,20 +160,28 @@ int read_table_of_contents(struct chunkfile *cf,
return 0; return 0;
} }


struct pair_chunk_data {
const unsigned char **p;
size_t *size;
};

static int pair_chunk_fn(const unsigned char *chunk_start, static int pair_chunk_fn(const unsigned char *chunk_start,
size_t chunk_size, size_t chunk_size,
void *data) void *data)
{ {
const unsigned char **p = data; struct pair_chunk_data *pcd = data;
*p = chunk_start; *pcd->p = chunk_start;
*pcd->size = chunk_size;
return 0; return 0;
} }


int pair_chunk(struct chunkfile *cf, int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id, uint32_t chunk_id,
const unsigned char **p) const unsigned char **p,
size_t *size)
{ {
return read_chunk(cf, chunk_id, pair_chunk_fn, p); struct pair_chunk_data pcd = { .p = p, .size = size };
return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
} }


int read_chunk(struct chunkfile *cf, int read_chunk(struct chunkfile *cf,

View File

@ -36,20 +36,23 @@ int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile, const unsigned char *mfile,
size_t mfile_size, size_t mfile_size,
uint64_t toc_offset, uint64_t toc_offset,
int toc_length); int toc_length,
unsigned expected_alignment);


#define CHUNK_NOT_FOUND (-2) #define CHUNK_NOT_FOUND (-2)


/* /*
* Find 'chunk_id' in the given chunkfile and assign the * Find 'chunk_id' in the given chunkfile and assign the
* given pointer to the position in the mmap'd file where * given pointer to the position in the mmap'd file where
* that chunk begins. * that chunk begins. Likewise the "size" parameter is filled
* with the size of the chunk.
* *
* Returns CHUNK_NOT_FOUND if the chunk does not exist. * Returns CHUNK_NOT_FOUND if the chunk does not exist.
*/ */
int pair_chunk(struct chunkfile *cf, int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id, uint32_t chunk_id,
const unsigned char **p); const unsigned char **p,
size_t *size);


typedef int (*chunk_read_fn)(const unsigned char *chunk_start, typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
size_t chunk_size, void *data); size_t chunk_size, void *data);

View File

@ -277,6 +277,8 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,


static int verify_commit_graph_lite(struct commit_graph *g) static int verify_commit_graph_lite(struct commit_graph *g)
{ {
int i;

/* /*
* Basic validation shared between parse_commit_graph() * Basic validation shared between parse_commit_graph()
* which'll be called every time the graph is used, and the * which'll be called every time the graph is used, and the
@ -302,6 +304,30 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 1; return 1;
} }


for (i = 0; i < 255; i++) {
uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]);
uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]);

if (oid_fanout1 > oid_fanout2) {
error("commit-graph fanout values out of order");
return 1;
}
}
if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) {
error("commit-graph oid table and fanout disagree on size");
return 1;
}

return 0;
}

static int graph_read_oid_fanout(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
if (chunk_size != 256 * sizeof(uint32_t))
return error("commit-graph oid fanout chunk is wrong size");
g->chunk_oid_fanout = (const uint32_t *)chunk_start;
return 0; return 0;
} }


@ -314,12 +340,54 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start,
return 0; return 0;
} }


static int graph_read_commit_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH)
return error("commit-graph commit data chunk is wrong size");
g->chunk_commit_data = chunk_start;
return 0;
}

static int graph_read_generation_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
if (chunk_size != g->num_commits * sizeof(uint32_t))
return error("commit-graph generations chunk is wrong size");
g->chunk_generation_data = chunk_start;
return 0;
}

static int graph_read_bloom_index(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
if (chunk_size != g->num_commits * 4) {
warning("commit-graph changed-path index chunk is too small");
return -1;
}
g->chunk_bloom_indexes = chunk_start;
return 0;
}

static int graph_read_bloom_data(const unsigned char *chunk_start, static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data) size_t chunk_size, void *data)
{ {
struct commit_graph *g = data; struct commit_graph *g = data;
uint32_t hash_version; uint32_t hash_version;

if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
warning("ignoring too-small changed-path chunk"
" (%"PRIuMAX" < %"PRIuMAX") in commit-graph file",
(uintmax_t)chunk_size,
(uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE);
return -1;
}

g->chunk_bloom_data = chunk_start; g->chunk_bloom_data = chunk_start;
g->chunk_bloom_data_size = chunk_size;
hash_version = get_be32(chunk_start); hash_version = get_be32(chunk_start);


if (hash_version != 1) if (hash_version != 1)
@ -391,29 +459,31 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
cf = init_chunkfile(NULL); cf = init_chunkfile(NULL);


if (read_table_of_contents(cf, graph->data, graph_size, if (read_table_of_contents(cf, graph->data, graph_size,
GRAPH_HEADER_SIZE, graph->num_chunks)) GRAPH_HEADER_SIZE, graph->num_chunks, 1))
goto free_and_return; goto free_and_return;


pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph);
(const unsigned char **)&graph->chunk_oid_fanout);
read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph);
pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges,
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); &graph->chunk_extra_edges_size);
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs,
&graph->chunk_base_graphs_size);


if (s->commit_graph_generation_version >= 2) { if (s->commit_graph_generation_version >= 2) {
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
&graph->chunk_generation_data); graph_read_generation_data, graph);
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
&graph->chunk_generation_data_overflow); &graph->chunk_generation_data_overflow,
&graph->chunk_generation_data_overflow_size);


if (graph->chunk_generation_data) if (graph->chunk_generation_data)
graph->read_generation_data = 1; graph->read_generation_data = 1;
} }


if (s->commit_graph_read_changed_paths) { if (s->commit_graph_read_changed_paths) {
pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
&graph->chunk_bloom_indexes); graph_read_bloom_index, graph);
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
graph_read_bloom_data, graph); graph_read_bloom_data, graph);
} }
@ -510,6 +580,11 @@ static int add_graph_to_chain(struct commit_graph *g,
return 0; return 0;
} }


if (g->chunk_base_graphs_size / g->hash_len < n) {
warning(_("commit-graph base graphs chunk is too small"));
return 0;
}

while (n) { while (n) {
n--; n--;


@ -837,7 +912,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
die(_("commit-graph requires overflow generation data but has none")); die(_("commit-graph requires overflow generation data but has none"));


offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos)); if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos)
die(_("commit-graph overflow generation data is too small"));
graph_data->generation = item->date +
get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos);
} else } else
graph_data->generation = item->date + offset; graph_data->generation = item->date + offset;
} else } else
@ -857,7 +935,7 @@ static int fill_commit_in_graph(struct repository *r,
struct commit_graph *g, uint32_t pos) struct commit_graph *g, uint32_t pos)
{ {
uint32_t edge_value; uint32_t edge_value;
uint32_t *parent_data_ptr; uint32_t parent_data_pos;
struct commit_list **pptr; struct commit_list **pptr;
const unsigned char *commit_data; const unsigned char *commit_data;
uint32_t lex_index; uint32_t lex_index;
@ -889,14 +967,21 @@ static int fill_commit_in_graph(struct repository *r,
return 1; return 1;
} }


parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK;
st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK));
do { do {
edge_value = get_be32(parent_data_ptr); if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) {
error("commit-graph extra-edges pointer out of bounds");
free_commit_list(item->parents);
item->parents = NULL;
item->object.parsed = 0;
return 0;
}
edge_value = get_be32(g->chunk_extra_edges +
sizeof(uint32_t) * parent_data_pos);
pptr = insert_parent_or_die(r, g, pptr = insert_parent_or_die(r, g,
edge_value & GRAPH_EDGE_LAST_MASK, edge_value & GRAPH_EDGE_LAST_MASK,
pptr); pptr);
parent_data_ptr++; parent_data_pos++;
} while (!(edge_value & GRAPH_LAST_EDGE)); } while (!(edge_value & GRAPH_LAST_EDGE));


return 1; return 1;

View File

@ -94,10 +94,14 @@ struct commit_graph {
const unsigned char *chunk_commit_data; const unsigned char *chunk_commit_data;
const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data;
const unsigned char *chunk_generation_data_overflow; const unsigned char *chunk_generation_data_overflow;
size_t chunk_generation_data_overflow_size;
const unsigned char *chunk_extra_edges; const unsigned char *chunk_extra_edges;
size_t chunk_extra_edges_size;
const unsigned char *chunk_base_graphs; const unsigned char *chunk_base_graphs;
size_t chunk_base_graphs_size;
const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_indexes;
const unsigned char *chunk_bloom_data; const unsigned char *chunk_bloom_data;
size_t chunk_bloom_data_size;


struct topo_level_slab *topo_levels; struct topo_level_slab *topo_levels;
struct bloom_filter_settings *bloom_filter_settings; struct bloom_filter_settings *bloom_filter_settings;

68
midx.c
View File

@ -71,6 +71,33 @@ static int midx_read_oid_fanout(const unsigned char *chunk_start,
error(_("multi-pack-index OID fanout is of the wrong size")); error(_("multi-pack-index OID fanout is of the wrong size"));
return 1; return 1;
} }
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
return 0;
}

static int midx_read_oid_lookup(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct multi_pack_index *m = data;
m->chunk_oid_lookup = chunk_start;

if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
error(_("multi-pack-index OID lookup chunk is the wrong size"));
return 1;
}
return 0;
}

static int midx_read_object_offsets(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct multi_pack_index *m = data;
m->chunk_object_offsets = chunk_start;

if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
error(_("multi-pack-index object offset chunk is the wrong size"));
return 1;
}
return 0; return 0;
} }


@ -140,33 +167,41 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
cf = init_chunkfile(NULL); cf = init_chunkfile(NULL);


if (read_table_of_contents(cf, m->data, midx_size, if (read_table_of_contents(cf, m->data, midx_size,
MIDX_HEADER_SIZE, m->num_chunks)) MIDX_HEADER_SIZE, m->num_chunks,
MIDX_CHUNK_ALIGNMENT))
goto cleanup_fail; goto cleanup_fail;


if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND) if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
die(_("multi-pack-index missing required pack-name chunk")); die(_("multi-pack-index required pack-name chunk missing or corrupted"));
if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND) if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
die(_("multi-pack-index missing required OID fanout chunk")); die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND) if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
die(_("multi-pack-index missing required OID lookup chunk")); die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND) if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
die(_("multi-pack-index missing required object offsets chunk")); die(_("multi-pack-index required object offsets chunk missing or corrupted"));


pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
&m->chunk_large_offsets_len);


if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,

&m->chunk_revindex_len);
m->num_objects = ntohl(m->chunk_oid_fanout[255]);


CALLOC_ARRAY(m->pack_names, m->num_packs); CALLOC_ARRAY(m->pack_names, m->num_packs);
CALLOC_ARRAY(m->packs, m->num_packs); CALLOC_ARRAY(m->packs, m->num_packs);


cur_pack_name = (const char *)m->chunk_pack_names; cur_pack_name = (const char *)m->chunk_pack_names;
for (i = 0; i < m->num_packs; i++) { for (i = 0; i < m->num_packs; i++) {
const char *end;
size_t avail = m->chunk_pack_names_len -
(cur_pack_name - (const char *)m->chunk_pack_names);

m->pack_names[i] = cur_pack_name; m->pack_names[i] = cur_pack_name;


cur_pack_name += strlen(cur_pack_name) + 1; end = memchr(cur_pack_name, '\0', avail);
if (!end)
die(_("multi-pack-index pack-name chunk is too short"));
cur_pack_name = end + 1;


if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
die(_("multi-pack-index pack names out of order: '%s' before '%s'"), die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
@ -270,8 +305,9 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));


offset32 ^= MIDX_LARGE_OFFSET_NEEDED; offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
return get_be64(m->chunk_large_offsets + if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
st_mult(sizeof(uint64_t), offset32)); die(_("multi-pack-index large offset out of bounds"));
return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
} }


return offset32; return offset32;

3
midx.h
View File

@ -32,11 +32,14 @@ struct multi_pack_index {
int local; int local;


const unsigned char *chunk_pack_names; const unsigned char *chunk_pack_names;
size_t chunk_pack_names_len;
const uint32_t *chunk_oid_fanout; const uint32_t *chunk_oid_fanout;
const unsigned char *chunk_oid_lookup; const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_object_offsets; const unsigned char *chunk_object_offsets;
const unsigned char *chunk_large_offsets; const unsigned char *chunk_large_offsets;
size_t chunk_large_offsets_len;
const unsigned char *chunk_revindex; const unsigned char *chunk_revindex;
size_t chunk_revindex_len;


const char **pack_names; const char **pack_names;
struct packed_git **packs; struct packed_git **packs;

View File

@ -343,6 +343,17 @@ int verify_pack_revindex(struct packed_git *p)
return res; return res;
} }


static int can_use_midx_ridx_chunk(struct multi_pack_index *m)
{
if (!m->chunk_revindex)
return 0;
if (m->chunk_revindex_len != st_mult(sizeof(uint32_t), m->num_objects)) {
error(_("multi-pack-index reverse-index chunk is the wrong size"));
return 0;
}
return 1;
}

int load_midx_revindex(struct multi_pack_index *m) int load_midx_revindex(struct multi_pack_index *m)
{ {
struct strbuf revindex_name = STRBUF_INIT; struct strbuf revindex_name = STRBUF_INIT;
@ -351,7 +362,7 @@ int load_midx_revindex(struct multi_pack_index *m)
if (m->revindex_data) if (m->revindex_data)
return 0; return 0;


if (m->chunk_revindex) { if (can_use_midx_ridx_chunk(m)) {
/* /*
* If the MIDX `m` has a `RIDX` chunk, then use its contents for * If the MIDX `m` has a `RIDX` chunk, then use its contents for
* the reverse index instead of trying to load a separate `.rev` * the reverse index instead of trying to load a separate `.rev`

17
t/lib-chunk.sh Normal file
View File

@ -0,0 +1,17 @@
# Shell library for working with "chunk" files (commit-graph, midx, etc).

# corrupt_chunk_file <fn> <chunk> <offset> <bytes>
#
# Corrupt a chunk-based file (like a commit-graph) by overwriting the bytes
# found in the chunk specified by the 4-byte <chunk> identifier. If <offset> is
# "clear", replace the chunk entirely. Otherwise, overwrite data <offset> bytes
# into the chunk.
#
# The <bytes> are interpreted as pairs of hex digits (so "000000FE" would be
# big-endian 254).
corrupt_chunk_file () {
fn=$1; shift
perl "$TEST_DIRECTORY"/lib-chunk/corrupt-chunk-file.pl \
"$@" <"$fn" >"$fn.tmp" &&
mv "$fn.tmp" "$fn"
}

View File

@ -0,0 +1,66 @@
#!/usr/bin/perl

my ($chunk, $seek, $bytes) = @ARGV;
$bytes =~ s/../chr(hex($&))/ge;

binmode STDIN;
binmode STDOUT;

# A few helpers to read bytes, or read and copy them to the
# output.
sub get {
my $n = shift;
return unless $n;
read(STDIN, my $buf, $n)
or die "read error or eof: $!\n";
return $buf;
}
sub copy {
my $buf = get(@_);
print $buf;
return $buf;
}

# read until we find table-of-contents entry for chunk;
# note that we cheat a bit by assuming 4-byte alignment and
# that no ToC entry will accidentally look like a header.
#
# If we don't find the entry, copy() will hit EOF and exit
# (which should cause the caller to fail the test).
while (copy(4) ne $chunk) { }
my $offset = unpack("Q>", copy(8));

# In clear mode, our length will change. So figure out
# the length by comparing to the offset of the next chunk, and
# then adjust that offset (and all subsequent) ones.
my $len;
if ($seek eq "clear") {
my $id;
do {
$id = copy(4);
my $next = unpack("Q>", get(8));
if (!defined $len) {
$len = $next - $offset;
}
print pack("Q>", $next - $len + length($bytes));
} while (unpack("N", $id));
}

# and now copy up to our existing chunk data
copy($offset - tell(STDIN));
if ($seek eq "clear") {
# if clearing, skip past existing data
get($len);
} else {
# otherwise, copy up to the requested offset,
# and skip past the overwritten bytes
copy($seek);
get(length($bytes));
}

# now write out the requested bytes, along
# with any other remaining data
print $bytes;
while (read(STDIN, my $buf, 4096)) {
print $buf;
}

View File

@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME


. ./test-lib.sh . ./test-lib.sh
. "$TEST_DIRECTORY"/lib-chunk.sh


GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH=0
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
@ -404,4 +405,53 @@ test_expect_success 'Bloom generation backfills empty commits' '
) )
' '


corrupt_graph () {
graph=.git/objects/info/commit-graph &&
test_when_finished "rm -rf $graph" &&
git commit-graph write --reachable --changed-paths &&
corrupt_chunk_file $graph "$@"
}

check_corrupt_graph () {
corrupt_graph "$@" &&
git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
test_cmp expect.out out
}

test_expect_success 'Bloom reader notices too-small data chunk' '
check_corrupt_graph BDAT clear 00000000 &&
echo "warning: ignoring too-small changed-path chunk" \
"(4 < 12) in commit-graph file" >expect.err &&
test_cmp expect.err err
'

test_expect_success 'Bloom reader notices out-of-bounds filter offsets' '
check_corrupt_graph BIDX 12 FFFFFFFF &&
# use grep to avoid depending on exact chunk size
grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err
'

test_expect_success 'Bloom reader notices too-small index chunk' '
# replace the index with a single entry, making most
# lookups out-of-bounds
check_corrupt_graph BIDX clear 00000000 &&
echo "warning: commit-graph changed-path index chunk" \
"is too small" >expect.err &&
test_cmp expect.err err
'

test_expect_success 'Bloom reader notices out-of-order index offsets' '
# we do not know any real offsets, but we can pick
# something plausible; we should not get to the point of
# actually reading from the bogus offsets anyway.
corrupt_graph BIDX 4 0000000c00000005 &&
echo "warning: ignoring decreasing changed-path index offsets" \
"(12 > 5) for positions 1 and 2 of .git/objects/info/commit-graph" >expect.err &&
git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
test_cmp expect.out out &&
test_cmp expect.err err
'

test_done test_done

View File

@ -2,6 +2,7 @@


test_description='commit graph' test_description='commit graph'
. ./test-lib.sh . ./test-lib.sh
. "$TEST_DIRECTORY"/lib-chunk.sh


GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0


@ -559,7 +560,7 @@ test_expect_success 'detect incorrect fanout' '


test_expect_success 'detect incorrect fanout final value' ' test_expect_success 'detect incorrect fanout final value' '
corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \ corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \
"fanout value" "oid table and fanout disagree on size"
' '


test_expect_success 'detect incorrect OID order' ' test_expect_success 'detect incorrect OID order' '
@ -821,4 +822,77 @@ test_expect_success 'overflow during generation version upgrade' '
) )
' '


corrupt_chunk () {
graph=full/.git/objects/info/commit-graph &&
test_when_finished "rm -rf $graph" &&
git -C full commit-graph write --reachable &&
corrupt_chunk_file $graph "$@"
}

check_corrupt_chunk () {
corrupt_chunk "$@" &&
git -C full -c core.commitGraph=false log >expect.out &&
git -C full -c core.commitGraph=true log >out 2>err &&
test_cmp expect.out out
}

test_expect_success 'reader notices too-small oid fanout chunk' '
# make it big enough that the graph file is plausible,
# otherwise we hit an earlier check
check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) &&
cat >expect.err <<-\EOF &&
error: commit-graph oid fanout chunk is wrong size
error: commit-graph is missing the OID Fanout chunk
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices fanout/lookup table mismatch' '
check_corrupt_chunk OIDF 1020 "FFFFFFFF" &&
cat >expect.err <<-\EOF &&
error: commit-graph oid table and fanout disagree on size
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices out-of-bounds fanout' '
# Rather than try to corrupt a specific hash, we will just
# wreck them all. But we cannot just set them all to 0xFFFFFFFF or
# similar, as they are used for hi/lo starts in a binary search (so if
# they are identical, that indicates that the search should abort
# immediately). Instead, we will give them high values that differ by
# 2^24, ensuring that any that are used would cause an out-of-bounds
# read.
check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) &&
cat >expect.err <<-\EOF &&
error: commit-graph fanout values out of order
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices too-small commit data chunk' '
check_corrupt_chunk CDAT clear 00000000 &&
cat >expect.err <<-\EOF &&
error: commit-graph commit data chunk is wrong size
error: commit-graph is missing the Commit Data chunk
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices out-of-bounds extra edge' '
check_corrupt_chunk EDGE clear &&
cat >expect.err <<-\EOF &&
error: commit-graph extra-edges pointer out of bounds
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices too-small generations chunk' '
check_corrupt_chunk GDA2 clear 00000000 &&
cat >expect.err <<-\EOF &&
error: commit-graph generations chunk is wrong size
EOF
test_cmp expect.err err
'

test_done test_done

View File

@ -2,6 +2,7 @@


test_description='multi-pack-indexes' test_description='multi-pack-indexes'
. ./test-lib.sh . ./test-lib.sh
. "$TEST_DIRECTORY"/lib-chunk.sh


GIT_TEST_MULTI_PACK_INDEX=0 GIT_TEST_MULTI_PACK_INDEX=0
objdir=.git/objects objdir=.git/objects
@ -438,7 +439,7 @@ test_expect_success 'verify extended chunk count' '


test_expect_success 'verify missing required chunk' ' test_expect_success 'verify missing required chunk' '
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \ corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \
"missing required" "required pack-name chunk missing"
' '


test_expect_success 'verify invalid chunk offset' ' test_expect_success 'verify invalid chunk offset' '
@ -1055,4 +1056,105 @@ test_expect_success 'repack with delta islands' '
) )
' '


corrupt_chunk () {
midx=.git/objects/pack/multi-pack-index &&
test_when_finished "rm -rf $midx" &&
git repack -ad --write-midx &&
corrupt_chunk_file $midx "$@"
}

test_expect_success 'reader notices too-small oid fanout chunk' '
corrupt_chunk OIDF clear 00000000 &&
test_must_fail git log 2>err &&
cat >expect <<-\EOF &&
error: multi-pack-index OID fanout is of the wrong size
fatal: multi-pack-index required OID fanout chunk missing or corrupted
EOF
test_cmp expect err
'

test_expect_success 'reader notices too-small oid lookup chunk' '
corrupt_chunk OIDL clear 00000000 &&
test_must_fail git log 2>err &&
cat >expect <<-\EOF &&
error: multi-pack-index OID lookup chunk is the wrong size
fatal: multi-pack-index required OID lookup chunk missing or corrupted
EOF
test_cmp expect err
'

test_expect_success 'reader notices too-small pack names chunk' '
# There is no NUL to terminate the name here, so the
# chunk is too short.
corrupt_chunk PNAM clear 70656666 &&
test_must_fail git log 2>err &&
cat >expect <<-\EOF &&
fatal: multi-pack-index pack-name chunk is too short
EOF
test_cmp expect err
'

test_expect_success 'reader handles unaligned chunks' '
# A 9-byte PNAM means all of the subsequent chunks
# will no longer be 4-byte aligned, but it is still
# a valid one-pack chunk on its own (it is "foo.pack\0").
corrupt_chunk PNAM clear 666f6f2e7061636b00 &&
git -c core.multipackindex=false log >expect.out &&
git -c core.multipackindex=true log >out 2>err &&
test_cmp expect.out out &&
cat >expect.err <<-\EOF &&
error: chunk id 4f494446 not 4-byte aligned
EOF
test_cmp expect.err err
'

test_expect_success 'reader notices too-small object offset chunk' '
corrupt_chunk OOFF clear 00000000 &&
test_must_fail git log 2>err &&
cat >expect <<-\EOF &&
error: multi-pack-index object offset chunk is the wrong size
fatal: multi-pack-index required object offsets chunk missing or corrupted
EOF
test_cmp expect err
'

test_expect_success 'reader bounds-checks large offset table' '
# re-use the objects64 dir here to cheaply get access to a midx
# with large offsets.
git init repo &&
test_when_finished "rm -rf repo" &&
(
cd repo &&
(cd ../objects64 && pwd) >.git/objects/info/alternates &&
git multi-pack-index --object-dir=../objects64 write &&
midx=../objects64/pack/multi-pack-index &&
corrupt_chunk_file $midx LOFF clear &&
# using only %(objectsize) is important here; see the commit
# message for more details
test_must_fail git cat-file --batch-all-objects \
--batch-check="%(objectsize)" 2>err &&
cat >expect <<-\EOF &&
fatal: multi-pack-index large offset out of bounds
EOF
test_cmp expect err
)
'

test_expect_success 'reader notices too-small revindex chunk' '
# We only get a revindex with bitmaps (and likewise only
# load it when they are asked for).
test_config repack.writeBitmaps true &&
corrupt_chunk RIDX clear 00000000 &&
git -c core.multipackIndex=false rev-list \
--all --use-bitmap-index >expect.out &&
git -c core.multipackIndex=true rev-list \
--all --use-bitmap-index >out 2>err &&
test_cmp expect.out out &&
cat >expect.err <<-\EOF &&
error: multi-pack-index reverse-index chunk is the wrong size
warning: multi-pack bitmap is missing required reverse index
EOF
test_cmp expect.err err
'

test_done test_done

View File

@ -4,6 +4,7 @@ test_description='split commit graph'


TEST_PASSES_SANITIZE_LEAK=true TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh
. "$TEST_DIRECTORY"/lib-chunk.sh


GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH=0
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
@ -319,7 +320,7 @@ test_expect_success 'verify --shallow does not check base contents' '
cd verify-shallow && cd verify-shallow &&
git commit-graph verify && git commit-graph verify &&
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$base_file" 1000 "\01" && corrupt_file "$base_file" 1500 "\01" &&
git commit-graph verify --shallow && git commit-graph verify --shallow &&
test_must_fail git commit-graph verify 2>test_err && test_must_fail git commit-graph verify 2>test_err &&
grep -v "^+" test_err >err && grep -v "^+" test_err >err &&
@ -393,10 +394,23 @@ test_expect_success 'verify across alternates' '
test_commit extra && test_commit extra &&
git commit-graph write --reachable --split && git commit-graph write --reachable --split &&
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$tip_file" 100 "\01" && corrupt_file "$tip_file" 1500 "\01" &&
test_must_fail git commit-graph verify --shallow 2>test_err && test_must_fail git commit-graph verify --shallow 2>test_err &&
grep -v "^+" test_err >err && grep -v "^+" test_err >err &&
test_i18ngrep "commit-graph has incorrect fanout value" err test_i18ngrep "incorrect checksum" err
)
'

test_expect_success 'reader bounds-checks base-graph chunk' '
git clone --no-hardlinks . corrupt-base-chunk &&
(
cd corrupt-base-chunk &&
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_chunk_file "$tip_file" BASE clear 01020304 &&
git -c core.commitGraph=false log >expect.out &&
git -c core.commitGraph=true log >out 2>err &&
test_cmp expect.out out &&
grep "commit-graph base graphs chunk is too small" err
) )
' '



View File

@ -12,6 +12,7 @@ then
fi fi


. "$TEST_DIRECTORY"/lib-commit-graph.sh . "$TEST_DIRECTORY"/lib-commit-graph.sh
. "$TEST_DIRECTORY/lib-chunk.sh"


UNIX_EPOCH_ZERO="@0 +0000" UNIX_EPOCH_ZERO="@0 +0000"
FUTURE_DATE="@4147483646 +0000" FUTURE_DATE="@4147483646 +0000"
@ -74,4 +75,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' '
git -C repo-uint32-max commit-graph verify git -C repo-uint32-max commit-graph verify
' '


test_expect_success 'reader notices out-of-bounds generation overflow' '
graph=.git/objects/info/commit-graph &&
test_when_finished "rm -rf $graph" &&
git commit-graph write --reachable &&
corrupt_chunk_file $graph GDO2 clear &&
test_must_fail git log 2>err &&
grep "commit-graph overflow generation data is too small" err
'

test_done test_done