commit-graph: use chunk-format read API

Instead of parsing the table of contents directly, use the chunk-format
API methods read_table_of_contents() and pair_chunk(). While the current
implementation loses the duplicate-chunk detection, that will be added
in a future change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Derrick Stolee 2021-02-18 14:07:35 +00:00 committed by Junio C Hamano
parent 5f0879f54b
commit 2692c2f6fd
2 changed files with 55 additions and 106 deletions

View File

@ -59,8 +59,7 @@ void git_test_write_commit_graph_or_die(void)


#define GRAPH_HEADER_SIZE 8 #define GRAPH_HEADER_SIZE 8
#define GRAPH_FANOUT_SIZE (4 * 256) #define GRAPH_FANOUT_SIZE (4 * 256)
#define GRAPH_CHUNKLOOKUP_WIDTH 12 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * CHUNK_TOC_ENTRY_SIZE \
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)


#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31) #define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
@ -298,15 +297,43 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 0; return 0;
} }


static int graph_read_oid_lookup(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
g->chunk_oid_lookup = chunk_start;
g->num_commits = chunk_size / g->hash_len;
return 0;
}

static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
uint32_t hash_version;
g->chunk_bloom_data = chunk_start;
hash_version = get_be32(chunk_start);

if (hash_version != 1)
return 0;

g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
g->bloom_filter_settings->hash_version = hash_version;
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;

return 0;
}

struct commit_graph *parse_commit_graph(struct repository *r, struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size) void *graph_map, size_t graph_size)
{ {
const unsigned char *data, *chunk_lookup; const unsigned char *data;
uint32_t i;
struct commit_graph *graph; struct commit_graph *graph;
uint64_t next_chunk_offset;
uint32_t graph_signature; uint32_t graph_signature;
unsigned char graph_version, hash_version; unsigned char graph_version, hash_version;
struct chunkfile *cf = NULL;


if (!graph_map) if (!graph_map)
return NULL; return NULL;
@ -347,7 +374,7 @@ struct commit_graph *parse_commit_graph(struct repository *r,
graph->data_len = graph_size; graph->data_len = graph_size;


if (graph_size < GRAPH_HEADER_SIZE + if (graph_size < GRAPH_HEADER_SIZE +
(graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE +
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
error(_("commit-graph file is too small to hold %u chunks"), error(_("commit-graph file is too small to hold %u chunks"),
graph->num_chunks); graph->num_chunks);
@ -355,108 +382,28 @@ struct commit_graph *parse_commit_graph(struct repository *r,
return NULL; return NULL;
} }


chunk_lookup = data + 8; cf = init_chunkfile(NULL);
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;


chunk_id = get_be32(chunk_lookup + 0); if (read_table_of_contents(cf, graph->data, graph_size,
GRAPH_HEADER_SIZE, graph->num_chunks))
goto free_and_return;


chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
next_chunk_offset = get_be64(chunk_lookup + 4); (const unsigned char **)&graph->chunk_oid_fanout);
read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
&graph->chunk_generation_data);
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
&graph->chunk_generation_data_overflow);


if (chunk_offset > graph_size - the_hash_algo->rawsz) { if (r->settings.commit_graph_read_changed_paths) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
(uint32_t)chunk_offset); &graph->chunk_bloom_indexes);
goto free_and_return; read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
} graph_read_bloom_data, graph);

switch (chunk_id) {
case GRAPH_CHUNKID_OIDFANOUT:
if (graph->chunk_oid_fanout)
chunk_repeated = 1;
else
graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
break;

case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup)
chunk_repeated = 1;
else {
graph->chunk_oid_lookup = data + chunk_offset;
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break;

case GRAPH_CHUNKID_DATA:
if (graph->chunk_commit_data)
chunk_repeated = 1;
else
graph->chunk_commit_data = data + chunk_offset;
break;

case GRAPH_CHUNKID_GENERATION_DATA:
if (graph->chunk_generation_data)
chunk_repeated = 1;
else
graph->chunk_generation_data = data + chunk_offset;
break;

case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
if (graph->chunk_generation_data_overflow)
chunk_repeated = 1;
else
graph->chunk_generation_data_overflow = data + chunk_offset;
break;

case GRAPH_CHUNKID_EXTRAEDGES:
if (graph->chunk_extra_edges)
chunk_repeated = 1;
else
graph->chunk_extra_edges = data + chunk_offset;
break;

case GRAPH_CHUNKID_BASE:
if (graph->chunk_base_graphs)
chunk_repeated = 1;
else
graph->chunk_base_graphs = data + chunk_offset;
break;

case GRAPH_CHUNKID_BLOOMINDEXES:
if (graph->chunk_bloom_indexes)
chunk_repeated = 1;
else if (r->settings.commit_graph_read_changed_paths)
graph->chunk_bloom_indexes = data + chunk_offset;
break;

case GRAPH_CHUNKID_BLOOMDATA:
if (graph->chunk_bloom_data)
chunk_repeated = 1;
else if (r->settings.commit_graph_read_changed_paths) {
uint32_t hash_version;
graph->chunk_bloom_data = data + chunk_offset;
hash_version = get_be32(data + chunk_offset);

if (hash_version != 1)
break;

graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
graph->bloom_filter_settings->hash_version = hash_version;
graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
}
break;
}

if (chunk_repeated) {
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return;
}
} }


if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -473,9 +420,11 @@ struct commit_graph *parse_commit_graph(struct repository *r,
if (verify_commit_graph_lite(graph)) if (verify_commit_graph_lite(graph))
goto free_and_return; goto free_and_return;


free_chunkfile(cf);
return graph; return graph;


free_and_return: free_and_return:
free_chunkfile(cf);
free(graph->bloom_filter_settings); free(graph->bloom_filter_settings);
free(graph); free(graph);
return NULL; return NULL;

View File

@ -564,7 +564,7 @@ test_expect_success 'detect bad hash version' '


test_expect_success 'detect low chunk count' ' test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk" "final chunk has non-zero id"
' '


test_expect_success 'detect missing OID fanout chunk' ' test_expect_success 'detect missing OID fanout chunk' '