reftable/table: move reading block into block reader

The logic to read blocks from a reftable is scattered across both the
table and the block subsystems. Besides causing somewhat fuzzy
responsibilities, it also means that we have to awkwardly pass around
the ownership of blocks between the subsystems.

Refactor the code so that we stop passing the block when initializing a
reader, but instead by passing in the block source plus the offset at
which we're supposed to read a block. Like this, the ownership of the
block itself doesn't need to get handed over as the block reader is the
one owning the block right from the start.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Patrick Steinhardt 2025-04-07 15:16:17 +02:00 committed by Junio C Hamano
parent ba620d296a
commit fd888311fb
4 changed files with 107 additions and 129 deletions

View File

@ -209,31 +209,57 @@ int block_writer_finish(struct block_writer *w)
return w->next; return w->next;
} }


int block_reader_init(struct block_reader *br, struct reftable_block *block, static int read_block(struct reftable_block_source *source,
uint32_t header_off, uint32_t table_block_size, struct reftable_block *dest, uint64_t off,
uint32_t hash_size) uint32_t sz)
{ {
size_t size = block_source_size(source);
block_source_return_block(dest);
if (off >= size)
return 0;
if (off + sz > size)
sz = size - off;
return block_source_read_block(source, dest, off, sz);
}

int block_reader_init(struct block_reader *br,
struct reftable_block_source *source,
uint32_t offset, uint32_t header_size,
uint32_t table_block_size, uint32_t hash_size)
{
uint32_t guess_block_size = table_block_size ?
table_block_size : DEFAULT_BLOCK_SIZE;
uint32_t full_block_size = table_block_size; uint32_t full_block_size = table_block_size;
uint8_t typ = block->data[header_off];
uint32_t sz = reftable_get_be24(block->data + header_off + 1);
uint16_t restart_count; uint16_t restart_count;
uint32_t restart_off; uint32_t restart_off;
uint32_t block_size;
uint8_t block_type;
int err; int err;


block_source_return_block(&br->block); err = read_block(source, &br->block, offset, guess_block_size);
if (err < 0)
goto done;


if (!reftable_is_block_type(typ)) { block_type = br->block.data[header_size];
if (!reftable_is_block_type(block_type)) {
err = REFTABLE_FORMAT_ERROR; err = REFTABLE_FORMAT_ERROR;
goto done; goto done;
} }


if (typ == BLOCK_TYPE_LOG) { block_size = reftable_get_be24(br->block.data + header_size + 1);
uint32_t block_header_skip = 4 + header_off; if (block_size > guess_block_size) {
uLong dst_len = sz - block_header_skip; err = read_block(source, &br->block, offset, block_size);
uLong src_len = block->len - block_header_skip; if (err < 0)
goto done;
}

if (block_type == BLOCK_TYPE_LOG) {
uint32_t block_header_skip = 4 + header_size;
uLong dst_len = block_size - block_header_skip;
uLong src_len = br->block.len - block_header_skip;


/* Log blocks specify the *uncompressed* size in their header. */ /* Log blocks specify the *uncompressed* size in their header. */
REFTABLE_ALLOC_GROW_OR_NULL(br->uncompressed_data, sz, REFTABLE_ALLOC_GROW_OR_NULL(br->uncompressed_data, block_size,
br->uncompressed_cap); br->uncompressed_cap);
if (!br->uncompressed_data) { if (!br->uncompressed_data) {
err = REFTABLE_OUT_OF_MEMORY_ERROR; err = REFTABLE_OUT_OF_MEMORY_ERROR;
@ -241,7 +267,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
} }


/* Copy over the block header verbatim. It's not compressed. */ /* Copy over the block header verbatim. It's not compressed. */
memcpy(br->uncompressed_data, block->data, block_header_skip); memcpy(br->uncompressed_data, br->block.data, block_header_skip);


if (!br->zstream) { if (!br->zstream) {
REFTABLE_CALLOC_ARRAY(br->zstream, 1); REFTABLE_CALLOC_ARRAY(br->zstream, 1);
@ -259,7 +285,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
goto done; goto done;
} }


br->zstream->next_in = block->data + block_header_skip; br->zstream->next_in = br->block.data + block_header_skip;
br->zstream->avail_in = src_len; br->zstream->avail_in = src_len;
br->zstream->next_out = br->uncompressed_data + block_header_skip; br->zstream->next_out = br->uncompressed_data + block_header_skip;
br->zstream->avail_out = dst_len; br->zstream->avail_out = dst_len;
@ -278,43 +304,41 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
} }
err = 0; err = 0;


if (br->zstream->total_out + block_header_skip != sz) { if (br->zstream->total_out + block_header_skip != block_size) {
err = REFTABLE_FORMAT_ERROR; err = REFTABLE_FORMAT_ERROR;
goto done; goto done;
} }


/* We're done with the input data. */ /* We're done with the input data. */
block_source_return_block(block); block_source_return_block(&br->block);
block->data = br->uncompressed_data; br->block.data = br->uncompressed_data;
block->len = sz; br->block.len = block_size;
full_block_size = src_len + block_header_skip - br->zstream->avail_in; full_block_size = src_len + block_header_skip - br->zstream->avail_in;
} else if (full_block_size == 0) { } else if (full_block_size == 0) {
full_block_size = sz; full_block_size = block_size;
} else if (sz < full_block_size && sz < block->len && } else if (block_size < full_block_size && block_size < br->block.len &&
block->data[sz] != 0) { br->block.data[block_size] != 0) {
/* If the block is smaller than the full block size, it is /* If the block is smaller than the full block size, it is
padded (data followed by '\0') or the next block is padded (data followed by '\0') or the next block is
unaligned. */ unaligned. */
full_block_size = sz; full_block_size = block_size;
} }


restart_count = reftable_get_be16(block->data + sz - 2); restart_count = reftable_get_be16(br->block.data + block_size - 2);
restart_off = sz - 2 - 3 * restart_count; restart_off = block_size - 2 - 3 * restart_count;

/* transfer ownership. */
br->block = *block;
block->data = NULL;
block->len = 0;


br->block_type = block_type;
br->hash_size = hash_size; br->hash_size = hash_size;
br->restart_off = restart_off; br->restart_off = restart_off;
br->full_block_size = full_block_size; br->full_block_size = full_block_size;
br->header_off = header_off; br->header_off = header_size;
br->restart_count = restart_count; br->restart_count = restart_count;


err = 0; err = 0;


done: done:
if (err < 0)
block_reader_release(br);
return err; return err;
} }


@ -324,6 +348,7 @@ void block_reader_release(struct block_reader *br)
reftable_free(br->zstream); reftable_free(br->zstream);
reftable_free(br->uncompressed_data); reftable_free(br->uncompressed_data);
block_source_return_block(&br->block); block_source_return_block(&br->block);
memset(br, 0, sizeof(*br));
} }


uint8_t block_reader_type(const struct block_reader *r) uint8_t block_reader_type(const struct block_reader *r)

View File

@ -89,12 +89,14 @@ struct block_reader {
/* size of the data in the file. For log blocks, this is the compressed /* size of the data in the file. For log blocks, this is the compressed
* size. */ * size. */
uint32_t full_block_size; uint32_t full_block_size;
uint8_t block_type;
}; };


/* initializes a block reader. */ /* initializes a block reader. */
int block_reader_init(struct block_reader *br, struct reftable_block *bl, int block_reader_init(struct block_reader *br,
uint32_t header_off, uint32_t table_block_size, struct reftable_block_source *source,
uint32_t hash_size); uint32_t offset, uint32_t header_size,
uint32_t table_block_size, uint32_t hash_size);


void block_reader_release(struct block_reader *br); void block_reader_release(struct block_reader *br);



View File

@ -30,23 +30,6 @@ table_offsets_for(struct reftable_table *t, uint8_t typ)
abort(); abort();
} }


static int table_get_block(struct reftable_table *t,
struct reftable_block *dest, uint64_t off,
uint32_t sz)
{
ssize_t bytes_read;
if (off >= t->size)
return 0;
if (off + sz > t->size)
sz = t->size - off;

bytes_read = block_source_read_block(&t->source, dest, off, sz);
if (bytes_read < 0)
return (int)bytes_read;

return 0;
}

enum reftable_hash reftable_table_hash_id(struct reftable_table *t) enum reftable_hash reftable_table_hash_id(struct reftable_table *t)
{ {
return t->hash_id; return t->hash_id;
@ -180,64 +163,28 @@ static void table_iter_block_done(struct table_iter *ti)
block_iter_reset(&ti->bi); block_iter_reset(&ti->bi);
} }


static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
int version)
{
int32_t result = 0;

if (off == 0) {
data += header_size(version);
}

*typ = data[0];
if (reftable_is_block_type(*typ)) {
result = reftable_get_be24(data + 1);
}
return result;
}

int table_init_block_reader(struct reftable_table *t, struct block_reader *br, int table_init_block_reader(struct reftable_table *t, struct block_reader *br,
uint64_t next_off, uint8_t want_typ) uint64_t next_off, uint8_t want_typ)
{ {
int32_t guess_block_size = t->block_size ? t->block_size :
DEFAULT_BLOCK_SIZE;
struct reftable_block block = { NULL };
uint8_t block_typ = 0;
int err = 0;
uint32_t header_off = next_off ? 0 : header_size(t->version); uint32_t header_off = next_off ? 0 : header_size(t->version);
int32_t block_size = 0; int err;


if (next_off >= t->size) if (next_off >= t->size)
return 1; return 1;


err = table_get_block(t, &block, next_off, guess_block_size); err = block_reader_init(br, &t->source, next_off, header_off,
t->block_size, hash_size(t->hash_id));
if (err < 0) if (err < 0)
goto done; goto done;


block_size = extract_block_size(block.data, &block_typ, next_off, if (want_typ != BLOCK_TYPE_ANY && br->block_type != want_typ) {
t->version);
if (block_size < 0) {
err = block_size;
goto done;
}
if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
err = 1; err = 1;
goto done; goto done;
} }


if (block_size > guess_block_size) {
block_source_return_block(&block);
err = table_get_block(t, &block, next_off, block_size);
if (err < 0) {
goto done;
}
}

err = block_reader_init(br, &block, header_off, t->block_size,
hash_size(t->hash_id));
done: done:
block_source_return_block(&block); if (err)

block_reader_release(br);
return err; return err;
} }



View File

@ -19,7 +19,7 @@ static void t_ref_block_read_write(void)
struct reftable_record recs[30]; struct reftable_record recs[30];
const size_t N = ARRAY_SIZE(recs); const size_t N = ARRAY_SIZE(recs);
const size_t block_size = 1024; const size_t block_size = 1024;
struct reftable_block block = { 0 }; struct reftable_block_source source = { 0 };
struct block_writer bw = { struct block_writer bw = {
.last_key = REFTABLE_BUF_INIT, .last_key = REFTABLE_BUF_INIT,
}; };
@ -30,13 +30,14 @@ static void t_ref_block_read_write(void)
int ret; int ret;
struct block_reader br = { 0 }; struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT; struct block_iter it = BLOCK_ITER_INIT;
struct reftable_buf want = REFTABLE_BUF_INIT, buf = REFTABLE_BUF_INIT; struct reftable_buf want = REFTABLE_BUF_INIT;
struct reftable_buf block = REFTABLE_BUF_INIT;


REFTABLE_CALLOC_ARRAY(block.data, block_size); REFTABLE_CALLOC_ARRAY(block.buf, block_size);
check(block.data != NULL); check(block.buf != NULL);
block.len = block_size; block.len = block_size;
block_source_from_buf(&block.source ,&buf);
ret = block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size, ret = block_writer_init(&bw, BLOCK_TYPE_REF, (uint8_t *) block.buf, block_size,
header_off, hash_size(REFTABLE_HASH_SHA1)); header_off, hash_size(REFTABLE_HASH_SHA1));
check(!ret); check(!ret);


@ -62,7 +63,8 @@ static void t_ref_block_read_write(void)


block_writer_release(&bw); block_writer_release(&bw);


block_reader_init(&br, &block, header_off, block_size, REFTABLE_HASH_SIZE_SHA1); block_source_from_buf(&source ,&block);
block_reader_init(&br, &source, 0, header_off, block_size, REFTABLE_HASH_SIZE_SHA1);


block_iter_seek_start(&it, &br); block_iter_seek_start(&it, &br);


@ -100,9 +102,8 @@ static void t_ref_block_read_write(void)
block_reader_release(&br); block_reader_release(&br);
block_iter_close(&it); block_iter_close(&it);
reftable_record_release(&rec); reftable_record_release(&rec);
block_source_return_block(&br.block);
reftable_buf_release(&want); reftable_buf_release(&want);
reftable_buf_release(&buf); reftable_buf_release(&block);
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
reftable_record_release(&recs[i]); reftable_record_release(&recs[i]);
} }
@ -113,7 +114,7 @@ static void t_log_block_read_write(void)
struct reftable_record recs[30]; struct reftable_record recs[30];
const size_t N = ARRAY_SIZE(recs); const size_t N = ARRAY_SIZE(recs);
const size_t block_size = 2048; const size_t block_size = 2048;
struct reftable_block block = { 0 }; struct reftable_block_source source = { 0 };
struct block_writer bw = { struct block_writer bw = {
.last_key = REFTABLE_BUF_INIT, .last_key = REFTABLE_BUF_INIT,
}; };
@ -124,13 +125,14 @@ static void t_log_block_read_write(void)
int ret; int ret;
struct block_reader br = { 0 }; struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT; struct block_iter it = BLOCK_ITER_INIT;
struct reftable_buf want = REFTABLE_BUF_INIT, buf = REFTABLE_BUF_INIT; struct reftable_buf want = REFTABLE_BUF_INIT;
struct reftable_buf block = REFTABLE_BUF_INIT;


REFTABLE_CALLOC_ARRAY(block.data, block_size); REFTABLE_CALLOC_ARRAY(block.buf, block_size);
check(block.data != NULL); check(block.buf != NULL);
block.len = block_size; block.len = block_size;
block_source_from_buf(&block.source ,&buf);
ret = block_writer_init(&bw, BLOCK_TYPE_LOG, block.data, block_size, ret = block_writer_init(&bw, BLOCK_TYPE_LOG, (uint8_t *) block.buf, block_size,
header_off, hash_size(REFTABLE_HASH_SHA1)); header_off, hash_size(REFTABLE_HASH_SHA1));
check(!ret); check(!ret);


@ -151,7 +153,8 @@ static void t_log_block_read_write(void)


block_writer_release(&bw); block_writer_release(&bw);


block_reader_init(&br, &block, header_off, block_size, REFTABLE_HASH_SIZE_SHA1); block_source_from_buf(&source, &block);
block_reader_init(&br, &source, 0, header_off, block_size, REFTABLE_HASH_SIZE_SHA1);


block_iter_seek_start(&it, &br); block_iter_seek_start(&it, &br);


@ -190,9 +193,8 @@ static void t_log_block_read_write(void)
block_reader_release(&br); block_reader_release(&br);
block_iter_close(&it); block_iter_close(&it);
reftable_record_release(&rec); reftable_record_release(&rec);
block_source_return_block(&br.block);
reftable_buf_release(&want); reftable_buf_release(&want);
reftable_buf_release(&buf); reftable_buf_release(&block);
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
reftable_record_release(&recs[i]); reftable_record_release(&recs[i]);
} }
@ -203,7 +205,7 @@ static void t_obj_block_read_write(void)
struct reftable_record recs[30]; struct reftable_record recs[30];
const size_t N = ARRAY_SIZE(recs); const size_t N = ARRAY_SIZE(recs);
const size_t block_size = 1024; const size_t block_size = 1024;
struct reftable_block block = { 0 }; struct reftable_block_source source = { 0 };
struct block_writer bw = { struct block_writer bw = {
.last_key = REFTABLE_BUF_INIT, .last_key = REFTABLE_BUF_INIT,
}; };
@ -214,13 +216,14 @@ static void t_obj_block_read_write(void)
int ret; int ret;
struct block_reader br = { 0 }; struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT; struct block_iter it = BLOCK_ITER_INIT;
struct reftable_buf want = REFTABLE_BUF_INIT, buf = REFTABLE_BUF_INIT; struct reftable_buf want = REFTABLE_BUF_INIT;
struct reftable_buf block = REFTABLE_BUF_INIT;


REFTABLE_CALLOC_ARRAY(block.data, block_size); REFTABLE_CALLOC_ARRAY(block.buf, block_size);
check(block.data != NULL); check(block.buf != NULL);
block.len = block_size; block.len = block_size;
block_source_from_buf(&block.source, &buf);
ret = block_writer_init(&bw, BLOCK_TYPE_OBJ, block.data, block_size, ret = block_writer_init(&bw, BLOCK_TYPE_OBJ, (uint8_t *) block.buf, block_size,
header_off, hash_size(REFTABLE_HASH_SHA1)); header_off, hash_size(REFTABLE_HASH_SHA1));
check(!ret); check(!ret);


@ -243,7 +246,8 @@ static void t_obj_block_read_write(void)


block_writer_release(&bw); block_writer_release(&bw);


block_reader_init(&br, &block, header_off, block_size, REFTABLE_HASH_SIZE_SHA1); block_source_from_buf(&source, &block);
block_reader_init(&br, &source, 0, header_off, block_size, REFTABLE_HASH_SIZE_SHA1);


block_iter_seek_start(&it, &br); block_iter_seek_start(&it, &br);


@ -273,9 +277,8 @@ static void t_obj_block_read_write(void)
block_reader_release(&br); block_reader_release(&br);
block_iter_close(&it); block_iter_close(&it);
reftable_record_release(&rec); reftable_record_release(&rec);
block_source_return_block(&br.block);
reftable_buf_release(&want); reftable_buf_release(&want);
reftable_buf_release(&buf); reftable_buf_release(&block);
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
reftable_record_release(&recs[i]); reftable_record_release(&recs[i]);
} }
@ -286,7 +289,7 @@ static void t_index_block_read_write(void)
struct reftable_record recs[30]; struct reftable_record recs[30];
const size_t N = ARRAY_SIZE(recs); const size_t N = ARRAY_SIZE(recs);
const size_t block_size = 1024; const size_t block_size = 1024;
struct reftable_block block = { 0 }; struct reftable_block_source source = { 0 };
struct block_writer bw = { struct block_writer bw = {
.last_key = REFTABLE_BUF_INIT, .last_key = REFTABLE_BUF_INIT,
}; };
@ -298,13 +301,14 @@ static void t_index_block_read_write(void)
int ret; int ret;
struct block_reader br = { 0 }; struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT; struct block_iter it = BLOCK_ITER_INIT;
struct reftable_buf want = REFTABLE_BUF_INIT, buf = REFTABLE_BUF_INIT; struct reftable_buf want = REFTABLE_BUF_INIT;
struct reftable_buf block = REFTABLE_BUF_INIT;


REFTABLE_CALLOC_ARRAY(block.data, block_size); REFTABLE_CALLOC_ARRAY(block.buf, block_size);
check(block.data != NULL); check(block.buf != NULL);
block.len = block_size; block.len = block_size;
block_source_from_buf(&block.source, &buf);
ret = block_writer_init(&bw, BLOCK_TYPE_INDEX, block.data, block_size, ret = block_writer_init(&bw, BLOCK_TYPE_INDEX, (uint8_t *) block.buf, block_size,
header_off, hash_size(REFTABLE_HASH_SHA1)); header_off, hash_size(REFTABLE_HASH_SHA1));
check(!ret); check(!ret);


@ -327,7 +331,8 @@ static void t_index_block_read_write(void)


block_writer_release(&bw); block_writer_release(&bw);


block_reader_init(&br, &block, header_off, block_size, REFTABLE_HASH_SIZE_SHA1); block_source_from_buf(&source, &block);
block_reader_init(&br, &source, 0, header_off, block_size, REFTABLE_HASH_SIZE_SHA1);


block_iter_seek_start(&it, &br); block_iter_seek_start(&it, &br);


@ -365,9 +370,8 @@ static void t_index_block_read_write(void)
block_reader_release(&br); block_reader_release(&br);
block_iter_close(&it); block_iter_close(&it);
reftable_record_release(&rec); reftable_record_release(&rec);
block_source_return_block(&br.block);
reftable_buf_release(&want); reftable_buf_release(&want);
reftable_buf_release(&buf); reftable_buf_release(&block);
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
reftable_record_release(&recs[i]); reftable_record_release(&recs[i]);
} }