656 lines
17 KiB
C
656 lines
17 KiB
C
/*
|
|
* Copyright 2020 Google LLC
|
|
*
|
|
* Use of this source code is governed by a BSD-style
|
|
* license that can be found in the LICENSE file or at
|
|
* https://developers.google.com/open-source/licenses/bsd
|
|
*/
|
|
|
|
#include "block.h"
|
|
|
|
#include "blocksource.h"
|
|
#include "constants.h"
|
|
#include "iter.h"
|
|
#include "record.h"
|
|
#include "reftable-error.h"
|
|
#include "system.h"
|
|
|
|
size_t header_size(int version)
|
|
{
|
|
switch (version) {
|
|
case 1:
|
|
return 24;
|
|
case 2:
|
|
return 28;
|
|
}
|
|
abort();
|
|
}
|
|
|
|
size_t footer_size(int version)
|
|
{
|
|
switch (version) {
|
|
case 1:
|
|
return 68;
|
|
case 2:
|
|
return 72;
|
|
}
|
|
abort();
|
|
}
|
|
|
|
static int block_writer_register_restart(struct block_writer *w, int n,
|
|
int is_restart, struct reftable_buf *key)
|
|
{
|
|
uint32_t rlen;
|
|
int err;
|
|
|
|
rlen = w->restart_len;
|
|
if (rlen >= MAX_RESTARTS)
|
|
is_restart = 0;
|
|
|
|
if (is_restart)
|
|
rlen++;
|
|
if (2 + 3 * rlen + n > w->block_size - w->next)
|
|
return REFTABLE_ENTRY_TOO_BIG_ERROR;
|
|
if (is_restart) {
|
|
REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1,
|
|
w->restart_cap);
|
|
if (!w->restarts)
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
w->restarts[w->restart_len++] = w->next;
|
|
}
|
|
|
|
w->next += n;
|
|
|
|
reftable_buf_reset(&w->last_key);
|
|
err = reftable_buf_add(&w->last_key, key->buf, key->len);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
w->entries++;
|
|
return 0;
|
|
}
|
|
|
|
int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
|
|
uint32_t block_size, uint32_t header_off, uint32_t hash_size)
|
|
{
|
|
bw->block = block;
|
|
bw->hash_size = hash_size;
|
|
bw->block_size = block_size;
|
|
bw->header_off = header_off;
|
|
bw->block[header_off] = typ;
|
|
bw->next = header_off + 4;
|
|
bw->restart_interval = 16;
|
|
bw->entries = 0;
|
|
bw->restart_len = 0;
|
|
bw->last_key.len = 0;
|
|
if (!bw->zstream) {
|
|
REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
|
|
if (!bw->zstream)
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
deflateInit(bw->zstream, 9);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
uint8_t block_writer_type(struct block_writer *bw)
|
|
{
|
|
return bw->block[bw->header_off];
|
|
}
|
|
|
|
/*
|
|
* Adds the reftable_record to the block. Returns 0 on success and
|
|
* appropriate error codes on failure.
|
|
*/
|
|
int block_writer_add(struct block_writer *w, struct reftable_record *rec)
|
|
{
|
|
struct reftable_buf empty = REFTABLE_BUF_INIT;
|
|
struct reftable_buf last =
|
|
w->entries % w->restart_interval == 0 ? empty : w->last_key;
|
|
struct string_view out = {
|
|
.buf = w->block + w->next,
|
|
.len = w->block_size - w->next,
|
|
};
|
|
struct string_view start = out;
|
|
int is_restart = 0;
|
|
int n = 0;
|
|
int err;
|
|
|
|
err = reftable_record_key(rec, &w->scratch);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
if (!w->scratch.len) {
|
|
err = REFTABLE_API_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
n = reftable_encode_key(&is_restart, out, last, w->scratch,
|
|
reftable_record_val_type(rec));
|
|
if (n < 0) {
|
|
err = n;
|
|
goto done;
|
|
}
|
|
string_view_consume(&out, n);
|
|
|
|
n = reftable_record_encode(rec, out, w->hash_size);
|
|
if (n < 0) {
|
|
err = n;
|
|
goto done;
|
|
}
|
|
string_view_consume(&out, n);
|
|
|
|
err = block_writer_register_restart(w, start.len - out.len, is_restart,
|
|
&w->scratch);
|
|
done:
|
|
return err;
|
|
}
|
|
|
|
int block_writer_finish(struct block_writer *w)
|
|
{
|
|
for (uint32_t i = 0; i < w->restart_len; i++) {
|
|
reftable_put_be24(w->block + w->next, w->restarts[i]);
|
|
w->next += 3;
|
|
}
|
|
|
|
reftable_put_be16(w->block + w->next, w->restart_len);
|
|
w->next += 2;
|
|
reftable_put_be24(w->block + 1 + w->header_off, w->next);
|
|
|
|
/*
|
|
* Log records are stored zlib-compressed. Note that the compression
|
|
* also spans over the restart points we have just written.
|
|
*/
|
|
if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) {
|
|
int block_header_skip = 4 + w->header_off;
|
|
uLongf src_len = w->next - block_header_skip, compressed_len;
|
|
int ret;
|
|
|
|
ret = deflateReset(w->zstream);
|
|
if (ret != Z_OK)
|
|
return REFTABLE_ZLIB_ERROR;
|
|
|
|
/*
|
|
* Precompute the upper bound of how many bytes the compressed
|
|
* data may end up with. Combined with `Z_FINISH`, `deflate()`
|
|
* is guaranteed to return `Z_STREAM_END`.
|
|
*/
|
|
compressed_len = deflateBound(w->zstream, src_len);
|
|
REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len,
|
|
w->compressed_cap);
|
|
if (!w->compressed) {
|
|
ret = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
return ret;
|
|
}
|
|
|
|
w->zstream->next_out = w->compressed;
|
|
w->zstream->avail_out = compressed_len;
|
|
w->zstream->next_in = w->block + block_header_skip;
|
|
w->zstream->avail_in = src_len;
|
|
|
|
/*
|
|
* We want to perform all decompression in a single step, which
|
|
* is why we can pass Z_FINISH here. As we have precomputed the
|
|
* deflated buffer's size via `deflateBound()` this function is
|
|
* guaranteed to succeed according to the zlib documentation.
|
|
*/
|
|
ret = deflate(w->zstream, Z_FINISH);
|
|
if (ret != Z_STREAM_END)
|
|
return REFTABLE_ZLIB_ERROR;
|
|
|
|
/*
|
|
* Overwrite the uncompressed data we have already written and
|
|
* adjust the `next` pointer to point right after the
|
|
* compressed data.
|
|
*/
|
|
memcpy(w->block + block_header_skip, w->compressed,
|
|
w->zstream->total_out);
|
|
w->next = w->zstream->total_out + block_header_skip;
|
|
}
|
|
|
|
return w->next;
|
|
}
|
|
|
|
static int read_block(struct reftable_block_source *source,
|
|
struct reftable_block_data *dest, uint64_t off,
|
|
uint32_t sz)
|
|
{
|
|
size_t size = block_source_size(source);
|
|
block_source_release_data(dest);
|
|
if (off >= size)
|
|
return 0;
|
|
if (off + sz > size)
|
|
sz = size - off;
|
|
return block_source_read_data(source, dest, off, sz);
|
|
}
|
|
|
|
int reftable_block_init(struct reftable_block *block,
|
|
struct reftable_block_source *source,
|
|
uint32_t offset, uint32_t header_size,
|
|
uint32_t table_block_size, uint32_t hash_size,
|
|
uint8_t want_type)
|
|
{
|
|
uint32_t guess_block_size = table_block_size ?
|
|
table_block_size : DEFAULT_BLOCK_SIZE;
|
|
uint32_t full_block_size = table_block_size;
|
|
uint16_t restart_count;
|
|
uint32_t restart_off;
|
|
uint32_t block_size;
|
|
uint8_t block_type;
|
|
int err;
|
|
|
|
err = read_block(source, &block->block_data, offset, guess_block_size);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
block_type = block->block_data.data[header_size];
|
|
if (!reftable_is_block_type(block_type)) {
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
goto done;
|
|
}
|
|
if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) {
|
|
err = 1;
|
|
goto done;
|
|
}
|
|
|
|
block_size = reftable_get_be24(block->block_data.data + header_size + 1);
|
|
if (block_size > guess_block_size) {
|
|
err = read_block(source, &block->block_data, offset, block_size);
|
|
if (err < 0)
|
|
goto done;
|
|
}
|
|
|
|
if (block_type == REFTABLE_BLOCK_TYPE_LOG) {
|
|
uint32_t block_header_skip = 4 + header_size;
|
|
uLong dst_len = block_size - block_header_skip;
|
|
uLong src_len = block->block_data.len - block_header_skip;
|
|
|
|
/* Log blocks specify the *uncompressed* size in their header. */
|
|
REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size,
|
|
block->uncompressed_cap);
|
|
if (!block->uncompressed_data) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/* Copy over the block header verbatim. It's not compressed. */
|
|
memcpy(block->uncompressed_data, block->block_data.data, block_header_skip);
|
|
|
|
if (!block->zstream) {
|
|
REFTABLE_CALLOC_ARRAY(block->zstream, 1);
|
|
if (!block->zstream) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = inflateInit(block->zstream);
|
|
} else {
|
|
err = inflateReset(block->zstream);
|
|
}
|
|
if (err != Z_OK) {
|
|
err = REFTABLE_ZLIB_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
block->zstream->next_in = block->block_data.data + block_header_skip;
|
|
block->zstream->avail_in = src_len;
|
|
block->zstream->next_out = block->uncompressed_data + block_header_skip;
|
|
block->zstream->avail_out = dst_len;
|
|
|
|
/*
|
|
* We know both input as well as output size, and we know that
|
|
* the sizes should never be bigger than `uInt_MAX` because
|
|
* blocks can at most be 16MB large. We can thus use `Z_FINISH`
|
|
* here to instruct zlib to inflate the data in one go, which
|
|
* is more efficient than using `Z_NO_FLUSH`.
|
|
*/
|
|
err = inflate(block->zstream, Z_FINISH);
|
|
if (err != Z_STREAM_END) {
|
|
err = REFTABLE_ZLIB_ERROR;
|
|
goto done;
|
|
}
|
|
err = 0;
|
|
|
|
if (block->zstream->total_out + block_header_skip != block_size) {
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/* We're done with the input data. */
|
|
block_source_release_data(&block->block_data);
|
|
block->block_data.data = block->uncompressed_data;
|
|
block->block_data.len = block_size;
|
|
full_block_size = src_len + block_header_skip - block->zstream->avail_in;
|
|
} else if (full_block_size == 0) {
|
|
full_block_size = block_size;
|
|
} else if (block_size < full_block_size && block_size < block->block_data.len &&
|
|
block->block_data.data[block_size] != 0) {
|
|
/* If the block is smaller than the full block size, it is
|
|
padded (data followed by '\0') or the next block is
|
|
unaligned. */
|
|
full_block_size = block_size;
|
|
}
|
|
|
|
restart_count = reftable_get_be16(block->block_data.data + block_size - 2);
|
|
restart_off = block_size - 2 - 3 * restart_count;
|
|
|
|
block->block_type = block_type;
|
|
block->hash_size = hash_size;
|
|
block->restart_off = restart_off;
|
|
block->full_block_size = full_block_size;
|
|
block->header_off = header_size;
|
|
block->restart_count = restart_count;
|
|
|
|
err = 0;
|
|
|
|
done:
|
|
if (err < 0)
|
|
reftable_block_release(block);
|
|
return err;
|
|
}
|
|
|
|
void reftable_block_release(struct reftable_block *block)
|
|
{
|
|
inflateEnd(block->zstream);
|
|
reftable_free(block->zstream);
|
|
reftable_free(block->uncompressed_data);
|
|
block_source_release_data(&block->block_data);
|
|
memset(block, 0, sizeof(*block));
|
|
}
|
|
|
|
uint8_t reftable_block_type(const struct reftable_block *b)
|
|
{
|
|
return b->block_data.data[b->header_off];
|
|
}
|
|
|
|
int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key)
|
|
{
|
|
int off = block->header_off + 4, n;
|
|
struct string_view in = {
|
|
.buf = block->block_data.data + off,
|
|
.len = block->restart_off - off,
|
|
};
|
|
uint8_t extra = 0;
|
|
|
|
reftable_buf_reset(key);
|
|
|
|
n = reftable_decode_key(key, &extra, in);
|
|
if (n < 0)
|
|
return n;
|
|
if (!key->len)
|
|
return REFTABLE_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx)
|
|
{
|
|
return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx);
|
|
}
|
|
|
|
void block_iter_init(struct block_iter *it, const struct reftable_block *block)
|
|
{
|
|
it->block = block;
|
|
block_iter_seek_start(it);
|
|
}
|
|
|
|
void block_iter_seek_start(struct block_iter *it)
|
|
{
|
|
reftable_buf_reset(&it->last_key);
|
|
it->next_off = it->block->header_off + 4;
|
|
}
|
|
|
|
struct restart_needle_less_args {
|
|
int error;
|
|
struct reftable_buf needle;
|
|
const struct reftable_block *block;
|
|
};
|
|
|
|
static int restart_needle_less(size_t idx, void *_args)
|
|
{
|
|
struct restart_needle_less_args *args = _args;
|
|
uint32_t off = block_restart_offset(args->block, idx);
|
|
struct string_view in = {
|
|
.buf = args->block->block_data.data + off,
|
|
.len = args->block->restart_off - off,
|
|
};
|
|
uint64_t prefix_len, suffix_len;
|
|
uint8_t extra;
|
|
int n;
|
|
|
|
/*
|
|
* Records at restart points are stored without prefix compression, so
|
|
* there is no need to fully decode the record key here. This removes
|
|
* the need for allocating memory.
|
|
*/
|
|
n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
|
|
if (n < 0 || prefix_len) {
|
|
args->error = 1;
|
|
return -1;
|
|
}
|
|
|
|
string_view_consume(&in, n);
|
|
if (suffix_len > in.len) {
|
|
args->error = 1;
|
|
return -1;
|
|
}
|
|
|
|
n = memcmp(args->needle.buf, in.buf,
|
|
args->needle.len < suffix_len ? args->needle.len : suffix_len);
|
|
if (n)
|
|
return n < 0;
|
|
return args->needle.len < suffix_len;
|
|
}
|
|
|
|
int block_iter_next(struct block_iter *it, struct reftable_record *rec)
|
|
{
|
|
struct string_view in = {
|
|
.buf = (unsigned char *) it->block->block_data.data + it->next_off,
|
|
.len = it->block->restart_off - it->next_off,
|
|
};
|
|
struct string_view start = in;
|
|
uint8_t extra = 0;
|
|
int n = 0;
|
|
|
|
if (it->next_off >= it->block->restart_off)
|
|
return 1;
|
|
|
|
n = reftable_decode_key(&it->last_key, &extra, in);
|
|
if (n < 0)
|
|
return -1;
|
|
if (!it->last_key.len)
|
|
return REFTABLE_FORMAT_ERROR;
|
|
|
|
string_view_consume(&in, n);
|
|
n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size,
|
|
&it->scratch);
|
|
if (n < 0)
|
|
return -1;
|
|
string_view_consume(&in, n);
|
|
|
|
it->next_off += start.len - in.len;
|
|
return 0;
|
|
}
|
|
|
|
void block_iter_reset(struct block_iter *it)
|
|
{
|
|
reftable_buf_reset(&it->last_key);
|
|
it->next_off = 0;
|
|
it->block = NULL;
|
|
}
|
|
|
|
void block_iter_close(struct block_iter *it)
|
|
{
|
|
reftable_buf_release(&it->last_key);
|
|
reftable_buf_release(&it->scratch);
|
|
}
|
|
|
|
int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
|
|
{
|
|
struct restart_needle_less_args args = {
|
|
.needle = *want,
|
|
.block = it->block,
|
|
};
|
|
struct reftable_record rec;
|
|
int err = 0;
|
|
size_t i;
|
|
|
|
/*
|
|
* Perform a binary search over the block's restart points, which
|
|
* avoids doing a linear scan over the whole block. Like this, we
|
|
* identify the section of the block that should contain our key.
|
|
*
|
|
* Note that we explicitly search for the first restart point _greater_
|
|
* than the sought-after record, not _greater or equal_ to it. In case
|
|
* the sought-after record is located directly at the restart point we
|
|
* would otherwise start doing the linear search at the preceding
|
|
* restart point. While that works alright, we would end up scanning
|
|
* too many record.
|
|
*/
|
|
i = binsearch(it->block->restart_count, &restart_needle_less, &args);
|
|
if (args.error) {
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Now there are multiple cases:
|
|
*
|
|
* - `i == 0`: The wanted record is smaller than the record found at
|
|
* the first restart point. As the first restart point is the first
|
|
* record in the block, our wanted record cannot be located in this
|
|
* block at all. We still need to position the iterator so that the
|
|
* next call to `block_iter_next()` will yield an end-of-iterator
|
|
* signal.
|
|
*
|
|
* - `i == restart_count`: The wanted record was not found at any of
|
|
* the restart points. As there is no restart point at the end of
|
|
* the section the record may thus be contained in the last block.
|
|
*
|
|
* - `i > 0`: The wanted record must be contained in the section
|
|
* before the found restart point. We thus do a linear search
|
|
* starting from the preceding restart point.
|
|
*/
|
|
if (i > 0)
|
|
it->next_off = block_restart_offset(it->block, i - 1);
|
|
else
|
|
it->next_off = it->block->header_off + 4;
|
|
|
|
err = reftable_record_init(&rec, reftable_block_type(it->block));
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* We're looking for the last entry less than the wanted key so that
|
|
* the next call to `block_reader_next()` would yield the wanted
|
|
* record. We thus don't want to position our iterator at the sought
|
|
* after record, but one before. To do so, we have to go one entry too
|
|
* far and then back up.
|
|
*/
|
|
while (1) {
|
|
size_t prev_off = it->next_off;
|
|
|
|
err = block_iter_next(it, &rec);
|
|
if (err < 0)
|
|
goto done;
|
|
if (err > 0) {
|
|
it->next_off = prev_off;
|
|
err = 0;
|
|
goto done;
|
|
}
|
|
|
|
err = reftable_record_key(&rec, &it->last_key);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* Check whether the current key is greater or equal to the
|
|
* sought-after key. In case it is greater we know that the
|
|
* record does not exist in the block and can thus abort early.
|
|
* In case it is equal to the sought-after key we have found
|
|
* the desired record.
|
|
*
|
|
* Note that we store the next record's key record directly in
|
|
* `last_key` without restoring the key of the preceding record
|
|
* in case we need to go one record back. This is safe to do as
|
|
* `block_iter_next()` would return the ref whose key is equal
|
|
* to `last_key` now, and naturally all keys share a prefix
|
|
* with themselves.
|
|
*/
|
|
if (reftable_buf_cmp(&it->last_key, want) >= 0) {
|
|
it->next_off = prev_off;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
done:
|
|
reftable_record_release(&rec);
|
|
return err;
|
|
}
|
|
|
|
static int block_iter_seek_void(void *it, struct reftable_record *want)
|
|
{
|
|
struct reftable_buf buf = REFTABLE_BUF_INIT;
|
|
struct block_iter *bi = it;
|
|
int err;
|
|
|
|
if (bi->block->block_type != want->type)
|
|
return REFTABLE_API_ERROR;
|
|
|
|
err = reftable_record_key(want, &buf);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
err = block_iter_seek_key(it, &buf);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
err = 0;
|
|
|
|
out:
|
|
reftable_buf_release(&buf);
|
|
return err;
|
|
}
|
|
|
|
static int block_iter_next_void(void *it, struct reftable_record *rec)
|
|
{
|
|
return block_iter_next(it, rec);
|
|
}
|
|
|
|
static void block_iter_close_void(void *it)
|
|
{
|
|
block_iter_close(it);
|
|
}
|
|
|
|
static struct reftable_iterator_vtable block_iter_vtable = {
|
|
.seek = &block_iter_seek_void,
|
|
.next = &block_iter_next_void,
|
|
.close = &block_iter_close_void,
|
|
};
|
|
|
|
int reftable_block_init_iterator(const struct reftable_block *b,
|
|
struct reftable_iterator *it)
|
|
{
|
|
struct block_iter *bi;
|
|
|
|
REFTABLE_CALLOC_ARRAY(bi, 1);
|
|
block_iter_init(bi, b);
|
|
|
|
assert(!it->ops);
|
|
it->iter_arg = bi;
|
|
it->ops = &block_iter_vtable;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void block_writer_release(struct block_writer *bw)
|
|
{
|
|
deflateEnd(bw->zstream);
|
|
REFTABLE_FREE_AND_NULL(bw->zstream);
|
|
REFTABLE_FREE_AND_NULL(bw->restarts);
|
|
REFTABLE_FREE_AND_NULL(bw->compressed);
|
|
reftable_buf_release(&bw->scratch);
|
|
reftable_buf_release(&bw->last_key);
|
|
/* the block is not owned. */
|
|
}
|