Merge branch 'ps/reftable-fixes-and-optims'

More fixes and optimizations to the reftable backend.

* ps/reftable-fixes-and-optims:
  reftable/merged: transfer ownership of records when iterating
  reftable/merged: really reuse buffers to compute record keys
  reftable/record: store "val2" hashes as static arrays
  reftable/record: store "val1" hashes as static arrays
  reftable/record: constify some parts of the interface
  reftable/writer: fix index corruption when writing multiple indices
  reftable/stack: do not auto-compact twice in `reftable_stack_add()`
  reftable/stack: do not overwrite errors when compacting
maint
Junio C Hamano 2024-01-16 10:11:56 -08:00
commit 481d69dd63
10 changed files with 117 additions and 75 deletions

View File

@ -49,13 +49,11 @@ static void test_block_read_write(void)

for (i = 0; i < N; i++) {
char name[100];
uint8_t hash[GIT_SHA1_RAWSZ];
snprintf(name, sizeof(name), "branch%02d", i);
memset(hash, i, sizeof(hash));

rec.u.ref.refname = name;
rec.u.ref.value_type = REFTABLE_REF_VAL1;
rec.u.ref.value.val1 = hash;
memset(rec.u.ref.value.val1, i, GIT_SHA1_RAWSZ);

names[i] = xstrdup(name);
n = block_writer_add(&bw, &rec);

View File

@ -123,12 +123,12 @@ static int merged_iter_next_entry(struct merged_iter *mi,
reftable_record_release(&top.rec);
}

reftable_record_copy_from(rec, &entry.rec, hash_size(mi->hash_id));
reftable_record_release(rec);
*rec = entry.rec;

done:
reftable_record_release(&entry.rec);
strbuf_release(&mi->entry_key);
strbuf_release(&mi->key);
if (err)
reftable_record_release(&entry.rec);
return err;
}


View File

@ -122,13 +122,11 @@ static void readers_destroy(struct reftable_reader **readers, size_t n)

static void test_merged_between(void)
{
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 0 };

struct reftable_ref_record r1[] = { {
.refname = "b",
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash1,
.value.val1 = { 1, 2, 3, 0 },
} };
struct reftable_ref_record r2[] = { {
.refname = "a",
@ -164,26 +162,24 @@ static void test_merged_between(void)

static void test_merged(void)
{
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
struct reftable_ref_record r1[] = {
{
.refname = "a",
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash1,
.value.val1 = { 1 },
},
{
.refname = "b",
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash1,
.value.val1 = { 1 },
},
{
.refname = "c",
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash1,
.value.val1 = { 1 },
}
};
struct reftable_ref_record r2[] = { {
@ -196,13 +192,13 @@ static void test_merged(void)
.refname = "c",
.update_index = 3,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash2,
.value.val1 = { 2 },
},
{
.refname = "d",
.update_index = 3,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash1,
.value.val1 = { 1 },
},
};


View File

@ -59,18 +59,15 @@ static void write_table(char ***names, struct strbuf *buf, int N,
*names = reftable_calloc(sizeof(char *) * (N + 1));
reftable_writer_set_limits(w, update_index, update_index);
for (i = 0; i < N; i++) {
uint8_t hash[GIT_SHA256_RAWSZ] = { 0 };
char name[100];
int n;

set_test_hash(hash, i);

snprintf(name, sizeof(name), "refs/heads/branch%02d", i);

ref.refname = name;
ref.update_index = update_index;
ref.value_type = REFTABLE_REF_VAL1;
ref.value.val1 = hash;
set_test_hash(ref.value.val1, i);
(*names)[i] = xstrdup(name);

n = reftable_writer_add_ref(w, &ref);
@ -549,8 +546,6 @@ static void test_table_refs_for(int indexed)
uint8_t hash[GIT_SHA1_RAWSZ];
char fill[51] = { 0 };
char name[100];
uint8_t hash1[GIT_SHA1_RAWSZ];
uint8_t hash2[GIT_SHA1_RAWSZ];
struct reftable_ref_record ref = { NULL };

memset(hash, i, sizeof(hash));
@ -560,11 +555,9 @@ static void test_table_refs_for(int indexed)
name[40] = 0;
ref.refname = name;

set_test_hash(hash1, i / 4);
set_test_hash(hash2, 3 + i / 4);
ref.value_type = REFTABLE_REF_VAL2;
ref.value.val2.value = hash1;
ref.value.val2.target_value = hash2;
set_test_hash(ref.value.val2.value, i / 4);
set_test_hash(ref.value.val2.target_value, 3 + i / 4);

/* 80 bytes / entry, so 3 entries per block. Yields 17
*/
@ -572,8 +565,8 @@ static void test_table_refs_for(int indexed)
n = reftable_writer_add_ref(w, &ref);
EXPECT(n == 0);

if (!memcmp(hash1, want_hash, GIT_SHA1_RAWSZ) ||
!memcmp(hash2, want_hash, GIT_SHA1_RAWSZ)) {
if (!memcmp(ref.value.val2.value, want_hash, GIT_SHA1_RAWSZ) ||
!memcmp(ref.value.val2.target_value, want_hash, GIT_SHA1_RAWSZ)) {
want_names[want_names_len++] = xstrdup(name);
}
}
@ -674,11 +667,10 @@ static void test_write_object_id_min_length(void)
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
reftable_new_writer(&strbuf_add_void, &buf, &opts);
uint8_t hash[GIT_SHA1_RAWSZ] = {42};
struct reftable_ref_record ref = {
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash,
.value.val1 = {42},
};
int err;
int i;
@ -710,11 +702,10 @@ static void test_write_object_id_length(void)
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
reftable_new_writer(&strbuf_add_void, &buf, &opts);
uint8_t hash[GIT_SHA1_RAWSZ] = {42};
struct reftable_ref_record ref = {
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = hash,
.value.val1 = {42},
};
int err;
int i;
@ -797,6 +788,84 @@ static void test_write_key_order(void)
strbuf_release(&buf);
}

static void test_write_multiple_indices(void)
{
struct reftable_write_options opts = {
.block_size = 100,
};
struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT;
struct reftable_block_source source = { 0 };
struct reftable_iterator it = { 0 };
const struct reftable_stats *stats;
struct reftable_writer *writer;
struct reftable_reader *reader;
int err, i;

writer = reftable_new_writer(&strbuf_add_void, &writer_buf, &opts);
reftable_writer_set_limits(writer, 1, 1);
for (i = 0; i < 100; i++) {
struct reftable_ref_record ref = {
.update_index = 1,
.value_type = REFTABLE_REF_VAL1,
.value.val1 = {i},
};

strbuf_reset(&buf);
strbuf_addf(&buf, "refs/heads/%04d", i);
ref.refname = buf.buf,

err = reftable_writer_add_ref(writer, &ref);
EXPECT_ERR(err);
}

for (i = 0; i < 100; i++) {
unsigned char hash[GIT_SHA1_RAWSZ] = {i};
struct reftable_log_record log = {
.update_index = 1,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.old_hash = hash,
.new_hash = hash,
},
};

strbuf_reset(&buf);
strbuf_addf(&buf, "refs/heads/%04d", i);
log.refname = buf.buf,

err = reftable_writer_add_log(writer, &log);
EXPECT_ERR(err);
}

reftable_writer_close(writer);

/*
* The written data should be sufficiently large to result in indices
* for each of the block types.
*/
stats = reftable_writer_stats(writer);
EXPECT(stats->ref_stats.index_offset > 0);
EXPECT(stats->obj_stats.index_offset > 0);
EXPECT(stats->log_stats.index_offset > 0);

block_source_from_strbuf(&source, &writer_buf);
err = reftable_new_reader(&reader, &source, "filename");
EXPECT_ERR(err);

/*
* Seeking the log uses the log index now. In case there is any
* confusion regarding indices we would notice here.
*/
err = reftable_reader_seek_log(reader, &it, "");
EXPECT_ERR(err);

reftable_iterator_destroy(&it);
reftable_writer_free(writer);
reftable_reader_free(reader);
strbuf_release(&writer_buf);
strbuf_release(&buf);
}

static void test_corrupt_table_empty(void)
{
struct strbuf buf = STRBUF_INIT;
@ -846,5 +915,6 @@ int readwrite_test_main(int argc, const char *argv[])
RUN_TEST(test_log_overflow);
RUN_TEST(test_write_object_id_length);
RUN_TEST(test_write_object_id_min_length);
RUN_TEST(test_write_multiple_indices);
return 0;
}

View File

@ -76,7 +76,7 @@ int reftable_is_block_type(uint8_t typ)
return 0;
}

uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec)
const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec)
{
switch (rec->value_type) {
case REFTABLE_REF_VAL1:
@ -88,7 +88,7 @@ uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec)
}
}

uint8_t *reftable_ref_record_val2(const struct reftable_ref_record *rec)
const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec)
{
switch (rec->value_type) {
case REFTABLE_REF_VAL2:
@ -219,13 +219,10 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec,
case REFTABLE_REF_DELETION:
break;
case REFTABLE_REF_VAL1:
ref->value.val1 = reftable_malloc(hash_size);
memcpy(ref->value.val1, src->value.val1, hash_size);
break;
case REFTABLE_REF_VAL2:
ref->value.val2.value = reftable_malloc(hash_size);
memcpy(ref->value.val2.value, src->value.val2.value, hash_size);
ref->value.val2.target_value = reftable_malloc(hash_size);
memcpy(ref->value.val2.target_value,
src->value.val2.target_value, hash_size);
break;
@ -242,7 +239,7 @@ static char hexdigit(int c)
return 'a' + (c - 10);
}

static void hex_format(char *dest, uint8_t *src, int hash_size)
static void hex_format(char *dest, const unsigned char *src, int hash_size)
{
assert(hash_size > 0);
if (src) {
@ -299,11 +296,8 @@ void reftable_ref_record_release(struct reftable_ref_record *ref)
reftable_free(ref->value.symref);
break;
case REFTABLE_REF_VAL2:
reftable_free(ref->value.val2.target_value);
reftable_free(ref->value.val2.value);
break;
case REFTABLE_REF_VAL1:
reftable_free(ref->value.val1);
break;
case REFTABLE_REF_DELETION:
break;
@ -394,7 +388,6 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
return -1;
}

r->value.val1 = reftable_malloc(hash_size);
memcpy(r->value.val1, in.buf, hash_size);
string_view_consume(&in, hash_size);
break;
@ -404,11 +397,9 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
return -1;
}

r->value.val2.value = reftable_malloc(hash_size);
memcpy(r->value.val2.value, in.buf, hash_size);
string_view_consume(&in, hash_size);

r->value.val2.target_value = reftable_malloc(hash_size);
memcpy(r->value.val2.target_value, in.buf, hash_size);
string_view_consume(&in, hash_size);
break;
@ -1164,7 +1155,7 @@ int reftable_record_equal(struct reftable_record *a, struct reftable_record *b,
reftable_record_data(a), reftable_record_data(b), hash_size);
}

static int hash_equal(uint8_t *a, uint8_t *b, int hash_size)
static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_size)
{
if (a && b)
return !memcmp(a, b, hash_size);

View File

@ -119,15 +119,10 @@ static void test_reftable_ref_record_roundtrip(void)
case REFTABLE_REF_DELETION:
break;
case REFTABLE_REF_VAL1:
in.u.ref.value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.u.ref.value.val1, 1);
break;
case REFTABLE_REF_VAL2:
in.u.ref.value.val2.value =
reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.u.ref.value.val2.value, 1);
in.u.ref.value.val2.target_value =
reftable_malloc(GIT_SHA1_RAWSZ);
set_hash(in.u.ref.value.val2.target_value, 2);
break;
case REFTABLE_REF_SYMREF:

View File

@ -9,6 +9,7 @@ https://developers.google.com/open-source/licenses/bsd
#ifndef REFTABLE_RECORD_H
#define REFTABLE_RECORD_H

#include "hash-ll.h"
#include <stdint.h>

/*
@ -38,10 +39,10 @@ struct reftable_ref_record {
#define REFTABLE_NR_REF_VALUETYPES 4
} value_type;
union {
uint8_t *val1; /* malloced hash. */
unsigned char val1[GIT_MAX_RAWSZ];
struct {
uint8_t *value; /* first value, malloced hash */
uint8_t *target_value; /* second value, malloced hash */
unsigned char value[GIT_MAX_RAWSZ]; /* first hash */
unsigned char target_value[GIT_MAX_RAWSZ]; /* second hash */
} val2;
char *symref; /* referent, malloced 0-terminated string */
} value;
@ -49,11 +50,11 @@ struct reftable_ref_record {

/* Returns the first hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL1 or REFTABLE_REF_VAL2. */
uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec);
const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec);

/* Returns the second hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL2. */
uint8_t *reftable_ref_record_val2(const struct reftable_ref_record *rec);
const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec);

/* returns whether 'ref' represents a deletion */
int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref);

View File

@ -425,9 +425,6 @@ int reftable_stack_add(struct reftable_stack *st,
return err;
}

if (!st->disable_auto_compact)
return reftable_stack_auto_compact(st);

return 0;
}

@ -801,18 +798,16 @@ static int stack_write_compact(struct reftable_stack *st,
err = 0;
break;
}
if (err < 0) {
break;
}
if (err < 0)
goto done;

if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
continue;
}

err = reftable_writer_add_ref(wr, &ref);
if (err < 0) {
break;
}
if (err < 0)
goto done;
entries++;
}
reftable_iterator_destroy(&it);
@ -827,9 +822,8 @@ static int stack_write_compact(struct reftable_stack *st,
err = 0;
break;
}
if (err < 0) {
break;
}
if (err < 0)
goto done;
if (first == 0 && reftable_log_record_is_deletion(&log)) {
continue;
}
@ -845,9 +839,8 @@ static int stack_write_compact(struct reftable_stack *st,
}

err = reftable_writer_add_log(wr, &log);
if (err < 0) {
break;
}
if (err < 0)
goto done;
entries++;
}


View File

@ -462,7 +462,6 @@ static void test_reftable_stack_add(void)
refs[i].refname = xstrdup(buf);
refs[i].update_index = i + 1;
refs[i].value_type = REFTABLE_REF_VAL1;
refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
set_test_hash(refs[i].value.val1, i);

logs[i].refname = xstrdup(buf);
@ -599,7 +598,6 @@ static void test_reftable_stack_tombstone(void)
refs[i].update_index = i + 1;
if (i % 2 == 0) {
refs[i].value_type = REFTABLE_REF_VAL1;
refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
set_test_hash(refs[i].value.val1, i);
}


View File

@ -432,12 +432,12 @@ static int writer_finish_section(struct reftable_writer *w)
reftable_free(idx);
}

writer_clear_index(w);

err = writer_flush_block(w);
if (err < 0)
return err;

writer_clear_index(w);

bstats = writer_reftable_block_stats(w, typ);
bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
bstats->index_offset = index_start;