Browse Source

midx: write object id fanout chunk

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Derrick Stolee 7 years ago committed by Junio C Hamano
parent
commit
d7cacf29cc
  1. 5
      Documentation/technical/pack-format.txt
  2. 53
      midx.c
  3. 1
      midx.h
  4. 4
      t/helper/test-read-midx.c
  5. 16
      t/t5319-multi-pack-index.sh

5
Documentation/technical/pack-format.txt

@ -302,6 +302,11 @@ CHUNK DATA:
name. This is the only chunk not guaranteed to be a multiple of four name. This is the only chunk not guaranteed to be a multiple of four
bytes in length, so should be the last chunk for alignment reasons. bytes in length, so should be the last chunk for alignment reasons.


OID Fanout (ID: {'O', 'I', 'D', 'F'})
The ith entry, F[i], stores the number of OIDs with first
byte at most i. Thus F[255] stores the total
number of objects.

OID Lookup (ID: {'O', 'I', 'D', 'L'}) OID Lookup (ID: {'O', 'I', 'D', 'L'})
The OIDs for all objects in the MIDX are stored in lexicographic The OIDs for all objects in the MIDX are stored in lexicographic
order in this chunk. order in this chunk.

53
midx.c

@ -18,11 +18,13 @@
#define MIDX_HASH_LEN 20 #define MIDX_HASH_LEN 20
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)


#define MIDX_MAX_CHUNKS 2 #define MIDX_MAX_CHUNKS 3
#define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNK_ALIGNMENT 4
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)


static char *get_midx_filename(const char *object_dir) static char *get_midx_filename(const char *object_dir)
{ {
@ -102,6 +104,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
m->chunk_pack_names = m->data + chunk_offset; m->chunk_pack_names = m->data + chunk_offset;
break; break;


case MIDX_CHUNKID_OIDFANOUT:
m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
break;

case MIDX_CHUNKID_OIDLOOKUP: case MIDX_CHUNKID_OIDLOOKUP:
m->chunk_oid_lookup = m->data + chunk_offset; m->chunk_oid_lookup = m->data + chunk_offset;
break; break;
@ -121,9 +127,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)


if (!m->chunk_pack_names) if (!m->chunk_pack_names)
die(_("multi-pack-index missing required pack-name chunk")); die(_("multi-pack-index missing required pack-name chunk"));
if (!m->chunk_oid_fanout)
die(_("multi-pack-index missing required OID fanout chunk"));
if (!m->chunk_oid_lookup) if (!m->chunk_oid_lookup)
die(_("multi-pack-index missing required OID lookup chunk")); die(_("multi-pack-index missing required OID lookup chunk"));


m->num_objects = ntohl(m->chunk_oid_fanout[255]);

m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));


cur_pack_name = (const char *)m->chunk_pack_names; cur_pack_name = (const char *)m->chunk_pack_names;
@ -389,6 +399,35 @@ static size_t write_midx_pack_names(struct hashfile *f,
return written; return written;
} }


static size_t write_midx_oid_fanout(struct hashfile *f,
struct pack_midx_entry *objects,
uint32_t nr_objects)
{
struct pack_midx_entry *list = objects;
struct pack_midx_entry *last = objects + nr_objects;
uint32_t count = 0;
uint32_t i;

/*
* Write the first-level table (the list is sorted,
* but we use a 256-entry lookup to be able to avoid
* having to do eight extra binary search iterations).
*/
for (i = 0; i < 256; i++) {
struct pack_midx_entry *next = list;

while (next < last && next->oid.hash[0] == i) {
count++;
next++;
}

hashwrite_be32(f, count);
list = next;
}

return MIDX_CHUNK_FANOUT_SIZE;
}

static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
struct pack_midx_entry *objects, struct pack_midx_entry *objects,
uint32_t nr_objects) uint32_t nr_objects)
@ -461,7 +500,7 @@ int write_midx_file(const char *object_dir)
FREE_AND_NULL(midx_name); FREE_AND_NULL(midx_name);


cur_chunk = 0; cur_chunk = 0;
num_chunks = 2; num_chunks = 3;


written = write_midx_header(f, num_chunks, packs.nr); written = write_midx_header(f, num_chunks, packs.nr);


@ -469,9 +508,13 @@ int write_midx_file(const char *object_dir)
chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;


cur_chunk++; cur_chunk++;
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;


cur_chunk++;
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;

cur_chunk++; cur_chunk++;
chunk_ids[cur_chunk] = 0; chunk_ids[cur_chunk] = 0;
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN;
@ -505,6 +548,10 @@ int write_midx_file(const char *object_dir)
written += write_midx_pack_names(f, packs.names, packs.nr); written += write_midx_pack_names(f, packs.names, packs.nr);
break; break;


case MIDX_CHUNKID_OIDFANOUT:
written += write_midx_oid_fanout(f, entries, nr_entries);
break;

case MIDX_CHUNKID_OIDLOOKUP: case MIDX_CHUNKID_OIDLOOKUP:
written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries);
break; break;

1
midx.h

@ -15,6 +15,7 @@ struct multi_pack_index {
uint32_t num_objects; uint32_t num_objects;


const unsigned char *chunk_pack_names; const unsigned char *chunk_pack_names;
const uint32_t *chunk_oid_fanout;
const unsigned char *chunk_oid_lookup; const unsigned char *chunk_oid_lookup;


const char **pack_names; const char **pack_names;

4
t/helper/test-read-midx.c

@ -22,10 +22,12 @@ static int read_midx_file(const char *object_dir)


if (m->chunk_pack_names) if (m->chunk_pack_names)
printf(" pack-names"); printf(" pack-names");
if (m->chunk_oid_fanout)
printf(" oid-fanout");
if (m->chunk_oid_lookup) if (m->chunk_oid_lookup)
printf(" oid-lookup"); printf(" oid-lookup");


printf("\n"); printf("\nnum_objects: %d\n", m->num_objects);


printf("packs:\n"); printf("packs:\n");
for (i = 0; i < m->num_packs; i++) for (i = 0; i < m->num_packs; i++)

16
t/t5319-multi-pack-index.sh

@ -5,10 +5,12 @@ test_description='multi-pack-indexes'


midx_read_expect () { midx_read_expect () {
NUM_PACKS=$1 NUM_PACKS=$1
NUM_OBJECTS=$2
{ {
cat <<-EOF && cat <<-EOF &&
header: 4d494458 1 2 $NUM_PACKS header: 4d494458 1 3 $NUM_PACKS
chunks: pack-names oid-lookup chunks: pack-names oid-fanout oid-lookup
num_objects: $NUM_OBJECTS
packs: packs:
EOF EOF
if test $NUM_PACKS -ge 1 if test $NUM_PACKS -ge 1
@ -24,7 +26,7 @@ midx_read_expect () {
test_expect_success 'write midx with no packs' ' test_expect_success 'write midx with no packs' '
test_when_finished rm -f pack/multi-pack-index && test_when_finished rm -f pack/multi-pack-index &&
git multi-pack-index --object-dir=. write && git multi-pack-index --object-dir=. write &&
midx_read_expect 0 midx_read_expect 0 0
' '


generate_objects () { generate_objects () {
@ -74,13 +76,13 @@ test_expect_success 'write midx with one v1 pack' '
pack=$(git pack-objects --index-version=1 pack/test <obj-list) && pack=$(git pack-objects --index-version=1 pack/test <obj-list) &&
test_when_finished rm pack/test-$pack.pack pack/test-$pack.idx pack/multi-pack-index && test_when_finished rm pack/test-$pack.pack pack/test-$pack.idx pack/multi-pack-index &&
git multi-pack-index --object-dir=. write && git multi-pack-index --object-dir=. write &&
midx_read_expect 1 midx_read_expect 1 18
' '


test_expect_success 'write midx with one v2 pack' ' test_expect_success 'write midx with one v2 pack' '
git pack-objects --index-version=2,0x40 pack/test <obj-list && git pack-objects --index-version=2,0x40 pack/test <obj-list &&
git multi-pack-index --object-dir=. write && git multi-pack-index --object-dir=. write &&
midx_read_expect 1 midx_read_expect 1 18
' '


test_expect_success 'add more objects' ' test_expect_success 'add more objects' '
@ -94,7 +96,7 @@ test_expect_success 'add more objects' '
test_expect_success 'write midx with two packs' ' test_expect_success 'write midx with two packs' '
git pack-objects --index-version=1 pack/test-2 <obj-list && git pack-objects --index-version=1 pack/test-2 <obj-list &&
git multi-pack-index --object-dir=. write && git multi-pack-index --object-dir=. write &&
midx_read_expect 2 midx_read_expect 2 34
' '


test_expect_success 'add more packs' ' test_expect_success 'add more packs' '
@ -108,7 +110,7 @@ test_expect_success 'add more packs' '


test_expect_success 'write midx with twelve packs' ' test_expect_success 'write midx with twelve packs' '
git multi-pack-index --object-dir=. write && git multi-pack-index --object-dir=. write &&
midx_read_expect 12 midx_read_expect 12 74
' '


test_done test_done

Loading…
Cancel
Save