|
|
|
#ifndef MIDX_H
|
|
|
|
#define MIDX_H
|
|
|
|
|
|
|
|
#include "repository.h"
|
|
|
|
|
|
|
|
struct object_id;
|
|
|
|
struct pack_entry;
|
|
|
|
struct repository;
|
|
|
|
|
|
|
|
#define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX"
|
|
|
|
|
|
|
|
struct multi_pack_index {
|
|
|
|
struct multi_pack_index *next;
|
|
|
|
|
|
|
|
const unsigned char *data;
|
|
|
|
size_t data_len;
|
|
|
|
|
pack-revindex: read multi-pack reverse indexes
Implement reading for multi-pack reverse indexes, as described in the
previous patch.
Note that these functions don't yet have any callers, and won't until
multi-pack reachability bitmaps are introduced in a later patch series.
In the meantime, this patch implements some of the infrastructure
necessary to support multi-pack bitmaps.
There are three new functions exposed by the revindex API:
- load_midx_revindex(): loads the reverse index corresponding to the
given multi-pack index.
- midx_to_pack_pos() and pack_pos_to_midx(): these convert between the
multi-pack index and pseudo-pack order.
load_midx_revindex() and pack_pos_to_midx() are both relatively
straightforward.
load_midx_revindex() needs a few functions to be exposed from the midx
API. One to get the checksum of a midx, and another to get the .rev's
filename. Similar to recent changes in the packed_git struct, three new
fields are added to the multi_pack_index struct: one to keep track of
the size, one to keep track of the mmap'd pointer, and another to point
past the header and at the reverse index's data.
pack_pos_to_midx() simply reads the corresponding entry out of the
table.
midx_to_pack_pos() is the trickiest, since it needs to find an object's
position in the psuedo-pack order, but that order can only be recovered
in the .rev file itself. This mapping can be implemented with a binary
search, but note that the thing we're binary searching over isn't an
array of values, but rather a permuted order of those values.
So, when comparing two items, it's helpful to keep in mind the
difference. Instead of a traditional binary search, where you are
comparing two things directly, here we're comparing a (pack, offset)
tuple with an index into the multi-pack index. That index describes
another (pack, offset) tuple, and it is _those_ two tuples that are
compared.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
const uint32_t *revindex_data;
|
|
|
|
const uint32_t *revindex_map;
|
|
|
|
size_t revindex_len;
|
|
|
|
|
|
|
|
uint32_t signature;
|
|
|
|
unsigned char version;
|
|
|
|
unsigned char hash_len;
|
|
|
|
unsigned char num_chunks;
|
|
|
|
uint32_t num_packs;
|
|
|
|
uint32_t num_objects;
|
|
|
|
|
|
|
|
int local;
|
|
|
|
|
|
|
|
const unsigned char *chunk_pack_names;
|
|
|
|
const uint32_t *chunk_oid_fanout;
|
|
|
|
const unsigned char *chunk_oid_lookup;
|
|
|
|
const unsigned char *chunk_object_offsets;
|
|
|
|
const unsigned char *chunk_large_offsets;
|
|
|
|
|
|
|
|
const char **pack_names;
|
|
|
|
struct packed_git **packs;
|
|
|
|
char object_dir[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MIDX_PROGRESS (1 << 0)
|
|
|
|
|
pack-revindex: read multi-pack reverse indexes
Implement reading for multi-pack reverse indexes, as described in the
previous patch.
Note that these functions don't yet have any callers, and won't until
multi-pack reachability bitmaps are introduced in a later patch series.
In the meantime, this patch implements some of the infrastructure
necessary to support multi-pack bitmaps.
There are three new functions exposed by the revindex API:
- load_midx_revindex(): loads the reverse index corresponding to the
given multi-pack index.
- midx_to_pack_pos() and pack_pos_to_midx(): these convert between the
multi-pack index and pseudo-pack order.
load_midx_revindex() and pack_pos_to_midx() are both relatively
straightforward.
load_midx_revindex() needs a few functions to be exposed from the midx
API. One to get the checksum of a midx, and another to get the .rev's
filename. Similar to recent changes in the packed_git struct, three new
fields are added to the multi_pack_index struct: one to keep track of
the size, one to keep track of the mmap'd pointer, and another to point
past the header and at the reverse index's data.
pack_pos_to_midx() simply reads the corresponding entry out of the
table.
midx_to_pack_pos() is the trickiest, since it needs to find an object's
position in the psuedo-pack order, but that order can only be recovered
in the .rev file itself. This mapping can be implemented with a binary
search, but note that the thing we're binary searching over isn't an
array of values, but rather a permuted order of those values.
So, when comparing two items, it's helpful to keep in mind the
difference. Instead of a traditional binary search, where you are
comparing two things directly, here we're comparing a (pack, offset)
tuple with an index into the multi-pack index. That index describes
another (pack, offset) tuple, and it is _those_ two tuples that are
compared.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
char *get_midx_rev_filename(struct multi_pack_index *m);
|
|
|
|
|
|
|
|
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
|
|
|
|
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
|
|
|
|
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result);
|
|
|
|
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos);
|
|
|
|
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos);
|
|
|
|
struct object_id *nth_midxed_object_oid(struct object_id *oid,
|
|
|
|
struct multi_pack_index *m,
|
|
|
|
uint32_t n);
|
|
|
|
int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m);
|
midx: check both pack and index names for containment
A midx file (and the struct we parse from it) contains a list of all of
the covered packfiles, mentioned by their ".idx" names (e.g.,
"pack-1234.idx", etc). And thus calls to midx_contains_pack() expect
callers to provide the idx name.
This works for most of the calls, but the one in open_packed_git_1()
tries to feed a packed_git->pack_name, which is the ".pack" name,
meaning we'll never find a match (even if the pack is covered by the
midx).
We can fix this by converting the ".pack" to ".idx" in the caller.
However, that requires allocating a new string. Instead, let's make
midx_contains_pack() a bit friendlier, and allow it take _either_ the
.pack or .idx variant.
All cleverness in the matching code is credited to René. Bugs are mine.
There's no test here, because while this does fix _a_ bug, it's masked
by another bug in that same caller. That will be covered (with a test)
in the next patch.
Helped-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name);
|
|
|
|
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local);
|
|
|
|
|
|
|
|
int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags);
|
|
|
|
void clear_midx_file(struct repository *r);
|
|
|
|
int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags);
|
|
|
|
int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags);
|
|
|
|
int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags);
|
|
|
|
|
|
|
|
void close_midx(struct multi_pack_index *m);
|
|
|
|
|
|
|
|
#endif
|