Merge branch 'tb/midx-bitmap-corruption-fix'

A bug that made multi-pack bitmap and the object order out-of-sync,
making the .midx data corrupt, has been fixed.

* tb/midx-bitmap-corruption-fix:
  pack-bitmap.c: gracefully fallback after opening pack/MIDX
  midx: read `RIDX` chunk when present
  t/lib-bitmap.sh: parameterize tests over reverse index source
  t5326: move tests to t/lib-bitmap.sh
  t5326: extract `test_rev_exists`
  t5326: drop unnecessary setup
  pack-revindex.c: instrument loading on-disk reverse index
  midx.c: make changing the preferred pack safe
  t5326: demonstrate bitmap corruption after permutation
maint
Junio C Hamano 2022-02-16 15:14:29 -08:00
commit f2cb46a6b3
11 changed files with 321 additions and 151 deletions

View File

@ -24,6 +24,7 @@ and their offsets into multiple packfiles. It contains:
** An offset within the jth packfile for the object.
* If large offsets are required, we use another list of large
offsets similar to version 2 pack-indexes.
- An optional list of objects in pseudo-pack order (used with MIDX bitmaps).

Thus, we can provide O(log N) lookup time for any number
of packfiles.

View File

@ -376,6 +376,11 @@ CHUNK DATA:
[Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'})
8-byte offsets into large packfiles.

[Optional] Bitmap pack order (ID: {'R', 'I', 'D', 'X'})
A list of MIDX positions (one per object in the MIDX, num_objects in
total, each a 4-byte unsigned integer in network byte order), sorted
according to their relative bitmap/pseudo-pack positions.

TRAILER:

Index checksum of the above contents.
@ -456,9 +461,5 @@ In short, a MIDX's pseudo-pack is the de-duplicated concatenation of
objects in packs stored by the MIDX, laid out in pack order, and the
packs arranged in MIDX order (with the preferred pack coming first).

Finally, note that the MIDX's reverse index is not stored as a chunk in
the multi-pack-index itself. This is done because the reverse index
includes the checksum of the pack or MIDX to which it belongs, which
makes it impossible to write in the MIDX. To avoid races when rewriting
the MIDX, a MIDX reverse index includes the MIDX's checksum in its
filename (e.g., `multi-pack-index-xyz.rev`).
The MIDX's reverse index is stored in the optional 'RIDX' chunk within
the MIDX itself.

29
midx.c
View File

@ -33,6 +33,7 @@
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
@ -161,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local

pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);

if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);

m->num_objects = ntohl(m->chunk_oid_fanout[255]);

CALLOC_ARRAY(m->pack_names, m->num_packs);
@ -833,6 +837,18 @@ static int write_midx_large_offsets(struct hashfile *f,
return 0;
}

static int write_midx_revindex(struct hashfile *f,
void *data)
{
struct write_midx_context *ctx = data;
uint32_t i;

for (i = 0; i < ctx->entries_nr; i++)
hashwrite_be32(f, ctx->pack_order[i]);

return 0;
}

struct midx_pack_order_data {
uint32_t nr;
uint32_t pack;
@ -1403,16 +1419,21 @@ static int write_midx_internal(const char *object_dir,
(size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH,
write_midx_large_offsets);

if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
ctx.pack_order = midx_pack_order(&ctx);
add_chunk(cf, MIDX_CHUNKID_REVINDEX,
ctx.entries_nr * sizeof(uint32_t),
write_midx_revindex);
}

write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
write_chunkfile(cf, &ctx);

finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);

if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
ctx.pack_order = midx_pack_order(&ctx);

if (flags & MIDX_WRITE_REV_INDEX)
if (flags & MIDX_WRITE_REV_INDEX &&
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
if (flags & MIDX_WRITE_BITMAP) {
if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx,

1
midx.h
View File

@ -36,6 +36,7 @@ struct multi_pack_index {
const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_object_offsets;
const unsigned char *chunk_large_offsets;
const unsigned char *chunk_revindex;

const char **pack_names;
struct packed_git **packs;

View File

@ -358,7 +358,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
cleanup:
munmap(bitmap_git->map, bitmap_git->map_size);
bitmap_git->map_size = 0;
bitmap_git->map_pos = 0;
bitmap_git->map = NULL;
bitmap_git->midx = NULL;
return -1;
}

@ -405,6 +407,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
munmap(bitmap_git->map, bitmap_git->map_size);
bitmap_git->map = NULL;
bitmap_git->map_size = 0;
bitmap_git->map_pos = 0;
bitmap_git->pack = NULL;
return -1;
}


View File

@ -298,9 +298,29 @@ int load_midx_revindex(struct multi_pack_index *m)
{
struct strbuf revindex_name = STRBUF_INIT;
int ret;

if (m->revindex_data)
return 0;

if (m->chunk_revindex) {
/*
* If the MIDX `m` has a `RIDX` chunk, then use its contents for
* the reverse index instead of trying to load a separate `.rev`
* file.
*
* Note that we do *not* set `m->revindex_map` here, since we do
* not want to accidentally call munmap() in the middle of the
* MIDX.
*/
trace2_data_string("load_midx_revindex", the_repository,
"source", "midx");
m->revindex_data = (const uint32_t *)m->chunk_revindex;
return 0;
}

trace2_data_string("load_midx_revindex", the_repository,
"source", "rev");

get_midx_rev_filename(&revindex_name, m);

ret = load_revindex_from_disk(revindex_name.buf,

View File

@ -1,6 +1,9 @@
# Helpers for scripts testing bitmap functionality; see t5310 for
# example usage.

objdir=.git/objects
midx=$objdir/pack/multi-pack-index

# Compare a file containing rev-list bitmap traversal output to its non-bitmap
# counterpart. You can't just use test_cmp for this, because the two produce
# subtly different output:
@ -264,3 +267,185 @@ have_delta () {
midx_checksum () {
test-tool read-midx --checksum "$1"
}

# midx_pack_source <obj>
midx_pack_source () {
test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2
}

test_rev_exists () {
commit="$1"
kind="$2"

test_expect_success "reverse index exists ($kind)" '
GIT_TRACE2_EVENT=$(pwd)/event.trace \
git rev-list --test-bitmap "$commit" &&

if test "rev" = "$kind"
then
test_path_is_file $midx-$(midx_checksum $objdir).rev
fi &&
grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"$kind\"" event.trace
'
}

midx_bitmap_core () {
rev_kind="${1:-midx}"

setup_bitmap_history

test_expect_success 'create single-pack midx with bitmaps' '
git repack -ad &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'

test_rev_exists HEAD "$rev_kind"

basic_bitmap_tests

test_expect_success 'create new additional packs' '
for i in $(test_seq 1 16)
do
test_commit "$i" &&
git repack -d || return 1
done &&

git checkout -b other2 HEAD~8 &&
for i in $(test_seq 1 8)
do
test_commit "side-$i" &&
git repack -d || return 1
done &&
git checkout second
'

test_expect_success 'create multi-pack midx with bitmaps' '
git multi-pack-index write --bitmap &&

ls $objdir/pack/pack-*.pack >packs &&
test_line_count = 25 packs &&

test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'

test_rev_exists HEAD "$rev_kind"

basic_bitmap_tests

test_expect_success '--no-bitmap is respected when bitmaps exist' '
git multi-pack-index write --bitmap &&

test_commit respect--no-bitmap &&
git repack -d &&

test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&

git multi-pack-index write --no-bitmap &&

test_path_is_file $midx &&
test_path_is_missing $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_missing $midx-$(midx_checksum $objdir).rev
'

test_expect_success 'setup midx with base from later pack' '
# Write a and b so that "a" is a delta on top of base "b", since Git
# prefers to delete contents out of a base rather than add to a shorter
# object.
test_seq 1 128 >a &&
test_seq 1 130 >b &&

git add a b &&
git commit -m "initial commit" &&

a=$(git rev-parse HEAD:a) &&
b=$(git rev-parse HEAD:b) &&

# In the first pack, "a" is stored as a delta to "b".
p1=$(git pack-objects .git/objects/pack/pack <<-EOF
$a
$b
EOF
) &&

# In the second pack, "a" is missing, and "b" is not a delta nor base to
# any other object.
p2=$(git pack-objects .git/objects/pack/pack <<-EOF
$b
$(git rev-parse HEAD)
$(git rev-parse HEAD^{tree})
EOF
) &&

git prune-packed &&
# Use the second pack as the preferred source, so that "b" occurs
# earlier in the MIDX object order, rendering "a" unusable for pack
# reuse.
git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx &&

have_delta $a $b &&
test $(midx_pack_source $a) != $(midx_pack_source $b)
'

rev_list_tests 'full bitmap with backwards delta'

test_expect_success 'clone with bitmaps enabled' '
git clone --no-local --bare . clone-reverse-delta.git &&
test_when_finished "rm -fr clone-reverse-delta.git" &&

git rev-parse HEAD >expect &&
git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'changing the preferred pack does not corrupt bitmaps' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&

test_commit A &&
test_commit B &&

git rev-list --objects --no-object-names HEAD^ >A.objects &&
git rev-list --objects --no-object-names HEAD^.. >B.objects &&

A=$(git pack-objects $objdir/pack/pack <A.objects) &&
B=$(git pack-objects $objdir/pack/pack <B.objects) &&

cat >indexes <<-EOF &&
pack-$A.idx
pack-$B.idx
EOF

git multi-pack-index write --bitmap --stdin-packs \
--preferred-pack=pack-$A.pack <indexes &&
git rev-list --test-bitmap A &&

git multi-pack-index write --bitmap --stdin-packs \
--preferred-pack=pack-$B.pack <indexes &&
git rev-list --test-bitmap A
)
'
}

midx_bitmap_partial_tests () {
rev_kind="${1:-midx}"

test_expect_success 'setup partial bitmaps' '
test_commit packed &&
git repack &&
test_commit loose &&
git multi-pack-index write --bitmap 2>err &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'

test_rev_exists HEAD~ "$rev_kind"

basic_bitmap_tests HEAD~
}

View File

@ -397,4 +397,32 @@ test_expect_success 'pack.preferBitmapTips' '
)
'

test_expect_success 'complains about multiple pack bitmaps' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&

test_commit base &&

git repack -adb &&
bitmap="$(ls .git/objects/pack/pack-*.bitmap)" &&
mv "$bitmap" "$bitmap.bak" &&

test_commit other &&
git repack -ab &&

mv "$bitmap.bak" "$bitmap" &&

find .git/objects/pack -type f -name "*.pack" >packs &&
find .git/objects/pack -type f -name "*.bitmap" >bitmaps &&
test_line_count = 2 packs &&
test_line_count = 2 bitmaps &&

git rev-list --use-bitmap-index HEAD 2>err &&
grep "ignoring extra bitmap file" err
)
'

test_done

View File

@ -9,125 +9,13 @@ test_description='exercise basic multi-pack bitmap functionality'
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0

objdir=.git/objects
midx=$objdir/pack/multi-pack-index
# This test exercise multi-pack bitmap functionality where the object order is
# stored and read from a special chunk within the MIDX, so use the default
# behavior here.
sane_unset GIT_TEST_MIDX_WRITE_REV
sane_unset GIT_TEST_MIDX_READ_RIDX

# midx_pack_source <obj>
midx_pack_source () {
test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2
}

setup_bitmap_history

test_expect_success 'enable core.multiPackIndex' '
git config core.multiPackIndex true
'

test_expect_success 'create single-pack midx with bitmaps' '
git repack -ad &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev
'

basic_bitmap_tests

test_expect_success 'create new additional packs' '
for i in $(test_seq 1 16)
do
test_commit "$i" &&
git repack -d || return 1
done &&

git checkout -b other2 HEAD~8 &&
for i in $(test_seq 1 8)
do
test_commit "side-$i" &&
git repack -d || return 1
done &&
git checkout second
'

test_expect_success 'create multi-pack midx with bitmaps' '
git multi-pack-index write --bitmap &&

ls $objdir/pack/pack-*.pack >packs &&
test_line_count = 25 packs &&

test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev
'

basic_bitmap_tests

test_expect_success '--no-bitmap is respected when bitmaps exist' '
git multi-pack-index write --bitmap &&

test_commit respect--no-bitmap &&
git repack -d &&

test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev &&

git multi-pack-index write --no-bitmap &&

test_path_is_file $midx &&
test_path_is_missing $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_missing $midx-$(midx_checksum $objdir).rev
'

test_expect_success 'setup midx with base from later pack' '
# Write a and b so that "a" is a delta on top of base "b", since Git
# prefers to delete contents out of a base rather than add to a shorter
# object.
test_seq 1 128 >a &&
test_seq 1 130 >b &&

git add a b &&
git commit -m "initial commit" &&

a=$(git rev-parse HEAD:a) &&
b=$(git rev-parse HEAD:b) &&

# In the first pack, "a" is stored as a delta to "b".
p1=$(git pack-objects .git/objects/pack/pack <<-EOF
$a
$b
EOF
) &&

# In the second pack, "a" is missing, and "b" is not a delta nor base to
# any other object.
p2=$(git pack-objects .git/objects/pack/pack <<-EOF
$b
$(git rev-parse HEAD)
$(git rev-parse HEAD^{tree})
EOF
) &&

git prune-packed &&
# Use the second pack as the preferred source, so that "b" occurs
# earlier in the MIDX object order, rendering "a" unusable for pack
# reuse.
git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx &&

have_delta $a $b &&
test $(midx_pack_source $a) != $(midx_pack_source $b)
'

rev_list_tests 'full bitmap with backwards delta'

test_expect_success 'clone with bitmaps enabled' '
git clone --no-local --bare . clone-reverse-delta.git &&
test_when_finished "rm -fr clone-reverse-delta.git" &&

git rev-parse HEAD >expect &&
git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual &&
test_cmp expect actual
'
midx_bitmap_core

bitmap_reuse_tests() {
from=$1
@ -204,17 +92,7 @@ test_expect_success 'missing object closure fails gracefully' '
)
'

test_expect_success 'setup partial bitmaps' '
test_commit packed &&
git repack &&
test_commit loose &&
git multi-pack-index write --bitmap 2>err &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev
'

basic_bitmap_tests HEAD~
midx_bitmap_partial_tests

test_expect_success 'removing a MIDX clears stale bitmaps' '
rm -fr repo &&
@ -228,7 +106,6 @@ test_expect_success 'removing a MIDX clears stale bitmaps' '

# Write a MIDX and bitmap; remove the MIDX but leave the bitmap.
stale_bitmap=$midx-$(midx_checksum $objdir).bitmap &&
stale_rev=$midx-$(midx_checksum $objdir).rev &&
rm $midx &&

# Then write a new MIDX.
@ -238,9 +115,7 @@ test_expect_success 'removing a MIDX clears stale bitmaps' '

test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev &&
test_path_is_missing $stale_bitmap &&
test_path_is_missing $stale_rev
test_path_is_missing $stale_bitmap
)
'

@ -261,7 +136,6 @@ test_expect_success 'pack.preferBitmapTips' '
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev &&

test-tool bitmap list-commits | sort >bitmaps &&
comm -13 bitmaps commits >before &&
@ -271,7 +145,6 @@ test_expect_success 'pack.preferBitmapTips' '
<before | git update-ref --stdin &&

rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx-$(midx_checksum $objdir).rev &&
rm -fr $midx &&

git -c pack.preferBitmapTips=refs/tags/include \
@ -309,7 +182,6 @@ test_expect_success 'writing a bitmap with --refs-snapshot' '
grep "$(git rev-parse two)" bitmaps &&

rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx-$(midx_checksum $objdir).rev &&
rm -fr $midx &&

# Then again, but with a refs snapshot which only sees
@ -354,7 +226,6 @@ test_expect_success 'write a bitmap with --refs-snapshot (preferred tips)' '
) >snapshot &&

rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx-$(midx_checksum $objdir).rev &&
rm -fr $midx &&

git multi-pack-index write --bitmap --refs-snapshot=snapshot &&
@ -395,4 +266,23 @@ test_expect_success 'hash-cache values are propagated from pack bitmaps' '
)
'

test_expect_success 'graceful fallback when missing reverse index' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&

test_commit base &&

# write a pack and MIDX bitmap containing base
git repack -adb &&
git multi-pack-index write --bitmap &&

GIT_TEST_MIDX_READ_RIDX=0 \
git rev-list --use-bitmap-index HEAD 2>err &&
! grep "ignoring extra bitmap file" err
)
'

test_done

View File

@ -0,0 +1,23 @@
#!/bin/sh

test_description='exercise basic multi-pack bitmap functionality (.rev files)'

. ./test-lib.sh
. "${TEST_DIRECTORY}/lib-bitmap.sh"

# We'll be writing our own midx and bitmaps, so avoid getting confused by the
# automatic ones.
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0

# Unlike t5326, this test exercise multi-pack bitmap functionality where the
# object order is stored in a separate .rev file.
GIT_TEST_MIDX_WRITE_REV=1
GIT_TEST_MIDX_READ_RIDX=0
export GIT_TEST_MIDX_WRITE_REV
export GIT_TEST_MIDX_READ_RIDX

midx_bitmap_core rev
midx_bitmap_partial_tests rev

test_done

View File

@ -312,16 +312,13 @@ test_expect_success 'cleans up MIDX when appropriate' '
checksum=$(midx_checksum $objdir) &&
test_path_is_file $midx &&
test_path_is_file $midx-$checksum.bitmap &&
test_path_is_file $midx-$checksum.rev &&

test_commit repack-3 &&
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx &&

test_path_is_file $midx &&
test_path_is_missing $midx-$checksum.bitmap &&
test_path_is_missing $midx-$checksum.rev &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_file $midx-$(midx_checksum $objdir).rev &&

test_commit repack-4 &&
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb &&
@ -354,7 +351,6 @@ test_expect_success '--write-midx with preferred bitmap tips' '
test_line_count = 1 before &&

rm -fr $midx-$(midx_checksum $objdir).bitmap &&
rm -fr $midx-$(midx_checksum $objdir).rev &&
rm -fr $midx &&

# instead of constructing the snapshot ourselves (c.f., the test