pack-bitmap: enable reuse from all bitmapped packs
Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple bitmapped packs for verbatim reuse, allow marking objects from all bitmapped packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack contained in the MIDX as a reuse candidate. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
parent
941074134c
commit
af626ac0e0
|
@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option
|
||||||
to linkgit:git-repack[1].
|
to linkgit:git-repack[1].
|
||||||
|
|
||||||
pack.allowPackReuse::
|
pack.allowPackReuse::
|
||||||
When true or "single", and when reachability bitmaps are enabled,
|
When true or "single", and when reachability bitmaps are
|
||||||
pack-objects will try to send parts of the bitmapped packfile
|
enabled, pack-objects will try to send parts of the bitmapped
|
||||||
verbatim. This can reduce memory and CPU usage to serve fetches,
|
packfile verbatim. When "multi", and when a multi-pack
|
||||||
but might result in sending a slightly larger pack. Defaults to
|
reachability bitmap is available, pack-objects will try to send
|
||||||
true.
|
parts of all packs in the MIDX.
|
||||||
|
+
|
||||||
|
If only a single pack bitmap is available, and
|
||||||
|
`pack.allowPackReuse` is set to "multi", reuse parts of just the
|
||||||
|
bitmapped packfile. This can reduce memory and CPU usage to
|
||||||
|
serve fetches, but might result in sending a slightly larger
|
||||||
|
pack. Defaults to true.
|
||||||
|
|
||||||
pack.island::
|
pack.island::
|
||||||
An extended regular expression configuring a set of delta
|
An extended regular expression configuring a set of delta
|
||||||
|
|
|
@ -232,6 +232,7 @@ static int use_bitmap_index = -1;
|
||||||
static enum {
|
static enum {
|
||||||
NO_PACK_REUSE = 0,
|
NO_PACK_REUSE = 0,
|
||||||
SINGLE_PACK_REUSE,
|
SINGLE_PACK_REUSE,
|
||||||
|
MULTI_PACK_REUSE,
|
||||||
} allow_pack_reuse = SINGLE_PACK_REUSE;
|
} allow_pack_reuse = SINGLE_PACK_REUSE;
|
||||||
static enum {
|
static enum {
|
||||||
WRITE_BITMAP_FALSE = 0,
|
WRITE_BITMAP_FALSE = 0,
|
||||||
|
@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v,
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
if (!strcasecmp(v, "single"))
|
if (!strcasecmp(v, "single"))
|
||||||
allow_pack_reuse = SINGLE_PACK_REUSE;
|
allow_pack_reuse = SINGLE_PACK_REUSE;
|
||||||
|
else if (!strcasecmp(v, "multi"))
|
||||||
|
allow_pack_reuse = MULTI_PACK_REUSE;
|
||||||
else
|
else
|
||||||
die(_("invalid pack.allowPackReuse value: '%s'"), v);
|
die(_("invalid pack.allowPackReuse value: '%s'"), v);
|
||||||
} else if (res) {
|
} else if (res) {
|
||||||
|
@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
|
||||||
reuse_partial_packfile_from_bitmap(bitmap_git,
|
reuse_partial_packfile_from_bitmap(bitmap_git,
|
||||||
&reuse_packfiles,
|
&reuse_packfiles,
|
||||||
&reuse_packfiles_nr,
|
&reuse_packfiles_nr,
|
||||||
&reuse_packfile_bitmap);
|
&reuse_packfile_bitmap,
|
||||||
|
allow_pack_reuse == MULTI_PACK_REUSE);
|
||||||
|
|
||||||
if (reuse_packfiles) {
|
if (reuse_packfiles) {
|
||||||
reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
|
reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
|
||||||
|
|
|
@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb)
|
||||||
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
struct bitmapped_pack **packs_out,
|
struct bitmapped_pack **packs_out,
|
||||||
size_t *packs_nr_out,
|
size_t *packs_nr_out,
|
||||||
struct bitmap **reuse_out)
|
struct bitmap **reuse_out,
|
||||||
|
int multi_pack_reuse)
|
||||||
{
|
{
|
||||||
struct repository *r = the_repository;
|
struct repository *r = the_repository;
|
||||||
struct bitmapped_pack *packs = NULL;
|
struct bitmapped_pack *packs = NULL;
|
||||||
|
@ -2064,15 +2065,30 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
free(packs);
|
free(packs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pack.bitmap_nr)
|
if (!pack.bitmap_nr)
|
||||||
continue; /* no objects from this pack */
|
continue;
|
||||||
if (pack.bitmap_pos)
|
|
||||||
continue; /* not preferred pack */
|
if (!multi_pack_reuse && pack.bitmap_pos) {
|
||||||
|
/*
|
||||||
|
* If we're only reusing a single pack, skip
|
||||||
|
* over any packs which are not positioned at
|
||||||
|
* the beginning of the MIDX bitmap.
|
||||||
|
*
|
||||||
|
* This is consistent with the existing
|
||||||
|
* single-pack reuse behavior, which only reuses
|
||||||
|
* parts of the MIDX's preferred pack.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
|
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
|
||||||
memcpy(&packs[packs_nr++], &pack, sizeof(pack));
|
memcpy(&packs[packs_nr++], &pack, sizeof(pack));
|
||||||
|
|
||||||
objects_nr += pack.p->num_objects;
|
objects_nr += pack.p->num_objects;
|
||||||
|
|
||||||
|
if (!multi_pack_reuse)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
QSORT(packs, packs_nr, bitmapped_pack_cmp);
|
QSORT(packs, packs_nr, bitmapped_pack_cmp);
|
||||||
|
@ -2080,10 +2096,10 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
|
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
|
||||||
|
|
||||||
packs[packs_nr].p = bitmap_git->pack;
|
packs[packs_nr].p = bitmap_git->pack;
|
||||||
packs[packs_nr].bitmap_pos = 0;
|
|
||||||
packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
|
packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
|
||||||
|
packs[packs_nr].bitmap_pos = 0;
|
||||||
|
|
||||||
objects_nr = packs[packs_nr++].p->num_objects;
|
objects_nr = packs[packs_nr++].bitmap_nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
word_alloc = objects_nr / BITS_IN_EWORD;
|
word_alloc = objects_nr / BITS_IN_EWORD;
|
||||||
|
@ -2091,10 +2107,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
word_alloc++;
|
word_alloc++;
|
||||||
reuse = bitmap_word_alloc(word_alloc);
|
reuse = bitmap_word_alloc(word_alloc);
|
||||||
|
|
||||||
if (packs_nr != 1)
|
for (i = 0; i < packs_nr; i++)
|
||||||
BUG("pack reuse not yet implemented for multiple packs");
|
reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);
|
||||||
|
|
||||||
reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse);
|
|
||||||
|
|
||||||
if (bitmap_is_empty(reuse)) {
|
if (bitmap_is_empty(reuse)) {
|
||||||
free(packs);
|
free(packs);
|
||||||
|
|
|
@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
|
||||||
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
|
||||||
struct bitmapped_pack **packs_out,
|
struct bitmapped_pack **packs_out,
|
||||||
size_t *packs_nr_out,
|
size_t *packs_nr_out,
|
||||||
struct bitmap **reuse_out);
|
struct bitmap **reuse_out,
|
||||||
|
int multi_pack_reuse);
|
||||||
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
|
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
|
||||||
kh_oid_map_t *reused_bitmaps, int show_progress);
|
kh_oid_map_t *reused_bitmaps, int show_progress);
|
||||||
void free_bitmap_index(struct bitmap_index *);
|
void free_bitmap_index(struct bitmap_index *);
|
||||||
|
|
|
@ -0,0 +1,203 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='pack-objects multi-pack reuse'
|
||||||
|
|
||||||
|
. ./test-lib.sh
|
||||||
|
. "$TEST_DIRECTORY"/lib-bitmap.sh
|
||||||
|
|
||||||
|
objdir=.git/objects
|
||||||
|
packdir=$objdir/pack
|
||||||
|
|
||||||
|
test_pack_reused () {
|
||||||
|
test_trace2_data pack-objects pack-reused "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
test_packs_reused () {
|
||||||
|
test_trace2_data pack-objects packs-reused "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# pack_position <object> </path/to/pack.idx
|
||||||
|
pack_position () {
|
||||||
|
git show-index >objects &&
|
||||||
|
grep "$1" objects | cut -d" " -f1
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success 'preferred pack is reused for single-pack reuse' '
|
||||||
|
test_config pack.allowPackReuse single &&
|
||||||
|
|
||||||
|
for i in A B
|
||||||
|
do
|
||||||
|
test_commit "$i" &&
|
||||||
|
git repack -d || return 1
|
||||||
|
done &&
|
||||||
|
|
||||||
|
git multi-pack-index write --bitmap &&
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --revs --all >/dev/null &&
|
||||||
|
|
||||||
|
test_pack_reused 3 <trace2.txt &&
|
||||||
|
test_packs_reused 1 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'enable multi-pack reuse' '
|
||||||
|
git config pack.allowPackReuse multi
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'reuse all objects from subset of bitmapped packs' '
|
||||||
|
test_commit C &&
|
||||||
|
git repack -d &&
|
||||||
|
|
||||||
|
git multi-pack-index write --bitmap &&
|
||||||
|
|
||||||
|
cat >in <<-EOF &&
|
||||||
|
$(git rev-parse C)
|
||||||
|
^$(git rev-parse A)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --revs <in >/dev/null &&
|
||||||
|
|
||||||
|
test_pack_reused 6 <trace2.txt &&
|
||||||
|
test_packs_reused 2 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'reuse all objects from all packs' '
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --revs --all >/dev/null &&
|
||||||
|
|
||||||
|
test_pack_reused 9 <trace2.txt &&
|
||||||
|
test_packs_reused 3 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'reuse objects from first pack with middle gap' '
|
||||||
|
for i in D E F
|
||||||
|
do
|
||||||
|
test_commit "$i" || return 1
|
||||||
|
done &&
|
||||||
|
|
||||||
|
# Set "pack.window" to zero to ensure that we do not create any
|
||||||
|
# deltas, which could alter the amount of pack reuse we perform
|
||||||
|
# (if, for e.g., we are not sending one or more bases).
|
||||||
|
D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" &&
|
||||||
|
|
||||||
|
d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" &&
|
||||||
|
e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" &&
|
||||||
|
f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" &&
|
||||||
|
|
||||||
|
# commits F, E, and D, should appear in that order at the
|
||||||
|
# beginning of the pack
|
||||||
|
test $f_pos -lt $e_pos &&
|
||||||
|
test $e_pos -lt $d_pos &&
|
||||||
|
|
||||||
|
# Ensure that the pack we are constructing sorts ahead of any
|
||||||
|
# other packs in lexical/bitmap order by choosing it as the
|
||||||
|
# preferred pack.
|
||||||
|
git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" &&
|
||||||
|
|
||||||
|
cat >in <<-EOF &&
|
||||||
|
$(git rev-parse E)
|
||||||
|
^$(git rev-parse D)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
|
||||||
|
|
||||||
|
test_pack_reused 3 <trace2.txt &&
|
||||||
|
test_packs_reused 1 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'reuse objects from middle pack with middle gap' '
|
||||||
|
rm -fr $packdir/multi-pack-index* &&
|
||||||
|
|
||||||
|
# Ensure that the pack we are constructing sort into any
|
||||||
|
# position *but* the first one, by choosing a different pack as
|
||||||
|
# the preferred one.
|
||||||
|
git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" &&
|
||||||
|
|
||||||
|
cat >in <<-EOF &&
|
||||||
|
$(git rev-parse E)
|
||||||
|
^$(git rev-parse D)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
|
||||||
|
|
||||||
|
test_pack_reused 3 <trace2.txt &&
|
||||||
|
test_packs_reused 1 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'omit delta with uninteresting base (same pack)' '
|
||||||
|
git repack -adk &&
|
||||||
|
|
||||||
|
test_seq 32 >f &&
|
||||||
|
git add f &&
|
||||||
|
test_tick &&
|
||||||
|
git commit -m "delta" &&
|
||||||
|
delta="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
test_seq 64 >f &&
|
||||||
|
test_tick &&
|
||||||
|
git commit -a -m "base" &&
|
||||||
|
base="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
test_commit other &&
|
||||||
|
|
||||||
|
git repack -d &&
|
||||||
|
|
||||||
|
have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" &&
|
||||||
|
|
||||||
|
git multi-pack-index write --bitmap &&
|
||||||
|
|
||||||
|
cat >in <<-EOF &&
|
||||||
|
$(git rev-parse other)
|
||||||
|
^$base
|
||||||
|
EOF
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
|
||||||
|
|
||||||
|
# We can only reuse the 3 objects corresponding to "other" from
|
||||||
|
# the latest pack.
|
||||||
|
#
|
||||||
|
# This is because even though we want "delta", we do not want
|
||||||
|
# "base", meaning that we have to inflate the delta/base-pair
|
||||||
|
# corresponding to the blob in commit "delta", which bypasses
|
||||||
|
# the pack-reuse mechanism.
|
||||||
|
#
|
||||||
|
# The remaining objects from the other pack are similarly not
|
||||||
|
# reused because their objects are on the uninteresting side of
|
||||||
|
# the query.
|
||||||
|
test_pack_reused 3 <trace2.txt &&
|
||||||
|
test_packs_reused 1 <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'omit delta from uninteresting base (cross pack)' '
|
||||||
|
cat >in <<-EOF &&
|
||||||
|
$(git rev-parse $base)
|
||||||
|
^$(git rev-parse $delta)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
P="$(git pack-objects --revs $packdir/pack <in)" &&
|
||||||
|
|
||||||
|
git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
|
||||||
|
|
||||||
|
: >trace2.txt &&
|
||||||
|
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
|
||||||
|
git pack-objects --stdout --delta-base-offset --all >/dev/null &&
|
||||||
|
|
||||||
|
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
|
||||||
|
objects_nr="$(git rev-list --count --all --objects)" &&
|
||||||
|
|
||||||
|
test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
|
||||||
|
test_packs_reused $packs_nr <trace2.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in New Issue