repack: implement `--filter-to` for storing filtered out objects

A previous commit has implemented `git repack --filter=<filter-spec>` to
allow users to filter out some objects from the main pack and move them
into a new different pack.

It would be nice if this new different pack could be created in a
different directory than the regular pack. This would make it possible
to move large blobs into a pack on a different kind of storage, for
example cheaper storage.

Even in a different directory, this pack can be accessible if, for
example, the Git alternates mechanism is used to point to it. In fact
not using the Git alternates mechanism can corrupt a repo as the
generated pack containing the filtered objects might not be accessible
from the repo any more. So setting up the Git alternates mechanism
should be done before using this feature if the user wants the repo to
be fully usable while this feature is used.

In some cases, like when a repo has just been cloned or when there is no
other activity in the repo, it's Ok to setup the Git alternates
mechanism afterwards though. It's also Ok to just inspect the generated
packfile containing the filtered objects and then just move it into the
'.git/objects/pack/' directory manually. That's why it's not necessary
for this command to check that the Git alternates mechanism has been
already setup.

While at it, as an example to show that `--filter` and `--filter-to`
work well with other options, let's also add a test to check that these
options work well with `--max-pack-size`.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Christian Couder 2023-10-02 18:55:03 +02:00 committed by Junio C Hamano
parent 1cd43a9ed9
commit 71c5aec1f5
3 changed files with 82 additions and 1 deletions

View File

@ -155,6 +155,17 @@ depth is 4095.
a single packfile containing all the objects. See
linkgit:git-rev-list[1] for valid `<filter-spec>` forms.

--filter-to=<dir>::
Write the pack containing filtered out objects to the
directory `<dir>`. Only useful with `--filter`. This can be
used for putting the pack on a separate object directory that
is accessed through the Git alternates mechanism. **WARNING:**
If the packfile containing the filtered out objects is not
accessible, the repo can become corrupt as it might not be
possible to access the objects in that packfile. See the
`objects` and `objects/info/alternates` sections of
linkgit:gitrepository-layout[5].

-b::
--write-bitmap-index::
Write a reachability bitmap index as part of the repack. This

View File

@ -977,6 +977,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
int write_midx = 0;
const char *cruft_expiration = NULL;
const char *expire_to = NULL;
const char *filter_to = NULL;

struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything,
@ -1029,6 +1030,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("write a multi-pack index of the resulting packs")),
OPT_STRING(0, "expire-to", &expire_to, N_("dir"),
N_("pack prefix to store a pack containing pruned objects")),
OPT_STRING(0, "filter-to", &filter_to, N_("dir"),
N_("pack prefix to store a pack containing filtered out objects")),
OPT_END()
};

@ -1177,6 +1180,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (po_args.filter_options.choice)
strvec_pushf(&cmd.args, "--filter=%s",
expand_list_objects_filter_spec(&po_args.filter_options));
else if (filter_to)
die(_("option '%s' can only be used along with '%s'"), "--filter-to", "--filter");

if (geometry.split_factor)
cmd.in = -1;
@ -1265,8 +1270,11 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
}

if (po_args.filter_options.choice) {
if (!filter_to)
filter_to = packtmp;

ret = write_filtered_pack(&po_args,
packtmp,
filter_to,
find_pack_prefix(packdir, packtmp),
&existing,
&names);

View File

@ -462,6 +462,68 @@ test_expect_success '--filter works with --pack-kept-objects and .keep packs' '
)
'

test_expect_success '--filter-to stores filtered out objects' '
git -C bare.git repack -a -d &&
test_stdout_line_count = 1 ls bare.git/objects/pack/*.pack &&

git init --bare filtered.git &&
git -C bare.git -c repack.writebitmaps=false repack -a -d \
--filter=blob:none \
--filter-to=../filtered.git/objects/pack/pack &&
test_stdout_line_count = 1 ls bare.git/objects/pack/pack-*.pack &&
test_stdout_line_count = 1 ls filtered.git/objects/pack/pack-*.pack &&

commit_pack=$(test-tool -C bare.git find-pack -c 1 HEAD) &&
blob_pack=$(test-tool -C bare.git find-pack -c 0 HEAD:file1) &&
blob_hash=$(git -C bare.git rev-parse HEAD:file1) &&
test -n "$blob_hash" &&
blob_pack=$(test-tool -C filtered.git find-pack -c 1 $blob_hash) &&

echo $(pwd)/filtered.git/objects >bare.git/objects/info/alternates &&
blob_pack=$(test-tool -C bare.git find-pack -c 1 HEAD:file1) &&
blob_content=$(git -C bare.git show $blob_hash) &&
test "$blob_content" = "content1"
'

test_expect_success '--filter works with --max-pack-size' '
rm -rf filtered.git &&
git init --bare filtered.git &&
git init max-pack-size &&
(
cd max-pack-size &&
test_commit base &&
# two blobs which exceed the maximum pack size
test-tool genrandom foo 1048576 >foo &&
git hash-object -w foo &&
test-tool genrandom bar 1048576 >bar &&
git hash-object -w bar &&
git add foo bar &&
git commit -m "adding foo and bar"
) &&
git clone --no-local --bare max-pack-size max-pack-size.git &&
(
cd max-pack-size.git &&
git -c repack.writebitmaps=false repack -a -d --filter=blob:none \
--max-pack-size=1M \
--filter-to=../filtered.git/objects/pack/pack &&
echo $(cd .. && pwd)/filtered.git/objects >objects/info/alternates &&

# Check that the 3 blobs are in different packfiles in filtered.git
test_stdout_line_count = 3 ls ../filtered.git/objects/pack/pack-*.pack &&
test_stdout_line_count = 1 ls objects/pack/pack-*.pack &&
foo_pack=$(test-tool find-pack -c 1 HEAD:foo) &&
bar_pack=$(test-tool find-pack -c 1 HEAD:bar) &&
base_pack=$(test-tool find-pack -c 1 HEAD:base.t) &&
test "$foo_pack" != "$bar_pack" &&
test "$foo_pack" != "$base_pack" &&
test "$bar_pack" != "$base_pack" &&
for pack in "$foo_pack" "$bar_pack" "$base_pack"
do
case "$foo_pack" in */filtered.git/objects/pack/*) true ;; *) return 1 ;; esac
done
)
'

objdir=.git/objects
midx=$objdir/pack/multi-pack-index