From ceb96a160b05c3ec45c416851bac00b711418839 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:31 +0200 Subject: [PATCH 01/10] midx: fix segfault with no packs and invalid preferred pack When asked to write a multi-pack-index the user can specify a preferred pack that is used as a tie breaker when multiple packs contain the same objects. When this packfile cannot be found, we just pick the first pack that is getting tracked by the newly written multi-pack-index as a fallback. Picking the fallback can fail in the case where we're asked to write a multi-pack-index with no packfiles at all: picking the fallback value will cause a segfault as we blindly index into the array of packfiles, which would be empty. Fix this bug by resetting the preferred packfile index to `-1` before searching for the preferred pack. This fixes the segfault as we already check for whether the index is `> - 1`. If it is not, we simply don't pick a preferred packfile at all. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 6 +++--- t/t5319-multi-pack-index.sh | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index 7cfad04a24..8455df4f35 100644 --- a/midx.c +++ b/midx.c @@ -1326,17 +1326,17 @@ static int write_midx_internal(const char *object_dir, } if (preferred_pack_name) { - int found = 0; + ctx.preferred_pack_idx = -1; + for (i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; - found = 1; break; } } - if (!found) + if (ctx.preferred_pack_idx == -1) warning(_("unknown preferred pack: '%s'"), preferred_pack_name); } else if (ctx.nr && diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 499d5d4c78..0883c7c6bd 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -183,6 +183,18 @@ test_expect_success 'write midx with --stdin-packs' ' compare_results_with_midx "mixed mode (one pack + extra)" +test_expect_success 'write with no objects and preferred pack' ' + test_when_finished "rm -rf empty" && + git init empty && + test_must_fail git -C empty multi-pack-index write \ + --stdin-packs --preferred-pack=does-not-exist err && + cat >expect <<-EOF && + warning: unknown preferred pack: ${SQ}does-not-exist${SQ} + error: no pack files to index. + EOF + test_cmp expect err +' + test_expect_success 'write progress off for redirected stderr' ' git multi-pack-index --object-dir=$objdir write 2>err && test_line_count = 0 err From 3d74a2337c679839265efa16b2bca2a9b7795a00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:36 +0200 Subject: [PATCH 02/10] repack: fix trying to use preferred pack in alternates When doing a geometric repack with multi-pack-indices, then we ask git-multi-pack-index(1) to use the largest packfile as the preferred pack. It can happen though that the largest packfile is not part of the main object database, but instead part of an alternate object database. The result is that git-multi-pack-index(1) will not be able to find the preferred pack and print a warning. It then falls back to use the first packfile that the multi-pack-index shall reference. Fix this bug by only considering packfiles as preferred pack that are local. This is the right thing to do given that a multi-pack-index should never reference packfiles borrowed from an alternate. While at it, rename the function `get_largest_active_packfile()` to `get_preferred_pack()` to better document its intent. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 +++++++++++++++++++++----- t/t7703-repack-geometric.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f649379531..63585ad046 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -444,8 +444,10 @@ static void split_pack_geometry(struct pack_geometry *geometry, int factor) geometry->split = split; } -static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry) +static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) { + uint32_t i; + if (!geometry) { /* * No geometry means either an all-into-one repack (in which @@ -460,7 +462,21 @@ static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry } if (geometry->split == geometry->pack_nr) return NULL; - return geometry->pack[geometry->pack_nr - 1]; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; } static void clear_pack_geometry(struct pack_geometry *geometry) @@ -587,7 +603,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *largest = get_largest_active_pack(geometry); + struct packed_git *preferred = get_preferred_pack(geometry); FILE *in; int ret; @@ -608,9 +624,9 @@ static int write_midx_included_packs(struct string_list *include, if (write_bitmaps) strvec_push(&cmd.args, "--bitmap"); - if (largest) + if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(largest)); + pack_basename(preferred)); if (refs_snapshot) strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 8821fbd2dd..4abc7d4c55 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -281,4 +281,36 @@ test_expect_success '--geometric with pack.packSizeLimit' ' ) ' +test_expect_success '--geometric --write-midx with packfiles in main and alternate ODB' ' + test_when_finished "rm -fr shared member" && + + # Create a shared repository that will serve as the alternate object + # database for the member linked to it. It has got some objects on its + # own that are packed into a single packfile. + git init shared && + test_commit -C shared common-object && + git -C shared repack -ad && + + # We create member so that its alternates file points to the shared + # repository. We then create a commit in it so that git-repack(1) has + # something to repack. + # of the shared object database. + git clone --shared shared member && + test_commit -C member unique-object && + git -C member repack --geometric=2 --write-midx 2>err && + test_must_be_empty err && + + # We should see that a new packfile was generated. + find shared/.git/objects/pack -type f -name "*.pack" >packs && + test_line_count = 1 packs && + + # We should also see a multi-pack-index. This multi-pack-index should + # never refer to any packfiles in the alternate object database. + test_path_is_file member/.git/objects/pack/multi-pack-index && + test-tool read-midx member/.git/objects >packs.midx && + grep "^pack-.*\.idx$" packs.midx | sort >actual && + basename member/.git/objects/pack/pack-*.idx >expect && + test_cmp expect actual +' + test_done From 51861340f8d7f76a99e0d7265f4417b0a9a6871c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:40 +0200 Subject: [PATCH 03/10] repack: fix generating multi-pack-index with only non-local packs When writing the multi-pack-index with geometric repacking we will add all packfiles to the index that are part of the geometric sequence. This can potentially also include packfiles borrowed from an alternate object directory. But given that a multi-pack-index can only ever include packs that are part of the main object database this does not make much sense whatsoever. In the edge case where all packfiles are contained in the alternate object database and the local repository has none itself this bug can cause us to invoke git-multi-pack-index(1) with only non-local packfiles that it ultimately cannot find. This causes it to return an error and thus causes the geometric repack to fail. Fix the code to skip non-local packfiles. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 +++++++++++ t/t7703-repack-geometric.sh | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/builtin/repack.c b/builtin/repack.c index 63585ad046..a591a4ddd6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -570,6 +570,17 @@ static void midx_included_packs(struct string_list *include, for (i = geometry->split; i < geometry->pack_nr; i++) { struct packed_git *p = geometry->pack[i]; + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + strbuf_addstr(&buf, pack_basename(p)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".idx"); diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 4abc7d4c55..9dd002437f 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -313,4 +313,27 @@ test_expect_success '--geometric --write-midx with packfiles in main and alterna test_cmp expect actual ' +test_expect_success '--geometric --with-midx with no local objects' ' + test_when_finished "rm -fr shared member" && + + # Create a repository with a single packfile that acts as alternate + # object database. + git init shared && + test_commit -C shared "shared-objects" && + git -C shared repack -ad && + + # Create a second repository linked to the first one and perform a + # geometric repack on it. + git clone --shared shared member && + git -C member repack --geometric 2 --write-midx 2>err && + test_must_be_empty err && + + # Assert that we wrote neither a new packfile nor a multi-pack-index. + # We should not have a packfile because the single packfile in the + # alternate object database does not invalidate the geometric sequence. + # And we should not have a multi-pack-index because these only index + # local packfiles, and there are none. + test_dir_is_empty member/$packdir +' + test_done From b7b8f048f567f1dde24874e82227fbf2c64a1aed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:45 +0200 Subject: [PATCH 04/10] pack-objects: split out `--stdin-packs` tests into separate file The test suite for git-pack-objects(1) is quite huge, and we're about to add more tests that relate to the `--stdin-packs` option. Split out all tests related to this option into a standalone file so that it becomes easier to test the feature in isolation. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 135 ------------------------------- t/t5331-pack-objects-stdin.sh | 145 ++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 135 deletions(-) create mode 100755 t/t5331-pack-objects-stdin.sh diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index f8a0f309e2..d2ce236d61 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -589,141 +589,6 @@ test_expect_success 'prefetch objects' ' test_line_count = 1 donelines ' -test_expect_success 'setup for --stdin-packs tests' ' - git init stdin-packs && - ( - cd stdin-packs && - - test_commit A && - test_commit B && - test_commit C && - - for id in A B C - do - git pack-objects .git/objects/pack/pack-$id \ - --incremental --revs <<-EOF || exit 1 - refs/tags/$id - EOF - done && - - ls -la .git/objects/pack - ) -' - -test_expect_success '--stdin-packs with excluded packs' ' - ( - cd stdin-packs && - - PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" && - PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" && - PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" && - - git pack-objects test --stdin-packs <<-EOF && - $PACK_A - ^$PACK_B - $PACK_C - EOF - - ( - git show-index <$(ls .git/objects/pack/pack-A-*.idx) && - git show-index <$(ls .git/objects/pack/pack-C-*.idx) - ) >expect.raw && - git show-index <$(ls test-*.idx) >actual.raw && - - cut -d" " -f2 expect && - cut -d" " -f2 actual && - test_cmp expect actual - ) -' - -test_expect_success '--stdin-packs is incompatible with --filter' ' - ( - cd stdin-packs && - test_must_fail git pack-objects --stdin-packs --stdout \ - --filter=blob:none err && - test_i18ngrep "cannot use --filter with --stdin-packs" err - ) -' - -test_expect_success '--stdin-packs is incompatible with --revs' ' - ( - cd stdin-packs && - test_must_fail git pack-objects --stdin-packs --revs out \ - err && - test_i18ngrep "cannot use internal rev list with --stdin-packs" err - ) -' - -test_expect_success '--stdin-packs with loose objects' ' - ( - cd stdin-packs && - - PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" && - PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" && - PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" && - - test_commit D && # loose - - git pack-objects test2 --stdin-packs --unpacked <<-EOF && - $PACK_A - ^$PACK_B - $PACK_C - EOF - - ( - git show-index <$(ls .git/objects/pack/pack-A-*.idx) && - git show-index <$(ls .git/objects/pack/pack-C-*.idx) && - git rev-list --objects --no-object-names \ - refs/tags/C..refs/tags/D - - ) >expect.raw && - ls -la . && - git show-index <$(ls test2-*.idx) >actual.raw && - - cut -d" " -f2 expect && - cut -d" " -f2 actual && - test_cmp expect actual - ) -' - -test_expect_success '--stdin-packs with broken links' ' - ( - cd stdin-packs && - - # make an unreachable object with a bogus parent - git cat-file -p HEAD >commit && - sed "s/$(git rev-parse HEAD^)/$(test_oid zero)/" in && - - git pack-objects .git/objects/pack/pack-D expect.raw && - git show-index <$(ls test3-*.idx) >actual.raw && - - cut -d" " -f2 expect && - cut -d" " -f2 actual && - test_cmp expect actual - ) -' - test_expect_success 'negative window clamps to 0' ' git pack-objects --progress --window=-1 neg-window stderr && check_deltas stderr = 0 diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh new file mode 100755 index 0000000000..d5eece5899 --- /dev/null +++ b/t/t5331-pack-objects-stdin.sh @@ -0,0 +1,145 @@ +#!/bin/sh + +test_description='pack-objects --stdin' +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'setup for --stdin-packs tests' ' + git init stdin-packs && + ( + cd stdin-packs && + + test_commit A && + test_commit B && + test_commit C && + + for id in A B C + do + git pack-objects .git/objects/pack/pack-$id \ + --incremental --revs <<-EOF || exit 1 + refs/tags/$id + EOF + done && + + ls -la .git/objects/pack + ) +' + +test_expect_success '--stdin-packs with excluded packs' ' + ( + cd stdin-packs && + + PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" && + PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" && + PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" && + + git pack-objects test --stdin-packs <<-EOF && + $PACK_A + ^$PACK_B + $PACK_C + EOF + + ( + git show-index <$(ls .git/objects/pack/pack-A-*.idx) && + git show-index <$(ls .git/objects/pack/pack-C-*.idx) + ) >expect.raw && + git show-index <$(ls test-*.idx) >actual.raw && + + cut -d" " -f2 expect && + cut -d" " -f2 actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs is incompatible with --filter' ' + ( + cd stdin-packs && + test_must_fail git pack-objects --stdin-packs --stdout \ + --filter=blob:none err && + test_i18ngrep "cannot use --filter with --stdin-packs" err + ) +' + +test_expect_success '--stdin-packs is incompatible with --revs' ' + ( + cd stdin-packs && + test_must_fail git pack-objects --stdin-packs --revs out \ + err && + test_i18ngrep "cannot use internal rev list with --stdin-packs" err + ) +' + +test_expect_success '--stdin-packs with loose objects' ' + ( + cd stdin-packs && + + PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" && + PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" && + PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" && + + test_commit D && # loose + + git pack-objects test2 --stdin-packs --unpacked <<-EOF && + $PACK_A + ^$PACK_B + $PACK_C + EOF + + ( + git show-index <$(ls .git/objects/pack/pack-A-*.idx) && + git show-index <$(ls .git/objects/pack/pack-C-*.idx) && + git rev-list --objects --no-object-names \ + refs/tags/C..refs/tags/D + + ) >expect.raw && + ls -la . && + git show-index <$(ls test2-*.idx) >actual.raw && + + cut -d" " -f2 expect && + cut -d" " -f2 actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs with broken links' ' + ( + cd stdin-packs && + + # make an unreachable object with a bogus parent + git cat-file -p HEAD >commit && + sed "s/$(git rev-parse HEAD^)/$(test_oid zero)/" in && + + git pack-objects .git/objects/pack/pack-D expect.raw && + git show-index <$(ls test3-*.idx) >actual.raw && + + cut -d" " -f2 expect && + cut -d" " -f2 actual && + test_cmp expect actual + ) +' + +test_done From 732194b5f2ff50fc536b28c3d5a3e43e0110419b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:49 +0200 Subject: [PATCH 05/10] pack-objects: fix error when packing same pack twice When passed the same packfile twice via `--stdin-packs` we return an error that the packfile supposedly was not found. This is because when reading packs into the list of included or excluded packfiles, we will happily re-add packfiles even if they are part of the lists already. And while the list can now contain duplicates, we will only set the `util` pointer of the first list entry to the `packed_git` structure. We notice that at a later point when checking that all list entries have their `util` pointer set and die with an error. While this is kind of a nonsensical request, this scenario can be hit when doing geometric repacks. When a repository is connected to an alternate object directory and both have the exact same packfile then both would get added to the geometric sequence. And when we then decide to perform the repack, we will invoke git-pack-objects(1) with the same packfile twice. Fix this bug by removing any duplicates from both the included and excluded packs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ t/t5331-pack-objects-stdin.sh | 27 +++++++++++++++++++++++++++ t/t7703-repack-geometric.sh | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 74a167a180..c97ae1b6d0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3344,7 +3344,9 @@ static void read_packs_list_from_stdin(void) } string_list_sort(&include_packs); + string_list_remove_duplicates(&include_packs, 0); string_list_sort(&exclude_packs); + string_list_remove_duplicates(&exclude_packs, 0); for (p = get_all_packs(the_repository); p; p = p->next) { const char *pack_name = pack_basename(p); diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index d5eece5899..71c8a4a635 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -7,6 +7,12 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh +packed_objects () { + git show-index <"$1" >tmp-object-list && + cut -d' ' -f2 tmp-object-list | sort && + rm tmp-object-list + } + test_expect_success 'setup for --stdin-packs tests' ' git init stdin-packs && ( @@ -142,4 +148,25 @@ test_expect_success '--stdin-packs with broken links' ' ) ' +test_expect_success 'pack-objects --stdin with duplicate packfile' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + test_commit "commit" && + git repack -ad && + + { + basename .git/objects/pack/pack-*.pack && + basename .git/objects/pack/pack-*.pack + } >packfiles && + + git pack-objects --stdin-packs generated-pack expect && + packed_objects generated-pack-*.idx >actual && + test_cmp expect actual + ) +' + test_done diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 9dd002437f..a358dfb7bd 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -336,4 +336,29 @@ test_expect_success '--geometric --with-midx with no local objects' ' test_dir_is_empty member/$packdir ' +test_expect_success '--geometric with same pack in main and alternate ODB' ' + test_when_finished "rm -fr shared member" && + + # Create a repository with a single packfile that acts as alternate + # object database. + git init shared && + test_commit -C shared "shared-objects" && + git -C shared repack -ad && + + # We create the member repository as an exact copy so that it has the + # same packfile. + cp -r shared member && + test-tool path-utils real_path shared/.git/objects >member/.git/objects/info/alternates && + find shared/.git/objects -type f >expected-files && + + # Verify that we can repack objects as expected without observing any + # error. Having the same packfile in both ODBs used to cause an error + # in git-pack-objects(1). + git -C member repack --geometric 2 2>err && + test_must_be_empty err && + # Nothing should have changed. + find shared/.git/objects -type f >actual-files && + test_cmp expected-files actual-files +' + test_done From 752b465c3c0fd7f503b50c326017b8b13af83c3b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:54 +0200 Subject: [PATCH 06/10] pack-objects: fix error when same packfile is included and excluded When passing the same packfile both as included and excluded via the `--stdin-packs` option, then we will return an error because the excluded packfile cannot be found. This is because we will only set the `util` pointer for the included packfile list if it was found, so that we later die when we notice that it's in fact not set for the excluded packfile list. Fix this bug by always setting the `util` pointer for both the included and excluded list entries. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 8 +++----- t/t5331-pack-objects-stdin.sh | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c97ae1b6d0..884b84a2fd 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3351,11 +3351,9 @@ static void read_packs_list_from_stdin(void) for (p = get_all_packs(the_repository); p; p = p->next) { const char *pack_name = pack_basename(p); - item = string_list_lookup(&include_packs, pack_name); - if (!item) - item = string_list_lookup(&exclude_packs, pack_name); - - if (item) + if ((item = string_list_lookup(&include_packs, pack_name))) + item->util = p; + if ((item = string_list_lookup(&exclude_packs, pack_name))) item->util = p; } diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 71c8a4a635..3ef736ec05 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -169,4 +169,24 @@ test_expect_success 'pack-objects --stdin with duplicate packfile' ' ) ' +test_expect_success 'pack-objects --stdin with same packfile excluded and included' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + test_commit "commit" && + git repack -ad && + + { + basename .git/objects/pack/pack-*.pack && + printf "^%s\n" "$(basename .git/objects/pack/pack-*.pack)" + } >packfiles && + + git pack-objects --stdin-packs generated-pack packed-objects && + test_must_be_empty packed-objects + ) +' + test_done From f3028418c3b68350aa810064c95283062c9157d6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:59 +0200 Subject: [PATCH 07/10] pack-objects: extend test coverage of `--stdin-packs` with alternates We don't have any tests that verify that git-pack-objects(1) works with `--stdin-packs` when combined with alternate object directories. Add some to make sure that the basic functionality works as expected. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5331-pack-objects-stdin.sh | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 3ef736ec05..acab31667a 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -189,4 +189,52 @@ test_expect_success 'pack-objects --stdin with same packfile excluded and includ ) ' +test_expect_success 'pack-objects --stdin with packfiles from alternate object database' ' + test_when_finished "rm -fr shared member" && + + # Set up a shared repository with a single packfile. + git init shared && + test_commit -C shared "shared-objects" && + git -C shared repack -ad && + basename shared/.git/objects/pack/pack-*.pack >packfile && + + # Set up a repository that is connected to the shared repository. This + # repository has no objects on its own, but we still expect to be able + # to pack objects from its alternate. + git clone --shared shared member && + git -C member pack-objects --stdin-packs generated-pack packfiles && + + { + packed_objects shared/.git/objects/pack/pack-*.idx && + packed_objects member/.git/objects/pack/pack-*.idx + } | sort >expected-objects && + + git -C member pack-objects --stdin-packs generated-pack actual-objects && + test_cmp expected-objects actual-objects +' + test_done From 19a3a7bde9164c86353c944e939a1dcb537993f9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:04 +0200 Subject: [PATCH 08/10] t/helper: allow chmtime to print verbosely without modifying mtime The `test-tool chmtime` helper allows us to both read and modify the modification time of files. But while it is possible to only read the mtimes of a file via `--get`, it is not possible to read the mtimes and report them together with their respective file paths via the `--verbose` flag without also modifying the mtime at the same time. Fix this so that it is possible to call `test-tool chmtime --verbose ...` without modifying any mtimes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/helper/test-chmtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/helper/test-chmtime.c b/t/helper/test-chmtime.c index dc28890a18..0e5538833a 100644 --- a/t/helper/test-chmtime.c +++ b/t/helper/test-chmtime.c @@ -94,7 +94,7 @@ int cmd__chmtime(int argc, const char **argv) if (timespec_arg(argv[i], &set_time, &set_eq)) { ++i; } else { - if (get == 0) { + if (get == 0 && verbose == 0) { fprintf(stderr, "Not a base-10 integer: %s\n", argv[i] + 1); goto usage; } From 932c16c04b5e41ee1c76d5640ec3ae67e1900c07 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:08 +0200 Subject: [PATCH 09/10] repack: honor `-l` when calculating pack geometry When the user passes `-l` to git-repack(1), then they essentially ask us to only repack objects part of the local object database while ignoring any packfiles part of an alternate object database. And we in fact honor this bit when doing a geometric repack as the resulting packfile will only ever contain local objects. What we're missing though is that we don't take locality of packfiles into account when computing whether the geometric sequence is intact or not. So even though we would only ever roll up local packfiles anyway, we could end up trying to repack because of non-local packfiles. This does not make much sense, and in the worst case it can cause us to try and do the geometric repack over and over again because we're never able to restore the geometric sequence. Fix this bug by honoring whether the user has passed `-l`. If so, we skip adding any non-local packfiles to the pack geometry. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 +++++++-- t/t7703-repack-geometric.sh | 57 +++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a591a4ddd6..165fe1b628 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -321,7 +321,8 @@ static int geometry_cmp(const void *va, const void *vb) } static void init_pack_geometry(struct pack_geometry **geometry_p, - struct string_list *existing_kept_packs) + struct string_list *existing_kept_packs, + const struct pack_objects_args *args) { struct packed_git *p; struct pack_geometry *geometry; @@ -331,6 +332,14 @@ static void init_pack_geometry(struct pack_geometry **geometry_p, geometry = *geometry_p; for (p = get_all_packs(the_repository); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + if (!pack_kept_objects) { /* * Any pack that has its pack_keep bit set will appear @@ -898,7 +907,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (geometric_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing_kept_packs); + init_pack_geometry(&geometry, &existing_kept_packs, &po_args); split_pack_geometry(geometry, geometric_factor); } diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index a358dfb7bd..be8fe78448 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -10,6 +10,12 @@ objdir=.git/objects packdir=$objdir/pack midx=$objdir/pack/multi-pack-index +packed_objects () { + git show-index <"$1" >tmp-object-list && + cut -d' ' -f2 tmp-object-list | sort && + rm tmp-object-list + } + test_expect_success '--geometric with no packs' ' git init geometric && test_when_finished "rm -fr geometric" && @@ -361,4 +367,55 @@ test_expect_success '--geometric with same pack in main and alternate ODB' ' test_cmp expected-files actual-files ' +test_expect_success '--geometric -l with non-intact geometric sequence across ODBs' ' + test_when_finished "rm -fr shared member" && + + git init shared && + test_commit_bulk -C shared --start=1 1 && + + git clone --shared shared member && + test_commit_bulk -C member --start=2 1 && + + # Verify that our assumptions actually hold: both generated packfiles + # should have three objects and should be non-equal. + packed_objects shared/.git/objects/pack/pack-*.idx >shared-objects && + packed_objects member/.git/objects/pack/pack-*.idx >member-objects && + test_line_count = 3 shared-objects && + test_line_count = 3 member-objects && + ! test_cmp shared-objects member-objects && + + # Perform the geometric repack. With `-l`, we should only see the local + # packfile and thus arrive at the conclusion that the geometric + # sequence is intact. We thus expect no changes. + # + # Note that we are tweaking mtimes of the packfiles so that we can + # verify they did not change. This is done in order to detect the case + # where we do repack objects, but the resulting packfile is the same. + test-tool chmtime --verbose =0 member/.git/objects/pack/* >expected-member-packs && + git -C member repack --geometric=2 -l -d && + test-tool chmtime --verbose member/.git/objects/pack/* >actual-member-packs && + test_cmp expected-member-packs actual-member-packs && + + { + packed_objects shared/.git/objects/pack/pack-*.idx && + packed_objects member/.git/objects/pack/pack-*.idx + } | sort >expected-objects && + + # On the other hand, when doing a non-local geometric repack we should + # see both packfiles and thus repack them. We expect that the shared + # object database was not changed. + test-tool chmtime --verbose =0 shared/.git/objects/pack/* >expected-shared-packs && + git -C member repack --geometric=2 -d && + test-tool chmtime --verbose shared/.git/objects/pack/* >actual-shared-packs && + test_cmp expected-shared-packs actual-shared-packs && + + # Furthermore, we expect that the member repository now has a single + # packfile that contains the combined shared and non-shared objects. + ls member/.git/objects/pack/pack-*.idx >actual && + test_line_count = 1 actual && + packed_objects member/.git/objects/pack/pack-*.idx >actual-objects && + test_line_count = 6 actual-objects && + test_cmp expected-objects actual-objects +' + test_done From d85cd1877777aa92c73868b9e86516d4be04b4a0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:12 +0200 Subject: [PATCH 10/10] repack: disable writing bitmaps when doing a local repack In order to write a bitmap, we need to have full coverage of all objects that are about to be packed. In the traditional non-multi-pack-index world this meant we need to do a full repack of all objects into a single packfile. But in the new multi-pack-index world we can get away with writing bitmaps when we have multiple packfiles as long as the multi-pack-index covers all objects. This is not always the case though. When asked to perform a repack of local objects, only, then we cannot guarantee to have full coverage of all objects regardless of whether we do a full repack or a repack with a multi-pack-index. The end result is that writing the bitmap will fail in both worlds: $ git multi-pack-index write --stdin-packs --bitmap Signed-off-by: Junio C Hamano --- builtin/repack.c | 12 ++++++++++++ object-file.c | 6 ++++++ object-store.h | 1 + t/t7700-repack.sh | 17 +++++++++++++++++ t/t7703-repack-geometric.sh | 27 +++++++++++++++++++++++++++ 5 files changed, 63 insertions(+) diff --git a/builtin/repack.c b/builtin/repack.c index 165fe1b628..c26f06769b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -885,6 +885,18 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); + if (write_bitmaps && po_args.local && has_alt_odb(the_repository)) { + /* + * When asked to do a local repack, but we have + * packfiles that are inherited from an alternate, then + * we cannot guarantee that the multi-pack-index would + * have full coverage of all objects. We thus disable + * writing bitmaps in that case. + */ + warning(_("disabling bitmap writing, as some objects are not being packed")); + write_bitmaps = 0; + } + if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; diff --git a/object-file.c b/object-file.c index 939865c1ae..a6574b265d 100644 --- a/object-file.c +++ b/object-file.c @@ -944,6 +944,12 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } +int has_alt_odb(struct repository *r) +{ + prepare_alt_odb(r); + return !!r->objects->odb->next; +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { diff --git a/object-store.h b/object-store.h index 1a713d89d7..8ba010a9d6 100644 --- a/object-store.h +++ b/object-store.h @@ -56,6 +56,7 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct object_directory *, 1, fspathhash, fspatheq) void prepare_alt_odb(struct repository *r); +int has_alt_odb(struct repository *r); char *compute_alternate_path(const char *path, struct strbuf *err); struct object_directory *find_odb(struct repository *r, const char *obj_dir); typedef int alt_odb_fn(struct object_directory *, void *); diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 4aabe98139..faa739eeb9 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -106,6 +106,23 @@ test_expect_success SYMLINKS '--local keeps packs when alternate is objectdir ' test_cmp expect actual ' +test_expect_success '--local disables writing bitmaps when connected to alternate ODB' ' + test_when_finished "rm -rf shared member" && + + git init shared && + git clone --shared shared member && + ( + cd member && + test_commit "object" && + GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adl --write-bitmap-index 2>err && + cat >expect <<-EOF && + warning: disabling bitmap writing, as some objects are not being packed + EOF + test_cmp expect err && + test_path_is_missing .git/objects/pack-*.bitmap + ) +' + test_expect_success 'packed obs in alt ODB are repacked even when local repo is packless' ' mkdir alt_objects/pack && mv .git/objects/pack/* alt_objects/pack && diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index be8fe78448..00f28fb558 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -418,4 +418,31 @@ test_expect_success '--geometric -l with non-intact geometric sequence across OD test_cmp expected-objects actual-objects ' +test_expect_success '--geometric -l disables writing bitmaps with non-local packfiles' ' + test_when_finished "rm -fr shared member" && + + git init shared && + test_commit_bulk -C shared --start=1 1 && + + git clone --shared shared member && + test_commit_bulk -C member --start=2 1 && + + # When performing a geometric repack with `-l` while connected to an + # alternate object database that has a packfile we do not have full + # coverage of objects. As a result, we expect that writing the bitmap + # will be disabled. + git -C member repack -l --geometric=2 --write-midx --write-bitmap-index 2>err && + cat >expect <<-EOF && + warning: disabling bitmap writing, as some objects are not being packed + EOF + test_cmp expect err && + test_path_is_missing member/.git/objects/pack/multi-pack-index-*.bitmap && + + # On the other hand, when we repack without `-l`, we should see that + # the bitmap gets created. + git -C member repack --geometric=2 --write-midx --write-bitmap-index 2>err && + test_must_be_empty err && + test_path_is_file member/.git/objects/pack/multi-pack-index-*.bitmap +' + test_done