diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 58a9b16126..79e1e6fb52 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -206,6 +206,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; static const char *base_name; @@ -1502,8 +1503,60 @@ static int have_duplicate_entry(const struct object_id *oid, return 1; } +static int want_cruft_object_mtime(struct repository *r, + const struct object_id *oid, + unsigned flags, uint32_t mtime) +{ + struct packed_git **cache; + + for (cache = kept_pack_cache(r, flags); *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; + + ofs = find_pack_entry_one(oid, p); + if (!ofs) + continue; + + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } + + return -1; +} + static int want_found_object(const struct object_id *oid, int exclude, - struct packed_git *p) + struct packed_git *p, uint32_t mtime) { if (exclude) return 1; @@ -1553,12 +1606,29 @@ static int want_found_object(const struct object_id *oid, int exclude, if (ignore_packed_keep_in_core) flags |= IN_CORE_KEEP_PACKS; - if (ignore_packed_keep_on_disk && p->pack_keep) - return 0; - if (ignore_packed_keep_in_core && p->pack_keep_in_core) - return 0; - if (has_object_kept_pack(p->repo, oid, flags)) - return 0; + /* + * If the object is in a pack that we want to ignore, *and* we + * don't have any cruft packs that are being retained, we can + * abort quickly. + */ + if (!ignore_packed_keep_in_core_has_cruft) { + if (ignore_packed_keep_on_disk && p->pack_keep) + return 0; + if (ignore_packed_keep_in_core && p->pack_keep_in_core) + return 0; + if (has_object_kept_pack(p->repo, oid, flags)) + return 0; + } else { + /* + * But if there is at least one cruft pack which + * is being kept, we only want to include the + * provided object if it has a strictly greater + * mtime than any existing cruft copy. + */ + if (!want_cruft_object_mtime(p->repo, oid, flags, + mtime)) + return 0; + } } /* @@ -1577,7 +1647,8 @@ static int want_object_in_pack_one(struct packed_git *p, const struct object_id *oid, int exclude, struct packed_git **found_pack, - off_t *found_offset) + off_t *found_offset, + uint32_t found_mtime) { off_t offset; @@ -1593,7 +1664,7 @@ static int want_object_in_pack_one(struct packed_git *p, *found_offset = offset; *found_pack = p; } - return want_found_object(oid, exclude, p); + return want_found_object(oid, exclude, p, found_mtime); } return -1; } @@ -1607,10 +1678,11 @@ static int want_object_in_pack_one(struct packed_git *p, * function finds if there is any pack that has the object and returns the pack * and its offset in these variables. */ -static int want_object_in_pack(const struct object_id *oid, - int exclude, - struct packed_git **found_pack, - off_t *found_offset) +static int want_object_in_pack_mtime(const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset, + uint32_t found_mtime) { int want; struct list_head *pos; @@ -1625,7 +1697,8 @@ static int want_object_in_pack(const struct object_id *oid, * are present we will determine the answer right now. */ if (*found_pack) { - want = want_found_object(oid, exclude, *found_pack); + want = want_found_object(oid, exclude, *found_pack, + found_mtime); if (want != -1) return want; @@ -1636,7 +1709,7 @@ static int want_object_in_pack(const struct object_id *oid, for (m = get_multi_pack_index(the_repository); m; m = m->next) { struct pack_entry e; if (fill_midx_entry(the_repository, oid, &e, m)) { - want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset); + want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); if (want != -1) return want; } @@ -1644,7 +1717,7 @@ static int want_object_in_pack(const struct object_id *oid, list_for_each(pos, get_packed_git_mru(the_repository)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset); + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, get_packed_git_mru(the_repository)); @@ -1674,6 +1747,15 @@ static int want_object_in_pack(const struct object_id *oid, return 1; } +static inline int want_object_in_pack(const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset) +{ + return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset, + 0); +} + static struct object_entry *create_object_entry(const struct object_id *oid, enum object_type type, uint32_t hash, @@ -3606,7 +3688,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type entry->no_try_delta = no_try_delta(name); } } else { - if (!want_object_in_pack(oid, 0, &pack, &offset)) + if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime)) return; if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { /* @@ -3680,6 +3762,8 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) struct packed_git *p = item->util; if (!p) die(_("could not find pack '%s'"), item->string); + if (p->is_cruft && keep) + ignore_packed_keep_in_core_has_cruft = 1; p->pack_keep_in_core = keep; } } diff --git a/packfile.c b/packfile.c index 2d80d80cb3..9d09f8bc72 100644 --- a/packfile.c +++ b/packfile.c @@ -24,6 +24,7 @@ #include "commit-graph.h" #include "pack-revindex.h" #include "promisor-remote.h" +#include "pack-mtimes.h" char *odb_pack_name(struct repository *r, struct strbuf *buf, const unsigned char *hash, const char *ext) @@ -2107,7 +2108,7 @@ static void maybe_invalidate_kept_pack_cache(struct repository *r, r->objects->kept_pack_cache.flags = 0; } -static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) { maybe_invalidate_kept_pack_cache(r, flags); diff --git a/packfile.h b/packfile.h index 00ada7a938..25097213d0 100644 --- a/packfile.h +++ b/packfile.h @@ -197,6 +197,8 @@ int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); +struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); + /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 959e6e2648..43d2947d28 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -304,6 +304,72 @@ test_expect_success '--max-cruft-size with freshened objects (packed)' ' ) ' +test_expect_success '--max-cruft-size with freshened objects (previously cruft)' ' + repo="max-cruft-size-threshold" && + + test_when_finished "rm -fr $repo" && + git init "$repo" && + ( + cd "$repo" && + + test_commit base && + foo="$(generate_random_blob foo $((2*1024*1024)))" && + bar="$(generate_random_blob bar $((2*1024*1024)))" && + baz="$(generate_random_blob baz $((2*1024*1024)))" && + + test-tool chmtime --get -100000 \ + "$objdir/$(test_oid_to_path "$foo")" >foo.old && + test-tool chmtime --get -100000 \ + "$objdir/$(test_oid_to_path "$bar")" >bar.old && + test-tool chmtime --get -100000 \ + "$objdir/$(test_oid_to_path "$baz")" >baz.old && + + git repack --cruft -d && + + # Make an identical copy of foo stored in a pack with a more + # recent mtime. + foo="$(generate_random_blob foo $((2*1024*1024)))" && + foo_pack="$(echo "$foo" | git pack-objects $packdir/pack)" && + test-tool chmtime --get -100 \ + "$packdir/pack-$foo_pack.pack" >foo.new && + git prune-packed && + + # Make a loose copy of bar, also with a more recent mtime. + bar="$(generate_random_blob bar $((2*1024*1024)))" && + test-tool chmtime --get -100 \ + "$objdir/$(test_oid_to_path "$bar")" >bar.new && + + # Make a new cruft object $quux to ensure we do not + # generate an identical pack to the existing cruft + # pack. + quux="$(generate_random_blob quux $((1024)))" && + test-tool chmtime --get -100 \ + "$objdir/$(test_oid_to_path "$quux")" >quux.new && + + git repack --cruft --max-cruft-size=3M -d && + + for p in $packdir/pack-*.mtimes + do + test-tool pack-mtimes "$(basename "$p")" || return 1 + done >actual.raw && + sort actual.raw >actual && + + # Among the set of all cruft packs, we should see both + # mtimes for object $foo and $bar, as well as the + # single new copy of $baz. + sort >expect <<-EOF && + $foo $(cat foo.old) + $foo $(cat foo.new) + $bar $(cat bar.old) + $bar $(cat bar.new) + $baz $(cat baz.old) + $quux $(cat quux.new) + EOF + + test_cmp expect actual + ) +' + test_expect_success '--max-cruft-size with pruning' ' git init max-cruft-size-prune && (