Merge branch 'ds/path-walk-2'
"git pack-objects" learns to find delta bases from blobs at the same path, using the --path-walk API. * ds/path-walk-2: pack-objects: allow --shallow and --path-walk path-walk: add new 'edge_aggressive' option pack-objects: thread the path-based compression pack-objects: refactor path-walk delta phase scalar: enable path-walk during push via config pack-objects: enable --path-walk via config repack: add --path-walk option t5538: add tests to confirm deltas in shallow pushes pack-objects: introduce GIT_TEST_PACK_PATH_WALK p5313: add performance tests for --path-walk pack-objects: update usage to match docs pack-objects: add --path-walk option pack-objects: extract should_attempt_deltas()maint
commit
88134a8417
|
|
@ -20,6 +20,10 @@ walking fewer objects.
|
|||
+
|
||||
* `pack.allowPackReuse=multi` may improve the time it takes to create a pack by
|
||||
reusing objects from multiple packs instead of just one.
|
||||
+
|
||||
* `pack.usePathWalk` may speed up packfile creation and make the packfiles be
|
||||
significantly smaller in the presence of certain filename collisions with Git's
|
||||
default name-hash.
|
||||
|
||||
feature.manyFiles::
|
||||
Enable config options that optimize for repos with many files in the
|
||||
|
|
|
|||
|
|
@ -155,6 +155,10 @@ pack.useSparse::
|
|||
commits contain certain types of direct renames. Default is
|
||||
`true`.
|
||||
|
||||
pack.usePathWalk::
|
||||
Enable the `--path-walk` option by default for `git pack-objects`
|
||||
processes. See linkgit:git-pack-objects[1] for full details.
|
||||
|
||||
pack.preferBitmapTips::
|
||||
When selecting which commits will receive bitmaps, prefer a
|
||||
commit at the tip of any reference that is a suffix of any value
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ SYNOPSIS
|
|||
[--cruft] [--cruft-expiration=<time>]
|
||||
[--stdout [--filter=<filter-spec>] | <base-name>]
|
||||
[--shallow] [--keep-true-parents] [--[no-]sparse]
|
||||
[--name-hash-version=<n>] < <object-list>
|
||||
[--name-hash-version=<n>] [--path-walk] < <object-list>
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
|
|
@ -375,6 +375,17 @@ many different directories. At the moment, this version is not allowed
|
|||
when writing reachability bitmap files with `--write-bitmap-index` and it
|
||||
will be automatically changed to version `1`.
|
||||
|
||||
--path-walk::
|
||||
Perform compression by first organizing objects by path, then a
|
||||
second pass that compresses across paths as normal. This has the
|
||||
potential to improve delta compression especially in the presence
|
||||
of filenames that cause collisions in Git's default name-hash
|
||||
algorithm.
|
||||
+
|
||||
Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The
|
||||
`--use-bitmap-index` option will be ignored in the presence of
|
||||
`--path-walk.`
|
||||
|
||||
|
||||
DELTA ISLANDS
|
||||
-------------
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ SYNOPSIS
|
|||
[verse]
|
||||
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]
|
||||
[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]
|
||||
[--write-midx] [--name-hash-version=<n>]
|
||||
[--write-midx] [--name-hash-version=<n>] [--path-walk]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
|
@ -258,6 +258,9 @@ linkgit:git-multi-pack-index[1]).
|
|||
Provide this argument to the underlying `git pack-objects` process.
|
||||
See linkgit:git-pack-objects[1] for full details.
|
||||
|
||||
--path-walk::
|
||||
Pass the `--path-walk` option to the underlying `git pack-objects`
|
||||
process. See linkgit:git-pack-objects[1] for full details.
|
||||
|
||||
CONFIGURATION
|
||||
-------------
|
||||
|
|
|
|||
|
|
@ -56,6 +56,14 @@ better off using the revision walk API instead.
|
|||
the revision walk so that the walk emits commits marked with the
|
||||
`UNINTERESTING` flag.
|
||||
|
||||
`edge_aggressive`::
|
||||
For performance reasons, usually only the boundary commits are
|
||||
explored to find UNINTERESTING objects. However, in the case of
|
||||
shallow clones it can be helpful to mark all trees and blobs
|
||||
reachable from UNINTERESTING tip commits as UNINTERESTING. This
|
||||
matches the behavior of `--objects-edge-aggressive` in the
|
||||
revision API.
|
||||
|
||||
`pl`::
|
||||
This pattern list pointer allows focusing the path-walk search to
|
||||
a set of patterns, only emitting paths that match the given
|
||||
|
|
@ -69,4 +77,5 @@ Examples
|
|||
|
||||
See example usages in:
|
||||
`t/helper/test-path-walk.c`,
|
||||
`builtin/pack-objects.c`,
|
||||
`builtin/backfill.c`
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@
|
|||
#include "promisor-remote.h"
|
||||
#include "pack-mtimes.h"
|
||||
#include "parse-options.h"
|
||||
#include "blob.h"
|
||||
#include "tree.h"
|
||||
#include "path-walk.h"
|
||||
#include "trace2.h"
|
||||
|
||||
/*
|
||||
* Objects we are going to pack are collected in the `to_pack` structure.
|
||||
|
|
@ -184,8 +188,14 @@ static inline void oe_set_delta_size(struct packing_data *pack,
|
|||
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
|
||||
|
||||
static const char *const pack_usage[] = {
|
||||
N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"),
|
||||
N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"),
|
||||
N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n"
|
||||
" [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n"
|
||||
" [--local] [--incremental] [--window=<n>] [--depth=<n>]\n"
|
||||
" [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n"
|
||||
" [--cruft] [--cruft-expiration=<time>]\n"
|
||||
" [--stdout [--filter=<filter-spec>] | <base-name>]\n"
|
||||
" [--shallow] [--keep-true-parents] [--[no-]sparse]\n"
|
||||
" [--name-hash-version=<n>] [--path-walk] < <object-list>"),
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
@ -200,6 +210,7 @@ static int keep_unreachable, unpack_unreachable, include_tag;
|
|||
static timestamp_t unpack_unreachable_expiration;
|
||||
static int pack_loose_unreachable;
|
||||
static int cruft;
|
||||
static int shallow = 0;
|
||||
static timestamp_t cruft_expiration;
|
||||
static int local;
|
||||
static int have_non_local_packs;
|
||||
|
|
@ -218,6 +229,7 @@ static int delta_search_threads;
|
|||
static int pack_to_stdout;
|
||||
static int sparse;
|
||||
static int thin;
|
||||
static int path_walk = -1;
|
||||
static int num_preferred_base;
|
||||
static struct progress *progress_state;
|
||||
|
||||
|
|
@ -3041,6 +3053,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
|
|||
struct thread_params {
|
||||
pthread_t thread;
|
||||
struct object_entry **list;
|
||||
struct packing_region *regions;
|
||||
unsigned list_size;
|
||||
unsigned remaining;
|
||||
int window;
|
||||
|
|
@ -3283,6 +3296,242 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int should_attempt_deltas(struct object_entry *entry)
|
||||
{
|
||||
if (DELTA(entry))
|
||||
/* This happens if we decided to reuse existing
|
||||
* delta from a pack. "reuse_delta &&" is implied.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
if (!entry->type_valid ||
|
||||
oe_size_less_than(&to_pack, entry, 50))
|
||||
return 0;
|
||||
|
||||
if (entry->no_try_delta)
|
||||
return 0;
|
||||
|
||||
if (!entry->preferred_base) {
|
||||
if (oe_type(entry) < 0)
|
||||
die(_("unable to get type of object %s"),
|
||||
oid_to_hex(&entry->idx.oid));
|
||||
} else if (oe_type(entry) < 0) {
|
||||
/*
|
||||
* This object is not found, but we
|
||||
* don't have to include it anyway.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void find_deltas_for_region(struct object_entry *list,
|
||||
struct packing_region *region,
|
||||
unsigned int *processed)
|
||||
{
|
||||
struct object_entry **delta_list;
|
||||
unsigned int delta_list_nr = 0;
|
||||
|
||||
ALLOC_ARRAY(delta_list, region->nr);
|
||||
for (size_t i = 0; i < region->nr; i++) {
|
||||
struct object_entry *entry = list + region->start + i;
|
||||
if (should_attempt_deltas(entry))
|
||||
delta_list[delta_list_nr++] = entry;
|
||||
}
|
||||
|
||||
QSORT(delta_list, delta_list_nr, type_size_sort);
|
||||
find_deltas(delta_list, &delta_list_nr, window, depth, processed);
|
||||
free(delta_list);
|
||||
}
|
||||
|
||||
static void find_deltas_by_region(struct object_entry *list,
|
||||
struct packing_region *regions,
|
||||
size_t start, size_t nr)
|
||||
{
|
||||
unsigned int processed = 0;
|
||||
size_t progress_nr;
|
||||
|
||||
if (!nr)
|
||||
return;
|
||||
|
||||
progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
|
||||
|
||||
if (progress)
|
||||
progress_state = start_progress(the_repository,
|
||||
_("Compressing objects by path"),
|
||||
progress_nr);
|
||||
|
||||
while (nr--)
|
||||
find_deltas_for_region(list,
|
||||
®ions[start++],
|
||||
&processed);
|
||||
|
||||
display_progress(progress_state, progress_nr);
|
||||
stop_progress(&progress_state);
|
||||
}
|
||||
|
||||
static void *threaded_find_deltas_by_path(void *arg)
|
||||
{
|
||||
struct thread_params *me = arg;
|
||||
|
||||
progress_lock();
|
||||
while (me->remaining) {
|
||||
while (me->remaining) {
|
||||
progress_unlock();
|
||||
find_deltas_for_region(to_pack.objects,
|
||||
me->regions,
|
||||
me->processed);
|
||||
progress_lock();
|
||||
me->remaining--;
|
||||
me->regions++;
|
||||
}
|
||||
|
||||
me->working = 0;
|
||||
pthread_cond_signal(&progress_cond);
|
||||
progress_unlock();
|
||||
|
||||
/*
|
||||
* We must not set ->data_ready before we wait on the
|
||||
* condition because the main thread may have set it to 1
|
||||
* before we get here. In order to be sure that new
|
||||
* work is available if we see 1 in ->data_ready, it
|
||||
* was initialized to 0 before this thread was spawned
|
||||
* and we reset it to 0 right away.
|
||||
*/
|
||||
pthread_mutex_lock(&me->mutex);
|
||||
while (!me->data_ready)
|
||||
pthread_cond_wait(&me->cond, &me->mutex);
|
||||
me->data_ready = 0;
|
||||
pthread_mutex_unlock(&me->mutex);
|
||||
|
||||
progress_lock();
|
||||
}
|
||||
progress_unlock();
|
||||
/* leave ->working 1 so that this doesn't get more work assigned */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ll_find_deltas_by_region(struct object_entry *list,
|
||||
struct packing_region *regions,
|
||||
uint32_t start, uint32_t nr)
|
||||
{
|
||||
struct thread_params *p;
|
||||
int i, ret, active_threads = 0;
|
||||
unsigned int processed = 0;
|
||||
uint32_t progress_nr;
|
||||
init_threaded_search();
|
||||
|
||||
if (!nr)
|
||||
return;
|
||||
|
||||
progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
|
||||
if (delta_search_threads <= 1) {
|
||||
find_deltas_by_region(list, regions, start, nr);
|
||||
cleanup_threaded_search();
|
||||
return;
|
||||
}
|
||||
|
||||
if (progress > pack_to_stdout)
|
||||
fprintf_ln(stderr,
|
||||
Q_("Path-based delta compression using up to %d thread",
|
||||
"Path-based delta compression using up to %d threads",
|
||||
delta_search_threads),
|
||||
delta_search_threads);
|
||||
CALLOC_ARRAY(p, delta_search_threads);
|
||||
|
||||
if (progress)
|
||||
progress_state = start_progress(the_repository,
|
||||
_("Compressing objects by path"),
|
||||
progress_nr);
|
||||
/* Partition the work amongst work threads. */
|
||||
for (i = 0; i < delta_search_threads; i++) {
|
||||
unsigned sub_size = nr / (delta_search_threads - i);
|
||||
|
||||
p[i].window = window;
|
||||
p[i].depth = depth;
|
||||
p[i].processed = &processed;
|
||||
p[i].working = 1;
|
||||
p[i].data_ready = 0;
|
||||
|
||||
p[i].regions = regions;
|
||||
p[i].list_size = sub_size;
|
||||
p[i].remaining = sub_size;
|
||||
|
||||
regions += sub_size;
|
||||
nr -= sub_size;
|
||||
}
|
||||
|
||||
/* Start work threads. */
|
||||
for (i = 0; i < delta_search_threads; i++) {
|
||||
if (!p[i].list_size)
|
||||
continue;
|
||||
pthread_mutex_init(&p[i].mutex, NULL);
|
||||
pthread_cond_init(&p[i].cond, NULL);
|
||||
ret = pthread_create(&p[i].thread, NULL,
|
||||
threaded_find_deltas_by_path, &p[i]);
|
||||
if (ret)
|
||||
die(_("unable to create thread: %s"), strerror(ret));
|
||||
active_threads++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now let's wait for work completion. Each time a thread is done
|
||||
* with its work, we steal half of the remaining work from the
|
||||
* thread with the largest number of unprocessed objects and give
|
||||
* it to that newly idle thread. This ensure good load balancing
|
||||
* until the remaining object list segments are simply too short
|
||||
* to be worth splitting anymore.
|
||||
*/
|
||||
while (active_threads) {
|
||||
struct thread_params *target = NULL;
|
||||
struct thread_params *victim = NULL;
|
||||
unsigned sub_size = 0;
|
||||
|
||||
progress_lock();
|
||||
for (;;) {
|
||||
for (i = 0; !target && i < delta_search_threads; i++)
|
||||
if (!p[i].working)
|
||||
target = &p[i];
|
||||
if (target)
|
||||
break;
|
||||
pthread_cond_wait(&progress_cond, &progress_mutex);
|
||||
}
|
||||
|
||||
for (i = 0; i < delta_search_threads; i++)
|
||||
if (p[i].remaining > 2*window &&
|
||||
(!victim || victim->remaining < p[i].remaining))
|
||||
victim = &p[i];
|
||||
if (victim) {
|
||||
sub_size = victim->remaining / 2;
|
||||
target->regions = victim->regions + victim->remaining - sub_size;
|
||||
victim->list_size -= sub_size;
|
||||
victim->remaining -= sub_size;
|
||||
}
|
||||
target->list_size = sub_size;
|
||||
target->remaining = sub_size;
|
||||
target->working = 1;
|
||||
progress_unlock();
|
||||
|
||||
pthread_mutex_lock(&target->mutex);
|
||||
target->data_ready = 1;
|
||||
pthread_cond_signal(&target->cond);
|
||||
pthread_mutex_unlock(&target->mutex);
|
||||
|
||||
if (!sub_size) {
|
||||
pthread_join(target->thread, NULL);
|
||||
pthread_cond_destroy(&target->cond);
|
||||
pthread_mutex_destroy(&target->mutex);
|
||||
active_threads--;
|
||||
}
|
||||
}
|
||||
cleanup_threaded_search();
|
||||
free(p);
|
||||
|
||||
display_progress(progress_state, progress_nr);
|
||||
stop_progress(&progress_state);
|
||||
}
|
||||
|
||||
static void prepare_pack(int window, int depth)
|
||||
{
|
||||
struct object_entry **delta_list;
|
||||
|
|
@ -3307,39 +3556,21 @@ static void prepare_pack(int window, int depth)
|
|||
if (!to_pack.nr_objects || !window || !depth)
|
||||
return;
|
||||
|
||||
if (path_walk)
|
||||
ll_find_deltas_by_region(to_pack.objects, to_pack.regions,
|
||||
0, to_pack.nr_regions);
|
||||
|
||||
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
|
||||
nr_deltas = n = 0;
|
||||
|
||||
for (i = 0; i < to_pack.nr_objects; i++) {
|
||||
struct object_entry *entry = to_pack.objects + i;
|
||||
|
||||
if (DELTA(entry))
|
||||
/* This happens if we decided to reuse existing
|
||||
* delta from a pack. "reuse_delta &&" is implied.
|
||||
*/
|
||||
if (!should_attempt_deltas(entry))
|
||||
continue;
|
||||
|
||||
if (!entry->type_valid ||
|
||||
oe_size_less_than(&to_pack, entry, 50))
|
||||
continue;
|
||||
|
||||
if (entry->no_try_delta)
|
||||
continue;
|
||||
|
||||
if (!entry->preferred_base) {
|
||||
if (!entry->preferred_base)
|
||||
nr_deltas++;
|
||||
if (oe_type(entry) < 0)
|
||||
die(_("unable to get type of object %s"),
|
||||
oid_to_hex(&entry->idx.oid));
|
||||
} else {
|
||||
if (oe_type(entry) < 0) {
|
||||
/*
|
||||
* This object is not found, but we
|
||||
* don't have to include it anyway.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
delta_list[n++] = entry;
|
||||
}
|
||||
|
|
@ -4272,6 +4503,93 @@ static void mark_bitmap_preferred_tips(void)
|
|||
}
|
||||
}
|
||||
|
||||
static inline int is_oid_uninteresting(struct repository *repo,
|
||||
struct object_id *oid)
|
||||
{
|
||||
struct object *o = lookup_object(repo, oid);
|
||||
return !o || (o->flags & UNINTERESTING);
|
||||
}
|
||||
|
||||
static int add_objects_by_path(const char *path,
|
||||
struct oid_array *oids,
|
||||
enum object_type type,
|
||||
void *data)
|
||||
{
|
||||
size_t oe_start = to_pack.nr_objects;
|
||||
size_t oe_end;
|
||||
unsigned int *processed = data;
|
||||
|
||||
/*
|
||||
* First, add all objects to the packing data, including the ones
|
||||
* marked UNINTERESTING (translated to 'exclude') as they can be
|
||||
* used as delta bases.
|
||||
*/
|
||||
for (size_t i = 0; i < oids->nr; i++) {
|
||||
int exclude;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
struct object_id *oid = &oids->oid[i];
|
||||
|
||||
/* Skip objects that do not exist locally. */
|
||||
if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) &&
|
||||
oid_object_info_extended(the_repository, oid, &oi,
|
||||
OBJECT_INFO_FOR_PREFETCH) < 0)
|
||||
continue;
|
||||
|
||||
exclude = is_oid_uninteresting(the_repository, oid);
|
||||
|
||||
if (exclude && !thin)
|
||||
continue;
|
||||
|
||||
add_object_entry(oid, type, path, exclude);
|
||||
}
|
||||
|
||||
oe_end = to_pack.nr_objects;
|
||||
|
||||
/* We can skip delta calculations if it is a no-op. */
|
||||
if (oe_end == oe_start || !window)
|
||||
return 0;
|
||||
|
||||
ALLOC_GROW(to_pack.regions,
|
||||
to_pack.nr_regions + 1,
|
||||
to_pack.nr_regions_alloc);
|
||||
|
||||
to_pack.regions[to_pack.nr_regions].start = oe_start;
|
||||
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
|
||||
to_pack.nr_regions++;
|
||||
|
||||
*processed += oids->nr;
|
||||
display_progress(progress_state, *processed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void get_object_list_path_walk(struct rev_info *revs)
|
||||
{
|
||||
struct path_walk_info info = PATH_WALK_INFO_INIT;
|
||||
unsigned int processed = 0;
|
||||
int result;
|
||||
|
||||
info.revs = revs;
|
||||
info.path_fn = add_objects_by_path;
|
||||
info.path_fn_data = &processed;
|
||||
|
||||
/*
|
||||
* Allow the --[no-]sparse option to be interesting here, if only
|
||||
* for testing purposes. Paths with no interesting objects will not
|
||||
* contribute to the resulting pack, but only create noisy preferred
|
||||
* base objects.
|
||||
*/
|
||||
info.prune_all_uninteresting = sparse;
|
||||
info.edge_aggressive = shallow;
|
||||
|
||||
trace2_region_enter("pack-objects", "path-walk", revs->repo);
|
||||
result = walk_objects_by_path(&info);
|
||||
trace2_region_leave("pack-objects", "path-walk", revs->repo);
|
||||
|
||||
if (result)
|
||||
die(_("failed to pack objects via path-walk"));
|
||||
}
|
||||
|
||||
static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
||||
{
|
||||
struct setup_revision_opt s_r_opt = {
|
||||
|
|
@ -4327,15 +4645,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
|||
if (write_bitmap_index)
|
||||
mark_bitmap_preferred_tips();
|
||||
|
||||
if (!fn_show_object)
|
||||
fn_show_object = show_object;
|
||||
|
||||
if (path_walk) {
|
||||
get_object_list_path_walk(revs);
|
||||
} else {
|
||||
if (prepare_revision_walk(revs))
|
||||
die(_("revision walk setup failed"));
|
||||
mark_edges_uninteresting(revs, show_edge, sparse);
|
||||
|
||||
if (!fn_show_object)
|
||||
fn_show_object = show_object;
|
||||
traverse_commit_list(revs,
|
||||
show_commit, fn_show_object,
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (unpack_unreachable_expiration) {
|
||||
revs->ignore_missing_links = 1;
|
||||
|
|
@ -4464,7 +4786,6 @@ int cmd_pack_objects(int argc,
|
|||
struct repository *repo UNUSED)
|
||||
{
|
||||
int use_internal_rev_list = 0;
|
||||
int shallow = 0;
|
||||
int all_progress_implied = 0;
|
||||
struct strvec rp = STRVEC_INIT;
|
||||
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
|
||||
|
|
@ -4545,6 +4866,8 @@ int cmd_pack_objects(int argc,
|
|||
N_("use the sparse reachability algorithm")),
|
||||
OPT_BOOL(0, "thin", &thin,
|
||||
N_("create thin packs")),
|
||||
OPT_BOOL(0, "path-walk", &path_walk,
|
||||
N_("use the path-walk API to walk objects when possible")),
|
||||
OPT_BOOL(0, "shallow", &shallow,
|
||||
N_("create packs suitable for shallow fetches")),
|
||||
OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
|
||||
|
|
@ -4614,6 +4937,17 @@ int cmd_pack_objects(int argc,
|
|||
if (pack_to_stdout != !base_name || argc)
|
||||
usage_with_options(pack_usage, pack_objects_options);
|
||||
|
||||
if (path_walk < 0) {
|
||||
if (use_bitmap_index > 0 ||
|
||||
!use_internal_rev_list)
|
||||
path_walk = 0;
|
||||
else if (the_repository->gitdir &&
|
||||
the_repository->settings.pack_use_path_walk)
|
||||
path_walk = 1;
|
||||
else
|
||||
path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0);
|
||||
}
|
||||
|
||||
if (depth < 0)
|
||||
depth = 0;
|
||||
if (depth >= (1 << OE_DEPTH_BITS)) {
|
||||
|
|
@ -4630,7 +4964,28 @@ int cmd_pack_objects(int argc,
|
|||
window = 0;
|
||||
|
||||
strvec_push(&rp, "pack-objects");
|
||||
if (thin) {
|
||||
|
||||
if (path_walk) {
|
||||
const char *option = NULL;
|
||||
if (filter_options.choice)
|
||||
option = "--filter";
|
||||
else if (use_delta_islands)
|
||||
option = "--delta-islands";
|
||||
|
||||
if (option) {
|
||||
warning(_("cannot use %s with %s"),
|
||||
option, "--path-walk");
|
||||
path_walk = 0;
|
||||
}
|
||||
}
|
||||
if (path_walk) {
|
||||
strvec_push(&rp, "--boundary");
|
||||
/*
|
||||
* We must disable the bitmaps because we are removing
|
||||
* the --objects / --objects-edge[-aggressive] options.
|
||||
*/
|
||||
use_bitmap_index = 0;
|
||||
} else if (thin) {
|
||||
use_internal_rev_list = 1;
|
||||
strvec_push(&rp, shallow
|
||||
? "--objects-edge-aggressive"
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ static char *packdir, *packtmp_name, *packtmp;
|
|||
static const char *const git_repack_usage[] = {
|
||||
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
|
||||
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
|
||||
"[--write-midx] [--name-hash-version=<n>]"),
|
||||
"[--write-midx] [--name-hash-version=<n>] [--path-walk]"),
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
@ -63,6 +63,7 @@ struct pack_objects_args {
|
|||
int quiet;
|
||||
int local;
|
||||
int name_hash_version;
|
||||
int path_walk;
|
||||
struct list_objects_filter_options filter_options;
|
||||
};
|
||||
|
||||
|
|
@ -313,6 +314,8 @@ static void prepare_pack_objects(struct child_process *cmd,
|
|||
strvec_pushf(&cmd->args, "--no-reuse-object");
|
||||
if (args->name_hash_version)
|
||||
strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version);
|
||||
if (args->path_walk)
|
||||
strvec_pushf(&cmd->args, "--path-walk");
|
||||
if (args->local)
|
||||
strvec_push(&cmd->args, "--local");
|
||||
if (args->quiet)
|
||||
|
|
@ -1184,6 +1187,8 @@ int cmd_repack(int argc,
|
|||
N_("pass --no-reuse-object to git-pack-objects")),
|
||||
OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
|
||||
N_("specify the name hash version to use for grouping similar objects by path")),
|
||||
OPT_BOOL(0, "path-walk", &po_args.path_walk,
|
||||
N_("pass --path-walk to git-pack-objects")),
|
||||
OPT_NEGBIT('n', NULL, &run_update_server_info,
|
||||
N_("do not run git-update-server-info"), 1),
|
||||
OPT__QUIET(&po_args.quiet, N_("be quiet")),
|
||||
|
|
|
|||
|
|
@ -120,11 +120,23 @@ struct object_entry {
|
|||
unsigned ext_base:1; /* delta_idx points outside packlist */
|
||||
};
|
||||
|
||||
/**
|
||||
* A packing region is a section of the packing_data.objects array
|
||||
* as given by a starting index and a number of elements.
|
||||
*/
|
||||
struct packing_region {
|
||||
size_t start;
|
||||
size_t nr;
|
||||
};
|
||||
|
||||
struct packing_data {
|
||||
struct repository *repo;
|
||||
struct object_entry *objects;
|
||||
uint32_t nr_objects, nr_alloc;
|
||||
|
||||
struct packing_region *regions;
|
||||
size_t nr_regions, nr_regions_alloc;
|
||||
|
||||
int32_t *index;
|
||||
uint32_t index_size;
|
||||
|
||||
|
|
|
|||
|
|
@ -503,7 +503,11 @@ int walk_objects_by_path(struct path_walk_info *info)
|
|||
if (prepare_revision_walk(info->revs))
|
||||
die(_("failed to setup revision walk"));
|
||||
|
||||
/* Walk trees to mark them as UNINTERESTING. */
|
||||
/*
|
||||
* Walk trees to mark them as UNINTERESTING.
|
||||
* This is particularly important when 'edge_aggressive' is set.
|
||||
*/
|
||||
info->revs->edge_hint_aggressive = info->edge_aggressive;
|
||||
edge_repo = info->revs->repo;
|
||||
edge_tree_list = root_tree_list;
|
||||
mark_edges_uninteresting(info->revs, show_edge,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,13 @@ struct path_walk_info {
|
|||
*/
|
||||
int prune_all_uninteresting;
|
||||
|
||||
/**
|
||||
* When 'edge_aggressive' is set, then the revision walk will use
|
||||
* the '--object-edge-aggressive' option to mark even more objects
|
||||
* as uninteresting.
|
||||
*/
|
||||
int edge_aggressive;
|
||||
|
||||
/**
|
||||
* Specify a sparse-checkout definition to match our paths to. Do not
|
||||
* walk outside of this sparse definition. If the patterns are in
|
||||
|
|
|
|||
|
|
@ -54,11 +54,13 @@ void prepare_repo_settings(struct repository *r)
|
|||
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
|
||||
r->settings.pack_use_bitmap_boundary_traversal = 1;
|
||||
r->settings.pack_use_multi_pack_reuse = 1;
|
||||
r->settings.pack_use_path_walk = 1;
|
||||
}
|
||||
if (manyfiles) {
|
||||
r->settings.index_version = 4;
|
||||
r->settings.index_skip_hash = 1;
|
||||
r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE;
|
||||
r->settings.pack_use_path_walk = 1;
|
||||
}
|
||||
|
||||
/* Commit graph config or default, does not cascade (simple) */
|
||||
|
|
@ -73,6 +75,7 @@ void prepare_repo_settings(struct repository *r)
|
|||
|
||||
/* Boolean config or default, does not cascade (simple) */
|
||||
repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1);
|
||||
repo_cfg_bool(r, "pack.usepathwalk", &r->settings.pack_use_path_walk, 0);
|
||||
repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1);
|
||||
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
|
||||
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ struct repo_settings {
|
|||
enum untracked_cache_setting core_untracked_cache;
|
||||
|
||||
int pack_use_sparse;
|
||||
int pack_use_path_walk;
|
||||
enum fetch_negotiation_setting fetch_negotiation_algorithm;
|
||||
|
||||
int core_multi_pack_index;
|
||||
|
|
|
|||
1
scalar.c
1
scalar.c
|
|
@ -170,6 +170,7 @@ static int set_recommended_config(int reconfigure)
|
|||
{ "core.autoCRLF", "false" },
|
||||
{ "core.safeCRLF", "false" },
|
||||
{ "fetch.showForcedUpdates", "false" },
|
||||
{ "pack.usePathWalk", "true" },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
int i;
|
||||
|
|
|
|||
4
t/README
4
t/README
|
|
@ -415,6 +415,10 @@ GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
|
|||
builtin to use the non-sparse object walk. This can still be overridden by
|
||||
the --sparse command-line argument.
|
||||
|
||||
GIT_TEST_PACK_PATH_WALK=<boolean> if enabled will default the pack-objects
|
||||
builtin to use the path-walk API for the object walk. This can still be
|
||||
overridden by the --no-path-walk command-line argument.
|
||||
|
||||
GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path
|
||||
by overriding the minimum number of cache entries required per thread.
|
||||
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ int cmd__path_walk(int argc, const char **argv)
|
|||
N_("toggle inclusion of tree objects")),
|
||||
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
|
||||
N_("toggle pruning of uninteresting paths")),
|
||||
OPT_BOOL(0, "edge-aggressive", &info.edge_aggressive,
|
||||
N_("toggle aggressive edge walk")),
|
||||
OPT_BOOL(0, "stdin-pl", &stdin_pl,
|
||||
N_("read a pattern list over stdin")),
|
||||
OPT_END(),
|
||||
|
|
|
|||
|
|
@ -22,46 +22,53 @@ test_expect_success 'create rev input' '
|
|||
EOF
|
||||
'
|
||||
|
||||
for version in 1 2
|
||||
do
|
||||
export version
|
||||
test_all_with_args () {
|
||||
parameter=$1
|
||||
export parameter
|
||||
|
||||
test_perf "thin pack with version $version" '
|
||||
test_perf "thin pack with $parameter" '
|
||||
git pack-objects --thin --stdout --revs --sparse \
|
||||
--name-hash-version=$version <in-thin >out
|
||||
$parameter <in-thin >out
|
||||
'
|
||||
|
||||
test_size "thin pack size with version $version" '
|
||||
test_size "thin pack size with $parameter" '
|
||||
test_file_size out
|
||||
'
|
||||
|
||||
test_perf "big pack with version $version" '
|
||||
test_perf "big pack with $parameter" '
|
||||
git pack-objects --stdout --revs --sparse \
|
||||
--name-hash-version=$version <in-big >out
|
||||
$parameter <in-big >out
|
||||
'
|
||||
|
||||
test_size "big pack size with version $version" '
|
||||
test_size "big pack size with $parameter" '
|
||||
test_file_size out
|
||||
'
|
||||
|
||||
test_perf "shallow fetch pack with version $version" '
|
||||
test_perf "shallow fetch pack with $parameter" '
|
||||
git pack-objects --stdout --revs --sparse --shallow \
|
||||
--name-hash-version=$version <in-shallow >out
|
||||
$parameter <in-shallow >out
|
||||
'
|
||||
|
||||
test_size "shallow pack size with version $version" '
|
||||
test_size "shallow pack size with $parameter" '
|
||||
test_file_size out
|
||||
'
|
||||
|
||||
test_perf "repack with version $version" '
|
||||
git repack -adf --name-hash-version=$version
|
||||
test_perf "repack with $parameter" '
|
||||
git repack -adf $parameter
|
||||
'
|
||||
|
||||
test_size "repack size with version $version" '
|
||||
test_size "repack size with $parameter" '
|
||||
gitdir=$(git rev-parse --git-dir) &&
|
||||
pack=$(ls $gitdir/objects/pack/pack-*.pack) &&
|
||||
test_file_size "$pack"
|
||||
'
|
||||
}
|
||||
|
||||
for version in 1 2
|
||||
do
|
||||
test_all_with_args --name-hash-version=$version
|
||||
done
|
||||
|
||||
test_all_with_args --path-walk
|
||||
|
||||
test_done
|
||||
|
|
|
|||
|
|
@ -59,6 +59,12 @@ test_expect_success 'pack-objects should fetch from promisor remote and execute
|
|||
|
||||
test_expect_success 'clone from promisor remote does not lazy-fetch by default' '
|
||||
rm -f script-executed &&
|
||||
|
||||
# The --path-walk feature of "git pack-objects" is not
|
||||
# compatible with this kind of fetch from an incomplete repo.
|
||||
GIT_TEST_PACK_PATH_WALK=0 &&
|
||||
export GIT_TEST_PACK_PATH_WALK &&
|
||||
|
||||
test_must_fail git clone evil no-lazy 2>err &&
|
||||
test_grep "lazy fetching disabled" err &&
|
||||
test_path_is_missing script-executed
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ merge-one-file
|
|||
multi-pack-index
|
||||
name-rev
|
||||
notes
|
||||
pack-objects
|
||||
push
|
||||
range-diff
|
||||
rebase
|
||||
|
|
|
|||
|
|
@ -723,4 +723,23 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
|
|||
! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
|
||||
'
|
||||
|
||||
test_expect_success '--path-walk pack everything' '
|
||||
git -C server rev-parse HEAD >in &&
|
||||
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
|
||||
--stdout --revs --path-walk --progress <in >out.pack 2>err &&
|
||||
grep "Compressing objects by path" err &&
|
||||
git -C server index-pack --stdin <out.pack
|
||||
'
|
||||
|
||||
test_expect_success '--path-walk thin pack' '
|
||||
cat >in <<-EOF &&
|
||||
$(git -C server rev-parse HEAD)
|
||||
^$(git -C server rev-parse HEAD~2)
|
||||
EOF
|
||||
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
|
||||
--thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
|
||||
grep "Compressing objects by path" err &&
|
||||
git -C server index-pack --fix-thin --stdin <out.pack
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
|
|
@ -59,6 +59,11 @@ test_expect_success 'indirectly clone patch_clone' '
|
|||
git pull ../.git &&
|
||||
test $(git rev-parse HEAD) = $B &&
|
||||
|
||||
# The --path-walk feature of "git pack-objects" is not
|
||||
# compatible with this kind of fetch from an incomplete repo.
|
||||
GIT_TEST_PACK_PATH_WALK=0 &&
|
||||
export GIT_TEST_PACK_PATH_WALK &&
|
||||
|
||||
git pull ../patch_clone/.git &&
|
||||
test $(git rev-parse HEAD) = $C
|
||||
)
|
||||
|
|
|
|||
|
|
@ -158,8 +158,9 @@ test_bitmap_cases () {
|
|||
ls .git/objects/pack/ | grep bitmap >output &&
|
||||
test_line_count = 1 output &&
|
||||
# verify equivalent packs are generated with/without using bitmap index
|
||||
packasha1=$(git pack-objects --no-use-bitmap-index --all packa </dev/null) &&
|
||||
packbsha1=$(git pack-objects --use-bitmap-index --all packb </dev/null) &&
|
||||
# Be careful to not use the path-walk option in either case.
|
||||
packasha1=$(git pack-objects --no-use-bitmap-index --no-path-walk --all packa </dev/null) &&
|
||||
packbsha1=$(git pack-objects --use-bitmap-index --no-path-walk --all packb </dev/null) &&
|
||||
list_packed_objects packa-$packasha1.idx >packa.objects &&
|
||||
list_packed_objects packb-$packbsha1.idx >packb.objects &&
|
||||
test_cmp packa.objects packb.objects
|
||||
|
|
@ -388,6 +389,14 @@ test_bitmap_cases () {
|
|||
git init --bare client.git &&
|
||||
(
|
||||
cd client.git &&
|
||||
|
||||
# This test relies on reusing a delta, but if the
|
||||
# path-walk machinery is engaged, the base object
|
||||
# is considered too small to use during the
|
||||
# dynamic computation, so is not used.
|
||||
GIT_TEST_PACK_PATH_WALK=0 &&
|
||||
export GIT_TEST_PACK_PATH_WALK &&
|
||||
|
||||
git config transfer.unpackLimit 1 &&
|
||||
git fetch .. delta-reuse-old:delta-reuse-old &&
|
||||
git fetch .. delta-reuse-new:delta-reuse-new &&
|
||||
|
|
|
|||
|
|
@ -89,15 +89,18 @@ max_chain() {
|
|||
# adjusted (or scrapped if the heuristics have become too unreliable)
|
||||
test_expect_success 'packing produces a long delta' '
|
||||
# Use --window=0 to make sure we are seeing reused deltas,
|
||||
# not computing a new long chain.
|
||||
pack=$(git pack-objects --all --window=0 </dev/null pack) &&
|
||||
# not computing a new long chain. (Also avoid the --path-walk
|
||||
# option as it may break delta chains.)
|
||||
pack=$(git pack-objects --all --window=0 --no-path-walk </dev/null pack) &&
|
||||
echo 9 >expect &&
|
||||
max_chain pack-$pack.pack >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success '--depth limits depth' '
|
||||
pack=$(git pack-objects --all --depth=5 </dev/null pack) &&
|
||||
# Avoid --path-walk to avoid breaking delta chains across path
|
||||
# boundaries.
|
||||
pack=$(git pack-objects --all --depth=5 --no-path-walk </dev/null pack) &&
|
||||
echo 5 >expect &&
|
||||
max_chain pack-$pack.pack >actual &&
|
||||
test_cmp expect actual
|
||||
|
|
|
|||
|
|
@ -7,6 +7,13 @@ test_description='pack-objects multi-pack reuse'
|
|||
|
||||
GIT_TEST_MULTI_PACK_INDEX=0
|
||||
GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0
|
||||
|
||||
# The --path-walk option does not consider the preferred pack
|
||||
# at all for reusing deltas, so this variable changes the
|
||||
# behavior of this test, if enabled.
|
||||
GIT_TEST_PACK_PATH_WALK=0
|
||||
export GIT_TEST_PACK_PATH_WALK
|
||||
|
||||
objdir=.git/objects
|
||||
packdir=$objdir/pack
|
||||
|
||||
|
|
|
|||
|
|
@ -1909,4 +1909,14 @@ test_expect_success 'push with config push.useBitmaps' '
|
|||
--thin --delta-base-offset -q --no-use-bitmap-index <false
|
||||
'
|
||||
|
||||
test_expect_success 'push with config pack.usePathWalk=true' '
|
||||
mk_test testrepo heads/main &&
|
||||
git checkout main &&
|
||||
test_config pack.usePathWalk true &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/path-walk.txt" \
|
||||
git push --quiet testrepo main:test &&
|
||||
|
||||
test_region pack-objects path-walk path-walk.txt
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
|
|
@ -123,4 +123,45 @@ EOF
|
|||
git cat-file blob $(echo 1|git hash-object --stdin) >/dev/null
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'push new commit from shallow clone has correct object count' '
|
||||
git init origin &&
|
||||
test_commit -C origin a &&
|
||||
test_commit -C origin b &&
|
||||
|
||||
git clone --depth=1 "file://$(pwd)/origin" client &&
|
||||
git -C client checkout -b topic &&
|
||||
git -C client commit --allow-empty -m "empty" &&
|
||||
GIT_PROGRESS_DELAY=0 git -C client push --progress origin topic 2>err &&
|
||||
test_grep "Enumerating objects: 1, done." err
|
||||
'
|
||||
|
||||
test_expect_success 'push new commit from shallow clone has good deltas' '
|
||||
git init base &&
|
||||
test_seq 1 999 >base/a &&
|
||||
test_commit -C base initial &&
|
||||
git -C base add a &&
|
||||
git -C base commit -m "big a" &&
|
||||
|
||||
git clone --depth=1 "file://$(pwd)/base" deltas &&
|
||||
git -C deltas checkout -b deltas &&
|
||||
test_seq 1 1000 >deltas/a &&
|
||||
git -C deltas commit -a -m "bigger a" &&
|
||||
GIT_PROGRESS_DELAY=0 git -C deltas push --progress origin deltas 2>err &&
|
||||
|
||||
test_grep "Enumerating objects: 5, done" err &&
|
||||
|
||||
# If the delta base is found, then this message uses "bytes".
|
||||
# If the delta base is not found, then this message uses "KiB".
|
||||
test_grep "Writing objects: .* bytes" err &&
|
||||
|
||||
git -C deltas commit --amend -m "changed message" &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/config-push.txt" \
|
||||
GIT_PROGRESS_DELAY=0 git -C deltas -c pack.usePathWalk=true \
|
||||
push --progress -f origin deltas 2>err &&
|
||||
|
||||
test_grep "Enumerating objects: 1, done" err &&
|
||||
test_region pack-objects path-walk config-push.txt
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
|
|
@ -376,6 +376,26 @@ test_expect_success 'topic, not base, boundary with pruning' '
|
|||
test_cmp_sorted expect out
|
||||
'
|
||||
|
||||
test_expect_success 'topic, not base, --edge-aggressive with pruning' '
|
||||
test-tool path-walk --prune --edge-aggressive -- topic --not base >out &&
|
||||
|
||||
cat >expect <<-EOF &&
|
||||
0:commit::$(git rev-parse topic)
|
||||
1:tree::$(git rev-parse topic^{tree})
|
||||
1:tree::$(git rev-parse base^{tree}):UNINTERESTING
|
||||
2:tree:right/:$(git rev-parse topic:right)
|
||||
2:tree:right/:$(git rev-parse base:right):UNINTERESTING
|
||||
3:blob:right/c:$(git rev-parse base:right/c):UNINTERESTING
|
||||
3:blob:right/c:$(git rev-parse topic:right/c)
|
||||
blobs:2
|
||||
commits:1
|
||||
tags:0
|
||||
trees:4
|
||||
EOF
|
||||
|
||||
test_cmp_sorted expect out
|
||||
'
|
||||
|
||||
test_expect_success 'trees are reported exactly once' '
|
||||
test_when_finished "rm -rf unique-trees" &&
|
||||
test_create_repo unique-trees &&
|
||||
|
|
|
|||
|
|
@ -1095,12 +1095,15 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s
|
|||
(cd super5 &&
|
||||
# This test var can mess with the stderr output checked in this test.
|
||||
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||
GIT_TEST_PACK_PATH_WALK=0 \
|
||||
git submodule update --quiet --init --depth=1 submodule3 >out 2>err &&
|
||||
test_must_be_empty out &&
|
||||
test_must_be_empty err
|
||||
) &&
|
||||
git clone super4 super6 &&
|
||||
(cd super6 &&
|
||||
# This test variable will create a "warning" message to stderr
|
||||
GIT_TEST_PACK_PATH_WALK=0 \
|
||||
git submodule update --init --depth=1 submodule3 >out 2>err &&
|
||||
test_file_not_empty out &&
|
||||
test_file_not_empty err
|
||||
|
|
|
|||
Loading…
Reference in New Issue