Merge branch 'ds/name-hash-tweaks'
"git pack-objects" and its wrapper "git repack" learned an option to use an alternative path-hash function to improve delta-base selection to produce a packfile with deeper history than window size. * ds/name-hash-tweaks: pack-objects: prevent name hash version change test-tool: add helper for name-hash values p5313: add size comparison test pack-objects: add GIT_TEST_NAME_HASH_VERSION repack: add --name-hash-version option pack-objects: add --name-hash-version option pack-objects: create new name-hash function versionmaint
commit
aae91a86fb
|
@ -15,7 +15,8 @@ SYNOPSIS
|
||||||
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
|
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
|
||||||
[--cruft] [--cruft-expiration=<time>]
|
[--cruft] [--cruft-expiration=<time>]
|
||||||
[--stdout [--filter=<filter-spec>] | <base-name>]
|
[--stdout [--filter=<filter-spec>] | <base-name>]
|
||||||
[--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list>
|
[--shallow] [--keep-true-parents] [--[no-]sparse]
|
||||||
|
[--name-hash-version=<n>] < <object-list>
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -345,6 +346,35 @@ raise an error.
|
||||||
Restrict delta matches based on "islands". See DELTA ISLANDS
|
Restrict delta matches based on "islands". See DELTA ISLANDS
|
||||||
below.
|
below.
|
||||||
|
|
||||||
|
--name-hash-version=<n>::
|
||||||
|
While performing delta compression, Git groups objects that may be
|
||||||
|
similar based on heuristics using the path to that object. While
|
||||||
|
grouping objects by an exact path match is good for paths with
|
||||||
|
many versions, there are benefits for finding delta pairs across
|
||||||
|
different full paths. Git collects objects by type and then by a
|
||||||
|
"name hash" of the path and then by size, hoping to group objects
|
||||||
|
that will compress well together.
|
||||||
|
+
|
||||||
|
The default name hash version is `1`, which prioritizes hash locality by
|
||||||
|
considering the final bytes of the path as providing the maximum magnitude
|
||||||
|
to the hash function. This version excels at distinguishing short paths
|
||||||
|
and finding renames across directories. However, the hash function depends
|
||||||
|
primarily on the final 16 bytes of the path. If there are many paths in
|
||||||
|
the repo that have the same final 16 bytes and differ only by parent
|
||||||
|
directory, then this name-hash may lead to too many collisions and cause
|
||||||
|
poor results. At the moment, this version is required when writing
|
||||||
|
reachability bitmap files with `--write-bitmap-index`.
|
||||||
|
+
|
||||||
|
The name hash version `2` has similar locality features as version `1`,
|
||||||
|
except it considers each path component separately and overlays the hashes
|
||||||
|
with a shift. This still prioritizes the final bytes of the path, but also
|
||||||
|
"salts" the lower bits of the hash using the parent directory names. This
|
||||||
|
method allows for some of the locality benefits of version `1` while
|
||||||
|
breaking most of the collisions from a similarly-named file appearing in
|
||||||
|
many different directories. At the moment, this version is not allowed
|
||||||
|
when writing reachability bitmap files with `--write-bitmap-index` and it
|
||||||
|
will be automatically changed to version `1`.
|
||||||
|
|
||||||
|
|
||||||
DELTA ISLANDS
|
DELTA ISLANDS
|
||||||
-------------
|
-------------
|
||||||
|
|
|
@ -9,7 +9,9 @@ git-repack - Pack unpacked objects in a repository
|
||||||
SYNOPSIS
|
SYNOPSIS
|
||||||
--------
|
--------
|
||||||
[verse]
|
[verse]
|
||||||
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>] [--write-midx]
|
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]
|
||||||
|
[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]
|
||||||
|
[--write-midx] [--name-hash-version=<n>]
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
-----------
|
-----------
|
||||||
|
@ -249,6 +251,11 @@ linkgit:git-multi-pack-index[1]).
|
||||||
Write a multi-pack index (see linkgit:git-multi-pack-index[1])
|
Write a multi-pack index (see linkgit:git-multi-pack-index[1])
|
||||||
containing the non-redundant packs.
|
containing the non-redundant packs.
|
||||||
|
|
||||||
|
--name-hash-version=<n>::
|
||||||
|
Provide this argument to the underlying `git pack-objects` process.
|
||||||
|
See linkgit:git-pack-objects[1] for full details.
|
||||||
|
|
||||||
|
|
||||||
CONFIGURATION
|
CONFIGURATION
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
|
1
Makefile
1
Makefile
|
@ -813,6 +813,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
|
||||||
TEST_BUILTINS_OBJS += test-match-trees.o
|
TEST_BUILTINS_OBJS += test-match-trees.o
|
||||||
TEST_BUILTINS_OBJS += test-mergesort.o
|
TEST_BUILTINS_OBJS += test-mergesort.o
|
||||||
TEST_BUILTINS_OBJS += test-mktemp.o
|
TEST_BUILTINS_OBJS += test-mktemp.o
|
||||||
|
TEST_BUILTINS_OBJS += test-name-hash.o
|
||||||
TEST_BUILTINS_OBJS += test-online-cpus.o
|
TEST_BUILTINS_OBJS += test-online-cpus.o
|
||||||
TEST_BUILTINS_OBJS += test-pack-mtimes.o
|
TEST_BUILTINS_OBJS += test-pack-mtimes.o
|
||||||
TEST_BUILTINS_OBJS += test-parse-options.o
|
TEST_BUILTINS_OBJS += test-parse-options.o
|
||||||
|
|
|
@ -269,6 +269,43 @@ struct configured_exclusion {
|
||||||
static struct oidmap configured_exclusions;
|
static struct oidmap configured_exclusions;
|
||||||
|
|
||||||
static struct oidset excluded_by_config;
|
static struct oidset excluded_by_config;
|
||||||
|
static int name_hash_version = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether the name_hash_version chosen by user input is appropriate,
|
||||||
|
* and also validate whether it is compatible with other features.
|
||||||
|
*/
|
||||||
|
static void validate_name_hash_version(void)
|
||||||
|
{
|
||||||
|
if (name_hash_version < 1 || name_hash_version > 2)
|
||||||
|
die(_("invalid --name-hash-version option: %d"), name_hash_version);
|
||||||
|
if (write_bitmap_index && name_hash_version != 1) {
|
||||||
|
warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
|
||||||
|
name_hash_version = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t pack_name_hash_fn(const char *name)
|
||||||
|
{
|
||||||
|
static int seen_version = -1;
|
||||||
|
|
||||||
|
if (seen_version < 0)
|
||||||
|
seen_version = name_hash_version;
|
||||||
|
else if (seen_version != name_hash_version)
|
||||||
|
BUG("name hash version changed from %d to %d mid-process",
|
||||||
|
seen_version, name_hash_version);
|
||||||
|
|
||||||
|
switch (name_hash_version) {
|
||||||
|
case 1:
|
||||||
|
return pack_name_hash(name);
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
return pack_name_hash_v2((const unsigned char *)name);
|
||||||
|
|
||||||
|
default:
|
||||||
|
BUG("invalid name-hash version: %d", name_hash_version);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* stats
|
* stats
|
||||||
|
@ -1689,7 +1726,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
create_object_entry(oid, type, pack_name_hash(name),
|
create_object_entry(oid, type, pack_name_hash_fn(name),
|
||||||
exclude, name && no_try_delta(name),
|
exclude, name && no_try_delta(name),
|
||||||
found_pack, found_offset);
|
found_pack, found_offset);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1903,7 +1940,7 @@ static void add_preferred_base_object(const char *name)
|
||||||
{
|
{
|
||||||
struct pbase_tree *it;
|
struct pbase_tree *it;
|
||||||
size_t cmplen;
|
size_t cmplen;
|
||||||
unsigned hash = pack_name_hash(name);
|
unsigned hash = pack_name_hash_fn(name);
|
||||||
|
|
||||||
if (!num_preferred_base || check_pbase_path(hash))
|
if (!num_preferred_base || check_pbase_path(hash))
|
||||||
return;
|
return;
|
||||||
|
@ -3415,7 +3452,7 @@ static void show_object_pack_hint(struct object *object, const char *name,
|
||||||
* here using a now in order to perhaps improve the delta selection
|
* here using a now in order to perhaps improve the delta selection
|
||||||
* process.
|
* process.
|
||||||
*/
|
*/
|
||||||
oe->hash = pack_name_hash(name);
|
oe->hash = pack_name_hash_fn(name);
|
||||||
oe->no_try_delta = name && no_try_delta(name);
|
oe->no_try_delta = name && no_try_delta(name);
|
||||||
|
|
||||||
stdin_packs_hints_nr++;
|
stdin_packs_hints_nr++;
|
||||||
|
@ -3565,7 +3602,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
|
||||||
entry = packlist_find(&to_pack, oid);
|
entry = packlist_find(&to_pack, oid);
|
||||||
if (entry) {
|
if (entry) {
|
||||||
if (name) {
|
if (name) {
|
||||||
entry->hash = pack_name_hash(name);
|
entry->hash = pack_name_hash_fn(name);
|
||||||
entry->no_try_delta = no_try_delta(name);
|
entry->no_try_delta = no_try_delta(name);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -3588,7 +3625,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry = create_object_entry(oid, type, pack_name_hash(name),
|
entry = create_object_entry(oid, type, pack_name_hash_fn(name),
|
||||||
0, name && no_try_delta(name),
|
0, name && no_try_delta(name),
|
||||||
pack, offset);
|
pack, offset);
|
||||||
}
|
}
|
||||||
|
@ -4068,6 +4105,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
|
||||||
if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
|
if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For now, force the name-hash version to be 1 since that
|
||||||
|
* is the version implied by the bitmap format. Later, the
|
||||||
|
* format can include this version explicitly in its format,
|
||||||
|
* allowing readers to know the version that was used during
|
||||||
|
* the bitmap write.
|
||||||
|
*/
|
||||||
|
name_hash_version = 1;
|
||||||
|
|
||||||
if (pack_options_allow_reuse())
|
if (pack_options_allow_reuse())
|
||||||
reuse_partial_packfile_from_bitmap(bitmap_git,
|
reuse_partial_packfile_from_bitmap(bitmap_git,
|
||||||
&reuse_packfiles,
|
&reuse_packfiles,
|
||||||
|
@ -4443,6 +4489,8 @@ int cmd_pack_objects(int argc,
|
||||||
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
|
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
|
||||||
N_("protocol"),
|
N_("protocol"),
|
||||||
N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
|
N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
|
||||||
|
OPT_INTEGER(0, "name-hash-version", &name_hash_version,
|
||||||
|
N_("use the specified name-hash function to group similar objects")),
|
||||||
OPT_END(),
|
OPT_END(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -4598,6 +4646,11 @@ int cmd_pack_objects(int argc,
|
||||||
if (pack_to_stdout || !rev_list_all)
|
if (pack_to_stdout || !rev_list_all)
|
||||||
write_bitmap_index = 0;
|
write_bitmap_index = 0;
|
||||||
|
|
||||||
|
if (name_hash_version < 0)
|
||||||
|
name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1);
|
||||||
|
|
||||||
|
validate_name_hash_version();
|
||||||
|
|
||||||
if (use_delta_islands)
|
if (use_delta_islands)
|
||||||
strvec_push(&rp, "--topo-order");
|
strvec_push(&rp, "--topo-order");
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,9 @@ static int run_update_server_info = 1;
|
||||||
static char *packdir, *packtmp_name, *packtmp;
|
static char *packdir, *packtmp_name, *packtmp;
|
||||||
|
|
||||||
static const char *const git_repack_usage[] = {
|
static const char *const git_repack_usage[] = {
|
||||||
N_("git repack [<options>]"),
|
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
|
||||||
|
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
|
||||||
|
"[--write-midx] [--name-hash-version=<n>]"),
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -60,6 +62,7 @@ struct pack_objects_args {
|
||||||
int no_reuse_object;
|
int no_reuse_object;
|
||||||
int quiet;
|
int quiet;
|
||||||
int local;
|
int local;
|
||||||
|
int name_hash_version;
|
||||||
struct list_objects_filter_options filter_options;
|
struct list_objects_filter_options filter_options;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -308,6 +311,8 @@ static void prepare_pack_objects(struct child_process *cmd,
|
||||||
strvec_pushf(&cmd->args, "--no-reuse-delta");
|
strvec_pushf(&cmd->args, "--no-reuse-delta");
|
||||||
if (args->no_reuse_object)
|
if (args->no_reuse_object)
|
||||||
strvec_pushf(&cmd->args, "--no-reuse-object");
|
strvec_pushf(&cmd->args, "--no-reuse-object");
|
||||||
|
if (args->name_hash_version)
|
||||||
|
strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version);
|
||||||
if (args->local)
|
if (args->local)
|
||||||
strvec_push(&cmd->args, "--local");
|
strvec_push(&cmd->args, "--local");
|
||||||
if (args->quiet)
|
if (args->quiet)
|
||||||
|
@ -1205,6 +1210,8 @@ int cmd_repack(int argc,
|
||||||
N_("pass --no-reuse-delta to git-pack-objects")),
|
N_("pass --no-reuse-delta to git-pack-objects")),
|
||||||
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
|
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
|
||||||
N_("pass --no-reuse-object to git-pack-objects")),
|
N_("pass --no-reuse-object to git-pack-objects")),
|
||||||
|
OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
|
||||||
|
N_("specify the name hash version to use for grouping similar objects by path")),
|
||||||
OPT_NEGBIT('n', NULL, &run_update_server_info,
|
OPT_NEGBIT('n', NULL, &run_update_server_info,
|
||||||
N_("do not run git-update-server-info"), 1),
|
N_("do not run git-update-server-info"), 1),
|
||||||
OPT__QUIET(&po_args.quiet, N_("be quiet")),
|
OPT__QUIET(&po_args.quiet, N_("be quiet")),
|
||||||
|
|
|
@ -208,6 +208,34 @@ static inline uint32_t pack_name_hash(const char *name)
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint32_t pack_name_hash_v2(const unsigned char *name)
|
||||||
|
{
|
||||||
|
uint32_t hash = 0, base = 0, c;
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while ((c = *name++)) {
|
||||||
|
if (isspace(c))
|
||||||
|
continue;
|
||||||
|
if (c == '/') {
|
||||||
|
base = (base >> 6) ^ hash;
|
||||||
|
hash = 0;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* 'c' is only a single byte. Reverse it and move
|
||||||
|
* it to the top of the hash, moving the rest to
|
||||||
|
* less-significant bits.
|
||||||
|
*/
|
||||||
|
c = (c & 0xF0) >> 4 | (c & 0x0F) << 4;
|
||||||
|
c = (c & 0xCC) >> 2 | (c & 0x33) << 2;
|
||||||
|
c = (c & 0xAA) >> 1 | (c & 0x55) << 1;
|
||||||
|
hash = (hash >> 2) + (c << 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (base >> 6) ^ hash;
|
||||||
|
}
|
||||||
|
|
||||||
static inline enum object_type oe_type(const struct object_entry *e)
|
static inline enum object_type oe_type(const struct object_entry *e)
|
||||||
{
|
{
|
||||||
return e->type_valid ? e->type_ : OBJ_BAD;
|
return e->type_valid ? e->type_ : OBJ_BAD;
|
||||||
|
|
4
t/README
4
t/README
|
@ -471,6 +471,10 @@ a test and then fails then the whole test run will abort. This can help to make
|
||||||
sure the expected tests are executed and not silently skipped when their
|
sure the expected tests are executed and not silently skipped when their
|
||||||
dependency breaks or is simply not present in a new environment.
|
dependency breaks or is simply not present in a new environment.
|
||||||
|
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=<int>, when set, causes 'git pack-objects' to
|
||||||
|
assume '--name-hash-version=<n>'.
|
||||||
|
|
||||||
|
|
||||||
Naming Tests
|
Naming Tests
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ test_tool_sources = [
|
||||||
'test-match-trees.c',
|
'test-match-trees.c',
|
||||||
'test-mergesort.c',
|
'test-mergesort.c',
|
||||||
'test-mktemp.c',
|
'test-mktemp.c',
|
||||||
|
'test-name-hash.c',
|
||||||
'test-online-cpus.c',
|
'test-online-cpus.c',
|
||||||
'test-pack-mtimes.c',
|
'test-pack-mtimes.c',
|
||||||
'test-parse-options.c',
|
'test-parse-options.c',
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
/*
|
||||||
|
* test-name-hash.c: Read a list of paths over stdin and report on their
|
||||||
|
* name-hash and full name-hash.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "test-tool.h"
|
||||||
|
#include "git-compat-util.h"
|
||||||
|
#include "pack-objects.h"
|
||||||
|
#include "strbuf.h"
|
||||||
|
|
||||||
|
int cmd__name_hash(int argc UNUSED, const char **argv UNUSED)
|
||||||
|
{
|
||||||
|
struct strbuf line = STRBUF_INIT;
|
||||||
|
|
||||||
|
while (!strbuf_getline(&line, stdin)) {
|
||||||
|
printf("%10u ", pack_name_hash(line.buf));
|
||||||
|
printf("%10u ", pack_name_hash_v2((unsigned const char *)line.buf));
|
||||||
|
printf("%s\n", line.buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
strbuf_release(&line);
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -44,6 +44,7 @@ static struct test_cmd cmds[] = {
|
||||||
{ "match-trees", cmd__match_trees },
|
{ "match-trees", cmd__match_trees },
|
||||||
{ "mergesort", cmd__mergesort },
|
{ "mergesort", cmd__mergesort },
|
||||||
{ "mktemp", cmd__mktemp },
|
{ "mktemp", cmd__mktemp },
|
||||||
|
{ "name-hash", cmd__name_hash },
|
||||||
{ "online-cpus", cmd__online_cpus },
|
{ "online-cpus", cmd__online_cpus },
|
||||||
{ "pack-mtimes", cmd__pack_mtimes },
|
{ "pack-mtimes", cmd__pack_mtimes },
|
||||||
{ "parse-options", cmd__parse_options },
|
{ "parse-options", cmd__parse_options },
|
||||||
|
|
|
@ -37,6 +37,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
|
||||||
int cmd__match_trees(int argc, const char **argv);
|
int cmd__match_trees(int argc, const char **argv);
|
||||||
int cmd__mergesort(int argc, const char **argv);
|
int cmd__mergesort(int argc, const char **argv);
|
||||||
int cmd__mktemp(int argc, const char **argv);
|
int cmd__mktemp(int argc, const char **argv);
|
||||||
|
int cmd__name_hash(int argc, const char **argv);
|
||||||
int cmd__online_cpus(int argc, const char **argv);
|
int cmd__online_cpus(int argc, const char **argv);
|
||||||
int cmd__pack_mtimes(int argc, const char **argv);
|
int cmd__pack_mtimes(int argc, const char **argv);
|
||||||
int cmd__parse_options(int argc, const char **argv);
|
int cmd__parse_options(int argc, const char **argv);
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='Tests pack performance using bitmaps'
|
||||||
|
. ./perf-lib.sh
|
||||||
|
|
||||||
|
GIT_TEST_PASSING_SANITIZE_LEAK=0
|
||||||
|
export GIT_TEST_PASSING_SANITIZE_LEAK
|
||||||
|
|
||||||
|
test_perf_large_repo
|
||||||
|
|
||||||
|
test_expect_success 'create rev input' '
|
||||||
|
cat >in-thin <<-EOF &&
|
||||||
|
$(git rev-parse HEAD)
|
||||||
|
^$(git rev-parse HEAD~1)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >in-big <<-EOF &&
|
||||||
|
$(git rev-parse HEAD)
|
||||||
|
^$(git rev-parse HEAD~1000)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >in-shallow <<-EOF
|
||||||
|
$(git rev-parse HEAD)
|
||||||
|
--shallow $(git rev-parse HEAD)
|
||||||
|
EOF
|
||||||
|
'
|
||||||
|
|
||||||
|
for version in 1 2
|
||||||
|
do
|
||||||
|
export version
|
||||||
|
|
||||||
|
test_perf "thin pack with version $version" '
|
||||||
|
git pack-objects --thin --stdout --revs --sparse \
|
||||||
|
--name-hash-version=$version <in-thin >out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_size "thin pack size with version $version" '
|
||||||
|
test_file_size out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_perf "big pack with version $version" '
|
||||||
|
git pack-objects --stdout --revs --sparse \
|
||||||
|
--name-hash-version=$version <in-big >out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_size "big pack size with version $version" '
|
||||||
|
test_file_size out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_perf "shallow fetch pack with version $version" '
|
||||||
|
git pack-objects --stdout --revs --sparse --shallow \
|
||||||
|
--name-hash-version=$version <in-shallow >out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_size "shallow pack size with version $version" '
|
||||||
|
test_file_size out
|
||||||
|
'
|
||||||
|
|
||||||
|
test_perf "repack with version $version" '
|
||||||
|
git repack -adf --name-hash-version=$version
|
||||||
|
'
|
||||||
|
|
||||||
|
test_size "repack size with version $version" '
|
||||||
|
gitdir=$(git rev-parse --git-dir) &&
|
||||||
|
pack=$(ls $gitdir/objects/pack/pack-*.pack) &&
|
||||||
|
test_file_size "$pack"
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
test_done
|
|
@ -0,0 +1,31 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='Tests pack performance using bitmaps'
|
||||||
|
. ./perf-lib.sh
|
||||||
|
|
||||||
|
GIT_TEST_PASSING_SANITIZE_LEAK=0
|
||||||
|
export GIT_TEST_PASSING_SANITIZE_LEAK
|
||||||
|
|
||||||
|
test_perf_large_repo
|
||||||
|
|
||||||
|
test_size 'paths at head' '
|
||||||
|
git ls-tree -r --name-only HEAD >path-list &&
|
||||||
|
wc -l <path-list &&
|
||||||
|
test-tool name-hash <path-list >name-hashes
|
||||||
|
'
|
||||||
|
|
||||||
|
for version in 1 2
|
||||||
|
do
|
||||||
|
test_size "distinct hash value: v$version" '
|
||||||
|
awk "{ print \$$version; }" <name-hashes | sort | \
|
||||||
|
uniq -c >name-hash-count &&
|
||||||
|
wc -l <name-hash-count
|
||||||
|
'
|
||||||
|
|
||||||
|
test_size "maximum multiplicity: v$version" '
|
||||||
|
sort -nr <name-hash-count | head -n 1 | \
|
||||||
|
awk "{ print \$1; }"
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
test_done
|
|
@ -45,7 +45,6 @@ rebase
|
||||||
remote
|
remote
|
||||||
remote-ext
|
remote-ext
|
||||||
remote-fd
|
remote-fd
|
||||||
repack
|
|
||||||
reset
|
reset
|
||||||
restore
|
restore
|
||||||
rev-parse
|
rev-parse
|
||||||
|
|
|
@ -689,4 +689,38 @@ do
|
||||||
'
|
'
|
||||||
done
|
done
|
||||||
|
|
||||||
|
test_expect_success 'valid and invalid --name-hash-versions' '
|
||||||
|
sane_unset GIT_TEST_NAME_HASH_VERSION &&
|
||||||
|
|
||||||
|
# Valid values are hard to verify other than "do not fail".
|
||||||
|
# Performance tests will be more valuable to validate these versions.
|
||||||
|
# Negative values are converted to version 1.
|
||||||
|
for value in -1 1 2
|
||||||
|
do
|
||||||
|
git pack-objects base --all --name-hash-version=$value || return 1
|
||||||
|
done &&
|
||||||
|
|
||||||
|
# Invalid values have clear post-conditions.
|
||||||
|
for value in 0 3
|
||||||
|
do
|
||||||
|
test_must_fail git pack-objects base --all --name-hash-version=$value 2>err &&
|
||||||
|
test_grep "invalid --name-hash-version option" err || return 1
|
||||||
|
done
|
||||||
|
'
|
||||||
|
|
||||||
|
# The following test is not necessarily a permanent choice, but since we do not
|
||||||
|
# have a "name hash version" bit in the .bitmap file format, we cannot write the
|
||||||
|
# hash values into the .bitmap file without risking breakage later.
|
||||||
|
#
|
||||||
|
# TODO: Make these compatible in the future and replace this test with the
|
||||||
|
# expected behavior when both are specified.
|
||||||
|
test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompatible' '
|
||||||
|
git pack-objects base --all --name-hash-version=2 --write-bitmap-index 2>err &&
|
||||||
|
test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err &&
|
||||||
|
|
||||||
|
# --stdout option silently removes --write-bitmap-index
|
||||||
|
git pack-objects --stdout --all --name-hash-version=2 --write-bitmap-index >out 2>err &&
|
||||||
|
! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
|
|
@ -26,6 +26,36 @@ has_any () {
|
||||||
grep -Ff "$1" "$2"
|
grep -Ff "$1" "$2"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Since name-hash values are stored in the .bitmap files, add a test
|
||||||
|
# that checks that the name-hash calculations are stable across versions.
|
||||||
|
# Not exhaustive, but these hashing algorithms would be hard to change
|
||||||
|
# without causing deviations here.
|
||||||
|
test_expect_success 'name-hash value stability' '
|
||||||
|
cat >names <<-\EOF &&
|
||||||
|
first
|
||||||
|
second
|
||||||
|
third
|
||||||
|
a/one-long-enough-for-collisions
|
||||||
|
b/two-long-enough-for-collisions
|
||||||
|
many/parts/to/this/path/enough/to/collide/in/v2
|
||||||
|
enough/parts/to/this/path/enough/to/collide/in/v2
|
||||||
|
EOF
|
||||||
|
|
||||||
|
test-tool name-hash <names >out &&
|
||||||
|
|
||||||
|
cat >expect <<-\EOF &&
|
||||||
|
2582249472 1763573760 first
|
||||||
|
2289942528 1188134912 second
|
||||||
|
2300837888 1130758144 third
|
||||||
|
2544516325 3963087891 a/one-long-enough-for-collisions
|
||||||
|
2544516325 4013419539 b/two-long-enough-for-collisions
|
||||||
|
1420111091 1709547268 many/parts/to/this/path/enough/to/collide/in/v2
|
||||||
|
1420111091 1709547268 enough/parts/to/this/path/enough/to/collide/in/v2
|
||||||
|
EOF
|
||||||
|
|
||||||
|
test_cmp expect out
|
||||||
|
'
|
||||||
|
|
||||||
test_bitmap_cases () {
|
test_bitmap_cases () {
|
||||||
writeLookupTable=false
|
writeLookupTable=false
|
||||||
for i in "$@"
|
for i in "$@"
|
||||||
|
@ -419,7 +449,10 @@ test_bitmap_cases () {
|
||||||
cat >expect <<-\EOF &&
|
cat >expect <<-\EOF &&
|
||||||
error: missing value for '\''pack.preferbitmaptips'\''
|
error: missing value for '\''pack.preferbitmaptips'\''
|
||||||
EOF
|
EOF
|
||||||
git repack -adb 2>actual &&
|
|
||||||
|
# Disable name hash version adjustment due to stderr comparison.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git repack -adb 2>actual &&
|
||||||
test_cmp expect actual
|
test_cmp expect actual
|
||||||
)
|
)
|
||||||
'
|
'
|
||||||
|
|
|
@ -208,7 +208,8 @@ test_expect_success 'bitmapPseudoMerge.stableThreshold creates stable groups' '
|
||||||
'
|
'
|
||||||
|
|
||||||
test_expect_success 'out of order thresholds are rejected' '
|
test_expect_success 'out of order thresholds are rejected' '
|
||||||
test_must_fail git \
|
# Disable the test var to remove a stderr message.
|
||||||
|
test_must_fail env GIT_TEST_NAME_HASH_VERSION=1 git \
|
||||||
-c bitmapPseudoMerge.test.pattern="refs/*" \
|
-c bitmapPseudoMerge.test.pattern="refs/*" \
|
||||||
-c bitmapPseudoMerge.test.threshold=1.month.ago \
|
-c bitmapPseudoMerge.test.threshold=1.month.ago \
|
||||||
-c bitmapPseudoMerge.test.stableThreshold=1.week.ago \
|
-c bitmapPseudoMerge.test.stableThreshold=1.week.ago \
|
||||||
|
|
|
@ -1237,7 +1237,12 @@ test_expect_success 'all boundary commits are excluded' '
|
||||||
test_tick &&
|
test_tick &&
|
||||||
git merge otherside &&
|
git merge otherside &&
|
||||||
ad=$(git log --no-walk --format=%ad HEAD) &&
|
ad=$(git log --no-walk --format=%ad HEAD) &&
|
||||||
git bundle create twoside-boundary.bdl main --since="$ad" &&
|
|
||||||
|
# If the a different name hash function is used here, then no delta
|
||||||
|
# pair is found and the bundle does not expand to three objects
|
||||||
|
# when fixing the thin object.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git bundle create twoside-boundary.bdl main --since="$ad" &&
|
||||||
test_bundle_object_count --thin twoside-boundary.bdl 3
|
test_bundle_object_count --thin twoside-boundary.bdl 3
|
||||||
'
|
'
|
||||||
|
|
||||||
|
|
|
@ -246,7 +246,11 @@ test_expect_success 'create bundle with --since option' '
|
||||||
EOF
|
EOF
|
||||||
test_cmp expect actual &&
|
test_cmp expect actual &&
|
||||||
|
|
||||||
git bundle create since.bdl \
|
# If a different name hash function is used, then one fewer
|
||||||
|
# delta base is found and this counts a different number
|
||||||
|
# of objects after performing --fix-thin.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git bundle create since.bdl \
|
||||||
--since "Thu Apr 7 15:27:00 2005 -0700" \
|
--since "Thu Apr 7 15:27:00 2005 -0700" \
|
||||||
--all &&
|
--all &&
|
||||||
|
|
||||||
|
|
|
@ -1093,7 +1093,9 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s
|
||||||
) &&
|
) &&
|
||||||
git clone super4 super5 &&
|
git clone super4 super5 &&
|
||||||
(cd super5 &&
|
(cd super5 &&
|
||||||
git submodule update --quiet --init --depth=1 submodule3 >out 2>err &&
|
# This test var can mess with the stderr output checked in this test.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git submodule update --quiet --init --depth=1 submodule3 >out 2>err &&
|
||||||
test_must_be_empty out &&
|
test_must_be_empty out &&
|
||||||
test_must_be_empty err
|
test_must_be_empty err
|
||||||
) &&
|
) &&
|
||||||
|
|
|
@ -308,7 +308,10 @@ test_expect_success 'no bitmaps created if .keep files present' '
|
||||||
keep=${pack%.pack}.keep &&
|
keep=${pack%.pack}.keep &&
|
||||||
test_when_finished "rm -f \"\$keep\"" &&
|
test_when_finished "rm -f \"\$keep\"" &&
|
||||||
>"$keep" &&
|
>"$keep" &&
|
||||||
git -C bare.git repack -ad 2>stderr &&
|
|
||||||
|
# Disable --name-hash-version test due to stderr comparison.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git -C bare.git repack -ad 2>stderr &&
|
||||||
test_must_be_empty stderr &&
|
test_must_be_empty stderr &&
|
||||||
find bare.git/objects/pack/ -type f -name "*.bitmap" >actual &&
|
find bare.git/objects/pack/ -type f -name "*.bitmap" >actual &&
|
||||||
test_must_be_empty actual
|
test_must_be_empty actual
|
||||||
|
@ -319,7 +322,10 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
|
||||||
blob=$(test-tool genrandom big $((1024*1024)) |
|
blob=$(test-tool genrandom big $((1024*1024)) |
|
||||||
git -C bare.git hash-object -w --stdin) &&
|
git -C bare.git hash-object -w --stdin) &&
|
||||||
git -C bare.git update-ref refs/tags/big $blob &&
|
git -C bare.git update-ref refs/tags/big $blob &&
|
||||||
git -C bare.git repack -ad 2>stderr &&
|
|
||||||
|
# Disable --name-hash-version test due to stderr comparison.
|
||||||
|
GIT_TEST_NAME_HASH_VERSION=1 \
|
||||||
|
git -C bare.git repack -ad 2>stderr &&
|
||||||
test_must_be_empty stderr &&
|
test_must_be_empty stderr &&
|
||||||
find bare.git/objects/pack -type f -name "*.bitmap" >actual &&
|
find bare.git/objects/pack -type f -name "*.bitmap" >actual &&
|
||||||
test_must_be_empty actual
|
test_must_be_empty actual
|
||||||
|
@ -776,6 +782,12 @@ test_expect_success 'repack -ad cleans up old .tmp-* packs' '
|
||||||
test_must_be_empty tmpfiles
|
test_must_be_empty tmpfiles
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success '--name-hash-version option passes through to pack-objects' '
|
||||||
|
GIT_TRACE2_EVENT="$(pwd)/hash-trace.txt" \
|
||||||
|
git repack -a --name-hash-version=2 &&
|
||||||
|
test_subcommand_flex git pack-objects --name-hash-version=2 <hash-trace.txt
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'setup for update-server-info' '
|
test_expect_success 'setup for update-server-info' '
|
||||||
git init update-server-info &&
|
git init update-server-info &&
|
||||||
test_commit -C update-server-info message
|
test_commit -C update-server-info message
|
||||||
|
|
|
@ -1896,6 +1896,32 @@ test_subcommand () {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Check that the given subcommand was run with the given set of
|
||||||
|
# arguments in order (but with possible extra arguments).
|
||||||
|
#
|
||||||
|
# test_subcommand_flex [!] <command> <args>... < <trace>
|
||||||
|
#
|
||||||
|
# If the first parameter passed is !, this instead checks that
|
||||||
|
# the given command was not called.
|
||||||
|
#
|
||||||
|
test_subcommand_flex () {
|
||||||
|
local negate=
|
||||||
|
if test "$1" = "!"
|
||||||
|
then
|
||||||
|
negate=t
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
local expr="$(printf '"%s".*' "$@")"
|
||||||
|
|
||||||
|
if test -n "$negate"
|
||||||
|
then
|
||||||
|
! grep "\[$expr\]"
|
||||||
|
else
|
||||||
|
grep "\[$expr\]"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# Check that the given command was invoked as part of the
|
# Check that the given command was invoked as part of the
|
||||||
# trace2-format trace on stdin.
|
# trace2-format trace on stdin.
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue