diffcore-rename: avoid doing basename comparisons for irrelevant sources
The basename comparison optimization implemented in
find_basename_matches() is very beneficial since it allows a source to
sometimes only be compared with one other file instead of N other files.
When a match is found, both a source and destination can be removed from
the matrix of inexact rename comparisons. In contrast, the irrelevant
source optimization only allows us to remove a source from the matrix of
inexact rename comparisons...but it has the advantage of allowing a
source file to not even be loaded into memory at all and be compared to
0 other files. Generally, not even comparing is a bigger performance
win, so when both optimizations could apply, prefer to use the
irrelevant-source optimization.
For the testcases mentioned in commit 557ac0350d
("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:
Before After
no-renames: 5.708 s ± 0.111 s 5.680 s ± 0.096 s
mega-renames: 102.171 s ± 0.440 s 13.812 s ± 0.162 s
just-one-mega: 3.471 s ± 0.015 s 506.0 ms ± 3.9 ms
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
parent
f89b4f2bee
commit
e4fd06e7e2
|
@ -527,6 +527,7 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void initialize_dir_rename_info(struct dir_rename_info *info,
|
static void initialize_dir_rename_info(struct dir_rename_info *info,
|
||||||
|
struct strset *relevant_sources,
|
||||||
struct strset *dirs_removed,
|
struct strset *dirs_removed,
|
||||||
struct strmap *dir_rename_count)
|
struct strmap *dir_rename_count)
|
||||||
{
|
{
|
||||||
|
@ -534,7 +535,7 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
|
||||||
struct strmap_entry *entry;
|
struct strmap_entry *entry;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!dirs_removed) {
|
if (!dirs_removed && !relevant_sources) {
|
||||||
info->setup = 0;
|
info->setup = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -549,7 +550,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
|
||||||
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
|
strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
|
||||||
|
|
||||||
/* Setup info->relevant_source_dirs */
|
/* Setup info->relevant_source_dirs */
|
||||||
info->relevant_source_dirs = dirs_removed;
|
info->relevant_source_dirs = NULL;
|
||||||
|
if (dirs_removed || !relevant_sources) {
|
||||||
|
info->relevant_source_dirs = dirs_removed; /* might be NULL */
|
||||||
|
} else {
|
||||||
|
info->relevant_source_dirs = xmalloc(sizeof(struct strintmap));
|
||||||
|
strset_init(info->relevant_source_dirs);
|
||||||
|
strset_for_each_entry(relevant_sources, &iter, entry) {
|
||||||
|
char *dirname = get_dirname(entry->key);
|
||||||
|
if (!dirs_removed ||
|
||||||
|
strset_contains(dirs_removed, dirname))
|
||||||
|
strset_add(info->relevant_source_dirs, dirname);
|
||||||
|
free(dirname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Loop setting up both info->idx_map, and doing setup of
|
* Loop setting up both info->idx_map, and doing setup of
|
||||||
|
@ -627,6 +641,13 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
|
||||||
/* dir_rename_guess */
|
/* dir_rename_guess */
|
||||||
strmap_clear(&info->dir_rename_guess, 1);
|
strmap_clear(&info->dir_rename_guess, 1);
|
||||||
|
|
||||||
|
/* relevant_source_dirs */
|
||||||
|
if (info->relevant_source_dirs &&
|
||||||
|
info->relevant_source_dirs != dirs_removed) {
|
||||||
|
strset_clear(info->relevant_source_dirs);
|
||||||
|
FREE_AND_NULL(info->relevant_source_dirs);
|
||||||
|
}
|
||||||
|
|
||||||
/* dir_rename_count */
|
/* dir_rename_count */
|
||||||
if (!keep_dir_rename_count) {
|
if (!keep_dir_rename_count) {
|
||||||
partial_clear_dir_rename_count(info->dir_rename_count);
|
partial_clear_dir_rename_count(info->dir_rename_count);
|
||||||
|
@ -749,6 +770,7 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
|
||||||
static int find_basename_matches(struct diff_options *options,
|
static int find_basename_matches(struct diff_options *options,
|
||||||
int minimum_score,
|
int minimum_score,
|
||||||
struct dir_rename_info *info,
|
struct dir_rename_info *info,
|
||||||
|
struct strset *relevant_sources,
|
||||||
struct strset *dirs_removed)
|
struct strset *dirs_removed)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -839,6 +861,11 @@ static int find_basename_matches(struct diff_options *options,
|
||||||
intptr_t src_index;
|
intptr_t src_index;
|
||||||
intptr_t dst_index;
|
intptr_t dst_index;
|
||||||
|
|
||||||
|
/* Skip irrelevant sources */
|
||||||
|
if (relevant_sources &&
|
||||||
|
!strset_contains(relevant_sources, filename))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the basename is unique among remaining sources, then
|
* If the basename is unique among remaining sources, then
|
||||||
* src_index will equal 'i' and we can attempt to match it
|
* src_index will equal 'i' and we can attempt to match it
|
||||||
|
@ -1164,7 +1191,7 @@ void diffcore_rename_extended(struct diff_options *options,
|
||||||
|
|
||||||
/* Preparation for basename-driven matching. */
|
/* Preparation for basename-driven matching. */
|
||||||
trace2_region_enter("diff", "dir rename setup", options->repo);
|
trace2_region_enter("diff", "dir rename setup", options->repo);
|
||||||
initialize_dir_rename_info(&info,
|
initialize_dir_rename_info(&info, relevant_sources,
|
||||||
dirs_removed, dir_rename_count);
|
dirs_removed, dir_rename_count);
|
||||||
trace2_region_leave("diff", "dir rename setup", options->repo);
|
trace2_region_leave("diff", "dir rename setup", options->repo);
|
||||||
|
|
||||||
|
@ -1172,7 +1199,9 @@ void diffcore_rename_extended(struct diff_options *options,
|
||||||
trace2_region_enter("diff", "basename matches", options->repo);
|
trace2_region_enter("diff", "basename matches", options->repo);
|
||||||
rename_count += find_basename_matches(options,
|
rename_count += find_basename_matches(options,
|
||||||
min_basename_score,
|
min_basename_score,
|
||||||
&info, dirs_removed);
|
&info,
|
||||||
|
relevant_sources,
|
||||||
|
dirs_removed);
|
||||||
trace2_region_leave("diff", "basename matches", options->repo);
|
trace2_region_leave("diff", "basename matches", options->repo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue