range-diff: add configurable memory limit for cost matrix
When comparing large commit ranges (e.g., 250,000+ commits), range-diff attempts to allocate an n×n cost matrix that can exhaust available memory. For example, with 256,784 commits (n = 513,568), the matrix would require approximately 256GB of memory (513,568² × 4 bytes), causing either immediate segmentation faults due to integer overflow or system hangs. Add a memory limit check in get_correspondences() before allocating the cost matrix. This check uses the total size in bytes (n² × sizeof(int)) and compares it against a configurable maximum, preventing both excessive memory usage and integer overflow issues. The limit is configurable via a new --max-memory option that accepts human-readable sizes (e.g., "1G", "500M"). The default is 4GB for 64 bit systems and 2GB for 32 bit systems. This allows comparing ranges of approximately 32,000 (16,000) commits - generous for real-world use cases while preventing impractical operations. When the limit is exceeded, range-diff now displays a clear error message showing both the requested memory size and the maximum allowed, formatted in human-readable units for better user experience. Example usage: git range-diff --max-memory=1G branch1...branch2 git range-diff --max-memory=500M base..topic1 base..topic2 This approach was chosen over alternatives: - Pre-counting commits: Would require spawning additional git processes and reading all commits twice - Limiting by commit count: Less precise than actual memory usage - Streaming approach: Would require significant refactoring of the current algorithm This issue was previously discussed in: https://lore.kernel.org/git/RFC-cover-v2-0.5-00000000000-20211210T122901Z-avarab@gmail.com/ Acked-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Paulo Casaretto <pcasaretto@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>main
parent
f814da676a
commit
00727249ec
|
@ -1404,6 +1404,7 @@ static void make_cover_letter(struct rev_info *rev, int use_separate_file,
|
||||||
struct range_diff_options range_diff_opts = {
|
struct range_diff_options range_diff_opts = {
|
||||||
.creation_factor = rev->creation_factor,
|
.creation_factor = rev->creation_factor,
|
||||||
.dual_color = 1,
|
.dual_color = 1,
|
||||||
|
.max_memory = RANGE_DIFF_MAX_MEMORY_DEFAULT,
|
||||||
.diffopt = &opts,
|
.diffopt = &opts,
|
||||||
.other_arg = &other_arg
|
.other_arg = &other_arg
|
||||||
};
|
};
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "parse-options.h"
|
#include "parse-options.h"
|
||||||
#include "range-diff.h"
|
#include "range-diff.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "parse.h"
|
||||||
|
|
||||||
|
|
||||||
static const char * const builtin_range_diff_usage[] = {
|
static const char * const builtin_range_diff_usage[] = {
|
||||||
|
@ -15,6 +16,21 @@ N_("git range-diff [<options>] <base> <old-tip> <new-tip>"),
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int parse_max_memory(const struct option *opt, const char *arg, int unset)
|
||||||
|
{
|
||||||
|
size_t *max_memory = opt->value;
|
||||||
|
uintmax_t val;
|
||||||
|
|
||||||
|
if (unset)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!git_parse_unsigned(arg, &val, SIZE_MAX))
|
||||||
|
return error(_("invalid max-memory value: %s"), arg);
|
||||||
|
|
||||||
|
*max_memory = (size_t)val;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int cmd_range_diff(int argc,
|
int cmd_range_diff(int argc,
|
||||||
const char **argv,
|
const char **argv,
|
||||||
const char *prefix,
|
const char *prefix,
|
||||||
|
@ -25,6 +41,7 @@ int cmd_range_diff(int argc,
|
||||||
struct strvec diff_merges_arg = STRVEC_INIT;
|
struct strvec diff_merges_arg = STRVEC_INIT;
|
||||||
struct range_diff_options range_diff_opts = {
|
struct range_diff_options range_diff_opts = {
|
||||||
.creation_factor = RANGE_DIFF_CREATION_FACTOR_DEFAULT,
|
.creation_factor = RANGE_DIFF_CREATION_FACTOR_DEFAULT,
|
||||||
|
.max_memory = RANGE_DIFF_MAX_MEMORY_DEFAULT,
|
||||||
.diffopt = &diffopt,
|
.diffopt = &diffopt,
|
||||||
.other_arg = &other_arg
|
.other_arg = &other_arg
|
||||||
};
|
};
|
||||||
|
@ -40,6 +57,10 @@ int cmd_range_diff(int argc,
|
||||||
PARSE_OPT_OPTARG),
|
PARSE_OPT_OPTARG),
|
||||||
OPT_PASSTHRU_ARGV(0, "diff-merges", &diff_merges_arg,
|
OPT_PASSTHRU_ARGV(0, "diff-merges", &diff_merges_arg,
|
||||||
N_("style"), N_("passed to 'git log'"), 0),
|
N_("style"), N_("passed to 'git log'"), 0),
|
||||||
|
OPT_CALLBACK(0, "max-memory", &range_diff_opts.max_memory,
|
||||||
|
N_("size"),
|
||||||
|
N_("maximum memory for cost matrix (default 4G)"),
|
||||||
|
parse_max_memory),
|
||||||
OPT_PASSTHRU_ARGV(0, "remerge-diff", &diff_merges_arg, NULL,
|
OPT_PASSTHRU_ARGV(0, "remerge-diff", &diff_merges_arg, NULL,
|
||||||
N_("passed to 'git log'"), PARSE_OPT_NOARG),
|
N_("passed to 'git log'"), PARSE_OPT_NOARG),
|
||||||
OPT_BOOL(0, "left-only", &left_only,
|
OPT_BOOL(0, "left-only", &left_only,
|
||||||
|
|
|
@ -717,6 +717,7 @@ static void show_diff_of_diff(struct rev_info *opt)
|
||||||
struct range_diff_options range_diff_opts = {
|
struct range_diff_options range_diff_opts = {
|
||||||
.creation_factor = opt->creation_factor,
|
.creation_factor = opt->creation_factor,
|
||||||
.dual_color = 1,
|
.dual_color = 1,
|
||||||
|
.max_memory = RANGE_DIFF_MAX_MEMORY_DEFAULT,
|
||||||
.diffopt = &opts
|
.diffopt = &opts
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
20
range-diff.c
20
range-diff.c
|
@ -325,13 +325,24 @@ static int diffsize(const char *a, const char *b)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_correspondences(struct string_list *a, struct string_list *b,
|
static void get_correspondences(struct string_list *a, struct string_list *b,
|
||||||
int creation_factor)
|
int creation_factor, size_t max_memory)
|
||||||
{
|
{
|
||||||
int n = a->nr + b->nr;
|
int n = a->nr + b->nr;
|
||||||
int *cost, c, *a2b, *b2a;
|
int *cost, c, *a2b, *b2a;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
size_t cost_size = st_mult(n, n);
|
||||||
ALLOC_ARRAY(cost, st_mult(n, n));
|
size_t cost_bytes = st_mult(sizeof(int), cost_size);
|
||||||
|
if (cost_bytes >= max_memory) {
|
||||||
|
struct strbuf cost_str = STRBUF_INIT;
|
||||||
|
struct strbuf max_str = STRBUF_INIT;
|
||||||
|
strbuf_humanise_bytes(&cost_str, cost_bytes);
|
||||||
|
strbuf_humanise_bytes(&max_str, max_memory);
|
||||||
|
die(_("range-diff: unable to compute the range-diff, since it "
|
||||||
|
"exceeds the maximum memory for the cost matrix: %s "
|
||||||
|
"(%"PRIuMAX" bytes) needed, limited to %s (%"PRIuMAX" bytes)"),
|
||||||
|
cost_str.buf, (uintmax_t)cost_bytes, max_str.buf, (uintmax_t)max_memory);
|
||||||
|
}
|
||||||
|
ALLOC_ARRAY(cost, cost_size);
|
||||||
ALLOC_ARRAY(a2b, n);
|
ALLOC_ARRAY(a2b, n);
|
||||||
ALLOC_ARRAY(b2a, n);
|
ALLOC_ARRAY(b2a, n);
|
||||||
|
|
||||||
|
@ -591,7 +602,8 @@ int show_range_diff(const char *range1, const char *range2,
|
||||||
if (!res) {
|
if (!res) {
|
||||||
find_exact_matches(&branch1, &branch2);
|
find_exact_matches(&branch1, &branch2);
|
||||||
get_correspondences(&branch1, &branch2,
|
get_correspondences(&branch1, &branch2,
|
||||||
range_diff_opts->creation_factor);
|
range_diff_opts->creation_factor,
|
||||||
|
range_diff_opts->max_memory);
|
||||||
output(&branch1, &branch2, range_diff_opts);
|
output(&branch1, &branch2, range_diff_opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,10 @@
|
||||||
#include "strvec.h"
|
#include "strvec.h"
|
||||||
|
|
||||||
#define RANGE_DIFF_CREATION_FACTOR_DEFAULT 60
|
#define RANGE_DIFF_CREATION_FACTOR_DEFAULT 60
|
||||||
|
#define RANGE_DIFF_MAX_MEMORY_DEFAULT \
|
||||||
|
(sizeof(void*) >= 8 ? \
|
||||||
|
((size_t)(1024L * 1024L) * (size_t)(4L * 1024L)) : /* 4GB on 64-bit */ \
|
||||||
|
((size_t)(1024L * 1024L) * (size_t)(2L * 1024L))) /* 2GB on 32-bit */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A much higher value than the default, when we KNOW we are comparing
|
* A much higher value than the default, when we KNOW we are comparing
|
||||||
|
@ -17,6 +21,7 @@ struct range_diff_options {
|
||||||
unsigned dual_color:1;
|
unsigned dual_color:1;
|
||||||
unsigned left_only:1, right_only:1;
|
unsigned left_only:1, right_only:1;
|
||||||
unsigned include_merges:1;
|
unsigned include_merges:1;
|
||||||
|
size_t max_memory;
|
||||||
const struct diff_options *diffopt; /* may be NULL */
|
const struct diff_options *diffopt; /* may be NULL */
|
||||||
const struct strvec *other_arg; /* may be NULL */
|
const struct strvec *other_arg; /* may be NULL */
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue