You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

434 lines
11 KiB

#include "cache.h"
#include "commit.h"
#include "config.h"
#include "revision.h"
#include "strvec.h"
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "promisor-remote.h"
#include "trace.h"
#include "url.h"
static int parse_combine_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf);
const char *list_object_filter_config_name(enum list_objects_filter_choice c)
{
switch (c) {
case LOFC_DISABLED:
/* we have no name for "no filter at all" */
break;
case LOFC_BLOB_NONE:
return "blob:none";
case LOFC_BLOB_LIMIT:
return "blob:limit";
case LOFC_TREE_DEPTH:
return "tree";
case LOFC_SPARSE_OID:
return "sparse:oid";
case LOFC_OBJECT_TYPE:
return "object:type";
case LOFC_COMBINE:
return "combine";
case LOFC__COUNT:
/* not a real filter type; just the count of all filters */
break;
}
BUG("list_object_filter_config_name: invalid argument '%d'", c);
}
int gently_parse_list_objects_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf)
{
const char *v0;
if (!arg)
return 0;
if (filter_options->choice)
BUG("filter_options already populated");
if (!strcmp(arg, "blob:none")) {
filter_options->choice = LOFC_BLOB_NONE;
return 0;
} else if (skip_prefix(arg, "blob:limit=", &v0)) {
if (git_parse_ulong(v0, &filter_options->blob_limit_value)) {
filter_options->choice = LOFC_BLOB_LIMIT;
return 0;
}
} else if (skip_prefix(arg, "tree:", &v0)) {
if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
return 1;
}
filter_options->choice = LOFC_TREE_DEPTH;
return 0;
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
list-objects-filter: delay parsing of sparse oid The list-objects-filter code has two steps to its initialization: 1. parse_list_objects_filter() makes sure the spec is a filter we know about and is syntactically correct. This step is done by "rev-list" or "upload-pack" that is going to apply a filter, but also by "git clone" or "git fetch" before they send the spec across the wire. 2. list_objects_filter__init() runs the type-specific initialization (using function pointers established in step 1). This happens at the start of traverse_commit_list_filtered(), when we're about to actually use the filter. It's a good idea to parse as much as we can in step 1, in order to catch problems early (e.g., a blob size limit that isn't a number). But one thing we _shouldn't_ do is resolve any oids at that step (e.g., for sparse-file contents specified by oid). In the case of a fetch, the oid has to be resolved on the remote side. The current code does resolve the oid during the parse phase, but ignores any error (which we must do, because we might just be sending the spec across the wire). This leads to two bugs: - if we're not in a repository (e.g., because it's git-clone parsing the spec), then we trigger a BUG() trying to resolve the name - if we did hit the error case, we still have to notice that later and bail. The code path in rev-list handles this, but the one in upload-pack does not, leading to a segfault. We can fix both by moving the oid resolution into the sparse-oid init function. At that point we know we have a repository (because we're about to traverse), and handling the error there fixes the segfault. As a bonus, we can drop the NULL sparse_oid_value check in rev-list, since this is now handled in the sparse-oid-filter init function. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
filter_options->sparse_oid_name = xstrdup(v0);
filter_options->choice = LOFC_SPARSE_OID;
return 0;
} else if (skip_prefix(arg, "sparse:path=", &v0)) {
if (errbuf) {
strbuf_addstr(
errbuf,
_("sparse:path filters support has been dropped"));
}
return 1;
} else if (skip_prefix(arg, "object:type=", &v0)) {
int type = type_from_string_gently(v0, strlen(v0), 1);
if (type < 0) {
strbuf_addf(errbuf, _("'%s' for 'object:type=<type>' is "
"not a valid object type"), v0);
return 1;
}
filter_options->object_type = type;
filter_options->choice = LOFC_OBJECT_TYPE;
return 0;
} else if (skip_prefix(arg, "combine:", &v0)) {
return parse_combine_filter(filter_options, v0, errbuf);
}
/*
* Please update _git_fetch() in git-completion.bash when you
* add new filters
*/
strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);
memset(filter_options, 0, sizeof(*filter_options));
return 1;
}
static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
static int has_reserved_character(
struct strbuf *sub_spec, struct strbuf *errbuf)
{
const char *c = sub_spec->buf;
while (*c) {
if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
strbuf_addf(
errbuf,
_("must escape char in sub-filter-spec: '%c'"),
*c);
return 1;
}
c++;
}
return 0;
}
static int parse_combine_subfilter(
struct list_objects_filter_options *filter_options,
struct strbuf *subspec,
struct strbuf *errbuf)
{
size_t new_index = filter_options->sub_nr;
char *decoded;
int result;
ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
filter_options->sub_alloc);
decoded = url_percent_decode(subspec->buf);
result = has_reserved_character(subspec, errbuf) ||
gently_parse_list_objects_filter(
&filter_options->sub[new_index], decoded, errbuf);
free(decoded);
return result;
}
static int parse_combine_filter(
struct list_objects_filter_options *filter_options,
const char *arg,
struct strbuf *errbuf)
{
struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
size_t sub;
int result = 0;
if (!subspecs[0]) {
strbuf_addstr(errbuf, _("expected something after combine:"));
result = 1;
goto cleanup;
}
for (sub = 0; subspecs[sub] && !result; sub++) {
if (subspecs[sub + 1]) {
/*
* This is not the last subspec. Remove trailing "+" so
* we can parse it.
*/
size_t last = subspecs[sub]->len - 1;
assert(subspecs[sub]->buf[last] == '+');
strbuf_remove(subspecs[sub], last, 1);
}
result = parse_combine_subfilter(
filter_options, subspecs[sub], errbuf);
}
filter_options->choice = LOFC_COMBINE;
cleanup:
strbuf_list_free(subspecs);
if (result) {
list_objects_filter_release(filter_options);
memset(filter_options, 0, sizeof(*filter_options));
}
return result;
}
static int allow_unencoded(char ch)
{
if (ch <= ' ' || ch == '%' || ch == '+')
return 0;
return !strchr(RESERVED_NON_WS, ch);
}
static void filter_spec_append_urlencode(
struct list_objects_filter_options *filter, const char *raw)
{
struct strbuf buf = STRBUF_INIT;
strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
trace_printf("Add to combine filter-spec: %s\n", buf.buf);
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append_nodup(&filter->filter_spec, strbuf_detach(&buf, NULL));
}
/*
* Changes filter_options into an equivalent LOFC_COMBINE filter options
* instance. Does not do anything if filter_options is already LOFC_COMBINE.
*/
static void transform_to_combine_type(
struct list_objects_filter_options *filter_options)
{
assert(filter_options->choice);
if (filter_options->choice == LOFC_COMBINE)
return;
{
const int initial_sub_alloc = 2;
struct list_objects_filter_options *sub_array =
xcalloc(initial_sub_alloc, sizeof(*sub_array));
sub_array[0] = *filter_options;
memset(filter_options, 0, sizeof(*filter_options));
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_init_dup(&filter_options->filter_spec);
filter_options->sub = sub_array;
filter_options->sub_alloc = initial_sub_alloc;
}
filter_options->sub_nr = 1;
filter_options->choice = LOFC_COMBINE;
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append(&filter_options->filter_spec, "combine:");
filter_spec_append_urlencode(
filter_options,
list_objects_filter_spec(&filter_options->sub[0]));
/*
* We don't need the filter_spec strings for subfilter specs, only the
* top level.
*/
string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
}
void list_objects_filter_die_if_populated(
struct list_objects_filter_options *filter_options)
{
if (filter_options->choice)
die(_("multiple filter-specs cannot be combined"));
}
void parse_list_objects_filter(
struct list_objects_filter_options *filter_options,
const char *arg)
{
struct strbuf errbuf = STRBUF_INIT;
int parse_error;
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
if (!filter_options->filter_spec.strdup_strings) {
if (filter_options->filter_spec.nr)
BUG("unexpected non-allocated string in filter_spec");
filter_options->filter_spec.strdup_strings = 1;
}
if (!filter_options->choice) {
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append(&filter_options->filter_spec, arg);
parse_error = gently_parse_list_objects_filter(
filter_options, arg, &errbuf);
} else {
/*
* Make filter_options an LOFC_COMBINE spec so we can trivially
* add subspecs to it.
*/
transform_to_combine_type(filter_options);
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append(&filter_options->filter_spec, "+");
filter_spec_append_urlencode(filter_options, arg);
ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
filter_options->sub_alloc);
parse_error = gently_parse_list_objects_filter(
&filter_options->sub[filter_options->sub_nr - 1], arg,
&errbuf);
}
if (parse_error)
die("%s", errbuf.buf);
}
int opt_parse_list_objects_filter(const struct option *opt,
const char *arg, int unset)
{
struct list_objects_filter_options *filter_options = opt->value;
pack-objects: lazily set up "struct rev_info", don't leak In the preceding [1] (pack-objects: move revs out of get_object_list(), 2022-03-22) the "repo_init_revisions()" was moved to cmd_pack_objects() so that it unconditionally took place for all invocations of "git pack-objects". We'd thus start leaking memory, which is easily reproduced in e.g. git.git by feeding e83c5163316 (Initial revision of "git", the information manager from hell, 2005-04-07) to "git pack-objects"; $ echo e83c5163316f89bfbde7d9ab23ca2e25604af290 | ./git pack-objects initial [...] ==19130==ERROR: LeakSanitizer: detected memory leaks Direct leak of 7120 byte(s) in 1 object(s) allocated from: #0 0x455308 in __interceptor_malloc (/home/avar/g/git/git+0x455308) #1 0x75b399 in do_xmalloc /home/avar/g/git/wrapper.c:41:8 #2 0x75b356 in xmalloc /home/avar/g/git/wrapper.c:62:9 #3 0x5d7609 in prep_parse_options /home/avar/g/git/diff.c:5647:2 #4 0x5d415a in repo_diff_setup /home/avar/g/git/diff.c:4621:2 #5 0x6dffbb in repo_init_revisions /home/avar/g/git/revision.c:1853:2 #6 0x4f599d in cmd_pack_objects /home/avar/g/git/builtin/pack-objects.c:3980:2 #7 0x4592ca in run_builtin /home/avar/g/git/git.c:465:11 #8 0x457d81 in handle_builtin /home/avar/g/git/git.c:718:3 #9 0x458ca5 in run_argv /home/avar/g/git/git.c:785:4 #10 0x457b40 in cmd_main /home/avar/g/git/git.c:916:19 #11 0x562259 in main /home/avar/g/git/common-main.c:56:11 #12 0x7fce792ac7ec in __libc_start_main csu/../csu/libc-start.c:332:16 #13 0x4300f9 in _start (/home/avar/g/git/git+0x4300f9) SUMMARY: LeakSanitizer: 7120 byte(s) leaked in 1 allocation(s). Aborted Narrowly fixing that commit would have been easy, just add call repo_init_revisions() right before get_object_list(), which is effectively what was done before that commit. But an unstated constraint when setting it up early is that it was needed for the subsequent [2] (pack-objects: parse --filter directly into revs.filter, 2022-03-22), i.e. we might have a --filter command-line option, and need to either have the "struct rev_info" setup when we encounter that option, or later. Let's just change the control flow so that we'll instead set up the "struct rev_info" only when we need it. Doing so leads to a bit more verbosity, but it's a lot clearer what we're doing and why. An earlier version of this commit[3] went behind opt_parse_list_objects_filter()'s back by faking up a "struct option" before calling it. Let's avoid that and instead create a blessed API for this pattern. We could furthermore combine the two get_object_list() invocations here by having repo_init_revisions() invoked on &pfd.revs, but I think clearly separating the two makes the flow clearer. Likewise redundantly but explicitly (i.e. redundant v.s. a "{ 0 }") "0" to "have_revs" early in cmd_pack_objects(). While we're at it add parentheses around the arguments to the OPT_* macros in in list-objects-filter-options.h, as we need to change those lines anyway. It doesn't matter in this case, but is good general practice. 1. https://lore.kernel.org/git/619b757d98465dbc4995bdc11a5282fbfcbd3daa.1647970119.git.gitgitgadget@gmail.com 2. https://lore.kernel.org/git/97de926904988b89b5663bd4c59c011a1723a8f5.1647970119.git.gitgitgadget@gmail.com 3. https://lore.kernel.org/git/patch-1.1-193534b0f07-20220325T121715Z-avarab@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
opt_lof_init init = (opt_lof_init)opt->defval;
if (init)
filter_options = init(opt->value);
if (unset || !arg)
list_objects_filter_set_no_filter(filter_options);
else
parse_list_objects_filter(filter_options, arg);
return 0;
}
const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
{
if (!filter->filter_spec.nr)
BUG("no filter_spec available for this filter");
if (filter->filter_spec.nr != 1) {
struct strbuf concatted = STRBUF_INIT;
strbuf_add_separated_string_list(
&concatted, "", &filter->filter_spec);
string_list_clear(&filter->filter_spec, /*free_util=*/0);
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append_nodup(
&filter->filter_spec, strbuf_detach(&concatted, NULL));
}
return filter->filter_spec.items[0].string;
}
const char *expand_list_objects_filter_spec(
struct list_objects_filter_options *filter)
{
if (filter->choice == LOFC_BLOB_LIMIT) {
struct strbuf expanded_spec = STRBUF_INIT;
strbuf_addf(&expanded_spec, "blob:limit=%lu",
filter->blob_limit_value);
string_list_clear(&filter->filter_spec, /*free_util=*/0);
list_objects_filter_options: plug leak of filter_spec strings The list_objects_filter_options struct contains a string_list to store the filter_spec. Because we allow the options struct to be zero-initialized by callers, the string_list's strdup_strings field is generally not set. Because we don't want to depend on the memory lifetimes of any strings passed in to the list-objects API, everything we add to the string_list is duplicated (either via xstrdup(), or via strbuf_detach()). So far so good, but now we have a problem at cleanup time: when we clear the list, the string_list API doesn't realize that it needs to free all of those strings, and we leak them. One option would be to set strdup_strings right before clearing the list. But this is tricky for two reasons: 1. There's one spot, in partial_clone_get_default_filter_spec(), that fails to duplicate its argument. We could fix that, but... 2. We clear the list in a surprising number of places. As you might expect, we do so in list_objects_filter_release(). But we also clear and rewrite it in expand_list_objects_filter_spec(), list_objects_filter_spec(), and transform_to_combine_type(). We'd have to put the same hack in all of those spots. Instead, let's just set strdup_strings before adding anything. That lets us drop the extra manual xstrdup() calls, fixes the spot mentioned in (1) above that _should_ be duplicating, and future-proofs further calls. We do have to switch the strbuf_detach() calls to use the nodup form, but that's an easy change, and the resulting code more clearly shows the expected ownership transfer. This also resolves a weird inconsistency: when we make a deep copy with list_objects_filter_copy(), it initializes the copy's filter_spec with string_list_init_dup(). So the copy frees its string_list memory correctly, but accidentally leaks the extra manual-xstrdup()'d strings! There is one hiccup, though. In an ideal world, everyone would allocate the list_objects_filter_options struct with an initializer which used STRING_LIST_INIT_DUP under the hood. But there are a bunch of existing callers which think that zero-initializing is good enough. We can leave them as-is by noting that the list is always initially populated via parse_list_objects_filter(). So we can just initialize the strdup_strings flag there. This is arguably a band-aid, but it works reliably. And it doesn't make anything harder if we want to switch all the callers later to a new LIST_OBJECTS_FILTER_INIT. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
string_list_append_nodup(
&filter->filter_spec,
strbuf_detach(&expanded_spec, NULL));
}
return list_objects_filter_spec(filter);
}
void list_objects_filter_release(
struct list_objects_filter_options *filter_options)
{
size_t sub;
if (!filter_options)
return;
string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
list-objects-filter: delay parsing of sparse oid The list-objects-filter code has two steps to its initialization: 1. parse_list_objects_filter() makes sure the spec is a filter we know about and is syntactically correct. This step is done by "rev-list" or "upload-pack" that is going to apply a filter, but also by "git clone" or "git fetch" before they send the spec across the wire. 2. list_objects_filter__init() runs the type-specific initialization (using function pointers established in step 1). This happens at the start of traverse_commit_list_filtered(), when we're about to actually use the filter. It's a good idea to parse as much as we can in step 1, in order to catch problems early (e.g., a blob size limit that isn't a number). But one thing we _shouldn't_ do is resolve any oids at that step (e.g., for sparse-file contents specified by oid). In the case of a fetch, the oid has to be resolved on the remote side. The current code does resolve the oid during the parse phase, but ignores any error (which we must do, because we might just be sending the spec across the wire). This leads to two bugs: - if we're not in a repository (e.g., because it's git-clone parsing the spec), then we trigger a BUG() trying to resolve the name - if we did hit the error case, we still have to notice that later and bail. The code path in rev-list handles this, but the one in upload-pack does not, leading to a segfault. We can fix both by moving the oid resolution into the sparse-oid init function. At that point we know we have a repository (because we're about to traverse), and handling the error there fixes the segfault. As a bonus, we can drop the NULL sparse_oid_value check in rev-list, since this is now handled in the sparse-oid-filter init function. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
free(filter_options->sparse_oid_name);
for (sub = 0; sub < filter_options->sub_nr; sub++)
list_objects_filter_release(&filter_options->sub[sub]);
free(filter_options->sub);
memset(filter_options, 0, sizeof(*filter_options));
}
void partial_clone_register(
const char *remote,
struct list_objects_filter_options *filter_options)
{
struct promisor_remote *promisor_remote;
char *cfg_name;
char *filter_name;
/* Check if it is already registered */
if ((promisor_remote = promisor_remote_find(remote))) {
if (promisor_remote->partial_clone_filter)
/*
* Remote is already registered and a filter is already
* set, so we don't need to do anything here.
*/
return;
} else {
if (upgrade_repository_format(1) < 0)
die(_("unable to upgrade repository format to support partial clone"));
/* Add promisor config for the remote */
cfg_name = xstrfmt("remote.%s.promisor", remote);
git_config_set(cfg_name, "true");
free(cfg_name);
}
/*
* Record the initial filter-spec in the config as
* the default for subsequent fetches from this remote.
*/
filter_name = xstrfmt("remote.%s.partialclonefilter", remote);
/* NEEDSWORK: 'expand' result leaking??? */
git_config_set(filter_name,
expand_list_objects_filter_spec(filter_options));
free(filter_name);
/* Make sure the config info are reset */
promisor_remote_reinit();
}
void partial_clone_get_default_filter_spec(
struct list_objects_filter_options *filter_options,
const char *remote)
{
struct promisor_remote *promisor = promisor_remote_find(remote);
struct strbuf errbuf = STRBUF_INIT;
/*
* Parse default value, but silently ignore it if it is invalid.
*/
if (!promisor)
return;
string_list_append(&filter_options->filter_spec,
promisor->partial_clone_filter);
gently_parse_list_objects_filter(filter_options,
promisor->partial_clone_filter,
&errbuf);
strbuf_release(&errbuf);
}
void list_objects_filter_copy(
struct list_objects_filter_options *dest,
const struct list_objects_filter_options *src)
{
int i;
struct string_list_item *item;
/* Copy everything. We will overwrite the pointers shortly. */
memcpy(dest, src, sizeof(struct list_objects_filter_options));
string_list_init_dup(&dest->filter_spec);
for_each_string_list_item(item, &src->filter_spec)
string_list_append(&dest->filter_spec, item->string);
dest->sparse_oid_name = xstrdup_or_null(src->sparse_oid_name);
ALLOC_ARRAY(dest->sub, dest->sub_alloc);
for (i = 0; i < src->sub_nr; i++)
list_objects_filter_copy(&dest->sub[i], &src->sub[i]);
}