From 692305ec677b6ff67b67db9f1d55e7ca77e9dd9a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 20 Oct 2021 23:39:47 -0400 Subject: [PATCH 1/9] midx.c: clean up chunkfile after reading the MIDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to read the contents of a MIDX, we initialize a chunkfile structure which can read the table of contents and assign pointers into different sections of the file for us. We do call free(), since the chunkfile struct is heap allocated, but not the more appropriate free_chunkfile(), which also frees memory that the structure itself owns. Call that instead to avoid leaking memory in this function. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 7e06e85975..7a8b027be0 100644 --- a/midx.c +++ b/midx.c @@ -179,12 +179,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); + free_chunkfile(cf); return m; cleanup_fail: free(m); free(midx_name); - free(cf); + free_chunkfile(cf); if (midx_map) munmap(midx_map, midx_size); if (0 <= fd) From 492cb394fb2bcfa9a2f0f3f6cab466909927523f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:08 -0400 Subject: [PATCH 2/9] midx.c: don't leak MIDX from verify_midx_file The function midx.c:verify_midx_file() allocates a MIDX struct by calling load_multi_pack_index(). But when cleaning up, it calls free() without freeing any resources associated with the MIDX. Call the more appropriate close_midx() which does free those resources, which causes t5319.3 to pass when Git is compiled with SANITIZE=leak. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 7a8b027be0..e529ca76c1 100644 --- a/midx.c +++ b/midx.c @@ -1603,7 +1603,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag * Remaining tests assume that we have objects, so we can * return here. */ - return verify_midx_error; + goto cleanup; } if (flags & MIDX_PROGRESS) @@ -1681,7 +1681,9 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag } stop_progress(&progress); +cleanup: free(pairs); + close_midx(m); return verify_midx_error; } From 7f4c3508c06e154485ea9ce82be00e1e3df57f54 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:11 -0400 Subject: [PATCH 3/9] t/helper/test-read-midx.c: free MIDX within read_midx_file() When calling `read_midx_file()` to show information about a MIDX or list the objects contained within it we fail to call `close_midx()`, leaking the memory allocated to store that MIDX. Fix this by calling `close_midx()` before exiting the function. We can drop the "early" return when `show_objects` is non-zero, since the next instruction is also a return. (We could just as easily put a `cleanup` label here as with previous patches. But the only other time we terminate the function early is when we fail to load a MIDX in the first place. `close_midx()` does handle a NULL argument, but the extra complexity is likely not warranted). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-read-midx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 9d6fa7a377..27072ba94d 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -55,9 +55,10 @@ static int read_midx_file(const char *object_dir, int show_objects) printf("%s %"PRIu64"\t%s\n", oid_to_hex(&oid), e.offset, e.p->pack_name); } - return 0; } + close_midx(m); + return 0; } From 9e39acc94ad42362243811a359c2972049e8c880 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:13 -0400 Subject: [PATCH 4/9] builtin/pack-objects.c: don't leak memory via arguments When constructing arguments to pass to setup_revision(), pack-objects only frees the memory used by that array after calling get_object_list(). Ensure that we call strvec_clear() whether or not we use the arguments array by cleaning up whenever we exit the function (and rewriting one early return to jump to a label which frees the memory and then returns). We could avoid setting this array up altogether unless we are in the if-else block that calls get_object_list(), but setting up the argument array is intermingled with lots of other side-effects, e.g.: if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; strvec_push(&rp, "--exclude-promisor-objects"); } So it would be awkward to check exclude_promisor_objects twice: first to set use_internal_rev_list and fetch_if_missing, and then again above get_object_list() to push the relevant argument onto the array. Instead, leave the array's construction alone and make sure to free it unconditionally. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1a3dd445f8..857be7826f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4148,11 +4148,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_packs_list_from_stdin(); if (rev_list_unpacked) add_unreachable_loose_objects(); - } else if (!use_internal_rev_list) + } else if (!use_internal_rev_list) { read_object_list_from_stdin(); - else { + } else { get_object_list(rp.nr, rp.v); - strvec_clear(&rp); } cleanup_preferred_base(); if (include_tag && nr_result) @@ -4162,7 +4161,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) the_repository); if (non_empty && !nr_result) - return 0; + goto cleanup; if (nr_result) { trace2_region_enter("pack-objects", "prepare-pack", the_repository); @@ -4183,5 +4182,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) " pack-reused %"PRIu32), written, written_delta, reused, reused_delta, reuse_packfile_objects); + +cleanup: + strvec_clear(&rp); + return 0; } From e6432e0f1f183595b265b76ca765c612e705c65a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 28 Oct 2021 16:25:48 -0400 Subject: [PATCH 5/9] builtin/repack.c: avoid leaking child arguments `git repack` invokes a handful of child processes: one to write the actual pack, and optionally ones to repack promisor objects and update the MIDX. Most of these are freed automatically by calling `start_command()` (which invokes it on error) and `finish_command()` which calls it automatically. But repack_promisor_objects() can initialize a child_process, populate its array of arguments, and then return from the function before even calling start_command(). Make sure that the prepared list of arguments is freed by calling child_process_clear() ourselves to avoid leaking memory along this path. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 0b2d1e5d82..9b74e0d468 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -258,9 +258,11 @@ static void repack_promisor_objects(const struct pack_objects_args *args, for_each_packed_object(write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); - if (cmd.in == -1) + if (cmd.in == -1) { /* No packed objects; cmd was never started */ + child_process_clear(&cmd); return; + } close(cmd.in); From ee4a1d63d7e9bdbea6bbeeb3f82ef33030de9ffb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:18 -0400 Subject: [PATCH 6/9] builtin/multi-pack-index.c: don't leak concatenated options The `multi-pack-index` builtin dynamically allocates an array of command-line option for each of its separate modes by calling add_common_options() to concatante the common options with sub-command specific ones. Because this operation allocates a new array, we have to be careful to remember to free it. We already do this in the repack and write sub-commands, but verify and expire don't. Rectify this by calling FREE_AND_NULL as the other modes do. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 075d15d706..4480ba3982 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -167,6 +167,8 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv) usage_with_options(builtin_multi_pack_index_verify_usage, options); + FREE_AND_NULL(options); + return verify_midx_file(the_repository, opts.object_dir, opts.flags); } @@ -191,6 +193,8 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv) usage_with_options(builtin_multi_pack_index_expire_usage, options); + FREE_AND_NULL(options); + return expire_midx_packs(the_repository, opts.object_dir, opts.flags); } From 60980aed786487e9113f0cb2907dfc75a77d363c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:21 -0400 Subject: [PATCH 7/9] midx.c: write MIDX filenames to strbuf To ask for the name of a MIDX and its corresponding .rev file, callers invoke get_midx_filename() and get_midx_rev_filename(), respectively. These both invoke xstrfmt(), allocating a chunk of memory which must be freed later on. This makes callers in pack-bitmap.c somewhat awkward. Specifically, midx_bitmap_filename(), which is implemented like: return xstrfmt("%s-%s.bitmap", get_midx_filename(midx->object_dir), hash_to_hex(get_midx_checksum(midx))); this leaks the second argument to xstrfmt(), which itself was allocated with xstrfmt(). This caller could assign both the result of get_midx_filename() and the outer xstrfmt() to a temporary variable, remembering to free() the former before returning. But that involves a wasteful copy. Instead, get_midx_filename() and get_midx_rev_filename() take a strbuf as an output parameter. This way midx_bitmap_filename() can manipulate and pass around a temporary buffer which it detaches back to its caller. That allows us to implement the function without copying or open-coding get_midx_filename() in a way that doesn't leak. Update the other callers of get_midx_filename() and get_midx_rev_filename() accordingly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 59 +++++++++++++++++++++++++++---------------------- midx.h | 4 ++-- pack-bitmap.c | 15 ++++++++----- pack-revindex.c | 8 +++---- 4 files changed, 49 insertions(+), 37 deletions(-) diff --git a/midx.c b/midx.c index e529ca76c1..696a938e72 100644 --- a/midx.c +++ b/midx.c @@ -57,15 +57,15 @@ const unsigned char *get_midx_checksum(struct multi_pack_index *m) return m->data + m->data_len - the_hash_algo->rawsz; } -char *get_midx_filename(const char *object_dir) +void get_midx_filename(struct strbuf *out, const char *object_dir) { - return xstrfmt("%s/pack/multi-pack-index", object_dir); + strbuf_addf(out, "%s/pack/multi-pack-index", object_dir); } -char *get_midx_rev_filename(struct multi_pack_index *m) +void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m) { - return xstrfmt("%s/pack/multi-pack-index-%s.rev", - m->object_dir, hash_to_hex(get_midx_checksum(m))); + get_midx_filename(out, m->object_dir); + strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m))); } static int midx_read_oid_fanout(const unsigned char *chunk_start, @@ -89,28 +89,30 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local size_t midx_size; void *midx_map = NULL; uint32_t hash_version; - char *midx_name = get_midx_filename(object_dir); + struct strbuf midx_name = STRBUF_INIT; uint32_t i; const char *cur_pack_name; struct chunkfile *cf = NULL; - fd = git_open(midx_name); + get_midx_filename(&midx_name, object_dir); + + fd = git_open(midx_name.buf); if (fd < 0) goto cleanup_fail; if (fstat(fd, &st)) { - error_errno(_("failed to read %s"), midx_name); + error_errno(_("failed to read %s"), midx_name.buf); goto cleanup_fail; } midx_size = xsize_t(st.st_size); if (midx_size < MIDX_MIN_SIZE) { - error(_("multi-pack-index file %s is too small"), midx_name); + error(_("multi-pack-index file %s is too small"), midx_name.buf); goto cleanup_fail; } - FREE_AND_NULL(midx_name); + strbuf_release(&midx_name); midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); @@ -184,7 +186,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cleanup_fail: free(m); - free(midx_name); + strbuf_release(&midx_name); free_chunkfile(cf); if (midx_map) munmap(midx_map, midx_size); @@ -1115,7 +1117,7 @@ static int write_midx_internal(const char *object_dir, const char *refs_snapshot, unsigned flags) { - char *midx_name; + struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t i; struct hashfile *f = NULL; @@ -1130,10 +1132,10 @@ static int write_midx_internal(const char *object_dir, /* Ensure the given object_dir is local, or a known alternate. */ find_odb(the_repository, object_dir); - midx_name = get_midx_filename(object_dir); - if (safe_create_leading_directories(midx_name)) + get_midx_filename(&midx_name, object_dir); + if (safe_create_leading_directories(midx_name.buf)) die_errno(_("unable to create leading directories of %s"), - midx_name); + midx_name.buf); if (!packs_to_include) { /* @@ -1367,7 +1369,7 @@ static int write_midx_internal(const char *object_dir, pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); - hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); + hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR); f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk)); if (ctx.nr - dropped_packs == 0) { @@ -1404,9 +1406,9 @@ static int write_midx_internal(const char *object_dir, ctx.pack_order = midx_pack_order(&ctx); if (flags & MIDX_WRITE_REV_INDEX) - write_midx_reverse_index(midx_name, midx_hash, &ctx); + write_midx_reverse_index(midx_name.buf, midx_hash, &ctx); if (flags & MIDX_WRITE_BITMAP) { - if (write_midx_bitmap(midx_name, midx_hash, &ctx, + if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx, refs_snapshot, flags) < 0) { error(_("could not write multi-pack bitmap")); result = 1; @@ -1435,7 +1437,7 @@ cleanup: free(ctx.entries); free(ctx.pack_perm); free(ctx.pack_order); - free(midx_name); + strbuf_release(&midx_name); return result; } @@ -1499,20 +1501,22 @@ static void clear_midx_files_ext(const char *object_dir, const char *ext, void clear_midx_file(struct repository *r) { - char *midx = get_midx_filename(r->objects->odb->path); + struct strbuf midx = STRBUF_INIT; + + get_midx_filename(&midx, r->objects->odb->path); if (r->objects && r->objects->multi_pack_index) { close_midx(r->objects->multi_pack_index); r->objects->multi_pack_index = NULL; } - if (remove_path(midx)) - die(_("failed to clear multi-pack-index at %s"), midx); + if (remove_path(midx.buf)) + die(_("failed to clear multi-pack-index at %s"), midx.buf); clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL); clear_midx_files_ext(r->objects->odb->path, ".rev", NULL); - free(midx); + strbuf_release(&midx); } static int verify_midx_error; @@ -1565,12 +1569,15 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag if (!m) { int result = 0; struct stat sb; - char *filename = get_midx_filename(object_dir); - if (!stat(filename, &sb)) { + struct strbuf filename = STRBUF_INIT; + + get_midx_filename(&filename, object_dir); + + if (!stat(filename.buf, &sb)) { error(_("multi-pack-index file exists, but failed to parse")); result = 1; } - free(filename); + strbuf_release(&filename); return result; } diff --git a/midx.h b/midx.h index 6e32297fa3..b7d79a515c 100644 --- a/midx.h +++ b/midx.h @@ -48,8 +48,8 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) const unsigned char *get_midx_checksum(struct multi_pack_index *m); -char *get_midx_filename(const char *object_dir); -char *get_midx_rev_filename(struct multi_pack_index *m); +void get_midx_filename(struct strbuf *out, const char *object_dir); +void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); diff --git a/pack-bitmap.c b/pack-bitmap.c index f47a0a7db4..3f603425c9 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -292,9 +292,12 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) char *midx_bitmap_filename(struct multi_pack_index *midx) { - return xstrfmt("%s-%s.bitmap", - get_midx_filename(midx->object_dir), - hash_to_hex(get_midx_checksum(midx))); + struct strbuf buf = STRBUF_INIT; + + get_midx_filename(&buf, midx->object_dir); + strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx))); + + return strbuf_detach(&buf, NULL); } char *pack_bitmap_filename(struct packed_git *p) @@ -324,10 +327,12 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, } if (bitmap_git->pack || bitmap_git->midx) { + struct strbuf buf = STRBUF_INIT; + get_midx_filename(&buf, midx->object_dir); /* ignore extra bitmap file; we can only handle one */ - warning("ignoring extra bitmap file: %s", - get_midx_filename(midx->object_dir)); + warning("ignoring extra bitmap file: %s", buf.buf); close(fd); + strbuf_release(&buf); return -1; } diff --git a/pack-revindex.c b/pack-revindex.c index 0e4a31d9db..70d0fbafcb 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -296,14 +296,14 @@ int load_pack_revindex(struct packed_git *p) int load_midx_revindex(struct multi_pack_index *m) { - char *revindex_name; + struct strbuf revindex_name = STRBUF_INIT; int ret; if (m->revindex_data) return 0; - revindex_name = get_midx_rev_filename(m); + get_midx_rev_filename(&revindex_name, m); - ret = load_revindex_from_disk(revindex_name, + ret = load_revindex_from_disk(revindex_name.buf, m->num_objects, &m->revindex_map, &m->revindex_len); @@ -313,7 +313,7 @@ int load_midx_revindex(struct multi_pack_index *m) m->revindex_data = (const uint32_t *)((const char *)m->revindex_map + RIDX_HEADER_SIZE); cleanup: - free(revindex_name); + strbuf_release(&revindex_name); return ret; } From 022815114a8a57188bc0e8fd622e10d5e22604dc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:23 -0400 Subject: [PATCH 8/9] pack-bitmap.c: don't leak type-level bitmaps test_bitmap_walk() is used to implement `git rev-list --test-bitmap`, which compares the result of the on-disk bitmaps with ones generated on-the-fly during a revision walk. In fa95666a40 (pack-bitmap.c: harden 'test_bitmap_walk()' to check type bitmaps, 2021-08-24), we hardened those tests to also check the four special type-level bitmaps, but never freed those bitmaps. We should have, since each required an allocation when we EWAH-decompressed them. Free those, plugging that leak, and also free the base (the scratch-pad bitmap), too. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index 3f603425c9..3d81425c29 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1726,6 +1726,12 @@ void test_bitmap_walk(struct rev_info *revs) else die("mismatch in bitmap results"); + bitmap_free(result); + bitmap_free(tdata.base); + bitmap_free(tdata.commits); + bitmap_free(tdata.trees); + bitmap_free(tdata.blobs); + bitmap_free(tdata.tags); free_bitmap_index(bitmap_git); } From 655b8561d6b10f22f0e7350df9388110667001af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 26 Oct 2021 17:01:26 -0400 Subject: [PATCH 9/9] pack-bitmap.c: more aggressively free in free_bitmap_index() The function free_bitmap_index() is somewhat lax in what it frees. There are two notable examples: - While it does call kh_destroy_oid_map on the "bitmaps" map, which maps commit OIDs to their corresponding bitmaps, the bitmaps themselves are not freed. Note here that we recycle already-freed ewah_bitmaps into a pool, but these are handled correctly by ewah_pool_free(). - We never bother to free the extended index's "positions" map, which we always allocate in load_bitmap(). Fix both of these. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index 3d81425c29..a56ceb9441 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1859,9 +1859,17 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->trees); ewah_pool_free(b->blobs); ewah_pool_free(b->tags); + if (b->bitmaps) { + struct stored_bitmap *sb; + kh_foreach_value(b->bitmaps, sb, { + ewah_pool_free(sb->root); + free(sb); + }); + } kh_destroy_oid_map(b->bitmaps); free(b->ext_index.objects); free(b->ext_index.hashes); + kh_destroy_oid_pos(b->ext_index.positions); bitmap_free(b->result); bitmap_free(b->haves); if (bitmap_is_midx(b)) {