From 9d81ecb52b5e6611f66c968884dde42928350b18 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Mar 2019 12:36:11 -0700 Subject: [PATCH 1/4] progress: add sparse mode to force 100% complete message Add new start_sparse_progress() and start_delayed_sparse_progress() constructors and "sparse" flag to struct progress. Teach stop_progress() to force a 100% complete progress message before printing the final "done" message when "sparse" is set. Calling display_progress() for every item in a large set can be expensive. If callers try to filter this for performance reasons, such as emitting every k-th item, progress would not reach 100% unless they made a final call to display_progress() with the item count before calling stop_progress(). Now this is automatic when "sparse" is set. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- progress.c | 38 +++++++++++++++++++++++++++++++++++--- progress.h | 3 +++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/progress.c b/progress.c index 5a99c9fbf0..212d00e524 100644 --- a/progress.c +++ b/progress.c @@ -34,6 +34,7 @@ struct progress { uint64_t total; unsigned last_percent; unsigned delay; + unsigned sparse; struct throughput *throughput; uint64_t start_ns; }; @@ -194,7 +195,7 @@ int display_progress(struct progress *progress, uint64_t n) } static struct progress *start_progress_delay(const char *title, uint64_t total, - unsigned delay) + unsigned delay, unsigned sparse) { struct progress *progress = malloc(sizeof(*progress)); if (!progress) { @@ -208,6 +209,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, progress->last_value = -1; progress->last_percent = -1; progress->delay = delay; + progress->sparse = sparse; progress->throughput = NULL; progress->start_ns = getnanotime(); set_progress_signal(); @@ -216,16 +218,46 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, struct progress *start_delayed_progress(const char *title, uint64_t total) { - return start_progress_delay(title, total, 2); + return start_progress_delay(title, total, 2, 0); } struct progress *start_progress(const char *title, uint64_t total) { - return start_progress_delay(title, total, 0); + return start_progress_delay(title, total, 0, 0); +} + +/* + * Here "sparse" means that the caller might use some sampling criteria to + * decide when to call display_progress() rather than calling it for every + * integer value in[0 .. total). In particular, the caller might not call + * display_progress() for the last value in the range. + * + * When "sparse" is set, stop_progress() will automatically force the done + * message to show 100%. + */ +struct progress *start_sparse_progress(const char *title, uint64_t total) +{ + return start_progress_delay(title, total, 0, 1); +} + +struct progress *start_delayed_sparse_progress(const char *title, + uint64_t total) +{ + return start_progress_delay(title, total, 2, 1); +} + +static void finish_if_sparse(struct progress *progress) +{ + if (progress && + progress->sparse && + progress->last_value != progress->total) + display_progress(progress, progress->total); } void stop_progress(struct progress **p_progress) { + finish_if_sparse(*p_progress); + stop_progress_msg(p_progress, _("done")); } diff --git a/progress.h b/progress.h index 70a4d4a0d6..7b725acc8d 100644 --- a/progress.h +++ b/progress.h @@ -6,7 +6,10 @@ struct progress; void display_throughput(struct progress *progress, uint64_t total); int display_progress(struct progress *progress, uint64_t n); struct progress *start_progress(const char *title, uint64_t total); +struct progress *start_sparse_progress(const char *title, uint64_t total); struct progress *start_delayed_progress(const char *title, uint64_t total); +struct progress *start_delayed_sparse_progress(const char *title, + uint64_t total); void stop_progress(struct progress **progress); void stop_progress_msg(struct progress **progress, const char *msg); From d829223a4256d2a6f0e07425ba3828d28bda7596 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Mar 2019 12:36:13 -0700 Subject: [PATCH 2/4] trace2:data: add trace2 data to midx Log multi-pack-index command mode. Log number of objects and packfiles in the midx. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 3 +++ midx.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index fca70f8e4f..ae6e476ad5 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -3,6 +3,7 @@ #include "config.h" #include "parse-options.h" #include "midx.h" +#include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { N_("git multi-pack-index [--object-dir=] (write|verify)"), @@ -40,6 +41,8 @@ int cmd_multi_pack_index(int argc, const char **argv, return 1; } + trace2_cmd_mode(argv[0]); + if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) diff --git a/midx.c b/midx.c index 8a505fd423..24141a7c62 100644 --- a/midx.c +++ b/midx.c @@ -8,6 +8,7 @@ #include "sha1-lookup.h" #include "midx.h" #include "progress.h" +#include "trace2.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -164,6 +165,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->pack_names[i]); } + trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); + trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); + return m; cleanup_fail: From 430efb8a74bd9ce9b6019b37cda21063914b0038 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Mar 2019 12:36:14 -0700 Subject: [PATCH 3/4] midx: add progress indicators in multi-pack-index verify Add progress indicators to more parts of midx verify. Use sparse progress indicator for object iteration. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- midx.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index 24141a7c62..e3919387d9 100644 --- a/midx.c +++ b/midx.c @@ -962,6 +962,18 @@ static void midx_report(const char *fmt, ...) va_end(ap); } +/* + * Limit calls to display_progress() for performance reasons. + * The interval here was arbitrarily chosen. + */ +#define SPARSE_PROGRESS_INTERVAL (1 << 12) +#define midx_display_sparse_progress(progress, n) \ + do { \ + uint64_t _n = (n); \ + if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ + display_progress(progress, _n); \ + } while (0) + int verify_midx_file(const char *object_dir) { uint32_t i; @@ -972,10 +984,15 @@ int verify_midx_file(const char *object_dir) if (!m) return 0; + progress = start_progress(_("Looking for referenced packfiles"), + m->num_packs); for (i = 0; i < m->num_packs; i++) { if (prepare_midx_pack(m, i)) midx_report("failed to load pack in position %d", i); + + display_progress(progress, i + 1); } + stop_progress(&progress); for (i = 0; i < 255; i++) { uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); @@ -986,6 +1003,8 @@ int verify_midx_file(const char *object_dir) i, oid_fanout1, oid_fanout2, i + 1); } + progress = start_sparse_progress(_("Verifying OID order in MIDX"), + m->num_objects - 1); for (i = 0; i < m->num_objects - 1; i++) { struct object_id oid1, oid2; @@ -995,9 +1014,12 @@ int verify_midx_file(const char *object_dir) if (oidcmp(&oid1, &oid2) >= 0) midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); - } - progress = start_progress(_("Verifying object offsets"), m->num_objects); + midx_display_sparse_progress(progress, i + 1); + } + stop_progress(&progress); + + progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); for (i = 0; i < m->num_objects; i++) { struct object_id oid; struct pack_entry e; @@ -1023,7 +1045,7 @@ int verify_midx_file(const char *object_dir) midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), i, oid_to_hex(&oid), m_offset, p_offset); - display_progress(progress, i + 1); + midx_display_sparse_progress(progress, i + 1); } stop_progress(&progress); From 5ae18df9d8ed27625f81974c3fb2db179a23ca2d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Mar 2019 12:36:15 -0700 Subject: [PATCH 4/4] midx: during verify group objects by packfile to speed verification Teach `multi-pack-index verify` to sort the set of object by packfile so that only one packfile needs to be open at a time. This is a performance improvement. Previously, objects were verified in OID order. This essentially requires all packfiles to be open at the same time. If the number of packfiles exceeds the open file limit, packfiles would be LRU-closed and re-opened many times. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- midx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- packfile.c | 2 +- packfile.h | 2 ++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/midx.c b/midx.c index e3919387d9..f1cd868f8c 100644 --- a/midx.c +++ b/midx.c @@ -962,6 +962,20 @@ static void midx_report(const char *fmt, ...) va_end(ap); } +struct pair_pos_vs_id +{ + uint32_t pos; + uint32_t pack_int_id; +}; + +static int compare_pair_pos_vs_id(const void *_a, const void *_b) +{ + struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; + struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; + + return b->pack_int_id - a->pack_int_id; +} + /* * Limit calls to display_progress() for performance reasons. * The interval here was arbitrarily chosen. @@ -976,6 +990,7 @@ static void midx_report(const char *fmt, ...) int verify_midx_file(const char *object_dir) { + struct pair_pos_vs_id *pairs = NULL; uint32_t i; struct progress *progress; struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); @@ -1019,16 +1034,42 @@ int verify_midx_file(const char *object_dir) } stop_progress(&progress); + /* + * Create an array mapping each object to its packfile id. Sort it + * to group the objects by packfile. Use this permutation to visit + * each of the objects and only require 1 packfile to be open at a + * time. + */ + ALLOC_ARRAY(pairs, m->num_objects); + for (i = 0; i < m->num_objects; i++) { + pairs[i].pos = i; + pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); + } + + progress = start_sparse_progress(_("Sorting objects by packfile"), + m->num_objects); + display_progress(progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); + stop_progress(&progress); + progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); for (i = 0; i < m->num_objects; i++) { struct object_id oid; struct pack_entry e; off_t m_offset, p_offset; - nth_midxed_object_oid(&oid, m, i); + if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && + m->packs[pairs[i-1].pack_int_id]) + { + close_pack_fd(m->packs[pairs[i-1].pack_int_id]); + close_pack_index(m->packs[pairs[i-1].pack_int_id]); + } + + nth_midxed_object_oid(&oid, m, pairs[i].pos); + if (!fill_midx_entry(&oid, &e, m)) { midx_report(_("failed to load pack entry for oid[%d] = %s"), - i, oid_to_hex(&oid)); + pairs[i].pos, oid_to_hex(&oid)); continue; } @@ -1043,11 +1084,13 @@ int verify_midx_file(const char *object_dir) if (m_offset != p_offset) midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), - i, oid_to_hex(&oid), m_offset, p_offset); + pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); midx_display_sparse_progress(progress, i + 1); } stop_progress(&progress); + free(pairs); + return verify_midx_error; } diff --git a/packfile.c b/packfile.c index 16bcb75262..d2bcb2f860 100644 --- a/packfile.c +++ b/packfile.c @@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p) } } -static int close_pack_fd(struct packed_git *p) +int close_pack_fd(struct packed_git *p) { if (p->pack_fd < 0) return 0; diff --git a/packfile.h b/packfile.h index d70c6d9afb..b1c18504eb 100644 --- a/packfile.h +++ b/packfile.h @@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +int close_pack_fd(struct packed_git *p); + extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);