From 026ad6016070748a66ed9a977ad90efc08df2225 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:55 -0500 Subject: [PATCH 1/7] builtin/repo: rename repo_info() to cmd_repo_info() Subcommand functions are often prefixed with `cmd_` to denote that they are an entrypoint. Rename repo_info() to cmd_repo_info() accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index bbb0966f2d..eeeab8fbd2 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -136,8 +136,8 @@ static int parse_format_cb(const struct option *opt, return 0; } -static int repo_info(int argc, const char **argv, const char *prefix, - struct repository *repo) +static int cmd_repo_info(int argc, const char **argv, const char *prefix, + struct repository *repo) { enum output_format format = FORMAT_KEYVALUE; struct option options[] = { @@ -161,7 +161,7 @@ int cmd_repo(int argc, const char **argv, const char *prefix, { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { - OPT_SUBCOMMAND("info", &fn, repo_info), + OPT_SUBCOMMAND("info", &fn, cmd_repo_info), OPT_END() }; From eafc03dbe316478acff5eef3b70c037de4758f08 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:56 -0500 Subject: [PATCH 2/7] ref-filter: allow NULL filter pattern When setting up `struct ref_filter` for filter_refs(), the `name_patterns` field must point to an array of pattern strings even if no patterns are required. To improve this interface, treat a NULL `name_patterns` field the same as when it points to an empty array. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- ref-filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 520d2539c9..2cb5a166d6 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2664,7 +2664,7 @@ static int match_name_as_path(const char **pattern, const char *refname, /* Return 1 if the refname matches one of the patterns, otherwise 0. */ static int filter_pattern_match(struct ref_filter *filter, const char *refname) { - if (!*filter->name_patterns) + if (!filter->name_patterns || !*filter->name_patterns) return 1; /* No pattern always matches */ if (filter->match_as_path) return match_name_as_path(filter->name_patterns, refname, @@ -2751,7 +2751,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, return for_each_fullref_with_seek(filter, cb, cb_data, 0); } - if (!filter->name_patterns[0]) { + if (!filter->name_patterns || !filter->name_patterns[0]) { /* no patterns; we have to look at everything */ return for_each_fullref_with_seek(filter, cb, cb_data, 0); } From 6d1997f6cbc10ac03bc450630de4410762f77b6f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:57 -0500 Subject: [PATCH 3/7] ref-filter: export ref_kind_from_refname() When filtering refs, `ref_kind_from_refname()` is used to determine the ref type. In a subsequent commit, this same logic is reused when counting refs by type. Export the function to prepare for this change. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- ref-filter.c | 2 +- ref-filter.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ref-filter.c b/ref-filter.c index 2cb5a166d6..30cc488d8a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2833,7 +2833,7 @@ struct ref_array_item *ref_array_push(struct ref_array *array, return ref; } -static int ref_kind_from_refname(const char *refname) +int ref_kind_from_refname(const char *refname) { unsigned int i; diff --git a/ref-filter.h b/ref-filter.h index f22ca94b49..4ed1edf09a 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -135,6 +135,8 @@ struct ref_format { OPT_STRVEC(0, "exclude", &(var)->exclude, \ N_("pattern"), N_("exclude refs which match pattern")) +/* Get the reference kind from the provided reference name. */ +int ref_kind_from_refname(const char *refname); /* * API for filtering a set of refs. Based on the type of refs the user * has requested, we iterate through those refs and apply filters From bbb2b9334856ae0a2b18e65e5924a42c31a83c6b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:58 -0500 Subject: [PATCH 4/7] builtin/repo: introduce structure subcommand The structure of a repository's history can have huge impacts on the performance and health of the repository itself. Currently, Git lacks a means to surface repository metrics regarding its structure/shape via a single command. Acquiring this information requires users to be familiar with the relevant data points and the various Git commands required to surface them. To fill this gap, supplemental tools such as git-sizer(1) have been developed. To allow users to more readily identify repository structure related information, introduce the "structure" subcommand in git-repo(1). The goal of this subcommand is to eventually provide similar functionality to git-sizer(1), but natively in Git. The initial version of this command only iterates through all references in the repository and tracks the count of branches, tags, remote refs, and other reference types. The corresponding information is displayed in a human-friendly table formatted in a very similar manner to git-sizer(1). The width of each table column is adjusted automatically to satisfy the requirements of the widest row contained. Subsequent commits will surface additional relevant data points to output and also provide other more machine-friendly output formats. Based-on-patch-by: Derrick Stolee Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 10 ++ builtin/repo.c | 200 ++++++++++++++++++++++++++++++++++++ t/meson.build | 1 + t/t1901-repo-structure.sh | 61 +++++++++++ 4 files changed, 272 insertions(+) create mode 100755 t/t1901-repo-structure.sh diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 209afd1b61..8193298dd5 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,6 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] +git repo structure DESCRIPTION ----------- @@ -43,6 +44,15 @@ supported: + `-z` is an alias for `--format=nul`. +`structure`:: + Retrieve statistics about the current repository structure. The + following kinds of information are reported: ++ +* Reference counts categorized by type + ++ +The table output format may change and is not intended for machine parsing. + INFO KEYS --------- In order to obtain a set of values from `git repo info`, you should provide diff --git a/builtin/repo.c b/builtin/repo.c index eeeab8fbd2..e77e8db563 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,12 +4,16 @@ #include "environment.h" #include "parse-options.h" #include "quote.h" +#include "ref-filter.h" #include "refs.h" #include "strbuf.h" +#include "string-list.h" #include "shallow.h" +#include "utf8.h" static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", + "git repo structure", NULL }; @@ -156,12 +160,208 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct ref_stats { + size_t branches; + size_t remotes; + size_t tags; + size_t others; +}; + +struct stats_table { + struct string_list rows; + + int name_col_width; + int value_col_width; +}; + +/* + * Holds column data that gets stored for each row. + */ +struct stats_table_entry { + char *value; +}; + +static void stats_table_vaddf(struct stats_table *table, + struct stats_table_entry *entry, + const char *format, va_list ap) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + char *formatted_name; + int name_width; + + strbuf_vaddf(&buf, format, ap); + formatted_name = strbuf_detach(&buf, NULL); + name_width = utf8_strwidth(formatted_name); + + item = string_list_append_nodup(&table->rows, formatted_name); + item->util = entry; + + if (name_width > table->name_col_width) + table->name_col_width = name_width; + if (entry) { + int value_width = utf8_strwidth(entry->value); + if (value_width > table->value_col_width) + table->value_col_width = value_width; + } +} + +static void stats_table_addf(struct stats_table *table, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + stats_table_vaddf(table, NULL, format, ap); + va_end(ap); +} + +static void stats_table_count_addf(struct stats_table *table, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value); + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + +static inline size_t get_total_reference_count(struct ref_stats *stats) +{ + return stats->branches + stats->remotes + stats->tags + stats->others; +} + +static void stats_table_setup_structure(struct stats_table *table, + struct ref_stats *refs) +{ + size_t ref_total; + + ref_total = get_total_reference_count(refs); + stats_table_addf(table, "* %s", _("References")); + stats_table_count_addf(table, ref_total, " * %s", _("Count")); + stats_table_count_addf(table, refs->branches, " * %s", _("Branches")); + stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); + stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); + stats_table_count_addf(table, refs->others, " * %s", _("Others")); +} + +static void stats_table_print_structure(const struct stats_table *table) +{ + const char *name_col_title = _("Repository structure"); + const char *value_col_title = _("Value"); + int name_col_width = utf8_strwidth(name_col_title); + int value_col_width = utf8_strwidth(value_col_title); + struct string_list_item *item; + + if (table->name_col_width > name_col_width) + name_col_width = table->name_col_width; + if (table->value_col_width > value_col_width) + value_col_width = table->value_col_width; + + printf("| %-*s | %-*s |\n", name_col_width, name_col_title, + value_col_width, value_col_title); + printf("| "); + for (int i = 0; i < name_col_width; i++) + putchar('-'); + printf(" | "); + for (int i = 0; i < value_col_width; i++) + putchar('-'); + printf(" |\n"); + + for_each_string_list_item(item, &table->rows) { + struct stats_table_entry *entry = item->util; + const char *value = ""; + + if (entry) { + struct stats_table_entry *entry = item->util; + value = entry->value; + } + + printf("| %-*s | %*s |\n", name_col_width, item->string, + value_col_width, value); + } +} + +static void stats_table_clear(struct stats_table *table) +{ + struct stats_table_entry *entry; + struct string_list_item *item; + + for_each_string_list_item(item, &table->rows) { + entry = item->util; + if (entry) + free(entry->value); + } + + string_list_clear(&table->rows, 1); +} + +static int count_references(const char *refname, + const char *referent UNUSED, + const struct object_id *oid UNUSED, + int flags UNUSED, void *cb_data) +{ + struct ref_stats *stats = cb_data; + + switch (ref_kind_from_refname(refname)) { + case FILTER_REFS_BRANCHES: + stats->branches++; + break; + case FILTER_REFS_REMOTES: + stats->remotes++; + break; + case FILTER_REFS_TAGS: + stats->tags++; + break; + case FILTER_REFS_OTHERS: + stats->others++; + break; + default: + BUG("unexpected reference type"); + } + + return 0; +} + +static void structure_count_references(struct ref_stats *stats, + struct repository *repo) +{ + refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); +} + +static int cmd_repo_structure(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + struct stats_table table = { + .rows = STRING_LIST_INIT_DUP, + }; + struct ref_stats stats = { 0 }; + struct option options[] = { 0 }; + + argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (argc) + usage(_("too many arguments")); + + structure_count_references(&stats, repo); + + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + + stats_table_clear(&table); + + return 0; +} + int cmd_repo(int argc, const char **argv, const char *prefix, struct repository *repo) { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { OPT_SUBCOMMAND("info", &fn, cmd_repo_info), + OPT_SUBCOMMAND("structure", &fn, cmd_repo_structure), OPT_END() }; diff --git a/t/meson.build b/t/meson.build index 7974795fe4..9e426f8edc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -236,6 +236,7 @@ integration_tests = [ 't1701-racy-split-index.sh', 't1800-hook.sh', 't1900-repo.sh', + 't1901-repo-structure.sh', 't2000-conflict-when-checking-files-out.sh', 't2002-checkout-cache-u.sh', 't2003-checkout-cache-mkdir.sh', diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh new file mode 100755 index 0000000000..e592eea0eb --- /dev/null +++ b/t/t1901-repo-structure.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='test git repo structure' + +. ./test-lib.sh + +test_expect_success 'empty repository' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_expect_success 'repository with references' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m init && + git tag -a foo -m bar && + + oid="$(git rev-parse HEAD)" && + git update-ref refs/remotes/origin/foo "$oid" && + + git notes add -m foo && + + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_done From eb5cf58ffcd4bb117c870d448b0df0193df52c82 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:59 -0500 Subject: [PATCH 5/7] builtin/repo: add object counts in structure output The amount of objects in a repository can provide insight regarding its shape. To surface this information, use the path-walk API to count the number of reachable objects in the repository by object type. All regular references are used to determine the reachable set of objects. The object counts are appended to the same table containing the reference information. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 107 +++++++++++++++++++++++++++++++++--- t/t1901-repo-structure.sh | 19 ++++++- 3 files changed, 118 insertions(+), 9 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 8193298dd5..ae62d2415f 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -49,6 +49,7 @@ supported: following kinds of information are reported: + * Reference counts categorized by type +* Reachable object counts categorized by type + The table output format may change and is not intended for machine parsing. diff --git a/builtin/repo.c b/builtin/repo.c index e77e8db563..f39f06ee8c 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -3,9 +3,11 @@ #include "builtin.h" #include "environment.h" #include "parse-options.h" +#include "path-walk.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" +#include "revision.h" #include "strbuf.h" #include "string-list.h" #include "shallow.h" @@ -167,6 +169,18 @@ struct ref_stats { size_t others; }; +struct object_stats { + size_t tags; + size_t commits; + size_t trees; + size_t blobs; +}; + +struct repo_structure { + struct ref_stats refs; + struct object_stats objects; +}; + struct stats_table { struct string_list rows; @@ -234,9 +248,17 @@ static inline size_t get_total_reference_count(struct ref_stats *stats) return stats->branches + stats->remotes + stats->tags + stats->others; } -static void stats_table_setup_structure(struct stats_table *table, - struct ref_stats *refs) +static inline size_t get_total_object_count(struct object_stats *stats) { + return stats->tags + stats->commits + stats->trees + stats->blobs; +} + +static void stats_table_setup_structure(struct stats_table *table, + struct repo_structure *stats) +{ + struct object_stats *objects = &stats->objects; + struct ref_stats *refs = &stats->refs; + size_t object_total; size_t ref_total; ref_total = get_total_reference_count(refs); @@ -246,6 +268,15 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); + + object_total = get_total_object_count(objects); + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", _("Reachable objects")); + stats_table_count_addf(table, object_total, " * %s", _("Count")); + stats_table_count_addf(table, objects->commits, " * %s", _("Commits")); + stats_table_count_addf(table, objects->trees, " * %s", _("Trees")); + stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs")); + stats_table_count_addf(table, objects->tags, " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) @@ -299,12 +330,18 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +struct count_references_data { + struct ref_stats *stats; + struct rev_info *revs; +}; + static int count_references(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, + const struct object_id *oid, int flags UNUSED, void *cb_data) { - struct ref_stats *stats = cb_data; + struct count_references_data *data = cb_data; + struct ref_stats *stats = data->stats; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -323,13 +360,64 @@ static int count_references(const char *refname, BUG("unexpected reference type"); } + /* + * While iterating through references for counting, also add OIDs in + * preparation for the path walk. + */ + add_pending_oid(data->revs, NULL, oid, 0); + return 0; } static void structure_count_references(struct ref_stats *stats, + struct rev_info *revs, struct repository *repo) { - refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); + struct count_references_data data = { + .stats = stats, + .revs = revs, + }; + + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); +} + + +static int count_objects(const char *path UNUSED, struct oid_array *oids, + enum object_type type, void *cb_data) +{ + struct object_stats *stats = cb_data; + + switch (type) { + case OBJ_TAG: + stats->tags += oids->nr; + break; + case OBJ_COMMIT: + stats->commits += oids->nr; + break; + case OBJ_TREE: + stats->trees += oids->nr; + break; + case OBJ_BLOB: + stats->blobs += oids->nr; + break; + default: + BUG("invalid object type"); + } + + return 0; +} + +static void structure_count_objects(struct object_stats *stats, + struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + + info.revs = revs; + info.path_fn = count_objects; + info.path_fn_data = stats; + + walk_objects_by_path(&info); + path_walk_info_clear(&info); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -338,19 +426,24 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; - struct ref_stats stats = { 0 }; + struct repo_structure stats = { 0 }; + struct rev_info revs; struct option options[] = { 0 }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) usage(_("too many arguments")); - structure_count_references(&stats, repo); + repo_init_revisions(repo, &revs, prefix); + + structure_count_references(&stats.refs, &revs, repo); + structure_count_objects(&stats.objects, &revs); stats_table_setup_structure(&table, &stats); stats_table_print_structure(&table); stats_table_clear(&table); + release_revisions(&revs); return 0; } diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index e592eea0eb..c32cf4e239 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -18,6 +18,13 @@ test_expect_success 'empty repository' ' | * Tags | 0 | | * Remotes | 0 | | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | EOF git repo structure >out 2>err && @@ -27,17 +34,18 @@ test_expect_success 'empty repository' ' ) ' -test_expect_success 'repository with references' ' +test_expect_success 'repository with references and objects' ' test_when_finished "rm -rf repo" && git init repo && ( cd repo && - git commit --allow-empty -m init && + test_commit_bulk 42 && git tag -a foo -m bar && oid="$(git rev-parse HEAD)" && git update-ref refs/remotes/origin/foo "$oid" && + # Also creates a commit, tree, and blob. git notes add -m foo && cat >expect <<-\EOF && @@ -49,6 +57,13 @@ test_expect_success 'repository with references' ' | * Tags | 1 | | * Remotes | 1 | | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 130 | + | * Commits | 43 | + | * Trees | 43 | + | * Blobs | 43 | + | * Tags | 1 | EOF git repo structure >out 2>err && From 17215675b5a2c2eab54b295a7e92d953af2e8779 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:00 -0500 Subject: [PATCH 6/7] builtin/repo: add keyvalue and nul format for structure stats All repository structure stats are outputted in a human-friendly table form. This format is not suitable for machine parsing. Add a --format option that supports three output modes: `table`, `keyvalue`, and `nul`. The `table` mode is the default format and prints the same table output as before. With the `keyvalue` mode, each line of output contains a key-value pair of a repository stat. The '=' character is used to delimit between keys and values. The `nul` mode is similar to `keyvalue`, but key-values are delimited by a NUL character instead of a newline. Also, instead of a '=' character to delimit between keys and values, a newline character is used. This allows stat values to support special characters without having to cquote them. These two new modes provides output that is more machine-friendly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 25 +++++++++++++++-- builtin/repo.c | 55 ++++++++++++++++++++++++++++++++++--- t/t1901-repo-structure.sh | 33 ++++++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index ae62d2415f..ce43cb19c8 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,7 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] -git repo structure +git repo structure [--format=(table|keyvalue|nul)] DESCRIPTION ----------- @@ -44,7 +44,7 @@ supported: + `-z` is an alias for `--format=nul`. -`structure`:: +`structure [--format=(table|keyvalue|nul)]`:: Retrieve statistics about the current repository structure. The following kinds of information are reported: + @@ -52,7 +52,26 @@ supported: * Reachable object counts categorized by type + -The table output format may change and is not intended for machine parsing. +The output format can be chosen through the flag `--format`. Three formats are +supported: ++ +`table`::: + Outputs repository stats in a human-friendly table. This format may + change and is not intended for machine parsing. This is the default + format. + +`keyvalue`::: + Each line of output contains a key-value pair for a repository stat. + The '=' character is used to delimit between the key and the value. + Values containing "unusual" characters are quoted as explained for the + configuration variable `core.quotePath` (see linkgit:git-config[1]). + +`nul`::: + Similar to `keyvalue`, but uses a NUL character to delimit between + key-value pairs instead of a newline. Also uses a newline character as + the delimiter between the key and value instead of '='. Unlike the + `keyvalue` format, values containing "unusual" characters are never + quoted. INFO KEYS --------- diff --git a/builtin/repo.c b/builtin/repo.c index f39f06ee8c..1754cc7e5d 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -15,13 +15,14 @@ static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", - "git repo structure", + "git repo structure [--format=(table|keyvalue|nul)]", NULL }; typedef int get_value_fn(struct repository *repo, struct strbuf *buf); enum output_format { + FORMAT_TABLE, FORMAT_KEYVALUE, FORMAT_NUL_TERMINATED, }; @@ -136,6 +137,8 @@ static int parse_format_cb(const struct option *opt, *format = FORMAT_NUL_TERMINATED; else if (!strcmp(arg, "keyvalue")) *format = FORMAT_KEYVALUE; + else if (!strcmp(arg, "table")) + *format = FORMAT_TABLE; else die(_("invalid format '%s'"), arg); @@ -158,6 +161,8 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED) + die(_("unsupported output format")); return print_fields(argc, argv, repo, format); } @@ -330,6 +335,30 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +static void structure_keyvalue_print(struct repo_structure *stats, + char key_delim, char value_delim) +{ + printf("references.branches.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.branches, value_delim); + printf("references.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.tags, value_delim); + printf("references.remotes.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.remotes, value_delim); + printf("references.others.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.others, value_delim); + + printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.commits, value_delim); + printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.trees, value_delim); + printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.blobs, value_delim); + printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.tags, value_delim); + + fflush(stdout); +} + struct count_references_data { struct ref_stats *stats; struct rev_info *revs; @@ -426,9 +455,15 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; + enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; - struct option options[] = { 0 }; + struct option options[] = { + OPT_CALLBACK_F(0, "format", &format, N_("format"), + N_("output format"), + PARSE_OPT_NONEG, parse_format_cb), + OPT_END() + }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) @@ -439,8 +474,20 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, structure_count_references(&stats.refs, &revs, repo); structure_count_objects(&stats.objects, &revs); - stats_table_setup_structure(&table, &stats); - stats_table_print_structure(&table); + switch (format) { + case FORMAT_TABLE: + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + break; + case FORMAT_KEYVALUE: + structure_keyvalue_print(&stats, '=', '\n'); + break; + case FORMAT_NUL_TERMINATED: + structure_keyvalue_print(&stats, '\n', '\0'); + break; + default: + BUG("invalid output format"); + } stats_table_clear(&table); release_revisions(&revs); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index c32cf4e239..14bd8aede5 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -73,4 +73,37 @@ test_expect_success 'repository with references and objects' ' ) ' +test_expect_success 'keyvalue and nul format' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit_bulk 42 && + git tag -a foo -m bar && + + cat >expect <<-\EOF && + references.branches.count=1 + references.tags.count=1 + references.remotes.count=0 + references.others.count=0 + objects.commits.count=42 + objects.trees.count=42 + objects.blobs.count=42 + objects.tags.count=1 + EOF + + git repo structure --format=keyvalue >out 2>err && + + test_cmp expect out && + test_line_count = 0 err && + + # Replace key and value delimiters for nul format. + tr "\n=" "\0\n" expect_nul && + git repo structure --format=nul >out 2>err && + + test_cmp expect_nul out && + test_line_count = 0 err + ) +' + test_done From 16a93c03c7824a40b034a6ee1cb1c68c8ef48682 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:01 -0500 Subject: [PATCH 7/7] builtin/repo: add progress meter for structure stats When using the structure subcommand for git-repo(1), evaluating a repository may take some time depending on its shape. Add a progress meter to provide feedback to the user about what is happening. The progress meter is enabled by default when the command is executed from a tty. It can also be explicitly enabled/disabled via the --[no-]progress option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 46 ++++++++++++++++++++++++++++++++++----- t/t1901-repo-structure.sh | 20 +++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 1754cc7e5d..9d4749f79b 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,6 +4,7 @@ #include "environment.h" #include "parse-options.h" #include "path-walk.h" +#include "progress.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" @@ -362,6 +363,7 @@ static void structure_keyvalue_print(struct repo_structure *stats, struct count_references_data { struct ref_stats *stats; struct rev_info *revs; + struct progress *progress; }; static int count_references(const char *refname, @@ -371,6 +373,7 @@ static int count_references(const char *refname, { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; + size_t ref_count; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -395,26 +398,41 @@ static int count_references(const char *refname, */ add_pending_oid(data->revs, NULL, oid, 0); + ref_count = get_total_reference_count(stats); + display_progress(data->progress, ref_count); + return 0; } static void structure_count_references(struct ref_stats *stats, struct rev_info *revs, - struct repository *repo) + struct repository *repo, + int show_progress) { struct count_references_data data = { .stats = stats, .revs = revs, }; + if (show_progress) + data.progress = start_delayed_progress(repo, + _("Counting references"), 0); + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); + stop_progress(&data.progress); } +struct count_objects_data { + struct object_stats *stats; + struct progress *progress; +}; static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { - struct object_stats *stats = cb_data; + struct count_objects_data *data = cb_data; + struct object_stats *stats = data->stats; + size_t object_count; switch (type) { case OBJ_TAG: @@ -433,20 +451,31 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, BUG("invalid object type"); } + object_count = get_total_object_count(stats); + display_progress(data->progress, object_count); + return 0; } static void structure_count_objects(struct object_stats *stats, - struct rev_info *revs) + struct rev_info *revs, + struct repository *repo, int show_progress) { struct path_walk_info info = PATH_WALK_INFO_INIT; + struct count_objects_data data = { + .stats = stats, + }; info.revs = revs; info.path_fn = count_objects; - info.path_fn_data = stats; + info.path_fn_data = &data; + + if (show_progress) + data.progress = start_delayed_progress(repo, _("Counting objects"), 0); walk_objects_by_path(&info); path_walk_info_clear(&info); + stop_progress(&data.progress); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -458,10 +487,12 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; + int show_progress = -1; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), N_("output format"), PARSE_OPT_NONEG, parse_format_cb), + OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_END() }; @@ -471,8 +502,11 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, repo_init_revisions(repo, &revs, prefix); - structure_count_references(&stats.refs, &revs, repo); - structure_count_objects(&stats.objects, &revs); + if (show_progress < 0) + show_progress = isatty(2); + + structure_count_references(&stats.refs, &revs, repo, show_progress); + structure_count_objects(&stats.objects, &revs, repo, show_progress); switch (format) { case FORMAT_TABLE: diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 14bd8aede5..36a71a144e 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -106,4 +106,24 @@ test_expect_success 'keyvalue and nul format' ' ) ' +test_expect_success 'progress meter option' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + + GIT_PROGRESS_DELAY=0 git repo structure --progress >out 2>err && + + test_file_not_empty out && + test_grep "Counting references: 2, done." err && + test_grep "Counting objects: 3, done." err && + + GIT_PROGRESS_DELAY=0 git repo structure --no-progress >out 2>err && + + test_file_not_empty out && + test_line_count = 0 err + ) +' + test_done