Merge branch 'jt/repo-stats' into jch

"git repo stats", a new command.

Comments?

* jt/repo-stats:
  builtin/repo: add progress meter for stats
  builtin/repo: add keyvalue and nul format for stats
  builtin/repo: add object counts in stats output
  builtin/repo: introduce stats subcommand
  ref-filter: allow NULL filter pattern
  builtin/repo: rename repo_info() to cmd_repo_info()
seen
Junio C Hamano 2025-10-06 14:31:22 -07:00
commit cad6d204d7
5 changed files with 533 additions and 5 deletions

View File

@ -9,6 +9,7 @@ SYNOPSIS
--------
[synopsis]
git repo info [--format=(keyvalue|nul)] [-z] [<key>...]
git repo stats [--format=(table|keyvalue|nul)]

DESCRIPTION
-----------
@ -43,6 +44,35 @@ supported:
+
`-z` is an alias for `--format=nul`.

`stats [--format=(table|keyvalue|nul)]`::
Retrieve statistics about the current repository. The following kinds
of information are reported:
+
* Reference counts categorized by type
* Reachable object counts categorized by type

+
The output format can be chosen through the flag `--format`. Three formats are
supported:
+
`table`:::
Outputs repository stats in a human-friendly table. This format may
change and is not intended for machine parsing. This is the default
format.

`keyvalue`:::
Each line of output contains a key-value pair for a repository stat.
The '=' character is used to delimit between the key and the value.
Values containing "unusual" characters are quoted as explained for the
configuration variable `core.quotePath` (see linkgit:git-config[1]).

`nul`:::
Similar to `keyvalue`, but uses a NUL character to delimit between
key-value pairs instead of a newline. Also uses a newline character as
the delimiter between the key and value instead of '='. Unlike the
`keyvalue` format, values containing "unusual" characters are never
quoted.

INFO KEYS
---------
In order to obtain a set of values from `git repo info`, you should provide

View File

@ -3,19 +3,27 @@
#include "builtin.h"
#include "environment.h"
#include "parse-options.h"
#include "path-walk.h"
#include "progress.h"
#include "quote.h"
#include "ref-filter.h"
#include "refs.h"
#include "revision.h"
#include "strbuf.h"
#include "string-list.h"
#include "shallow.h"
#include "utf8.h"

static const char *const repo_usage[] = {
"git repo info [--format=(keyvalue|nul)] [-z] [<key>...]",
"git repo stats [--format=(table|keyvalue|nul)]",
NULL
};

typedef int get_value_fn(struct repository *repo, struct strbuf *buf);

enum output_format {
FORMAT_TABLE,
FORMAT_KEYVALUE,
FORMAT_NUL_TERMINATED,
};
@ -130,14 +138,16 @@ static int parse_format_cb(const struct option *opt,
*format = FORMAT_NUL_TERMINATED;
else if (!strcmp(arg, "keyvalue"))
*format = FORMAT_KEYVALUE;
else if (!strcmp(arg, "table"))
*format = FORMAT_TABLE;
else
die(_("invalid format '%s'"), arg);

return 0;
}

static int repo_info(int argc, const char **argv, const char *prefix,
struct repository *repo)
static int cmd_repo_info(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
enum output_format format = FORMAT_KEYVALUE;
struct option options[] = {
@ -152,16 +162,374 @@ static int repo_info(int argc, const char **argv, const char *prefix,
};

argc = parse_options(argc, argv, prefix, options, repo_usage, 0);
if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED)
die(_("unsupported output format"));

return print_fields(argc, argv, repo, format);
}

struct ref_stats {
size_t branches;
size_t remotes;
size_t tags;
size_t others;
};

struct object_stats {
size_t tags;
size_t commits;
size_t trees;
size_t blobs;
};

struct repo_stats {
struct ref_stats refs;
struct object_stats objects;
};

struct stats_table {
struct string_list rows;

size_t name_col_width;
size_t value_col_width;
};

/*
* Holds column data that gets stored for each row.
*/
struct stats_table_entry {
char *value;
};

static void stats_table_vaddf(struct stats_table *table,
struct stats_table_entry *entry,
const char *format, va_list ap)
{
struct strbuf buf = STRBUF_INIT;
struct string_list_item *item;
char *formatted_name;
size_t name_width;

strbuf_vaddf(&buf, format, ap);
formatted_name = strbuf_detach(&buf, NULL);
name_width = utf8_strwidth(formatted_name);

item = string_list_append_nodup(&table->rows, formatted_name);
item->util = entry;

if (name_width > table->name_col_width)
table->name_col_width = name_width;
if (entry) {
size_t value_width = utf8_strwidth(entry->value);
if (value_width > table->value_col_width)
table->value_col_width = value_width;
}
}

static void stats_table_addf(struct stats_table *table, const char *format, ...)
{
va_list ap;

va_start(ap, format);
stats_table_vaddf(table, NULL, format, ap);
va_end(ap);
}

static void stats_table_count_addf(struct stats_table *table, size_t value,
const char *format, ...)
{
struct stats_table_entry *entry;
va_list ap;

CALLOC_ARRAY(entry, 1);
entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value);

va_start(ap, format);
stats_table_vaddf(table, entry, format, ap);
va_end(ap);
}

static inline size_t get_total_object_count(struct object_stats *stats)
{
return stats->tags + stats->commits + stats->trees + stats->blobs;
}

static void stats_table_setup(struct stats_table *table, struct repo_stats *stats)
{
struct object_stats *objects = &stats->objects;
struct ref_stats *refs = &stats->refs;
size_t object_total;
size_t ref_total;

ref_total = refs->branches + refs->remotes + refs->tags + refs->others;
stats_table_addf(table, "* %s", _("References"));
stats_table_count_addf(table, ref_total, " * %s", _("Count"));
stats_table_count_addf(table, refs->branches, " * %s", _("Branches"));
stats_table_count_addf(table, refs->tags, " * %s", _("Tags"));
stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes"));
stats_table_count_addf(table, refs->others, " * %s", _("Others"));

object_total = get_total_object_count(objects);
stats_table_addf(table, "");
stats_table_addf(table, "* %s", _("Reachable objects"));
stats_table_count_addf(table, object_total, " * %s", _("Count"));
stats_table_count_addf(table, objects->commits, " * %s", _("Commits"));
stats_table_count_addf(table, objects->trees, " * %s", _("Trees"));
stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs"));
stats_table_count_addf(table, objects->tags, " * %s", _("Tags"));
}

static inline size_t max_size_t(size_t a, size_t b)
{
return (a > b) ? a : b;
}

static void stats_table_print(const struct stats_table *table)
{
const char *name_col_title = _("Repository stats");
const char *value_col_title = _("Value");
size_t name_title_len = utf8_strwidth(name_col_title);
size_t value_title_len = utf8_strwidth(value_col_title);
struct string_list_item *item;
int name_col_width;
int value_col_width;

name_col_width = cast_size_t_to_int(
max_size_t(table->name_col_width, name_title_len));
value_col_width = cast_size_t_to_int(
max_size_t(table->value_col_width, value_title_len));

printf("| %-*s | %-*s |\n", name_col_width, name_col_title,
value_col_width, value_col_title);
printf("| ");
for (int i = 0; i < name_col_width; i++)
putchar('-');
printf(" | ");
for (int i = 0; i < value_col_width; i++)
putchar('-');
printf(" |\n");

for_each_string_list_item(item, &table->rows) {
struct stats_table_entry *entry = item->util;
const char *value = "";

if (entry) {
struct stats_table_entry *entry = item->util;
value = entry->value;
}

printf("| %-*s | %*s |\n", name_col_width, item->string,
value_col_width, value);
}
}

static void stats_table_clear(struct stats_table *table)
{
struct stats_table_entry *entry;
struct string_list_item *item;

for_each_string_list_item(item, &table->rows) {
entry = item->util;
if (entry)
free(entry->value);
}

string_list_clear(&table->rows, 1);
}

static void stats_keyvalue_print(struct repo_stats *stats, char key_delim,
char value_delim)
{
printf("references.branches.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.branches, value_delim);
printf("references.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.tags, value_delim);
printf("references.remotes.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.remotes, value_delim);
printf("references.others.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.others, value_delim);

printf("objects.commits.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.commits, value_delim);
printf("objects.trees.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.trees, value_delim);
printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.blobs, value_delim);
printf("objects.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.tags, value_delim);

fflush(stdout);
}

static void stats_count_references(struct ref_stats *stats, struct ref_array *refs,
struct repository *repo, int show_progress)
{
struct progress *progress = NULL;

if (show_progress)
progress = start_delayed_progress(repo, _("Counting references"),
refs->nr);

for (int i = 0; i < refs->nr; i++) {
struct ref_array_item *ref = refs->items[i];

switch (ref->kind) {
case FILTER_REFS_BRANCHES:
stats->branches++;
break;
case FILTER_REFS_REMOTES:
stats->remotes++;
break;
case FILTER_REFS_TAGS:
stats->tags++;
break;
case FILTER_REFS_OTHERS:
stats->others++;
break;
default:
BUG("unexpected reference type");
}

display_progress(progress, i + 1);
}

stop_progress(&progress);
}

struct count_objects_data {
struct object_stats *stats;
struct progress *progress;
};

static int count_objects(const char *path UNUSED, struct oid_array *oids,
enum object_type type, void *cb_data)
{
struct count_objects_data *data = cb_data;
struct object_stats *stats = data->stats;
size_t object_count;

switch (type) {
case OBJ_TAG:
stats->tags += oids->nr;
break;
case OBJ_COMMIT:
stats->commits += oids->nr;
break;
case OBJ_TREE:
stats->trees += oids->nr;
break;
case OBJ_BLOB:
stats->blobs += oids->nr;
break;
default:
BUG("invalid object type");
}

object_count = get_total_object_count(stats);
display_progress(data->progress, object_count);

return 0;
}

static void stats_count_objects(struct object_stats *stats,
struct ref_array *refs, struct rev_info *revs,
struct repository *repo, int show_progress)
{
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct count_objects_data data = {
.stats = stats,
};

info.revs = revs;
info.path_fn = count_objects;
info.path_fn_data = &data;

for (int i = 0; i < refs->nr; i++) {
struct ref_array_item *ref = refs->items[i];

switch (ref->kind) {
case FILTER_REFS_BRANCHES:
case FILTER_REFS_TAGS:
case FILTER_REFS_REMOTES:
case FILTER_REFS_OTHERS:
add_pending_oid(revs, NULL, &ref->objectname, 0);
break;
default:
BUG("unexpected reference type");
}
}

if (show_progress)
data.progress = start_delayed_progress(repo, _("Counting objects"), 0);

walk_objects_by_path(&info);
path_walk_info_clear(&info);
stop_progress(&data.progress);
}

static int cmd_repo_stats(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
struct ref_filter filter = REF_FILTER_INIT;
struct stats_table table = {
.rows = STRING_LIST_INIT_DUP,
};
enum output_format format = FORMAT_TABLE;
struct repo_stats stats = { 0 };
struct ref_array refs = { 0 };
struct rev_info revs;
int show_progress = -1;
struct option options[] = {
OPT_CALLBACK_F(0, "format", &format, N_("format"),
N_("output format"),
PARSE_OPT_NONEG, parse_format_cb),
OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
OPT_END()
};

argc = parse_options(argc, argv, prefix, options, repo_usage, 0);
if (argc)
usage(_("too many arguments"));

repo_init_revisions(repo, &revs, prefix);
if (filter_refs(&refs, &filter, FILTER_REFS_REGULAR))
die(_("unable to filter refs"));

if (show_progress < 0)
show_progress = isatty(2);

stats_count_references(&stats.refs, &refs, repo, show_progress);
stats_count_objects(&stats.objects, &refs, &revs, repo, show_progress);

switch (format) {
case FORMAT_TABLE:
stats_table_setup(&table, &stats);
stats_table_print(&table);
break;
case FORMAT_KEYVALUE:
stats_keyvalue_print(&stats, '=', '\n');
break;
case FORMAT_NUL_TERMINATED:
stats_keyvalue_print(&stats, '\n', '\0');
break;
default:
BUG("invalid output format");
}

stats_table_clear(&table);
release_revisions(&revs);
ref_array_clear(&refs);

return 0;
}

int cmd_repo(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
parse_opt_subcommand_fn *fn = NULL;
struct option options[] = {
OPT_SUBCOMMAND("info", &fn, repo_info),
OPT_SUBCOMMAND("info", &fn, cmd_repo_info),
OPT_SUBCOMMAND("stats", &fn, cmd_repo_stats),
OPT_END()
};


View File

@ -2664,7 +2664,7 @@ static int match_name_as_path(const char **pattern, const char *refname,
/* Return 1 if the refname matches one of the patterns, otherwise 0. */
static int filter_pattern_match(struct ref_filter *filter, const char *refname)
{
if (!*filter->name_patterns)
if (!filter->name_patterns || !*filter->name_patterns)
return 1; /* No pattern always matches */
if (filter->match_as_path)
return match_name_as_path(filter->name_patterns, refname,
@ -2751,7 +2751,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}

if (!filter->name_patterns[0]) {
if (!filter->name_patterns || !filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}

View File

@ -238,6 +238,7 @@ integration_tests = [
't1701-racy-split-index.sh',
't1800-hook.sh',
't1900-repo.sh',
't1901-repo-stats.sh',
't2000-conflict-when-checking-files-out.sh',
't2002-checkout-cache-u.sh',
't2003-checkout-cache-mkdir.sh',

129
t/t1901-repo-stats.sh Executable file
View File

@ -0,0 +1,129 @@
#!/bin/sh

test_description='test git repo stats'

. ./test-lib.sh

test_expect_success 'empty repository' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
cat >expect <<-\EOF &&
| Repository stats | Value |
| ------------------- | ----- |
| * References | |
| * Count | 0 |
| * Branches | 0 |
| * Tags | 0 |
| * Remotes | 0 |
| * Others | 0 |
| | |
| * Reachable objects | |
| * Count | 0 |
| * Commits | 0 |
| * Trees | 0 |
| * Blobs | 0 |
| * Tags | 0 |
EOF

git repo stats >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err
)
'

test_expect_success 'repository with references and objects' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit_bulk 42 &&
git tag -a foo -m bar &&

oid="$(git rev-parse HEAD)" &&
git update-ref refs/remotes/origin/foo "$oid" &&

# Also creates a commit, tree, and blob.
git notes add -m foo &&

cat >expect <<-\EOF &&
| Repository stats | Value |
| ------------------- | ----- |
| * References | |
| * Count | 4 |
| * Branches | 1 |
| * Tags | 1 |
| * Remotes | 1 |
| * Others | 1 |
| | |
| * Reachable objects | |
| * Count | 130 |
| * Commits | 43 |
| * Trees | 43 |
| * Blobs | 43 |
| * Tags | 1 |
EOF

git repo stats >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err
)
'

test_expect_success 'keyvalue and nul format' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit_bulk 42 &&
git tag -a foo -m bar &&

cat >expect <<-\EOF &&
references.branches.count=1
references.tags.count=1
references.remotes.count=0
references.others.count=0
objects.commits.count=42
objects.trees.count=42
objects.blobs.count=42
objects.tags.count=1
EOF

git repo stats --format=keyvalue >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err &&

# Replace key and value delimiters for nul format.
tr "\n=" "\0\n" <expect >expect_nul &&
git repo stats --format=nul >out 2>err &&

test_cmp expect_nul out &&
test_line_count = 0 err
)
'

test_expect_success 'progress meter option' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit foo &&

GIT_PROGRESS_DELAY=0 git repo stats --progress >out 2>err &&

test_file_not_empty out &&
test_grep "Counting references: 100% (2/2), done." err &&
test_grep "Counting objects: 3, done." err &&

GIT_PROGRESS_DELAY=0 git repo stats --no-progress >out 2>err &&

test_file_not_empty out &&
test_line_count = 0 err
)
'

test_done