Merge branch 'jt/repo-structure'

"git repo structure", a new command.

* jt/repo-structure:
  builtin/repo: add progress meter for structure stats
  builtin/repo: add keyvalue and nul format for structure stats
  builtin/repo: add object counts in structure output
  builtin/repo: introduce structure subcommand
  ref-filter: export ref_kind_from_refname()
  ref-filter: allow NULL filter pattern
  builtin/repo: rename repo_info() to cmd_repo_info()
main
Junio C Hamano 2025-11-04 07:48:06 -08:00
commit a9db6c66f5
6 changed files with 542 additions and 6 deletions

View File

@ -9,6 +9,7 @@ SYNOPSIS
--------
[synopsis]
git repo info [--format=(keyvalue|nul)] [-z] [<key>...]
git repo structure [--format=(table|keyvalue|nul)]

DESCRIPTION
-----------
@ -43,6 +44,35 @@ supported:
+
`-z` is an alias for `--format=nul`.

`structure [--format=(table|keyvalue|nul)]`::
Retrieve statistics about the current repository structure. The
following kinds of information are reported:
+
* Reference counts categorized by type
* Reachable object counts categorized by type

+
The output format can be chosen through the flag `--format`. Three formats are
supported:
+
`table`:::
Outputs repository stats in a human-friendly table. This format may
change and is not intended for machine parsing. This is the default
format.

`keyvalue`:::
Each line of output contains a key-value pair for a repository stat.
The '=' character is used to delimit between the key and the value.
Values containing "unusual" characters are quoted as explained for the
configuration variable `core.quotePath` (see linkgit:git-config[1]).

`nul`:::
Similar to `keyvalue`, but uses a NUL character to delimit between
key-value pairs instead of a newline. Also uses a newline character as
the delimiter between the key and value instead of '='. Unlike the
`keyvalue` format, values containing "unusual" characters are never
quoted.

INFO KEYS
---------
In order to obtain a set of values from `git repo info`, you should provide

View File

@ -3,19 +3,27 @@
#include "builtin.h"
#include "environment.h"
#include "parse-options.h"
#include "path-walk.h"
#include "progress.h"
#include "quote.h"
#include "ref-filter.h"
#include "refs.h"
#include "revision.h"
#include "strbuf.h"
#include "string-list.h"
#include "shallow.h"
#include "utf8.h"

static const char *const repo_usage[] = {
"git repo info [--format=(keyvalue|nul)] [-z] [<key>...]",
"git repo structure [--format=(table|keyvalue|nul)]",
NULL
};

typedef int get_value_fn(struct repository *repo, struct strbuf *buf);

enum output_format {
FORMAT_TABLE,
FORMAT_KEYVALUE,
FORMAT_NUL_TERMINATED,
};
@ -130,13 +138,15 @@ static int parse_format_cb(const struct option *opt,
*format = FORMAT_NUL_TERMINATED;
else if (!strcmp(arg, "keyvalue"))
*format = FORMAT_KEYVALUE;
else if (!strcmp(arg, "table"))
*format = FORMAT_TABLE;
else
die(_("invalid format '%s'"), arg);

return 0;
}

static int repo_info(int argc, const char **argv, const char *prefix,
static int cmd_repo_info(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
enum output_format format = FORMAT_KEYVALUE;
@ -152,16 +162,380 @@ static int repo_info(int argc, const char **argv, const char *prefix,
};

argc = parse_options(argc, argv, prefix, options, repo_usage, 0);
if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED)
die(_("unsupported output format"));

return print_fields(argc, argv, repo, format);
}

struct ref_stats {
size_t branches;
size_t remotes;
size_t tags;
size_t others;
};

struct object_stats {
size_t tags;
size_t commits;
size_t trees;
size_t blobs;
};

struct repo_structure {
struct ref_stats refs;
struct object_stats objects;
};

struct stats_table {
struct string_list rows;

int name_col_width;
int value_col_width;
};

/*
* Holds column data that gets stored for each row.
*/
struct stats_table_entry {
char *value;
};

static void stats_table_vaddf(struct stats_table *table,
struct stats_table_entry *entry,
const char *format, va_list ap)
{
struct strbuf buf = STRBUF_INIT;
struct string_list_item *item;
char *formatted_name;
int name_width;

strbuf_vaddf(&buf, format, ap);
formatted_name = strbuf_detach(&buf, NULL);
name_width = utf8_strwidth(formatted_name);

item = string_list_append_nodup(&table->rows, formatted_name);
item->util = entry;

if (name_width > table->name_col_width)
table->name_col_width = name_width;
if (entry) {
int value_width = utf8_strwidth(entry->value);
if (value_width > table->value_col_width)
table->value_col_width = value_width;
}
}

static void stats_table_addf(struct stats_table *table, const char *format, ...)
{
va_list ap;

va_start(ap, format);
stats_table_vaddf(table, NULL, format, ap);
va_end(ap);
}

static void stats_table_count_addf(struct stats_table *table, size_t value,
const char *format, ...)
{
struct stats_table_entry *entry;
va_list ap;

CALLOC_ARRAY(entry, 1);
entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value);

va_start(ap, format);
stats_table_vaddf(table, entry, format, ap);
va_end(ap);
}

static inline size_t get_total_reference_count(struct ref_stats *stats)
{
return stats->branches + stats->remotes + stats->tags + stats->others;
}

static inline size_t get_total_object_count(struct object_stats *stats)
{
return stats->tags + stats->commits + stats->trees + stats->blobs;
}

static void stats_table_setup_structure(struct stats_table *table,
struct repo_structure *stats)
{
struct object_stats *objects = &stats->objects;
struct ref_stats *refs = &stats->refs;
size_t object_total;
size_t ref_total;

ref_total = get_total_reference_count(refs);
stats_table_addf(table, "* %s", _("References"));
stats_table_count_addf(table, ref_total, " * %s", _("Count"));
stats_table_count_addf(table, refs->branches, " * %s", _("Branches"));
stats_table_count_addf(table, refs->tags, " * %s", _("Tags"));
stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes"));
stats_table_count_addf(table, refs->others, " * %s", _("Others"));

object_total = get_total_object_count(objects);
stats_table_addf(table, "");
stats_table_addf(table, "* %s", _("Reachable objects"));
stats_table_count_addf(table, object_total, " * %s", _("Count"));
stats_table_count_addf(table, objects->commits, " * %s", _("Commits"));
stats_table_count_addf(table, objects->trees, " * %s", _("Trees"));
stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs"));
stats_table_count_addf(table, objects->tags, " * %s", _("Tags"));
}

static void stats_table_print_structure(const struct stats_table *table)
{
const char *name_col_title = _("Repository structure");
const char *value_col_title = _("Value");
int name_col_width = utf8_strwidth(name_col_title);
int value_col_width = utf8_strwidth(value_col_title);
struct string_list_item *item;

if (table->name_col_width > name_col_width)
name_col_width = table->name_col_width;
if (table->value_col_width > value_col_width)
value_col_width = table->value_col_width;

printf("| %-*s | %-*s |\n", name_col_width, name_col_title,
value_col_width, value_col_title);
printf("| ");
for (int i = 0; i < name_col_width; i++)
putchar('-');
printf(" | ");
for (int i = 0; i < value_col_width; i++)
putchar('-');
printf(" |\n");

for_each_string_list_item(item, &table->rows) {
struct stats_table_entry *entry = item->util;
const char *value = "";

if (entry) {
struct stats_table_entry *entry = item->util;
value = entry->value;
}

printf("| %-*s | %*s |\n", name_col_width, item->string,
value_col_width, value);
}
}

static void stats_table_clear(struct stats_table *table)
{
struct stats_table_entry *entry;
struct string_list_item *item;

for_each_string_list_item(item, &table->rows) {
entry = item->util;
if (entry)
free(entry->value);
}

string_list_clear(&table->rows, 1);
}

static void structure_keyvalue_print(struct repo_structure *stats,
char key_delim, char value_delim)
{
printf("references.branches.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.branches, value_delim);
printf("references.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.tags, value_delim);
printf("references.remotes.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.remotes, value_delim);
printf("references.others.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.others, value_delim);

printf("objects.commits.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.commits, value_delim);
printf("objects.trees.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.trees, value_delim);
printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.blobs, value_delim);
printf("objects.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.tags, value_delim);

fflush(stdout);
}

struct count_references_data {
struct ref_stats *stats;
struct rev_info *revs;
struct progress *progress;
};

static int count_references(const char *refname,
const char *referent UNUSED,
const struct object_id *oid,
int flags UNUSED, void *cb_data)
{
struct count_references_data *data = cb_data;
struct ref_stats *stats = data->stats;
size_t ref_count;

switch (ref_kind_from_refname(refname)) {
case FILTER_REFS_BRANCHES:
stats->branches++;
break;
case FILTER_REFS_REMOTES:
stats->remotes++;
break;
case FILTER_REFS_TAGS:
stats->tags++;
break;
case FILTER_REFS_OTHERS:
stats->others++;
break;
default:
BUG("unexpected reference type");
}

/*
* While iterating through references for counting, also add OIDs in
* preparation for the path walk.
*/
add_pending_oid(data->revs, NULL, oid, 0);

ref_count = get_total_reference_count(stats);
display_progress(data->progress, ref_count);

return 0;
}

static void structure_count_references(struct ref_stats *stats,
struct rev_info *revs,
struct repository *repo,
int show_progress)
{
struct count_references_data data = {
.stats = stats,
.revs = revs,
};

if (show_progress)
data.progress = start_delayed_progress(repo,
_("Counting references"), 0);

refs_for_each_ref(get_main_ref_store(repo), count_references, &data);
stop_progress(&data.progress);
}

struct count_objects_data {
struct object_stats *stats;
struct progress *progress;
};

static int count_objects(const char *path UNUSED, struct oid_array *oids,
enum object_type type, void *cb_data)
{
struct count_objects_data *data = cb_data;
struct object_stats *stats = data->stats;
size_t object_count;

switch (type) {
case OBJ_TAG:
stats->tags += oids->nr;
break;
case OBJ_COMMIT:
stats->commits += oids->nr;
break;
case OBJ_TREE:
stats->trees += oids->nr;
break;
case OBJ_BLOB:
stats->blobs += oids->nr;
break;
default:
BUG("invalid object type");
}

object_count = get_total_object_count(stats);
display_progress(data->progress, object_count);

return 0;
}

static void structure_count_objects(struct object_stats *stats,
struct rev_info *revs,
struct repository *repo, int show_progress)
{
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct count_objects_data data = {
.stats = stats,
};

info.revs = revs;
info.path_fn = count_objects;
info.path_fn_data = &data;

if (show_progress)
data.progress = start_delayed_progress(repo, _("Counting objects"), 0);

walk_objects_by_path(&info);
path_walk_info_clear(&info);
stop_progress(&data.progress);
}

static int cmd_repo_structure(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
struct stats_table table = {
.rows = STRING_LIST_INIT_DUP,
};
enum output_format format = FORMAT_TABLE;
struct repo_structure stats = { 0 };
struct rev_info revs;
int show_progress = -1;
struct option options[] = {
OPT_CALLBACK_F(0, "format", &format, N_("format"),
N_("output format"),
PARSE_OPT_NONEG, parse_format_cb),
OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
OPT_END()
};

argc = parse_options(argc, argv, prefix, options, repo_usage, 0);
if (argc)
usage(_("too many arguments"));

repo_init_revisions(repo, &revs, prefix);

if (show_progress < 0)
show_progress = isatty(2);

structure_count_references(&stats.refs, &revs, repo, show_progress);
structure_count_objects(&stats.objects, &revs, repo, show_progress);

switch (format) {
case FORMAT_TABLE:
stats_table_setup_structure(&table, &stats);
stats_table_print_structure(&table);
break;
case FORMAT_KEYVALUE:
structure_keyvalue_print(&stats, '=', '\n');
break;
case FORMAT_NUL_TERMINATED:
structure_keyvalue_print(&stats, '\n', '\0');
break;
default:
BUG("invalid output format");
}

stats_table_clear(&table);
release_revisions(&revs);

return 0;
}

int cmd_repo(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
parse_opt_subcommand_fn *fn = NULL;
struct option options[] = {
OPT_SUBCOMMAND("info", &fn, repo_info),
OPT_SUBCOMMAND("info", &fn, cmd_repo_info),
OPT_SUBCOMMAND("structure", &fn, cmd_repo_structure),
OPT_END()
};


View File

@ -2664,7 +2664,7 @@ static int match_name_as_path(const char **pattern, const char *refname,
/* Return 1 if the refname matches one of the patterns, otherwise 0. */
static int filter_pattern_match(struct ref_filter *filter, const char *refname)
{
if (!*filter->name_patterns)
if (!filter->name_patterns || !*filter->name_patterns)
return 1; /* No pattern always matches */
if (filter->match_as_path)
return match_name_as_path(filter->name_patterns, refname,
@ -2751,7 +2751,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}

if (!filter->name_patterns[0]) {
if (!filter->name_patterns || !filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
@ -2833,7 +2833,7 @@ struct ref_array_item *ref_array_push(struct ref_array *array,
return ref;
}

static int ref_kind_from_refname(const char *refname)
int ref_kind_from_refname(const char *refname)
{
unsigned int i;


View File

@ -135,6 +135,8 @@ struct ref_format {
OPT_STRVEC(0, "exclude", &(var)->exclude, \
N_("pattern"), N_("exclude refs which match pattern"))

/* Get the reference kind from the provided reference name. */
int ref_kind_from_refname(const char *refname);
/*
* API for filtering a set of refs. Based on the type of refs the user
* has requested, we iterate through those refs and apply filters

View File

@ -238,6 +238,7 @@ integration_tests = [
't1701-racy-split-index.sh',
't1800-hook.sh',
't1900-repo.sh',
't1901-repo-structure.sh',
't2000-conflict-when-checking-files-out.sh',
't2002-checkout-cache-u.sh',
't2003-checkout-cache-mkdir.sh',

129
t/t1901-repo-structure.sh Executable file
View File

@ -0,0 +1,129 @@
#!/bin/sh

test_description='test git repo structure'

. ./test-lib.sh

test_expect_success 'empty repository' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
cat >expect <<-\EOF &&
| Repository structure | Value |
| -------------------- | ----- |
| * References | |
| * Count | 0 |
| * Branches | 0 |
| * Tags | 0 |
| * Remotes | 0 |
| * Others | 0 |
| | |
| * Reachable objects | |
| * Count | 0 |
| * Commits | 0 |
| * Trees | 0 |
| * Blobs | 0 |
| * Tags | 0 |
EOF

git repo structure >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err
)
'

test_expect_success 'repository with references and objects' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit_bulk 42 &&
git tag -a foo -m bar &&

oid="$(git rev-parse HEAD)" &&
git update-ref refs/remotes/origin/foo "$oid" &&

# Also creates a commit, tree, and blob.
git notes add -m foo &&

cat >expect <<-\EOF &&
| Repository structure | Value |
| -------------------- | ----- |
| * References | |
| * Count | 4 |
| * Branches | 1 |
| * Tags | 1 |
| * Remotes | 1 |
| * Others | 1 |
| | |
| * Reachable objects | |
| * Count | 130 |
| * Commits | 43 |
| * Trees | 43 |
| * Blobs | 43 |
| * Tags | 1 |
EOF

git repo structure >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err
)
'

test_expect_success 'keyvalue and nul format' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit_bulk 42 &&
git tag -a foo -m bar &&

cat >expect <<-\EOF &&
references.branches.count=1
references.tags.count=1
references.remotes.count=0
references.others.count=0
objects.commits.count=42
objects.trees.count=42
objects.blobs.count=42
objects.tags.count=1
EOF

git repo structure --format=keyvalue >out 2>err &&

test_cmp expect out &&
test_line_count = 0 err &&

# Replace key and value delimiters for nul format.
tr "\n=" "\0\n" <expect >expect_nul &&
git repo structure --format=nul >out 2>err &&

test_cmp expect_nul out &&
test_line_count = 0 err
)
'

test_expect_success 'progress meter option' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit foo &&

GIT_PROGRESS_DELAY=0 git repo structure --progress >out 2>err &&

test_file_not_empty out &&
test_grep "Counting references: 2, done." err &&
test_grep "Counting objects: 3, done." err &&

GIT_PROGRESS_DELAY=0 git repo structure --no-progress >out 2>err &&

test_file_not_empty out &&
test_line_count = 0 err
)
'

test_done