for-each-ref: introduce a '--start-after' option

The `git-for-each-ref(1)` command is used to iterate over references
present in a repository. In large repositories with millions of
references, it would be optimal to paginate this output such that we
can start iteration from a given reference. This would avoid having to
iterate over all references from the beginning each time when paginating
through results.

The previous commit added 'seek' functionality to the reference
backends. Utilize this and expose a '--start-after' option in
'git-for-each-ref(1)'. When used, the reference iteration seeks to the
lexicographically next reference and iterates from there onward.

This enables efficient pagination workflows, where the calling script
can remember the last provided reference and use that as the starting
point for the next set of references:
    git for-each-ref --count=100
    git for-each-ref --count=100 --start-after=refs/heads/branch-100
    git for-each-ref --count=100 --start-after=refs/heads/branch-200

Since the reference iterators only allow seeking to a specified marker
via the `ref_iterator_seek()`, we introduce a helper function
`start_ref_iterator_after()`, which seeks to next reference by simply
adding (char) 1 to the marker.

We must note that pagination always continues from the provided marker,
as such any concurrent reference updates lexicographically behind the
marker will not be output. Document the same.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Karthik Nayak 2025-07-15 13:28:30 +02:00 committed by Junio C Hamano
parent 526530a16a
commit dabecb9db2
5 changed files with 272 additions and 19 deletions

View File

@ -14,7 +14,7 @@ SYNOPSIS
[--points-at=<object>] [--points-at=<object>]
[--merged[=<object>]] [--no-merged[=<object>]] [--merged[=<object>]] [--no-merged[=<object>]]
[--contains[=<object>]] [--no-contains[=<object>]] [--contains[=<object>]] [--no-contains[=<object>]]
[--exclude=<pattern> ...] [--exclude=<pattern> ...] [--start-after=<marker>]


DESCRIPTION DESCRIPTION
----------- -----------
@ -108,6 +108,14 @@ TAB %(refname)`.
--include-root-refs:: --include-root-refs::
List root refs (HEAD and pseudorefs) apart from regular refs. List root refs (HEAD and pseudorefs) apart from regular refs.


--start-after=<marker>::
Allows paginating the output by skipping references up to and including the
specified marker. When paging, it should be noted that references may be
deleted, modified or added between invocations. Output will only yield those
references which follow the marker lexicographically. Output begins from the
first reference that would come after the marker alphabetically. Cannot be
used with general pattern matching or custom sort options.

FIELD NAMES FIELD NAMES
----------- -----------



View File

@ -13,6 +13,7 @@ static char const * const for_each_ref_usage[] = {
N_("git for-each-ref [--points-at <object>]"), N_("git for-each-ref [--points-at <object>]"),
N_("git for-each-ref [--merged [<commit>]] [--no-merged [<commit>]]"), N_("git for-each-ref [--merged [<commit>]] [--no-merged [<commit>]]"),
N_("git for-each-ref [--contains [<commit>]] [--no-contains [<commit>]]"), N_("git for-each-ref [--contains [<commit>]] [--no-contains [<commit>]]"),
N_("git for-each-ref [--start-after <marker>]"),
NULL NULL
}; };


@ -44,6 +45,7 @@ int cmd_for_each_ref(int argc,
OPT_GROUP(""), OPT_GROUP(""),
OPT_INTEGER( 0 , "count", &format.array_opts.max_count, N_("show only <n> matched refs")), OPT_INTEGER( 0 , "count", &format.array_opts.max_count, N_("show only <n> matched refs")),
OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")),
OPT_STRING( 0 , "start-after", &filter.start_after, N_("start-start"), N_("start iteration after the provided marker")),
OPT__COLOR(&format.use_color, N_("respect format colors")), OPT__COLOR(&format.use_color, N_("respect format colors")),
OPT_REF_FILTER_EXCLUDE(&filter), OPT_REF_FILTER_EXCLUDE(&filter),
OPT_REF_SORT(&sorting_options), OPT_REF_SORT(&sorting_options),
@ -79,6 +81,9 @@ int cmd_for_each_ref(int argc,
if (verify_ref_format(&format)) if (verify_ref_format(&format))
usage_with_options(for_each_ref_usage, opts); usage_with_options(for_each_ref_usage, opts);


if (filter.start_after && sorting_options.nr > 1)
die(_("cannot use --start-after with custom sort options"));

sorting = ref_sorting_options(&sorting_options); sorting = ref_sorting_options(&sorting_options);
ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase); ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
filter.ignore_case = icase; filter.ignore_case = icase;
@ -100,6 +105,9 @@ int cmd_for_each_ref(int argc,
filter.name_patterns = argv; filter.name_patterns = argv;
} }


if (filter.start_after && filter.name_patterns && filter.name_patterns[0])
die(_("cannot use --start-after with patterns"));

if (include_root_refs) if (include_root_refs)
flags |= FILTER_REFS_ROOT_REFS | FILTER_REFS_DETACHED_HEAD; flags |= FILTER_REFS_ROOT_REFS | FILTER_REFS_DETACHED_HEAD;



View File

@ -2683,6 +2683,41 @@ static int filter_exclude_match(struct ref_filter *filter, const char *refname)
return match_pattern(filter->exclude.v, refname, filter->ignore_case); return match_pattern(filter->exclude.v, refname, filter->ignore_case);
} }


/*
* We need to seek to the reference right after a given marker but excluding any
* matching references. So we seek to the lexicographically next reference.
*/
static int start_ref_iterator_after(struct ref_iterator *iter, const char *marker)
{
struct strbuf sb = STRBUF_INIT;
int ret;

strbuf_addstr(&sb, marker);
strbuf_addch(&sb, 1);

ret = ref_iterator_seek(iter, sb.buf, 0);

strbuf_release(&sb);
return ret;
}

static int for_each_fullref_with_seek(struct ref_filter *filter, each_ref_fn cb,
void *cb_data, unsigned int flags)
{
struct ref_iterator *iter;
int ret = 0;

iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), "",
NULL, 0, flags);
if (filter->start_after)
ret = start_ref_iterator_after(iter, filter->start_after);

if (ret)
return ret;

return do_for_each_ref_iterator(iter, cb, cb_data);
}

/* /*
* This is the same as for_each_fullref_in(), but it tries to iterate * This is the same as for_each_fullref_in(), but it tries to iterate
* only over the patterns we'll care about. Note that it _doesn't_ do a full * only over the patterns we'll care about. Note that it _doesn't_ do a full
@ -2694,8 +2729,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
{ {
if (filter->kind & FILTER_REFS_ROOT_REFS) { if (filter->kind & FILTER_REFS_ROOT_REFS) {
/* In this case, we want to print all refs including root refs. */ /* In this case, we want to print all refs including root refs. */
return refs_for_each_include_root_refs(get_main_ref_store(the_repository), return for_each_fullref_with_seek(filter, cb, cb_data,
cb, cb_data); DO_FOR_EACH_INCLUDE_ROOT_REFS);
} }


if (!filter->match_as_path) { if (!filter->match_as_path) {
@ -2704,8 +2739,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* prefixes like "refs/heads/" etc. are stripped off, * prefixes like "refs/heads/" etc. are stripped off,
* so we have to look at everything: * so we have to look at everything:
*/ */
return refs_for_each_fullref_in(get_main_ref_store(the_repository), return for_each_fullref_with_seek(filter, cb, cb_data, 0);
"", NULL, cb, cb_data);
} }


if (filter->ignore_case) { if (filter->ignore_case) {
@ -2714,14 +2748,12 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* so just return everything and let the caller * so just return everything and let the caller
* sort it out. * sort it out.
*/ */
return refs_for_each_fullref_in(get_main_ref_store(the_repository), return for_each_fullref_with_seek(filter, cb, cb_data, 0);
"", NULL, cb, cb_data);
} }


if (!filter->name_patterns[0]) { if (!filter->name_patterns[0]) {
/* no patterns; we have to look at everything */ /* no patterns; we have to look at everything */
return refs_for_each_fullref_in(get_main_ref_store(the_repository), return for_each_fullref_with_seek(filter, cb, cb_data, 0);
"", filter->exclude.v, cb, cb_data);
} }


return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository), return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository),
@ -3189,6 +3221,7 @@ void filter_is_base(struct repository *r,


static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data) static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
{ {
const char *prefix = NULL;
int ret = 0; int ret = 0;


filter->kind = type & FILTER_REFS_KIND_MASK; filter->kind = type & FILTER_REFS_KIND_MASK;
@ -3207,19 +3240,28 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref
* of filter_ref_kind(). * of filter_ref_kind().
*/ */
if (filter->kind == FILTER_REFS_BRANCHES) if (filter->kind == FILTER_REFS_BRANCHES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), prefix = "refs/heads/";
"refs/heads/", NULL,
fn, cb_data);
else if (filter->kind == FILTER_REFS_REMOTES) else if (filter->kind == FILTER_REFS_REMOTES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), prefix = "refs/remotes/";
"refs/remotes/", NULL,
fn, cb_data);
else if (filter->kind == FILTER_REFS_TAGS) else if (filter->kind == FILTER_REFS_TAGS)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), prefix = "refs/tags/";
"refs/tags/", NULL, fn,
cb_data); if (prefix) {
else if (filter->kind & FILTER_REFS_REGULAR) struct ref_iterator *iter;

iter = refs_ref_iterator_begin(get_main_ref_store(the_repository),
"", NULL, 0, 0);

if (filter->start_after)
ret = start_ref_iterator_after(iter, filter->start_after);
else if (prefix)
ret = ref_iterator_seek(iter, prefix, 1);

if (!ret)
ret = do_for_each_ref_iterator(iter, fn, cb_data);
} else if (filter->kind & FILTER_REFS_REGULAR) {
ret = for_each_fullref_in_pattern(filter, fn, cb_data); ret = for_each_fullref_in_pattern(filter, fn, cb_data);
}


/* /*
* When printing all ref types, HEAD is already included, * When printing all ref types, HEAD is already included,

View File

@ -64,6 +64,7 @@ struct ref_array {


struct ref_filter { struct ref_filter {
const char **name_patterns; const char **name_patterns;
const char *start_after;
struct strvec exclude; struct strvec exclude;
struct oid_array points_at; struct oid_array points_at;
struct commit_list *with_commit; struct commit_list *with_commit;

View File

@ -541,4 +541,198 @@ test_expect_success 'validate worktree atom' '
test_cmp expect actual test_cmp expect actual
' '


test_expect_success 'start after with empty value' '
cat >expect <<-\EOF &&
refs/heads/main
refs/heads/main_worktree
refs/heads/side
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after="" >actual &&
test_cmp expect actual
'

test_expect_success 'start after a specific reference' '
cat >expect <<-\EOF &&
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/spot >actual &&
test_cmp expect actual
'

test_expect_success 'start after a specific reference with partial match' '
cat >expect <<-\EOF &&
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/sp >actual &&
test_cmp expect actual
'

test_expect_success 'start after, just behind a specific reference' '
cat >expect <<-\EOF &&
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/parrot >actual &&
test_cmp expect actual
'

test_expect_success 'start after with specific directory match' '
cat >expect <<-\EOF &&
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd >actual &&
test_cmp expect actual
'

test_expect_success 'start after with specific directory and trailing slash' '
cat >expect <<-\EOF &&
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/ >actual &&
test_cmp expect actual
'

test_expect_success 'start after, just behind a specific directory' '
cat >expect <<-\EOF &&
refs/odd/spot
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/lost >actual &&
test_cmp expect actual
'

test_expect_success 'start after, overflow specific reference length' '
cat >expect <<-\EOF &&
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/spotnew >actual &&
test_cmp expect actual
'

test_expect_success 'start after, overflow specific reference path' '
cat >expect <<-\EOF &&
refs/tags/annotated-tag
refs/tags/doubly-annotated-tag
refs/tags/doubly-signed-tag
refs/tags/foo1.10
refs/tags/foo1.3
refs/tags/foo1.6
refs/tags/four
refs/tags/one
refs/tags/signed-tag
refs/tags/three
refs/tags/two
EOF
git for-each-ref --format="%(refname)" --start-after=refs/odd/spot/new >actual &&
test_cmp expect actual
'

test_expect_success 'start after, last reference' '
cat >expect <<-\EOF &&
EOF
git for-each-ref --format="%(refname)" --start-after=refs/tags/two >actual &&
test_cmp expect actual
'

test_expect_success 'start after used with a pattern' '
cat >expect <<-\EOF &&
fatal: cannot use --start-after with patterns
EOF
test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot refs/tags 2>actual &&
test_cmp expect actual
'

test_expect_success 'start after used with custom sort order' '
cat >expect <<-\EOF &&
fatal: cannot use --start-after with custom sort options
EOF
test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot --sort=author 2>actual &&
test_cmp expect actual
'

test_done test_done