Browse Source

Merge branch 'jk/shortlog-group-by-trailer'

"git shortlog" has been taught to group commits by the contents of
the trailer lines, like "Reviewed-by:", "Coauthored-by:", etc.

* jk/shortlog-group-by-trailer:
  shortlog: allow multiple groups to be specified
  shortlog: parse trailer idents
  shortlog: rename parse_stdin_ident()
  shortlog: de-duplicate trailer values
  shortlog: match commit trailers with --group
  trailer: add interface for iterating over commit trailers
  shortlog: add grouping option
  shortlog: change "author" variables to "ident"
maint
Junio C Hamano 4 years ago
parent
commit
2fa8aacc72
  1. 31
      Documentation/git-shortlog.txt
  2. 1
      builtin/log.c
  3. 213
      builtin/shortlog.c
  4. 8
      shortlog.h
  5. 141
      t/t4201-shortlog.sh
  6. 36
      trailer.c
  7. 45
      trailer.h

31
Documentation/git-shortlog.txt

@ -47,9 +47,38 @@ OPTIONS @@ -47,9 +47,38 @@ OPTIONS

Each pretty-printed commit will be rewrapped before it is shown.

--group=<type>::
Group commits based on `<type>`. If no `--group` option is
specified, the default is `author`. `<type>` is one of:
+
--
- `author`, commits are grouped by author
- `committer`, commits are grouped by committer (the same as `-c`)
- `trailer:<field>`, the `<field>` is interpreted as a case-insensitive
commit message trailer (see linkgit:git-interpret-trailers[1]). For
example, if your project uses `Reviewed-by` trailers, you might want
to see who has been reviewing with
`git shortlog -ns --group=trailer:reviewed-by`.
+
Note that commits that do not include the trailer will not be counted.
Likewise, commits with multiple trailers (e.g., multiple signoffs) may
be counted more than once (but only once per unique trailer value in
that commit).
+
Shortlog will attempt to parse each trailer value as a `name <email>`
identity. If successful, the mailmap is applied and the email is omitted
unless the `--email` option is specified. If the value cannot be parsed
as an identity, it will be taken literally and completely.
--
+
If `--group` is specified multiple times, commits are counted under each
value (but again, only once per unique value in that commit). For
example, `git shortlog --group=author --group=trailer:co-authored-by`
counts both authors and co-authors.

-c::
--committer::
Collect and show committer identities instead of authors.
This is an alias for `--group=committer`.

-w[<width>[,<indent1>[,<indent2>]]]::
Linewrap the output by wrapping each line at `width`. The first

1
builtin/log.c

@ -1195,6 +1195,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, @@ -1195,6 +1195,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout,
log.in1 = 2;
log.in2 = 4;
log.file = rev->diffopt.file;
log.groups = SHORTLOG_GROUP_AUTHOR;
for (i = 0; i < nr; i++)
shortlog_add_commit(&log, list[i]);


213
builtin/shortlog.c

@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
#include "mailmap.h"
#include "shortlog.h"
#include "parse-options.h"
#include "trailer.h"

static char const * const shortlog_usage[] = {
N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
@ -49,12 +50,12 @@ static int compare_by_list(const void *a1, const void *a2) @@ -49,12 +50,12 @@ static int compare_by_list(const void *a1, const void *a2)
}

static void insert_one_record(struct shortlog *log,
const char *author,
const char *ident,
const char *oneline)
{
struct string_list_item *item;

item = string_list_insert(&log->list, author);
item = string_list_insert(&log->list, ident);

if (log->summary)
item->util = (void *)(UTIL_TO_INT(item) + 1);
@ -97,8 +98,8 @@ static void insert_one_record(struct shortlog *log, @@ -97,8 +98,8 @@ static void insert_one_record(struct shortlog *log,
}
}

static int parse_stdin_author(struct shortlog *log,
struct strbuf *out, const char *in)
static int parse_ident(struct shortlog *log,
struct strbuf *out, const char *in)
{
const char *mailbuf, *namebuf;
size_t namelen, maillen;
@ -122,18 +123,33 @@ static int parse_stdin_author(struct shortlog *log, @@ -122,18 +123,33 @@ static int parse_stdin_author(struct shortlog *log,

static void read_from_stdin(struct shortlog *log)
{
struct strbuf author = STRBUF_INIT;
struct strbuf mapped_author = STRBUF_INIT;
struct strbuf ident = STRBUF_INIT;
struct strbuf mapped_ident = STRBUF_INIT;
struct strbuf oneline = STRBUF_INIT;
static const char *author_match[2] = { "Author: ", "author " };
static const char *committer_match[2] = { "Commit: ", "committer " };
const char **match;

match = log->committer ? committer_match : author_match;
while (strbuf_getline_lf(&author, stdin) != EOF) {
if (HAS_MULTI_BITS(log->groups))
die(_("using multiple --group options with stdin is not supported"));

switch (log->groups) {
case SHORTLOG_GROUP_AUTHOR:
match = author_match;
break;
case SHORTLOG_GROUP_COMMITTER:
match = committer_match;
break;
case SHORTLOG_GROUP_TRAILER:
die(_("using --group=trailer with stdin is not supported"));
default:
BUG("unhandled shortlog group");
}

while (strbuf_getline_lf(&ident, stdin) != EOF) {
const char *v;
if (!skip_prefix(author.buf, match[0], &v) &&
!skip_prefix(author.buf, match[1], &v))
if (!skip_prefix(ident.buf, match[0], &v) &&
!skip_prefix(ident.buf, match[1], &v))
continue;
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
oneline.len)
@ -142,23 +158,118 @@ static void read_from_stdin(struct shortlog *log) @@ -142,23 +158,118 @@ static void read_from_stdin(struct shortlog *log)
!oneline.len)
; /* discard blanks */

strbuf_reset(&mapped_author);
if (parse_stdin_author(log, &mapped_author, v) < 0)
strbuf_reset(&mapped_ident);
if (parse_ident(log, &mapped_ident, v) < 0)
continue;

insert_one_record(log, mapped_author.buf, oneline.buf);
insert_one_record(log, mapped_ident.buf, oneline.buf);
}
strbuf_release(&author);
strbuf_release(&mapped_author);
strbuf_release(&ident);
strbuf_release(&mapped_ident);
strbuf_release(&oneline);
}

struct strset_item {
struct hashmap_entry ent;
char value[FLEX_ARRAY];
};

struct strset {
struct hashmap map;
};

#define STRSET_INIT { { NULL } }

static int strset_item_hashcmp(const void *hash_data,
const struct hashmap_entry *entry,
const struct hashmap_entry *entry_or_key,
const void *keydata)
{
const struct strset_item *a, *b;

a = container_of(entry, const struct strset_item, ent);
if (keydata)
return strcmp(a->value, keydata);

b = container_of(entry_or_key, const struct strset_item, ent);
return strcmp(a->value, b->value);
}

/*
* Adds "str" to the set if it was not already present; returns true if it was
* already there.
*/
static int strset_check_and_add(struct strset *ss, const char *str)
{
unsigned int hash = strhash(str);
struct strset_item *item;

if (!ss->map.table)
hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);

if (hashmap_get_from_hash(&ss->map, hash, str))
return 1;

FLEX_ALLOC_STR(item, value, str);
hashmap_entry_init(&item->ent, hash);
hashmap_add(&ss->map, &item->ent);
return 0;
}

static void strset_clear(struct strset *ss)
{
if (!ss->map.table)
return;
hashmap_free_entries(&ss->map, struct strset_item, ent);
}

static void insert_records_from_trailers(struct shortlog *log,
struct strset *dups,
struct commit *commit,
struct pretty_print_context *ctx,
const char *oneline)
{
struct trailer_iterator iter;
const char *commit_buffer, *body;
struct strbuf ident = STRBUF_INIT;

/*
* Using format_commit_message("%B") would be simpler here, but
* this saves us copying the message.
*/
commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
body = strstr(commit_buffer, "\n\n");
if (!body)
return;

trailer_iterator_init(&iter, body);
while (trailer_iterator_advance(&iter)) {
const char *value = iter.val.buf;

if (!string_list_has_string(&log->trailers, iter.key.buf))
continue;

strbuf_reset(&ident);
if (!parse_ident(log, &ident, value))
value = ident.buf;

if (strset_check_and_add(dups, value))
continue;
insert_one_record(log, value, oneline);
}
trailer_iterator_release(&iter);

strbuf_release(&ident);
unuse_commit_buffer(commit, commit_buffer);
}

void shortlog_add_commit(struct shortlog *log, struct commit *commit)
{
struct strbuf author = STRBUF_INIT;
struct strbuf ident = STRBUF_INIT;
struct strbuf oneline = STRBUF_INIT;
struct strset dups = STRSET_INIT;
struct pretty_print_context ctx = {0};
const char *fmt;
const char *oneline_str;

ctx.fmt = CMIT_FMT_USERFORMAT;
ctx.abbrev = log->abbrev;
@ -166,21 +277,38 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) @@ -166,21 +277,38 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit)
ctx.date_mode.type = DATE_NORMAL;
ctx.output_encoding = get_log_output_encoding();

fmt = log->committer ?
(log->email ? "%cN <%cE>" : "%cN") :
(log->email ? "%aN <%aE>" : "%aN");

format_commit_message(commit, fmt, &author, &ctx);
if (!log->summary) {
if (log->user_format)
pretty_print_commit(&ctx, commit, &oneline);
else
format_commit_message(commit, "%s", &oneline, &ctx);
}
oneline_str = oneline.len ? oneline.buf : "<none>";

if (log->groups & SHORTLOG_GROUP_AUTHOR) {
strbuf_reset(&ident);
format_commit_message(commit,
log->email ? "%aN <%aE>" : "%aN",
&ident, &ctx);
if (!HAS_MULTI_BITS(log->groups) ||
!strset_check_and_add(&dups, ident.buf))
insert_one_record(log, ident.buf, oneline_str);
}
if (log->groups & SHORTLOG_GROUP_COMMITTER) {
strbuf_reset(&ident);
format_commit_message(commit,
log->email ? "%cN <%cE>" : "%cN",
&ident, &ctx);
if (!HAS_MULTI_BITS(log->groups) ||
!strset_check_and_add(&dups, ident.buf))
insert_one_record(log, ident.buf, oneline_str);
}
if (log->groups & SHORTLOG_GROUP_TRAILER) {
insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
}

insert_one_record(log, author.buf, oneline.len ? oneline.buf : "<none>");

strbuf_release(&author);
strset_clear(&dups);
strbuf_release(&ident);
strbuf_release(&oneline);
}

@ -241,6 +369,28 @@ static int parse_wrap_args(const struct option *opt, const char *arg, int unset) @@ -241,6 +369,28 @@ static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
return 0;
}

static int parse_group_option(const struct option *opt, const char *arg, int unset)
{
struct shortlog *log = opt->value;
const char *field;

if (unset) {
log->groups = 0;
string_list_clear(&log->trailers, 0);
} else if (!strcasecmp(arg, "author"))
log->groups |= SHORTLOG_GROUP_AUTHOR;
else if (!strcasecmp(arg, "committer"))
log->groups |= SHORTLOG_GROUP_COMMITTER;
else if (skip_prefix(arg, "trailer:", &field)) {
log->groups |= SHORTLOG_GROUP_TRAILER;
string_list_append(&log->trailers, field);
} else
return error(_("unknown group type: %s"), arg);

return 0;
}


void shortlog_init(struct shortlog *log)
{
memset(log, 0, sizeof(*log));
@ -251,6 +401,8 @@ void shortlog_init(struct shortlog *log) @@ -251,6 +401,8 @@ void shortlog_init(struct shortlog *log)
log->wrap = DEFAULT_WRAPLEN;
log->in1 = DEFAULT_INDENT1;
log->in2 = DEFAULT_INDENT2;
log->trailers.strdup_strings = 1;
log->trailers.cmp = strcasecmp;
}

int cmd_shortlog(int argc, const char **argv, const char *prefix)
@ -260,8 +412,9 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) @@ -260,8 +412,9 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
int nongit = !startup_info->have_repository;

const struct option options[] = {
OPT_BOOL('c', "committer", &log.committer,
N_("Group by committer rather than author")),
OPT_BIT('c', "committer", &log.groups,
N_("Group by committer rather than author"),
SHORTLOG_GROUP_COMMITTER),
OPT_BOOL('n', "numbered", &log.sort_by_number,
N_("sort output according to the number of commits per author")),
OPT_BOOL('s', "summary", &log.summary,
@ -271,6 +424,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) @@ -271,6 +424,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
N_("Linewrap output"), PARSE_OPT_OPTARG,
&parse_wrap_args),
OPT_CALLBACK(0, "group", &log, N_("field"),
N_("Group by field"), parse_group_option),
OPT_END(),
};

@ -311,6 +466,10 @@ parse_done: @@ -311,6 +466,10 @@ parse_done:
log.abbrev = rev.abbrev;
log.file = rev.diffopt.file;

if (!log.groups)
log.groups = SHORTLOG_GROUP_AUTHOR;
string_list_sort(&log.trailers);

/* assume HEAD if from a tty */
if (!nongit && !rev.pending.nr && isatty(0))
add_head_to_pending(&rev);

8
shortlog.h

@ -15,7 +15,13 @@ struct shortlog { @@ -15,7 +15,13 @@ struct shortlog {
int in2;
int user_format;
int abbrev;
int committer;

enum {
SHORTLOG_GROUP_AUTHOR = (1 << 0),
SHORTLOG_GROUP_COMMITTER = (1 << 1),
SHORTLOG_GROUP_TRAILER = (1 << 2),
} groups;
struct string_list trailers;

char *common_repo_prefix;
int email;

141
t/t4201-shortlog.sh

@ -215,4 +215,145 @@ test_expect_success 'shortlog --committer (external)' ' @@ -215,4 +215,145 @@ test_expect_success 'shortlog --committer (external)' '
test_cmp expect actual
'

test_expect_success '--group=committer is the same as --committer' '
git shortlog -ns --group=committer HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'shortlog --group=trailer:signed-off-by' '
git commit --allow-empty -m foo -s &&
GIT_COMMITTER_NAME="SOB One" \
GIT_COMMITTER_EMAIL=sob@example.com \
git commit --allow-empty -m foo -s &&
git commit --allow-empty --amend --no-edit -s &&
cat >expect <<-\EOF &&
2 C O Mitter <committer@example.com>
1 SOB One <sob@example.com>
EOF
git shortlog -nse --group=trailer:signed-off-by HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'trailer idents are split' '
cat >expect <<-\EOF &&
2 C O Mitter
1 SOB One
EOF
git shortlog -ns --group=trailer:signed-off-by HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'trailer idents are mailmapped' '
cat >expect <<-\EOF &&
2 C O Mitter
1 Another Name
EOF
echo "Another Name <sob@example.com>" >mail.map &&
git -c mailmap.file=mail.map shortlog -ns \
--group=trailer:signed-off-by HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'shortlog de-duplicates trailers in a single commit' '
git commit --allow-empty -F - <<-\EOF &&
subject one

this message has two distinct values, plus a repeat

Repeated-trailer: Foo
Repeated-trailer: Bar
Repeated-trailer: Foo
EOF

git commit --allow-empty -F - <<-\EOF &&
subject two

similar to the previous, but without the second distinct value

Repeated-trailer: Foo
Repeated-trailer: Foo
EOF

cat >expect <<-\EOF &&
2 Foo
1 Bar
EOF
git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'shortlog can match multiple groups' '
git commit --allow-empty -F - <<-\EOF &&
subject one

this has two trailers that are distinct from the author; it will count
3 times in the output

Some-trailer: User A <a@example.com>
Another-trailer: User B <b@example.com>
EOF

git commit --allow-empty -F - <<-\EOF &&
subject two

this one has two trailers, one of which is a duplicate with the author;
it will only be counted once for them

Another-trailer: A U Thor <author@example.com>
Some-trailer: User B <b@example.com>
EOF

cat >expect <<-\EOF &&
2 A U Thor
2 User B
1 User A
EOF
git shortlog -ns \
--group=author \
--group=trailer:some-trailer \
--group=trailer:another-trailer \
-2 HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'set up option selection tests' '
git commit --allow-empty -F - <<-\EOF
subject

body

Trailer-one: value-one
Trailer-two: value-two
EOF
'

test_expect_success '--no-group resets group list to author' '
cat >expect <<-\EOF &&
1 A U Thor
EOF
git shortlog -ns \
--group=committer \
--group=trailer:trailer-one \
--no-group \
-1 HEAD >actual &&
test_cmp expect actual
'

test_expect_success '--no-group resets trailer list' '
cat >expect <<-\EOF &&
1 value-two
EOF
git shortlog -ns \
--group=trailer:trailer-one \
--no-group \
--group=trailer:trailer-two \
-1 HEAD >actual &&
test_cmp expect actual
'

test_expect_success 'stdin with multiple groups reports error' '
git log >log &&
test_must_fail git shortlog --group=author --group=committer <log
'

test_done

36
trailer.c

@ -1183,3 +1183,39 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg, @@ -1183,3 +1183,39 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg,
format_trailer_info(out, &info, opts);
trailer_info_release(&info);
}

void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
{
struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
strbuf_init(&iter->key, 0);
strbuf_init(&iter->val, 0);
opts.no_divider = 1;
trailer_info_get(&iter->info, msg, &opts);
iter->cur = 0;
}

int trailer_iterator_advance(struct trailer_iterator *iter)
{
while (iter->cur < iter->info.trailer_nr) {
char *trailer = iter->info.trailers[iter->cur++];
int separator_pos = find_separator(trailer, separators);

if (separator_pos < 1)
continue; /* not a real trailer */

strbuf_reset(&iter->key);
strbuf_reset(&iter->val);
parse_trailer(&iter->key, &iter->val, NULL,
trailer, separator_pos);
unfold_value(&iter->val);
return 1;
}
return 0;
}

void trailer_iterator_release(struct trailer_iterator *iter)
{
trailer_info_release(&iter->info);
strbuf_release(&iter->val);
strbuf_release(&iter->key);
}

45
trailer.h

@ -2,8 +2,7 @@ @@ -2,8 +2,7 @@
#define TRAILER_H

#include "list.h"

struct strbuf;
#include "strbuf.h"

enum trailer_where {
WHERE_DEFAULT,
@ -103,4 +102,46 @@ void trailer_info_release(struct trailer_info *info); @@ -103,4 +102,46 @@ void trailer_info_release(struct trailer_info *info);
void format_trailers_from_commit(struct strbuf *out, const char *msg,
const struct process_trailer_options *opts);

/*
* An interface for iterating over the trailers found in a particular commit
* message. Use like:
*
* struct trailer_iterator iter;
* trailer_iterator_init(&iter, msg);
* while (trailer_iterator_advance(&iter))
* ... do something with iter.key and iter.val ...
* trailer_iterator_release(&iter);
*/
struct trailer_iterator {
struct strbuf key;
struct strbuf val;

/* private */
struct trailer_info info;
size_t cur;
};

/*
* Initialize "iter" in preparation for walking over the trailers in the commit
* message "msg". The "msg" pointer must remain valid until the iterator is
* released.
*
* After initializing, note that key/val will not yet point to any trailer.
* Call advance() to parse the first one (if any).
*/
void trailer_iterator_init(struct trailer_iterator *iter, const char *msg);

/*
* Advance to the next trailer of the iterator. Returns 0 if there is no such
* trailer, and 1 otherwise. The key and value of the trailer can be
* fetched from the iter->key and iter->value fields (which are valid
* only until the next advance).
*/
int trailer_iterator_advance(struct trailer_iterator *iter);

/*
* Release all resources associated with the trailer iteration.
*/
void trailer_iterator_release(struct trailer_iterator *iter);

#endif /* TRAILER_H */

Loading…
Cancel
Save