From 4d06402b1b29259bd475e5473c4478f58e9376a1 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Sun, 21 Jul 2013 06:52:38 -0400 Subject: [PATCH 1/5] contrib: add git-contacts helper This script lists people that might be interested in a patch by going back through the history for each patch hunk, and finding people that reviewed, acknowledged, signed, authored, or were Cc:'d on the code the patch is modifying. It does this by running git-blame incrementally on each hunk and then parsing the commit message. After gathering all participants, it determines each person's relevance by considering how many commits mentioned that person compared with the total number of commits under consideration. The final output consists only of participants who pass a minimum threshold of participation. Several conditions controlling a person's significance are currently hard-coded, such as minimum participation level, blame date-limiting, and -C level for detecting moved and copied lines. In the future, these conditions may become configurable. For example: % git contacts 0001-remote-hg-trivial-cleanups.patch Felipe Contreras Jeff King Max Horn Junio C Hamano Thus, it can be invoked as git-send-email's --cc-cmd option, among other possible uses. This is a Perl rewrite of Felipe Contreras' git-related patch series[1] written in Ruby. [1]: http://thread.gmane.org/gmane.comp.version-control.git/226065/ Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 127 ++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100755 contrib/contacts/git-contacts diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts new file mode 100755 index 0000000000..3e6cce8106 --- /dev/null +++ b/contrib/contacts/git-contacts @@ -0,0 +1,127 @@ +#!/usr/bin/perl + +# List people who might be interested in a patch. Useful as the argument to +# git-send-email --cc-cmd option, and in other situations. +# +# Usage: git contacts ... + +use strict; +use warnings; +use IPC::Open2; + +my $since = '5-years-ago'; +my $min_percent = 10; +my $labels_rx = qr/Signed-off-by|Reviewed-by|Acked-by|Cc/i; +my %seen; + +sub format_contact { + my ($name, $email) = @_; + return "$name <$email>"; +} + +sub parse_commit { + my ($commit, $data) = @_; + my $contacts = $commit->{contacts}; + my $inbody = 0; + for (split(/^/m, $data)) { + if (not $inbody) { + if (/^author ([^<>]+) <(\S+)> .+$/) { + $contacts->{format_contact($1, $2)} = 1; + } elsif (/^$/) { + $inbody = 1; + } + } elsif (/^$labels_rx:\s+([^<>]+)\s+<(\S+?)>$/o) { + $contacts->{format_contact($1, $2)} = 1; + } + } +} + +sub import_commits { + my ($commits) = @_; + return unless %$commits; + my $pid = open2 my $reader, my $writer, qw(git cat-file --batch); + for my $id (keys(%$commits)) { + print $writer "$id\n"; + my $line = <$reader>; + if ($line =~ /^([0-9a-f]{40}) commit (\d+)/) { + my ($cid, $len) = ($1, $2); + die "expected $id but got $cid\n" unless $id eq $cid; + my $data; + # cat-file emits newline after data, so read len+1 + read $reader, $data, $len + 1; + parse_commit($commits->{$id}, $data); + } + } + close $reader; + close $writer; + waitpid($pid, 0); + die "git-cat-file error: $?\n" if $?; +} + +sub get_blame { + my ($commits, $source, $start, $len, $from) = @_; + $len = 1 unless defined($len); + return if $len == 0; + open my $f, '-|', + qw(git blame --porcelain -C), '-L', "$start,+$len", + '--since', $since, "$from^", '--', $source or die; + while (<$f>) { + if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) { + my $id = $1; + $commits->{$id} = { id => $id, contacts => {} } + unless $seen{$id}; + $seen{$id} = 1; + } + } + close $f; +} + +sub scan_patches { + my ($commits, $f) = @_; + my ($id, $source); + while (<$f>) { + if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) { + $id = $1; + $seen{$id} = 1; + } + next unless $id; + if (m{^--- (?:a/(.+)|/dev/null)$}) { + $source = $1; + } elsif (/^--- /) { + die "Cannot parse hunk source: $_\n"; + } elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) { + get_blame($commits, $source, $1, $2, $id); + } + } +} + +sub scan_patch_file { + my ($commits, $file) = @_; + open my $f, '<', $file or die "read failure: $file: $!\n"; + scan_patches($commits, $f); + close $f; +} + +if (!@ARGV) { + die "No input patch files\n"; +} + +my %commits; +for (@ARGV) { + scan_patch_file(\%commits, $_); +} +import_commits(\%commits); + +my $contacts = {}; +for my $commit (values %commits) { + for my $contact (keys %{$commit->{contacts}}) { + $contacts->{$contact}++; + } +} + +my $ncommits = scalar(keys %commits); +for my $contact (keys %$contacts) { + my $percent = $contacts->{$contact} * 100 / $ncommits; + next if $percent < $min_percent; + print "$contact\n"; +} From 8e7c4a82ec25ee92bcb81de8bb8c4a27876d6edc Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Sun, 21 Jul 2013 06:52:39 -0400 Subject: [PATCH 2/5] contrib: contacts: add ability to parse from committish For example: % git contacts R1..R2 Committishes and patch files can be mentioned in the same invocation: % git contacts R1..R2 extra/*.patch Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 38 +++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index 3e6cce8106..1686ff340a 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -3,7 +3,7 @@ # List people who might be interested in a patch. Useful as the argument to # git-send-email --cc-cmd option, and in other situations. # -# Usage: git contacts ... +# Usage: git contacts ... use strict; use warnings; @@ -77,8 +77,8 @@ sub get_blame { } sub scan_patches { - my ($commits, $f) = @_; - my ($id, $source); + my ($commits, $id, $f) = @_; + my $source; while (<$f>) { if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) { $id = $1; @@ -98,18 +98,44 @@ sub scan_patches { sub scan_patch_file { my ($commits, $file) = @_; open my $f, '<', $file or die "read failure: $file: $!\n"; - scan_patches($commits, $f); + scan_patches($commits, undef, $f); + close $f; +} + +sub scan_rev_args { + my ($commits, $args) = @_; + open my $f, '-|', qw(git rev-list --reverse), @$args or die; + while (<$f>) { + chomp; + my $id = $_; + $seen{$id} = 1; + open my $g, '-|', qw(git show -C --oneline), $id or die; + scan_patches($commits, $id, $g); + close $g; + } close $f; } if (!@ARGV) { - die "No input patch files\n"; + die "No input revisions or patch files\n"; +} + +my (@files, @rev_args); +for (@ARGV) { + if (-e) { + push @files, $_; + } else { + push @rev_args, $_; + } } my %commits; -for (@ARGV) { +for (@files) { scan_patch_file(\%commits, $_); } +if (@rev_args) { + scan_rev_args(\%commits, \@rev_args) +} import_commits(\%commits); my $contacts = {}; From ccf6b45aff3f89016cf1a3e39a37e73e72c3e0f6 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Sun, 21 Jul 2013 06:52:40 -0400 Subject: [PATCH 3/5] contrib: contacts: interpret committish akin to format-patch As a convenience, accept the same style committish as accepted by git-format-patch. For example: % git contacts origin will consider commits in the current branch built atop 'origin', just as "git format-patch origin" will format commits built atop 'origin'. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index 1686ff340a..4553add0a6 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -102,9 +102,26 @@ sub scan_patch_file { close $f; } +sub parse_rev_args { + my @args = @_; + open my $f, '-|', + qw(git rev-parse --revs-only --default HEAD --symbolic), @args + or die; + my @revs; + while (<$f>) { + chomp; + push @revs, $_; + } + close $f; + return @revs if scalar(@revs) != 1; + return "^$revs[0]", 'HEAD' unless $revs[0] =~ /^-/; + return $revs[0], 'HEAD'; +} + sub scan_rev_args { my ($commits, $args) = @_; - open my $f, '-|', qw(git rev-list --reverse), @$args or die; + my @revs = parse_rev_args(@$args); + open my $f, '-|', qw(git rev-list --reverse), @revs or die; while (<$f>) { chomp; my $id = $_; From 7c6d6ff8f122b10d6214c4f53e3179996dee2f9a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Sun, 21 Jul 2013 06:52:41 -0400 Subject: [PATCH 4/5] contrib: contacts: add mailmap support The purpose of git-contacts is to determine a list of people who might have some interest in a patch or set of changes. It can be used as git-send-email's --cc-cmd argument or the computed list might be used to ask for comments on a proposed change. As such, it is important to report up-to-date email addresses in the computed list rather than potentially outdated ones recorded with commits. Apply git's mailmap functionality to the retrieved contacts in order to achieve this goal. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index 4553add0a6..d80f7d1b6e 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -133,6 +133,23 @@ sub scan_rev_args { close $f; } +sub mailmap_contacts { + my ($contacts) = @_; + my %mapped; + my $pid = open2 my $reader, my $writer, qw(git check-mailmap --stdin); + for my $contact (keys(%$contacts)) { + print $writer "$contact\n"; + my $canonical = <$reader>; + chomp $canonical; + $mapped{$canonical} += $contacts->{$contact}; + } + close $reader; + close $writer; + waitpid($pid, 0); + die "git-check-mailmap error: $?\n" if $?; + return \%mapped; +} + if (!@ARGV) { die "No input revisions or patch files\n"; } @@ -161,6 +178,7 @@ for my $commit (values %commits) { $contacts->{$contact}++; } } +$contacts = mailmap_contacts($contacts); my $ncommits = scalar(keys %commits); for my $contact (keys %$contacts) { From acb01a359bc2fe3f1ddfb1eb9daa60b8c83e5153 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Sun, 21 Jul 2013 06:52:42 -0400 Subject: [PATCH 5/5] contrib: contacts: add documentation Assuming that git-contacts may some day be promoted to a core git command, the documentation is written and formatted as if it already belongs in Documentation/ even though it presently resides in contrib/contacts. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts.txt | 94 +++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 contrib/contacts/git-contacts.txt diff --git a/contrib/contacts/git-contacts.txt b/contrib/contacts/git-contacts.txt new file mode 100644 index 0000000000..dd914d1261 --- /dev/null +++ b/contrib/contacts/git-contacts.txt @@ -0,0 +1,94 @@ +git-contacts(1) +=============== + +NAME +---- +git-contacts - List people who might be interested in a set of changes + + +SYNOPSIS +-------- +[verse] +'git contacts' (||)... + + +DESCRIPTION +----------- + +Given a set of changes, specified as patch files or revisions, determine people +who might be interested in those changes. This is done by consulting the +history of each patch or revision hunk to find people mentioned by commits +which touched the lines of files under consideration. + +Input consists of one or more patch files or revision arguments. A revision +argument can be a range or a single `` which is interpreted as +`..HEAD`, thus the same revision arguments are accepted as for +linkgit:git-format-patch[1]. Patch files and revision arguments can be combined +in the same invocation. + +This command can be useful for determining the list of people with whom to +discuss proposed changes, or for finding the list of recipients to Cc: when +submitting a patch series via `git send-email`. For the latter case, `git +contacts` can be used as the argument to `git send-email`'s `--cc-cmd` option. + + +DISCUSSION +---------- + +`git blame` is invoked for each hunk in a patch file or revision. For each +commit mentioned by `git blame`, the commit message is consulted for people who +authored, reviewed, signed, acknowledged, or were Cc:'d. Once the list of +participants is known, each person's relevance is computed by considering how +many commits mentioned that person compared with the total number of commits +under consideration. The final output consists only of participants who exceed +a minimum threshold of participation. + + +OUTPUT +------ + +For each person of interest, a single line is output, terminated by a newline. +If the person's name is known, ``Name $$$$'' is printed; otherwise +only ``$$$$'' is printed. + + +EXAMPLES +-------- + +* Consult patch files: ++ +------------ +$ git contacts feature/*.patch +------------ + +* Revision range: ++ +------------ +$ git contacts R1..R2 +------------ + +* From a single revision to `HEAD`: ++ +------------ +$ git contacts origin +------------ + +* Helper for `git send-email`: ++ +------------ +$ git send-email --cc-cmd='git contacts' feature/*.patch +------------ + + +LIMITATIONS +----------- + +Several conditions controlling a person's significance are currently +hard-coded, such as minimum participation level (10%), blame date-limiting (5 +years), and `-C` level for detecting moved and copied lines (a single `-C`). In +the future, these conditions may become configurable. + + +GIT +--- +Part of the linkgit:git[1] suite