From bd869f67b93b36e27405db5e288ee82f3a49051a Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Fri, 5 Jan 2018 19:36:51 +0100 Subject: [PATCH 1/3] send-email: add and use a local copy of Mail::Address We used to have two versions of the email parsing code. Our parse_mailboxes (in Git.pm), and Mail::Address which we used if installed. Unfortunately, both versions have different sets of bugs, and changing the behavior of git depending on whether Mail::Address is installed was a bad idea. A first attempt to solve this was cc90750 (send-email: don't use Mail::Address, even if available, 2017-08-23), but it turns out our parse_mailboxes is too buggy for some uses. For example the lack of nested comments support breaks get_maintainer.pl in the Linux kernel tree: https://public-inbox.org/git/20171116154814.23785-1-alex.bennee@linaro.org/ This patch goes the other way: use Mail::Address anyway, but have a local copy from CPAN as a fallback, when the system one is not available. The duplicated script is small (276 lines of code) and stable in time. Maintaining the local copy should not be an issue, and will certainly be less burden than maintaining our own parse_mailboxes. Another option would be to consider Mail::Address as a hard dependency, but it's easy enough to save the trouble of extra-dependency to the end user or packager. Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- git-send-email.perl | 3 +- perl/Git/FromCPAN/Mail/Address.pm | 276 ++++++++++++++++++++++++++++++ perl/Git/Mail/Address.pm | 24 +++ 3 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 perl/Git/FromCPAN/Mail/Address.pm create mode 100755 perl/Git/Mail/Address.pm diff --git a/git-send-email.perl b/git-send-email.perl index edcc6d3469..340b5c8482 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -30,6 +30,7 @@ use Error qw(:try); use Cwd qw(abs_path cwd); use Git; use Git::I18N; +use Git::Mail::Address; Getopt::Long::Configure qw/ pass_through /; @@ -489,7 +490,7 @@ my ($repoauthor, $repocommitter); ($repocommitter) = Git::ident_person(@repo, 'committer'); sub parse_address_line { - return Git::parse_mailboxes($_[0]); + return map { $_->format } Mail::Address->parse($_[0]); } sub split_addrs { diff --git a/perl/Git/FromCPAN/Mail/Address.pm b/perl/Git/FromCPAN/Mail/Address.pm new file mode 100644 index 0000000000..13b2ff7d05 --- /dev/null +++ b/perl/Git/FromCPAN/Mail/Address.pm @@ -0,0 +1,276 @@ +# Copyrights 1995-2017 by [Mark Overmeer ]. +# For other contributors see ChangeLog. +# See the manual pages for details on the licensing terms. +# Pod stripped from pm file by OODoc 2.02. +package Mail::Address; +use vars '$VERSION'; +$VERSION = '2.19'; + +use strict; + +use Carp; + +# use locale; removed in version 1.78, because it causes taint problems + +sub Version { our $VERSION } + + + +# given a comment, attempt to extract a person's name +sub _extract_name +{ # This function can be called as method as well + my $self = @_ && ref $_[0] ? shift : undef; + + local $_ = shift + or return ''; + + # Using encodings, too hard. See Mail::Message::Field::Full. + return '' if m/\=\?.*?\?\=/; + + # trim whitespace + s/^\s+//; + s/\s+$//; + s/\s+/ /; + + # Disregard numeric names (e.g. 123456.1234@compuserve.com) + return "" if /^[\d ]+$/; + + s/^\((.*)\)$/$1/; # remove outermost parenthesis + s/^"(.*)"$/$1/; # remove outer quotation marks + s/\(.*?\)//g; # remove minimal embedded comments + s/\\//g; # remove all escapes + s/^"(.*)"$/$1/; # remove internal quotation marks + s/^([^\s]+) ?, ?(.*)$/$2 $1/; # reverse "Last, First M." if applicable + s/,.*//; + + # Change casing only when the name contains only upper or only + # lower cased characters. + unless( m/[A-Z]/ && m/[a-z]/ ) + { # Set the case of the name to first char upper rest lower + s/\b(\w+)/\L\u$1/igo; # Upcase first letter on name + s/\bMc(\w)/Mc\u$1/igo; # Scottish names such as 'McLeod' + s/\bo'(\w)/O'\u$1/igo; # Irish names such as 'O'Malley, O'Reilly' + s/\b(x*(ix)?v*(iv)?i*)\b/\U$1/igo; # Roman numerals, eg 'Level III Support' + } + + # some cleanup + s/\[[^\]]*\]//g; + s/(^[\s'"]+|[\s'"]+$)//g; + s/\s{2,}/ /g; + + $_; +} + +sub _tokenise +{ local $_ = join ',', @_; + my (@words,$snippet,$field); + + s/\A\s+//; + s/[\r\n]+/ /g; + + while ($_ ne '') + { $field = ''; + if(s/^\s*\(/(/ ) # (...) + { my $depth = 0; + + PAREN: while(s/^(\(([^\(\)\\]|\\.)*)//) + { $field .= $1; + $depth++; + while(s/^(([^\(\)\\]|\\.)*\)\s*)//) + { $field .= $1; + last PAREN unless --$depth; + $field .= $1 if s/^(([^\(\)\\]|\\.)+)//; + } + } + + carp "Unmatched () '$field' '$_'" + if $depth; + + $field =~ s/\s+\Z//; + push @words, $field; + + next; + } + + if( s/^("(?:[^"\\]+|\\.)*")\s*// # "..." + || s/^(\[(?:[^\]\\]+|\\.)*\])\s*// # [...] + || s/^([^\s()<>\@,;:\\".[\]]+)\s*// + || s/^([()<>\@,;:\\".[\]])\s*// + ) + { push @words, $1; + next; + } + + croak "Unrecognised line: $_"; + } + + push @words, ","; + \@words; +} + +sub _find_next +{ my ($idx, $tokens, $len) = @_; + + while($idx < $len) + { my $c = $tokens->[$idx]; + return $c if $c eq ',' || $c eq ';' || $c eq '<'; + $idx++; + } + + ""; +} + +sub _complete +{ my ($class, $phrase, $address, $comment) = @_; + + @$phrase || @$comment || @$address + or return undef; + + my $o = $class->new(join(" ",@$phrase), join("",@$address), join(" ",@$comment)); + @$phrase = @$address = @$comment = (); + $o; +} + +#------------ + +sub new(@) +{ my $class = shift; + bless [@_], $class; +} + + +sub parse(@) +{ my $class = shift; + my @line = grep {defined} @_; + my $line = join '', @line; + + my (@phrase, @comment, @address, @objs); + my ($depth, $idx) = (0, 0); + + my $tokens = _tokenise @line; + my $len = @$tokens; + my $next = _find_next $idx, $tokens, $len; + + local $_; + for(my $idx = 0; $idx < $len; $idx++) + { $_ = $tokens->[$idx]; + + if(substr($_,0,1) eq '(') { push @comment, $_ } + elsif($_ eq '<') { $depth++ } + elsif($_ eq '>') { $depth-- if $depth } + elsif($_ eq ',' || $_ eq ';') + { warn "Unmatched '<>' in $line" if $depth; + my $o = $class->_complete(\@phrase, \@address, \@comment); + push @objs, $o if defined $o; + $depth = 0; + $next = _find_next $idx+1, $tokens, $len; + } + elsif($depth) { push @address, $_ } + elsif($next eq '<') { push @phrase, $_ } + elsif( /^[.\@:;]$/ || !@address || $address[-1] =~ /^[.\@:;]$/ ) + { push @address, $_ } + else + { warn "Unmatched '<>' in $line" if $depth; + my $o = $class->_complete(\@phrase, \@address, \@comment); + push @objs, $o if defined $o; + $depth = 0; + push @address, $_; + } + } + @objs; +} + +#------------ + +sub phrase { shift->set_or_get(0, @_) } +sub address { shift->set_or_get(1, @_) } +sub comment { shift->set_or_get(2, @_) } + +sub set_or_get($) +{ my ($self, $i) = (shift, shift); + @_ or return $self->[$i]; + + my $val = $self->[$i]; + $self->[$i] = shift if @_; + $val; +} + + +my $atext = '[\-\w !#$%&\'*+/=?^`{|}~]'; +sub format +{ my @addrs; + + foreach (@_) + { my ($phrase, $email, $comment) = @$_; + my @addr; + + if(defined $phrase && length $phrase) + { push @addr + , $phrase =~ /^(?:\s*$atext\s*)+$/o ? $phrase + : $phrase =~ /(?" + if defined $email && length $email; + } + elsif(defined $email && length $email) + { push @addr, $email; + } + + if(defined $comment && $comment =~ /\S/) + { $comment =~ s/^\s*\(?/(/; + $comment =~ s/\)?\s*$/)/; + } + + push @addr, $comment + if defined $comment && length $comment; + + push @addrs, join(" ", @addr) + if @addr; + } + + join ", ", @addrs; +} + +#------------ + +sub name +{ my $self = shift; + my $phrase = $self->phrase; + my $addr = $self->address; + + $phrase = $self->comment + unless defined $phrase && length $phrase; + + my $name = $self->_extract_name($phrase); + + # first.last@domain address + if($name eq '' && $addr =~ /([^\%\.\@_]+([\._][^\%\.\@_]+)+)[\@\%]/) + { ($name = $1) =~ s/[\._]+/ /g; + $name = _extract_name $name; + } + + if($name eq '' && $addr =~ m#/g=#i) # X400 style address + { my ($f) = $addr =~ m#g=([^/]*)#i; + my ($l) = $addr =~ m#s=([^/]*)#i; + $name = _extract_name "$f $l"; + } + + length $name ? $name : undef; +} + + +sub host +{ my $addr = shift->address || ''; + my $i = rindex $addr, '@'; + $i >= 0 ? substr($addr, $i+1) : undef; +} + + +sub user +{ my $addr = shift->address || ''; + my $i = rindex $addr, '@'; + $i >= 0 ? substr($addr,0,$i) : $addr; +} + +1; diff --git a/perl/Git/Mail/Address.pm b/perl/Git/Mail/Address.pm new file mode 100755 index 0000000000..2ce3e84670 --- /dev/null +++ b/perl/Git/Mail/Address.pm @@ -0,0 +1,24 @@ +package Git::Mail::Address; +use 5.008; +use strict; +use warnings; + +=head1 NAME + +Git::Mail::Address - Wrapper for the L module, in case it's not installed + +=head1 DESCRIPTION + +This module is only intended to be used for code shipping in the +C repository. Use it for anything else at your peril! + +=cut + +eval { + require Mail::Address; + 1; +} or do { + require Git::FromCPAN::Mail::Address; +}; + +1; From c8f9d13dc6ddcb6c9e22584324c18b7d74e772ea Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Mon, 8 Jan 2018 11:34:33 +0100 Subject: [PATCH 2/3] perl/Git: remove now useless email-address parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now use Mail::Address unconditionaly, hence parse_mailboxes is now dead code. Remove it and its tests. Signed-off-by: Matthieu Moy Reviewed-by: Alex Bennée Signed-off-by: Junio C Hamano --- perl/Git.pm | 71 -------------------------------------------- t/t9000-addresses.sh | 27 ----------------- t/t9000/test.pl | 67 ----------------------------------------- 3 files changed, 165 deletions(-) delete mode 100755 t/t9000-addresses.sh delete mode 100755 t/t9000/test.pl diff --git a/perl/Git.pm b/perl/Git.pm index ffa09ace92..65e6b32a0f 100644 --- a/perl/Git.pm +++ b/perl/Git.pm @@ -880,77 +880,6 @@ sub ident_person { return "$ident[0] <$ident[1]>"; } -=item parse_mailboxes - -Return an array of mailboxes extracted from a string. - -=cut - -# Very close to Mail::Address's parser, but we still have minor -# differences in some cases (see t9000 for examples). -sub parse_mailboxes { - my $re_comment = qr/\((?:[^)]*)\)/; - my $re_quote = qr/"(?:[^\"\\]|\\.)*"/; - my $re_word = qr/(?:[^]["\s()<>:;@\\,.]|\\.)+/; - - # divide the string in tokens of the above form - my $re_token = qr/(?:$re_quote|$re_word|$re_comment|\S)/; - my @tokens = map { $_ =~ /\s*($re_token)\s*/g } @_; - my $end_of_addr_seen = 0; - - # add a delimiter to simplify treatment for the last mailbox - push @tokens, ","; - - my (@addr_list, @phrase, @address, @comment, @buffer) = (); - foreach my $token (@tokens) { - if ($token =~ /^[,;]$/) { - # if buffer still contains undeterminated strings - # append it at the end of @address or @phrase - if ($end_of_addr_seen) { - push @phrase, @buffer; - } else { - push @address, @buffer; - } - - my $str_phrase = join ' ', @phrase; - my $str_address = join '', @address; - my $str_comment = join ' ', @comment; - - # quote are necessary if phrase contains - # special characters - if ($str_phrase =~ /[][()<>:;@\\,.\000-\037\177]/) { - $str_phrase =~ s/(^|[^\\])"/$1/g; - $str_phrase = qq["$str_phrase"]; - } - - # add "<>" around the address if necessary - if ($str_address ne "" && $str_phrase ne "") { - $str_address = qq[<$str_address>]; - } - - my $str_mailbox = "$str_phrase $str_address $str_comment"; - $str_mailbox =~ s/^\s*|\s*$//g; - push @addr_list, $str_mailbox if ($str_mailbox); - - @phrase = @address = @comment = @buffer = (); - $end_of_addr_seen = 0; - } elsif ($token =~ /^\(/) { - push @comment, $token; - } elsif ($token eq "<") { - push @phrase, (splice @address), (splice @buffer); - } elsif ($token eq ">") { - $end_of_addr_seen = 1; - push @address, (splice @buffer); - } elsif ($token eq "@" && !$end_of_addr_seen) { - push @address, (splice @buffer), "@"; - } else { - push @buffer, $token; - } - } - - return @addr_list; -} - =item hash_object ( TYPE, FILENAME ) Compute the SHA1 object id of the given C considering it is diff --git a/t/t9000-addresses.sh b/t/t9000-addresses.sh deleted file mode 100755 index a1ebef6de2..0000000000 --- a/t/t9000-addresses.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -test_description='compare address parsing with and without Mail::Address' -. ./test-lib.sh - -if ! test_have_prereq PERL; then - skip_all='skipping perl interface tests, perl not available' - test_done -fi - -perl -MTest::More -e 0 2>/dev/null || { - skip_all="Perl Test::More unavailable, skipping test" - test_done -} - -perl -MMail::Address -e 0 2>/dev/null || { - skip_all="Perl Mail::Address unavailable, skipping test" - test_done -} - -test_external_has_tap=1 - -test_external_without_stderr \ - 'Perl address parsing function' \ - perl "$TEST_DIRECTORY"/t9000/test.pl - -test_done diff --git a/t/t9000/test.pl b/t/t9000/test.pl deleted file mode 100755 index dfeaa9c655..0000000000 --- a/t/t9000/test.pl +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/perl -use lib (split(/:/, $ENV{GITPERLLIB})); - -use 5.008; -use warnings; -use strict; - -use Test::More qw(no_plan); -use Mail::Address; - -BEGIN { use_ok('Git') } - -my @success_list = (q[Jane], - q[jdoe@example.com], - q[], - q[Jane ], - q[Jane Doe ], - q["Jane" ], - q["Doe, Jane" ], - q["Jane@:;\>.,()], - q[Jane!#$%&'*+-/=?^_{|}~Doe' ], - q[""], - q["Jane jdoe@example.com"], - q[Jane Doe ], - q[Jane Doe < jdoe@example.com >], - q[Jane @ Doe @ Jane @ Doe], - q["Jane, 'Doe'" ], - q['Doe, "Jane' ], - q["Jane" "Do"e ], - q["Jane' Doe" ], - q["Jane Doe " ], - q["Jane\" Doe" ], - q[Doe, jane ], - q["Jane Doe ], - q['Jane 'Doe' ], - q[Jane@:;\.,()<>Doe ], - q[Jane Doe], - q[ Jane Doe]); - -my @known_failure_list = (q[Jane\ Doe ], - q["Doe, Ja"ne ], - q["Doe, Katarina" Jane ], - q[Jane jdoe@example.com], - q["Jane "Kat"a" ri"na" ",Doe" ], - q[Jane Doe], - q[Jane "Doe "], - q[\"Jane Doe ], - q[Jane\"\" Doe ], - q['Jane "Katarina\" \' Doe' ]); - -foreach my $str (@success_list) { - my @expected = map { $_->format } Mail::Address->parse("$str"); - my @actual = Git::parse_mailboxes("$str"); - is_deeply(\@expected, \@actual, qq[same output : $str]); -} - -TODO: { - local $TODO = "known breakage"; - foreach my $str (@known_failure_list) { - my @expected = map { $_->format } Mail::Address->parse("$str"); - my @actual = Git::parse_mailboxes("$str"); - is_deeply(\@expected, \@actual, qq[same output : $str]); - } -} - -my $is_passing = eval { Test::More->is_passing }; -exit($is_passing ? 0 : 1) unless $@ =~ /Can't locate object method/; From d60be8acab06c34886338cbf98f7628284705979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 8 Jan 2018 11:34:34 +0100 Subject: [PATCH 3/3] send-email: add test for Linux's get_maintainer.pl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had a regression that broke Linux's get_maintainer.pl. Using Mail::Address to parse email addresses fixed it, but let's protect against future regressions. Note that we need --cc-cmd to be relative because this option doesn't accept spaces in script names (probably to allow --cc-cmd="executable --option"), while --smtp-server needs to be absolute. Patch-edited-by: Matthieu Moy Signed-off-by: Alex Bennée Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- t/t9001-send-email.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 4d261c2a9c..a06e5d7ba5 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -172,6 +172,25 @@ test_expect_success $PREREQ 'cc trailer with various syntax' ' test_cmp expected-cc commandline1 ' +test_expect_success $PREREQ 'setup fake get_maintainer.pl script for cc trailer' " + write_script expected-cc-script.sh <<-EOF + echo 'One Person (supporter:THIS (FOO/bar))' + echo 'Two Person (maintainer:THIS THING)' + echo 'Third List (moderated list:THIS THING (FOO/bar))' + echo ' (moderated list:FOR THING)' + echo 'five@example.com (open list:FOR THING (FOO/bar))' + echo 'six@example.com (open list)' + EOF +" + +test_expect_success $PREREQ 'cc trailer with get_maintainer.pl output' ' + clean_fake_sendmail && + git send-email -1 --to=recipient@example.com \ + --cc-cmd=./expected-cc-script.sh \ + --smtp-server="$(pwd)/fake.sendmail" && + test_cmp expected-cc commandline1 +' + test_expect_success $PREREQ 'setup expect' " cat >expected-show-all-headers <<\EOF 0001-Second.patch