From 62e0069056ed11294c29bae25df69b6518f6339e Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Wed, 10 Oct 2012 01:02:56 +0200 Subject: [PATCH 1/5] git-send-email: introduce compose-encoding The introduction email (--compose option) have encoding hardcoded to UTF-8, but invoked editor may not use UTF-8 encoding. The encoding used by patches can be changed by the "8bit-encoding" option, but this option does not have effect on introduction email and equivalent for introduction email is missing. Added compose-encoding command line option and sendemail.composeencoding configuration option specify encoding of introduction email. Signed-off-by: Krzysztof Mazur Signed-off-by: Junio C Hamano --- Documentation/git-send-email.txt | 4 +++ git-send-email.perl | 9 +++++- t/t9001-send-email.sh | 55 ++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 324117072d..eeb561cf14 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -126,6 +126,10 @@ The --to option must be repeated for each user you want on the to list. + Note that no attempts whatsoever are made to validate the encoding. +--compose-encoding=:: + Specify encoding of compose message. Default is the value of the + 'sendemail.composeencoding'; if that is unspecified, UTF-8 is assumed. + Sending ~~~~~~~ diff --git a/git-send-email.perl b/git-send-email.perl index aea66a0d47..107e814b67 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -56,6 +56,7 @@ git send-email [options] --in-reply-to * Email "In-Reply-To:" --annotate * Review each patch that will be sent in an editor. --compose * Open an editor for introduction. + --compose-encoding * Encoding to assume for introduction. --8bit-encoding * Encoding to assume 8bit mails if undeclared Sending: @@ -198,6 +199,7 @@ my ($identity, $aliasfiletype, @alias_files, $smtp_domain); my ($validate, $confirm); my (@suppress_cc); my ($auto_8bit_encoding); +my ($compose_encoding); my ($debug_net_smtp) = 0; # Net::SMTP, see send_message() @@ -231,6 +233,7 @@ my %config_settings = ( "confirm" => \$confirm, "from" => \$sender, "assume8bitencoding" => \$auto_8bit_encoding, + "composeencoding" => \$compose_encoding, ); my %config_path_settings = ( @@ -315,6 +318,7 @@ my $rc = GetOptions("h" => \$help, "validate!" => \$validate, "format-patch!" => \$format_patch, "8bit-encoding=s" => \$auto_8bit_encoding, + "compose-encoding=s" => \$compose_encoding, "force" => \$force, ); @@ -638,10 +642,13 @@ EOT $summary_empty = 0 unless (/^\n$/); } elsif (/^\n$/) { $in_body = 1; + if (!defined $compose_encoding) { + $compose_encoding = "UTF-8"; + } if ($need_8bit_cte) { print $c2 "MIME-Version: 1.0\n", "Content-Type: text/plain; ", - "charset=UTF-8\n", + "charset=$compose_encoding\n", "Content-Transfer-Encoding: 8bit\n"; } } elsif (/^MIME-Version:/i) { diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 035122808b..265ae0463f 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -854,6 +854,61 @@ test_expect_success $PREREQ 'utf8 author is correctly passed on' ' grep "^From: Füñný Nâmé " msgtxt1 ' +test_expect_success $PREREQ 'sendemail.composeencoding works' ' + clean_fake_sendmail && + git config sendemail.composeencoding iso-8859-1 && + (echo "#!$SHELL_PATH" && + echo "echo utf8 body: àéìöú >>\"\$1\"" + ) >fake-editor-utf8 && + chmod +x fake-editor-utf8 && + GIT_EDITOR="\"$(pwd)/fake-editor-utf8\"" \ + git send-email \ + --compose --subject foo \ + --from="Example " \ + --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + $patches && + grep "^utf8 body" msgtxt1 && + grep "^Content-Type: text/plain; charset=iso-8859-1" msgtxt1 +' + +test_expect_success $PREREQ '--compose-encoding works' ' + clean_fake_sendmail && + (echo "#!$SHELL_PATH" && + echo "echo utf8 body: àéìöú >>\"\$1\"" + ) >fake-editor-utf8 && + chmod +x fake-editor-utf8 && + GIT_EDITOR="\"$(pwd)/fake-editor-utf8\"" \ + git send-email \ + --compose-encoding iso-8859-1 \ + --compose --subject foo \ + --from="Example " \ + --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + $patches && + grep "^utf8 body" msgtxt1 && + grep "^Content-Type: text/plain; charset=iso-8859-1" msgtxt1 +' + +test_expect_success $PREREQ '--compose-encoding overrides sendemail.composeencoding' ' + clean_fake_sendmail && + git config sendemail.composeencoding iso-8859-1 && + (echo "#!$SHELL_PATH" && + echo "echo utf8 body: àéìöú >>\"\$1\"" + ) >fake-editor-utf8 && + chmod +x fake-editor-utf8 && + GIT_EDITOR="\"$(pwd)/fake-editor-utf8\"" \ + git send-email \ + --compose-encoding iso-8859-2 \ + --compose --subject foo \ + --from="Example " \ + --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + $patches && + grep "^utf8 body" msgtxt1 && + grep "^Content-Type: text/plain; charset=iso-8859-2" msgtxt1 +' + test_expect_success $PREREQ 'detects ambiguous reference/file conflict' ' echo master > master && git add master && From 4a47a4ddec0d4d37e81ad63385ca287abfe90f9b Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Mon, 22 Oct 2012 14:41:48 +0200 Subject: [PATCH 2/5] git-send-email: use compose-encoding for Subject The commit "git-send-email: introduce compose-encoding" introduced the compose-encoding option to specify the introduction email encoding (--compose option), but the email Subject encoding was still hardcoded to UTF-8. Signed-off-by: Krzysztof Mazur Signed-off-by: Jeff King --- git-send-email.perl | 8 ++++---- t/t9001-send-email.sh | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index 107e814b67..adcb4e397b 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -636,15 +636,15 @@ EOT my $need_8bit_cte = file_has_nonascii($compose_filename); my $in_body = 0; my $summary_empty = 1; + if (!defined $compose_encoding) { + $compose_encoding = "UTF-8"; + } while(<$c>) { next if m/^GIT:/; if ($in_body) { $summary_empty = 0 unless (/^\n$/); } elsif (/^\n$/) { $in_body = 1; - if (!defined $compose_encoding) { - $compose_encoding = "UTF-8"; - } if ($need_8bit_cte) { print $c2 "MIME-Version: 1.0\n", "Content-Type: text/plain; ", @@ -658,7 +658,7 @@ EOT my $subject = $initial_subject; $_ = "Subject: " . ($subject =~ /[^[:ascii:]]/ ? - quote_rfc2047($subject) : + quote_rfc2047($subject, $compose_encoding) : $subject) . "\n"; } elsif (/^In-Reply-To:\s*(.+)\s*$/i) { diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 265ae0463f..89fcedaa33 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -909,6 +909,20 @@ test_expect_success $PREREQ '--compose-encoding overrides sendemail.composeencod grep "^Content-Type: text/plain; charset=iso-8859-2" msgtxt1 ' +test_expect_success $PREREQ '--compose-encoding adds correct MIME for subject' ' + clean_fake_sendmail && + GIT_EDITOR="\"$(pwd)/fake-editor\"" \ + git send-email \ + --compose-encoding iso-8859-2 \ + --compose --subject utf8-sübjëct \ + --from="Example " \ + --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + $patches && + grep "^fake edit" msgtxt1 && + grep "^Subject: =?iso-8859-2?q?utf8-s=C3=BCbj=C3=ABct?=" msgtxt1 +' + test_expect_success $PREREQ 'detects ambiguous reference/file conflict' ' echo master > master && git add master && From 5637d8573206e8c3d99abacb6b6ca3cf11816202 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Wed, 24 Oct 2012 10:03:35 +0200 Subject: [PATCH 3/5] git-send-email: skip RFC2047 quoting for ASCII subjects The git-send-email always use RFC2047 subject quoting for files with "broken" encoding - non-ASCII files without Content-Transfer-Encoding, even for ASCII subjects. This is harmless but unnecessarily ugly for people reading the raw headers. This patch skips rfc2047 quoting when the subject does not need it. Signed-off-by: Krzysztof Mazur Signed-off-by: Jeff King --- git-send-email.perl | 3 ++- t/t9001-send-email.sh | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/git-send-email.perl b/git-send-email.perl index adcb4e397b..efeae4c47a 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -1327,7 +1327,8 @@ foreach my $t (@files) { $body_encoding = $auto_8bit_encoding; } - if ($broken_encoding{$t} && !is_rfc2047_quoted($subject)) { + if ($broken_encoding{$t} && !is_rfc2047_quoted($subject) && + ($subject =~ /[^[:ascii:]]/)) { $subject = quote_rfc2047($subject, $auto_8bit_encoding); } diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 89fcedaa33..6c6af7d13f 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -1142,6 +1142,23 @@ Dieser deutsche Text enthält einen Umlaut! EOF ' +test_expect_success $PREREQ 'setup expect' ' +cat >expected <stdout && + grep "Subject" msgtxt1 >actual && + test_cmp expected actual +' + test_expect_success $PREREQ 'setup expect' ' cat >content-type-decl < Date: Wed, 24 Oct 2012 23:08:26 +0200 Subject: [PATCH 4/5] git-send-email: introduce quote_subject() The quote_rfc2047() always adds RFC2047 quoting. To avoid quoting ASCII subjects, before calling quote_rfc2047() subject must be tested for non-ASCII characters. This patch introduces a new quote_subject() function, which performs the test and calls quote_rfc2047 only if necessary. Signed-off-by: Krzysztof Mazur Signed-off-by: Jeff King --- git-send-email.perl | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index efeae4c47a..1574675d1d 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -657,9 +657,7 @@ EOT $initial_subject = $1; my $subject = $initial_subject; $_ = "Subject: " . - ($subject =~ /[^[:ascii:]]/ ? - quote_rfc2047($subject, $compose_encoding) : - $subject) . + quote_subject($subject, $compose_encoding) . "\n"; } elsif (/^In-Reply-To:\s*(.+)\s*$/i) { $initial_reply_to = $1; @@ -907,6 +905,22 @@ sub is_rfc2047_quoted { $s =~ m/^(?:"[[:ascii:]]*"|=\?$token\?$token\?$encoded_text\?=)$/o; } +sub subject_needs_rfc2047_quoting { + my $s = shift; + + return ($s =~ /[^[:ascii:]]/); +} + +sub quote_subject { + local $subject = shift; + my $encoding = shift || 'UTF-8'; + + if (subject_needs_rfc2047_quoting($subject)) { + return quote_rfc2047($subject, $encoding); + } + return $subject; +} + # use the simplest quoting being able to handle the recipient sub sanitize_address { my ($recipient) = @_; @@ -1327,9 +1341,8 @@ foreach my $t (@files) { $body_encoding = $auto_8bit_encoding; } - if ($broken_encoding{$t} && !is_rfc2047_quoted($subject) && - ($subject =~ /[^[:ascii:]]/)) { - $subject = quote_rfc2047($subject, $auto_8bit_encoding); + if ($broken_encoding{$t} && !is_rfc2047_quoted($subject)) { + $subject = quote_subject($subject, $auto_8bit_encoding); } if (defined $author and $author ne $sender) { From ce1459f740a591c63697ff6ff3c106123630539c Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Wed, 24 Oct 2012 23:28:29 +0200 Subject: [PATCH 5/5] =?UTF-8?q?git-send-email:=20add=20rfc2047=20quoting?= =?UTF-8?q?=20for=20"=3D=3F"?= For raw subjects rfc2047 quoting is needed not only for non-ASCII characters, but also for any possible rfc2047 in it. Signed-off-by: Krzysztof Mazur Signed-off-by: Jeff King --- git-send-email.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-send-email.perl b/git-send-email.perl index 1574675d1d..5a7c29db93 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -908,7 +908,7 @@ sub is_rfc2047_quoted { sub subject_needs_rfc2047_quoting { my $s = shift; - return ($s =~ /[^[:ascii:]]/); + return ($s =~ /[^[:ascii:]]/) || ($s =~ /=\?/); } sub quote_subject {