From fdb0c36e903d13c184f9a465035c75565c5c072a Mon Sep 17 00:00:00 2001 From: Mark Rada Date: Sat, 26 Sep 2009 13:46:08 -0400 Subject: [PATCH 1/5] gitweb: check given hash before trying to create snapshot Makes things nicer in cases when you hand craft the snapshot URL but make a typo in defining the hash variable (e.g. netx instead of next); you will now get an error message instead of a broken tarball. Tests for t9501 are included to demonstrate added functionality. Signed-off-by: Mark Rada Signed-off-by: Shawn O. Pearce --- gitweb/gitweb.perl | 7 +++-- t/t9501-gitweb-standalone-http-status.sh | 39 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 24b219310a..8d4a2ae600 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -5196,8 +5196,11 @@ sub git_snapshot { die_error(403, "Unsupported snapshot format"); } - if (!defined $hash) { - $hash = git_get_head_hash($project); + my $type = git_get_type("$hash^{}"); + if (!$type) { + die_error(404, 'Object does not exist'); + } elsif ($type eq 'blob') { + die_error(400, 'Object is not a tree-ish'); } my $name = $project; diff --git a/t/t9501-gitweb-standalone-http-status.sh b/t/t9501-gitweb-standalone-http-status.sh index d0ff21d426..0688a57e1d 100644 --- a/t/t9501-gitweb-standalone-http-status.sh +++ b/t/t9501-gitweb-standalone-http-status.sh @@ -75,4 +75,43 @@ test_expect_success \ test_debug 'cat gitweb.output' +# ---------------------------------------------------------------------- +# snapshot hash ids + +test_expect_success 'snapshots: good tree-ish id' ' + gitweb_run "p=.git;a=snapshot;h=master;sf=tgz" && + grep "Status: 200 OK" gitweb.output +' +test_debug 'cat gitweb.output' + +test_expect_success 'snapshots: bad tree-ish id' ' + gitweb_run "p=.git;a=snapshot;h=frizzumFrazzum;sf=tgz" && + grep "404 - Object does not exist" gitweb.output +' +test_debug 'cat gitweb.output' + +test_expect_success 'snapshots: bad tree-ish id (tagged object)' ' + echo object > tag-object && + git add tag-object && + git commit -m "Object to be tagged" && + git tag tagged-object `git hash-object tag-object` && + gitweb_run "p=.git;a=snapshot;h=tagged-object;sf=tgz" && + grep "400 - Object is not a tree-ish" gitweb.output +' +test_debug 'cat gitweb.output' + +test_expect_success 'snapshots: good object id' ' + ID=`git rev-parse --verify HEAD` && + gitweb_run "p=.git;a=snapshot;h=$ID;sf=tgz" && + grep "Status: 200 OK" gitweb.output +' +test_debug 'cat gitweb.output' + +test_expect_success 'snapshots: bad object id' ' + gitweb_run "p=.git;a=snapshot;h=abcdef01234;sf=tgz" && + grep "404 - Object does not exist" gitweb.output +' +test_debug 'cat gitweb.output' + + test_done From 46e09f310567b680c03151e048bf2b7e847611e2 Mon Sep 17 00:00:00 2001 From: Jakub Narebski Date: Thu, 29 Oct 2009 23:07:41 +0100 Subject: [PATCH 2/5] t/gitweb-lib.sh: Split gitweb output into headers and body Save HTTP headers into gitweb.headers, and the body of message into gitweb.body in gitweb_run() Signed-off-by: Jakub Narebski Signed-off-by: Junio C Hamano --- t/gitweb-lib.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/t/gitweb-lib.sh b/t/gitweb-lib.sh index 845253274b..32b841dd2e 100644 --- a/t/gitweb-lib.sh +++ b/t/gitweb-lib.sh @@ -52,10 +52,14 @@ gitweb_run () { rm -f gitweb.log && perl -- "$SCRIPT_NAME" \ >gitweb.output 2>gitweb.log && + sed -e '/^\r$/q' gitweb.headers && + sed -e '1,/^\r$/d' gitweb.body && if grep '^[[]' gitweb.log >/dev/null 2>&1; then false; else true; fi # gitweb.log is left for debugging - # gitweb.output is used to parse http output + # gitweb.output is used to parse HTTP output + # gitweb.headers contains only HTTP headers + # gitweb.body contains body of message, without headers } . ./test-lib.sh From 3ce9450a810243cbd9d0250d9ae3ea6834f50b9c Mon Sep 17 00:00:00 2001 From: Jakub Narebski Date: Sat, 7 Nov 2009 16:13:28 +0100 Subject: [PATCH 3/5] gitweb: Document current snapshot rules via new tests Add t9502-gitweb-standalone-parse-output test script, which runs gitweb as a CGI script from the commandline and checks that it produces the correct output. Currently this test script contains only tests of snapshot naming (proposed name of snapshot file) and snapshot prefix (prefix of files in the archive / snapshot). It defines and uses 'tar' snapshot format, without compression, for easy checking of snapshot prefix. Testing is done using check_snapshot function. Gitweb uses the following format for snapshot filenames: - where is project name with '.git' or '/.git' suffix stripped, unless '.git' is the whole project name. For snapshot prefix it uses simply: / Disadvantages of current snapshot rules: * There exists convention that . archive unpacks to / directory (/ is prefix of archive). Gitweb does not respect it * Snapshot links generated by gitweb use full SHA-1 id as a value of 'h' / $hash parameter. With current rules it leads to long file names like e.g. repo-1005c80cc11c531d327b12195027cbbb4ff9e3cb.tgz * For handcrafted URLs, where 'h' / $hash parameter is a symbolic 'volatile' revision name such as "HEAD" or "next" snapshot name doesn't tell us what exact version it was created from * Proposed filename in Content-Disposition header should not contain any directory path information, which means that it should not contain '/' (see RFC2183)... which means that snapshot naming is broken for $hash being e.g. hirearchical branch name such as 'xx/test' This would be improved in next commit. Signed-off-by: Jakub Narebski Signed-off-by: Junio C Hamano --- t/t9502-gitweb-standalone-parse-output.sh | 87 +++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 t/t9502-gitweb-standalone-parse-output.sh diff --git a/t/t9502-gitweb-standalone-parse-output.sh b/t/t9502-gitweb-standalone-parse-output.sh new file mode 100755 index 0000000000..741187b9e4 --- /dev/null +++ b/t/t9502-gitweb-standalone-parse-output.sh @@ -0,0 +1,87 @@ +#!/bin/sh +# +# Copyright (c) 2009 Mark Rada +# + +test_description='gitweb as standalone script (parsing script output). + +This test runs gitweb (git web interface) as a CGI script from the +commandline, and checks that it produces the correct output, either +in the HTTP header or the actual script output.' + + +. ./gitweb-lib.sh + +# ---------------------------------------------------------------------- +# snapshot file name and prefix + +cat >>gitweb_config.perl <<\EOF + +$known_snapshot_formats{'tar'} = { + 'display' => 'tar', + 'type' => 'application/x-tar', + 'suffix' => '.tar', + 'format' => 'tar', +}; + +$feature{'snapshot'}{'default'} = ['tar']; +EOF + +# Call check_snapshot with the arguments " []" +# +# This will check that gitweb HTTP header contains proposed filename +# as with '.tar' suffix added, and that generated tarfile +# (gitweb message body) has as prefix for al files in tarfile +# +# default to +check_snapshot () { + basename=$1 + prefix=${2:-"$1"} + echo "basename=$basename" + grep "filename=.*$basename.tar" gitweb.headers >/dev/null 2>&1 && + "$TAR" tf gitweb.body >file_list && + ! grep -v "^$prefix/" file_list +} + +test_expect_success setup ' + test_commit first foo && + git branch xx/test && + FULL_ID=$(git rev-parse --verify HEAD) && + SHORT_ID=$(git rev-parse --verify --short=7 HEAD) +' +test_debug ' + echo "FULL_ID = $FULL_ID" + echo "SHORT_ID = $SHORT_ID" +' + +test_expect_success 'snapshot: full sha1' ' + gitweb_run "p=.git;a=snapshot;h=$FULL_ID;sf=tar" && + check_snapshot ".git-$FULL_ID" ".git" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: shortened sha1' ' + gitweb_run "p=.git;a=snapshot;h=$SHORT_ID;sf=tar" && + check_snapshot ".git-$SHORT_ID" ".git" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: HEAD' ' + gitweb_run "p=.git;a=snapshot;h=HEAD;sf=tar" && + check_snapshot ".git-HEAD" ".git" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: short branch name (master)' ' + gitweb_run "p=.git;a=snapshot;h=master;sf=tar" && + check_snapshot ".git-master" ".git" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_failure 'snapshot: hierarchical branch name (xx/test)' ' + gitweb_run "p=.git;a=snapshot;h=xx/test;sf=tar" && + ! grep "filename=.*/" gitweb.headers +' +test_debug 'cat gitweb.headers' + +test_done From b629275fd02aa07c2630d1a8c8a14011ff164043 Mon Sep 17 00:00:00 2001 From: Mark Rada Date: Sat, 7 Nov 2009 16:13:29 +0100 Subject: [PATCH 4/5] gitweb: Smarter snapshot names Teach gitweb how to produce nicer snapshot names by only using the short hash id. If clients make requests using a tree-ish that is not a partial or full SHA-1 hash, then the short hash will also be appended to whatever they asked for. If clients request snapshot of a tag (which means that $hash ('h') parameter has 'refs/tags/' prefix), use only tag name. Update tests cases in t9502-gitweb-standalone-parse-output. Gitweb uses the following format for snapshot filenames: -. where is project name with '.git' or '/.git' suffix stripped, unless '.git' is the whole project name. For snapshot prefix it uses: -/ as compared to / before (without version info). Current rules for : * if 'h' / $hash parameter is SHA-1 or shortened SHA-1, use SHA-1 shortened to to 7 characters * otherwise if 'h' / $hash parameter is tag name (it begins with 'refs/tags/' prefix, use tag name (with 'refs/tags/' stripped * otherwise if 'h' / $hash parameter starts with 'refs/heads/' prefix, strip this prefix, convert '/' into '.', and append shortened SHA-1 after '-', i.e. use - Signed-off-by: Mark Rada Signed-off-by: Shawn O. Pearce Signed-off-by: Jakub Narebski Signed-off-by: Junio C Hamano --- gitweb/gitweb.perl | 76 ++++++++++++++++++----- t/t9502-gitweb-standalone-parse-output.sh | 38 ++++++++++-- 2 files changed, 93 insertions(+), 21 deletions(-) diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 8d4a2ae600..d8dfd950a4 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -1983,16 +1983,27 @@ sub quote_command { # get HEAD ref of given project as hash sub git_get_head_hash { - my $project = shift; + return git_get_full_hash(shift, 'HEAD'); +} + +sub git_get_full_hash { + return git_get_hash(@_); +} + +sub git_get_short_hash { + return git_get_hash(@_, '--short=7'); +} + +sub git_get_hash { + my ($project, $hash, @options) = @_; my $o_git_dir = $git_dir; my $retval = undef; $git_dir = "$projectroot/$project"; - if (open my $fd, "-|", git_cmd(), "rev-parse", "--verify", "HEAD") { - my $head = <$fd>; + if (open my $fd, '-|', git_cmd(), 'rev-parse', + '--verify', '-q', @options, $hash) { + $retval = <$fd>; + chomp $retval if defined $retval; close $fd; - if (defined $head && $head =~ /^([0-9a-fA-F]{40})$/) { - $retval = $1; - } } if (defined $o_git_dir) { $git_dir = $o_git_dir; @@ -5179,6 +5190,43 @@ sub git_tree { git_footer_html(); } +sub snapshot_name { + my ($project, $hash) = @_; + + # path/to/project.git -> project + # path/to/project/.git -> project + my $name = to_utf8($project); + $name =~ s,([^/])/*\.git$,$1,; + $name = basename($name); + # sanitize name + $name =~ s/[[:cntrl:]]/?/g; + + my $ver = $hash; + if ($hash =~ /^[0-9a-fA-F]+$/) { + # shorten SHA-1 hash + my $full_hash = git_get_full_hash($project, $hash); + if ($full_hash =~ /^$hash/ && length($hash) > 7) { + $ver = git_get_short_hash($project, $hash); + } + } elsif ($hash =~ m!^refs/tags/(.*)$!) { + # tags don't need shortened SHA-1 hash + $ver = $1; + } else { + # branches and other need shortened SHA-1 hash + if ($hash =~ m!^refs/(?:heads|remotes)/(.*)$!) { + $ver = $1; + } + $ver .= '-' . git_get_short_hash($project, $hash); + } + # in case of hierarchical branch names + $ver =~ s!/!.!g; + + # name = project-version_string + $name = "$name-$ver"; + + return wantarray ? ($name, $name) : $name; +} + sub git_snapshot { my $format = $input_params{'snapshot_format'}; if (!@snapshot_fmts) { @@ -5203,24 +5251,20 @@ sub git_snapshot { die_error(400, 'Object is not a tree-ish'); } - my $name = $project; - $name =~ s,([^/])/*\.git$,$1,; - $name = basename($name); - my $filename = to_utf8($name); - $name =~ s/\047/\047\\\047\047/g; - my $cmd; - $filename .= "-$hash$known_snapshot_formats{$format}{'suffix'}"; - $cmd = quote_command( + my ($name, $prefix) = snapshot_name($project, $hash); + my $filename = "$name$known_snapshot_formats{$format}{'suffix'}"; + my $cmd = quote_command( git_cmd(), 'archive', "--format=$known_snapshot_formats{$format}{'format'}", - "--prefix=$name/", $hash); + "--prefix=$prefix/", $hash); if (exists $known_snapshot_formats{$format}{'compressor'}) { $cmd .= ' | ' . quote_command(@{$known_snapshot_formats{$format}{'compressor'}}); } + $filename =~ s/(["\\])/\\$1/g; print $cgi->header( -type => $known_snapshot_formats{$format}{'type'}, - -content_disposition => 'inline; filename="' . "$filename" . '"', + -content_disposition => 'inline; filename="' . $filename . '"', -status => '200 OK'); open my $fd, "-|", $cmd diff --git a/t/t9502-gitweb-standalone-parse-output.sh b/t/t9502-gitweb-standalone-parse-output.sh index 741187b9e4..dd83890001 100755 --- a/t/t9502-gitweb-standalone-parse-output.sh +++ b/t/t9502-gitweb-standalone-parse-output.sh @@ -56,29 +56,57 @@ test_debug ' test_expect_success 'snapshot: full sha1' ' gitweb_run "p=.git;a=snapshot;h=$FULL_ID;sf=tar" && - check_snapshot ".git-$FULL_ID" ".git" + check_snapshot ".git-$SHORT_ID" ' test_debug 'cat gitweb.headers && cat file_list' test_expect_success 'snapshot: shortened sha1' ' gitweb_run "p=.git;a=snapshot;h=$SHORT_ID;sf=tar" && - check_snapshot ".git-$SHORT_ID" ".git" + check_snapshot ".git-$SHORT_ID" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: almost full sha1' ' + ID=$(git rev-parse --short=30 HEAD) && + gitweb_run "p=.git;a=snapshot;h=$ID;sf=tar" && + check_snapshot ".git-$SHORT_ID" ' test_debug 'cat gitweb.headers && cat file_list' test_expect_success 'snapshot: HEAD' ' gitweb_run "p=.git;a=snapshot;h=HEAD;sf=tar" && - check_snapshot ".git-HEAD" ".git" + check_snapshot ".git-HEAD-$SHORT_ID" ' test_debug 'cat gitweb.headers && cat file_list' test_expect_success 'snapshot: short branch name (master)' ' gitweb_run "p=.git;a=snapshot;h=master;sf=tar" && - check_snapshot ".git-master" ".git" + ID=$(git rev-parse --verify --short=7 master) && + check_snapshot ".git-master-$ID" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: short tag name (first)' ' + gitweb_run "p=.git;a=snapshot;h=first;sf=tar" && + ID=$(git rev-parse --verify --short=7 first) && + check_snapshot ".git-first-$ID" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: full branch name (refs/heads/master)' ' + gitweb_run "p=.git;a=snapshot;h=refs/heads/master;sf=tar" && + ID=$(git rev-parse --verify --short=7 master) && + check_snapshot ".git-master-$ID" +' +test_debug 'cat gitweb.headers && cat file_list' + +test_expect_success 'snapshot: full tag name (refs/tags/first)' ' + gitweb_run "p=.git;a=snapshot;h=refs/tags/first;sf=tar" && + check_snapshot ".git-first" ' test_debug 'cat gitweb.headers && cat file_list' -test_expect_failure 'snapshot: hierarchical branch name (xx/test)' ' +test_expect_success 'snapshot: hierarchical branch name (xx/test)' ' gitweb_run "p=.git;a=snapshot;h=xx/test;sf=tar" && ! grep "filename=.*/" gitweb.headers ' From f74a83fcf04c50e8358c8fb493539af13f9b9aa5 Mon Sep 17 00:00:00 2001 From: Brian Gernhardt Date: Mon, 23 Nov 2009 12:33:42 -0500 Subject: [PATCH 5/5] t/gitweb-lib: Split HTTP response with non-GNU sed Recognizing \r in a regex is something GNU sed will do, but other sed implementation's won't (e.g. BSD sed on OS X). Instead of two sed invocations, use a single Perl script to split output into headers and body. Signed-off-by: Brian Gernhardt Signed-off-by: Junio C Hamano --- t/gitweb-lib.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/t/gitweb-lib.sh b/t/gitweb-lib.sh index 32b841dd2e..76d8b7b803 100644 --- a/t/gitweb-lib.sh +++ b/t/gitweb-lib.sh @@ -52,8 +52,18 @@ gitweb_run () { rm -f gitweb.log && perl -- "$SCRIPT_NAME" \ >gitweb.output 2>gitweb.log && - sed -e '/^\r$/q' gitweb.headers && - sed -e '1,/^\r$/d' gitweb.body && + perl -w -e ' + open O, ">gitweb.headers"; + while (<>) { + print O; + last if (/^\r$/ || /^$/); + } + open O, ">gitweb.body"; + while (<>) { + print O; + } + close O; + ' gitweb.output && if grep '^[[]' gitweb.log >/dev/null 2>&1; then false; else true; fi # gitweb.log is left for debugging