@ -35,8 +35,8 @@ my $sha1_short = qr/[a-f\d]{4,40}/;
my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
$_find_copies_harder, $_l, $_cp_similarity,
$_find_copies_harder, $_l, $_cp_similarity,
$_repack, $_repack_nr, $_repack_flags,
$_repack, $_repack_nr, $_repack_flags,
$_template, $_shared,
$_template, $_shared, $_no_default_regex, $_no_graft_copy,
$_version, $_upgrade, $_authors, $_branch_all_refs);
$_version, $_upgrade, $_authors, $_branch_all_refs, @_opt_m);
my (@_branch_from, %tree_map, %users);
my (@_branch_from, %tree_map, %users);
my ($_svn_co_url_revs, $_svn_pg_peg_revs);
my ($_svn_co_url_revs, $_svn_pg_peg_revs);
my @repo_path_split_cache;
my @repo_path_split_cache;
@ -48,6 +48,12 @@ my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
'repack:i' => \$_repack,
'repack:i' => \$_repack,
'repack-flags|repack-args|repack-opts=s' => \$_repack_flags);
'repack-flags|repack-args|repack-opts=s' => \$_repack_flags);
my ($_trunk, $_tags, $_branches);
my %multi_opts = ( 'trunk|T=s' => \$_trunk,
'tags|t=s' => \$_tags,
'branches|b=s' => \$_branches );
my %init_opts = ( 'template=s' => \$_template, 'shared' => \$_shared );
# yes, 'native' sets "\n". Patches to fix this for non-*nix systems welcome:
# yes, 'native' sets "\n". Patches to fix this for non-*nix systems welcome:
my %EOL = ( CR => "\015", LF => "\012", CRLF => "\015\012", native => "\012" );
my %EOL = ( CR => "\015", LF => "\012", CRLF => "\015\012", native => "\012" );
@ -56,8 +62,7 @@ my %cmd = (
{ 'revision|r=s' => \$_revision, %fc_opts } ],
{ 'revision|r=s' => \$_revision, %fc_opts } ],
init => [ \&init, "Initialize a repo for tracking" .
init => [ \&init, "Initialize a repo for tracking" .
" (requires URL argument)",
" (requires URL argument)",
{ 'template=s' => \$_template,
\%init_opts ],
'shared' => \$_shared } ],
commit => [ \&commit, "Commit git revisions to SVN",
commit => [ \&commit, "Commit git revisions to SVN",
{ 'stdin|' => \$_stdin,
{ 'stdin|' => \$_stdin,
'edit|e' => \$_edit,
'edit|e' => \$_edit,
@ -71,7 +76,19 @@ my %cmd = (
rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
{ 'no-ignore-externals' => \$_no_ignore_ext,
{ 'no-ignore-externals' => \$_no_ignore_ext,
'upgrade' => \$_upgrade } ],
'upgrade' => \$_upgrade } ],
'graft-branches' => [ \&graft_branches,
'Detect merges/branches from already imported history',
{ 'merge-rx|m' => \@_opt_m,
'no-default-regex' => \$_no_default_regex,
'no-graft-copy' => \$_no_graft_copy } ],
'multi-init' => [ \&multi_init,
'Initialize multiple trees (like git-svnimport)',
{ %multi_opts, %fc_opts } ],
'multi-fetch' => [ \&multi_fetch,
'Fetch multiple trees (like git-svnimport)',
\%fc_opts ],
);
);
my $cmd;
my $cmd;
for (my $i = 0; $i < @ARGV; $i++) {
for (my $i = 0; $i < @ARGV; $i++) {
if (defined $cmd{$ARGV[$i]}) {
if (defined $cmd{$ARGV[$i]}) {
@ -96,7 +113,7 @@ init_vars();
load_authors() if $_authors;
load_authors() if $_authors;
load_all_refs() if $_branch_all_refs;
load_all_refs() if $_branch_all_refs;
svn_compat_check();
svn_compat_check();
migration_check() unless $cmd eq 'init';
migration_check() unless $cmd =~ /^(?:init|multi-init)$/;
$cmd{$cmd}->[0]->(@ARGV);
$cmd{$cmd}->[0]->(@ARGV);
exit 0;
exit 0;
@ -219,6 +236,7 @@ when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
sub init {
sub init {
$SVN_URL = shift or die "SVN repository location required " .
$SVN_URL = shift or die "SVN repository location required " .
"as a command-line argument\n";
"as a command-line argument\n";
$SVN_URL =~ s!/+$!!; # strip trailing slash
unless (-d $GIT_DIR) {
unless (-d $GIT_DIR) {
my @init_db = ('git-init-db');
my @init_db = ('git-init-db');
push @init_db, "--template=$_template" if defined $_template;
push @init_db, "--template=$_template" if defined $_template;
@ -358,8 +376,283 @@ sub show_ignore {
}
}
}
}
sub graft_branches {
my $gr_file = "$GIT_DIR/info/grafts";
my ($grafts, $comments) = read_grafts($gr_file);
my $gr_sha1;
if (%$grafts) {
# temporarily disable our grafts file to make this idempotent
chomp($gr_sha1 = safe_qx(qw/git-hash-object -w/,$gr_file));
rename $gr_file, "$gr_file~$gr_sha1" or croak $!;
}
my $l_map = read_url_paths();
my @re = map { qr/$_/is } @_opt_m if @_opt_m;
unless ($_no_default_regex) {
push @re, ( qr/\b(?:merge|merging|merged)\s+(\S.+)/is,
qr/\b(?:from|of)\s+(\S.+)/is );
}
foreach my $u (keys %$l_map) {
if (@re) {
foreach my $p (keys %{$l_map->{$u}}) {
graft_merge_msg($grafts,$l_map,$u,$p);
}
}
graft_file_copy($grafts,$l_map,$u) unless $_no_graft_copy;
}
write_grafts($grafts, $comments, $gr_file);
unlink "$gr_file~$gr_sha1" if $gr_sha1;
}
sub multi_init {
my $url = shift;
$_trunk ||= 'trunk';
$_trunk =~ s#/+$##;
$url =~ s#/+$## if $url;
if ($_trunk !~ m#^[a-z\+]+://#) {
$_trunk = '/' . $_trunk if ($_trunk !~ m#^/#);
unless ($url) {
print STDERR "E: '$_trunk' is not a complete URL ",
"and a separate URL is not specified\n";
exit 1;
}
$_trunk = $url . $_trunk;
}
if ($GIT_SVN eq 'git-svn') {
print "GIT_SVN_ID set to 'trunk' for $_trunk\n";
$GIT_SVN = $ENV{GIT_SVN_ID} = 'trunk';
}
init_vars();
init($_trunk);
complete_url_ls_init($url, $_branches, '--branches/-b', '');
complete_url_ls_init($url, $_tags, '--tags/-t', 'tags/');
}
sub multi_fetch {
# try to do trunk first, since branches/tags
# may be descended from it.
if (-d "$GIT_DIR/svn/trunk") {
print "Fetching trunk\n";
defined(my $pid = fork) or croak $!;
if (!$pid) {
$GIT_SVN = $ENV{GIT_SVN_ID} = 'trunk';
init_vars();
fetch(@_);
exit 0;
}
waitpid $pid, 0;
croak $? if $?;
}
rec_fetch('', "$GIT_DIR/svn", @_);
}
########################### utility functions #########################
########################### utility functions #########################
sub rec_fetch {
my ($pfx, $p, @args) = @_;
my @dir;
foreach (sort <$p/*>) {
if (-r "$_/info/url") {
$pfx .= '/' if $pfx && $pfx !~ m!/$!;
my $id = $pfx . basename $_;
next if $id eq 'trunk';
print "Fetching $id\n";
defined(my $pid = fork) or croak $!;
if (!$pid) {
$GIT_SVN = $ENV{GIT_SVN_ID} = $id;
init_vars();
fetch(@args);
exit 0;
}
waitpid $pid, 0;
croak $? if $?;
} elsif (-d $_) {
push @dir, $_;
}
}
foreach (@dir) {
my $x = $_;
$x =~ s!^\Q$GIT_DIR\E/svn/!!;
rec_fetch($x, $_);
}
}
sub complete_url_ls_init {
my ($url, $var, $switch, $pfx) = @_;
unless ($var) {
print STDERR "W: $switch not specified\n";
return;
}
$var =~ s#/+$##;
if ($var !~ m#^[a-z\+]+://#) {
$var = '/' . $var if ($var !~ m#^/#);
unless ($url) {
print STDERR "E: '$var' is not a complete URL ",
"and a separate URL is not specified\n";
exit 1;
}
$var = $url . $var;
}
chomp(my @ls = safe_qx(qw/svn ls --non-interactive/, $var));
my $old = $GIT_SVN;
defined(my $pid = fork) or croak $!;
if (!$pid) {
foreach my $u (map { "$var/$_" } (grep m!/$!, @ls)) {
$u =~ s#/+$##;
if ($u !~ m!\Q$var\E/(.+)$!) {
print STDERR "W: Unrecognized URL: $u\n";
die "This should never happen\n";
}
my $id = $pfx.$1;
print "init $u => $id\n";
$GIT_SVN = $ENV{GIT_SVN_ID} = $id;
init_vars();
init($u);
}
exit 0;
}
waitpid $pid, 0;
croak $? if $?;
}
sub common_prefix {
my $paths = shift;
my %common;
foreach (@$paths) {
my @tmp = split m#/#, $_;
my $p = '';
while (my $x = shift @tmp) {
$p .= "/$x";
$common{$p} ||= 0;
$common{$p}++;
}
}
foreach (sort {length $b <=> length $a} keys %common) {
if ($common{$_} == @$paths) {
return $_;
}
}
return '';
}
# this isn't funky-filename safe, but good enough for now...
sub graft_file_copy {
my ($grafts, $l_map, $u) = @_;
my $paths = $l_map->{$u};
my $pfx = common_prefix([keys %$paths]);
my $pid = open my $fh, '-|';
defined $pid or croak $!;
unless ($pid) {
exec(qw/svn log -v/, $u.$pfx) or croak $!;
}
my ($r, $mp) = (undef, undef);
while (<$fh>) {
chomp;
if (/^\-{72}$/) {
$mp = $r = undef;
} elsif (/^r(\d+) \| /) {
$r = $1 unless defined $r;
} elsif (/^Changed paths:/) {
$mp = 1;
} elsif ($mp && m#^ [AR] /(\S.*?) \(from /(\S+?):(\d+)\)$#) {
my $dbg = "r$r | $_";
my ($p1, $p0, $r0) = ($1, $2, $3);
my $c;
foreach my $x (keys %$paths) {
next unless ($p1 =~ /^\Q$x\E/);
my $i = $paths->{$x};
my $f = "$GIT_DIR/svn/$i/revs/$r";
unless (-r $f) {
print STDERR "r$r of $i not imported,",
" $dbg\n";
next;
}
$c = file_to_s($f);
}
next unless $c;
foreach my $x (keys %$paths) {
next unless ($p0 =~ /^\Q$x\E/);
my $i = $paths->{$x};
my $f = "$GIT_DIR/svn/$i/revs/$r0";
while ($r0 && !-r $f) {
# could be an older revision, too...
$r0--;
$f = "$GIT_DIR/svn/$i/revs/$r0";
}
unless (-r $f) {
print STDERR "r$r0 of $i not imported,",
" $dbg\n";
next;
}
my $r1 = file_to_s($f);
$grafts->{$c}->{$r1} = 1;
}
}
}
}
sub process_merge_msg_matches {
my ($grafts, $l_map, $u, $p, $c, @matches) = @_;
my (@strong, @weak);
foreach (@matches) {
# merging with ourselves is not interesting
next if $_ eq $p;
if ($l_map->{$u}->{$_}) {
push @strong, $_;
} else {
push @weak, $_;
}
}
foreach my $w (@weak) {
last if @strong;
# no exact match, use branch name as regexp.
my $re = qr/\Q$w\E/i;
foreach (keys %{$l_map->{$u}}) {
if (/$re/) {
push @strong, $_;
last;
}
}
last if @strong;
$w = basename($w);
$re = qr/\Q$w\E/i;
foreach (keys %{$l_map->{$u}}) {
if (/$re/) {
push @strong, $_;
last;
}
}
}
my ($rev) = ($c->{m} =~ /^git-svn-id:\s(?:\S+?)\@(\d+)
\s(?:[a-f\d\-]+)$/xsm);
unless (defined $rev) {
($rev) = ($c->{m} =~/^git-svn-id:\s(\d+)
\@(?:[a-f\d\-]+)/xsm);
return unless defined $rev;
}
foreach my $m (@strong) {
my ($r0, $s0) = find_rev_before($rev, $m);
$grafts->{$c->{c}}->{$s0} = 1 if defined $s0;
}
}
sub graft_merge_msg {
my ($grafts, $l_map, $u, $p, @re) = @_;
my $x = $l_map->{$u}->{$p};
my $rl = rev_list_raw($x);
while (my $c = next_rev_list_entry($rl)) {
foreach my $re (@re) {
my (@br) = ($c->{m} =~ /$re/g);
next unless @br;
process_merge_msg_matches($grafts,$l_map,$u,$p,$c,@br);
}
}
}
sub read_uuid {
sub read_uuid {
return if $SVN_UUID;
return if $SVN_UUID;
my $info = shift || svn_info('.');
my $info = shift || svn_info('.');
@ -402,6 +695,7 @@ sub repo_path_split {
$url .= "/$n";
$url .= "/$n";
}
}
push @repo_path_split_cache, qr/^(\Q$url\E)/;
push @repo_path_split_cache, qr/^(\Q$url\E)/;
$path = join('/',@paths);
return ($url, $path);
return ($url, $path);
}
}
@ -806,6 +1100,38 @@ sub svn_commit_tree {
return fetch("$committed=$commit")->{revision};
return fetch("$committed=$commit")->{revision};
}
}
sub rev_list_raw {
my (@args) = @_;
my $pid = open my $fh, '-|';
defined $pid or croak $!;
if (!$pid) {
exec(qw/git-rev-list --pretty=raw/, @args) or croak $!;
}
return { fh => $fh, t => { } };
}
sub next_rev_list_entry {
my $rl = shift;
my $fh = $rl->{fh};
my $x = $rl->{t};
while (<$fh>) {
if (/^commit ($sha1)$/o) {
if ($x->{c}) {
$rl->{t} = { c => $1 };
return $x;
} else {
$x->{c} = $1;
}
} elsif (/^parent ($sha1)$/o) {
$x->{p}->{$1} = 1;
} elsif (s/^ //) {
$x->{m} ||= '';
$x->{m} .= $_;
}
}
return ($x != $rl->{t}) ? $x : undef;
}
# read the entire log into a temporary file (which is removed ASAP)
# read the entire log into a temporary file (which is removed ASAP)
# and store the file handle + parser state
# and store the file handle + parser state
sub svn_log_raw {
sub svn_log_raw {
@ -1318,6 +1644,16 @@ sub svn_propget_base {
return safe_qx(qw/svn propget/, $p, $f);
return safe_qx(qw/svn propget/, $p, $f);
}
}
sub git_svn_each {
my $sub = shift;
foreach (`git-rev-parse --symbolic --all`) {
next unless s#^refs/remotes/##;
chomp $_;
next unless -f "$GIT_DIR/svn/$_/info/url";
&$sub($_);
}
}
sub migration_check {
sub migration_check {
return if (-d "$GIT_DIR/svn" || !-d $GIT_DIR);
return if (-d "$GIT_DIR/svn" || !-d $GIT_DIR);
print "Upgrading repository...\n";
print "Upgrading repository...\n";
@ -1344,6 +1680,16 @@ sub migration_check {
print "Done upgrading.\n";
print "Done upgrading.\n";
}
}
sub find_rev_before {
my ($r, $git_svn_id) = @_;
my @revs = map { basename $_ } <$GIT_DIR/svn/$git_svn_id/revs/*>;
foreach my $r0 (sort { $b <=> $a } @revs) {
next if $r0 >= $r;
return ($r0, file_to_s("$GIT_DIR/svn/$git_svn_id/revs/$r0"));
}
return (undef, undef);
}
sub init_vars {
sub init_vars {
$GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
$GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
$GIT_SVN_DIR = "$GIT_DIR/svn/$GIT_SVN";
$GIT_SVN_DIR = "$GIT_DIR/svn/$GIT_SVN";
@ -1384,6 +1730,79 @@ sub set_default_vals {
}
}
}
}
sub read_grafts {
my $gr_file = shift;
my ($grafts, $comments) = ({}, {});
if (open my $fh, '<', $gr_file) {
my @tmp;
while (<$fh>) {
if (/^($sha1)\s+/) {
my $c = $1;
if (@tmp) {
@{$comments->{$c}} = @tmp;
@tmp = ();
}
foreach my $p (split /\s+/, $_) {
$grafts->{$c}->{$p} = 1;
}
} else {
push @tmp, $_;
}
}
close $fh or croak $!;
@{$comments->{'END'}} = @tmp if @tmp;
}
return ($grafts, $comments);
}
sub write_grafts {
my ($grafts, $comments, $gr_file) = @_;
open my $fh, '>', $gr_file or croak $!;
foreach my $c (sort keys %$grafts) {
if ($comments->{$c}) {
print $fh $_ foreach @{$comments->{$c}};
}
my $p = $grafts->{$c};
delete $p->{$c}; # commits are not self-reproducing...
my $pid = open my $ch, '-|';
defined $pid or croak $!;
if (!$pid) {
exec(qw/git-cat-file commit/, $c) or croak $!;
}
while (<$ch>) {
if (/^parent ([a-f\d]{40})/) {
$p->{$1} = 1;
} else {
last unless /^\S/i;
}
}
close $ch; # breaking the pipe
print $fh $c, ' ', join(' ', sort keys %$p),"\n";
}
if ($comments->{'END'}) {
print $fh $_ foreach @{$comments->{'END'}};
}
close $fh or croak $!;
}
sub read_url_paths {
my $l_map = {};
git_svn_each(sub { my $x = shift;
my $u = file_to_s("$GIT_DIR/svn/$x/info/repo_url");
my $p = file_to_s("$GIT_DIR/svn/$x/info/repo_path");
# we hate trailing slashes
if ($u =~ s#(?:^\/+|\/+$)##g) {
s_to_file($u,"$GIT_DIR/svn/$x/info/repo_url");
}
if ($p =~ s#(?:^\/+|\/+$)##g) {
s_to_file($p,"$GIT_DIR/svn/$x/info/repo_path");
}
$l_map->{$u}->{$p} = $x;
});
return $l_map;
}
__END__
__END__
Data structures:
Data structures: