[PATCH] Add git-relink-script to fix up missing hardlinks

This will scan 2 or more object repositories and look for common objects, check
if they are hardlinked, and replace one with a hardlink to the other if not.

This version warns when skipping files because of size differences, and
handle more than 2 repositories automatically.

Signed-off-by: Ryan Anderson <ryan@michonline.com>
Cheered-on-by: Jeff Garzik <jgarzik@pobox.com>
Acked-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
maint
Ryan Anderson 2005-06-26 14:15:16 -04:00 committed by Linus Torvalds
parent 042a4ed7c5
commit 102fc37f3b
2 changed files with 174 additions and 1 deletions

View File

@ -25,7 +25,7 @@ SCRIPTS=git git-apply-patch-script git-merge-one-file-script git-prune-script \
git-deltafy-script git-fetch-script git-status-script git-commit-script \
git-log-script git-shortlog git-cvsimport-script git-diff-script \
git-reset-script git-add-script git-checkout-script git-clone-script \
gitk git-cherry git-rebase-script
gitk git-cherry git-rebase-script git-relink-script

PROG= git-update-cache git-diff-files git-init-db git-write-tree \
git-read-tree git-commit-tree git-cat-file git-fsck-cache \

173
git-relink-script Normal file
View File

@ -0,0 +1,173 @@
#!/usr/bin/env perl
# Copyright 2005, Ryan Anderson <ryan@michonline.com>
# Distribution permitted under the GPL v2, as distributed
# by the Free Software Foundation.
# Later versions of the GPL at the discretion of Linus Torvalds
#
# Scan two git object-trees, and hardlink any common objects between them.

use 5.006;
use strict;
use warnings;
use Getopt::Long;

sub get_canonical_form($);
sub do_scan_directory($$$);
sub compare_two_files($$);
sub usage();
sub link_two_files($$);

# stats
my $total_linked = 0;
my $total_already = 0;
my ($linked,$already);

my $fail_on_different_sizes = 0;
my $help = 0;
GetOptions("safe" => \$fail_on_different_sizes,
"help" => \$help);

usage() if $help;

my (@dirs) = @ARGV;

usage() if (!defined $dirs[0] || !defined $dirs[1]);

$_ = get_canonical_form($_) foreach (@dirs);

my $master_dir = pop @dirs;

opendir(D,$master_dir . "objects/")
or die "Failed to open $master_dir/objects/ : $!";

my @hashdirs = grep !/^\.{1,2}$/, readdir(D);

foreach my $repo (@dirs) {
$linked = 0;
$already = 0;
printf("Searching '%s' and '%s' for common objects and hardlinking them...\n",
$master_dir,$repo);

foreach my $hashdir (@hashdirs) {
do_scan_directory($master_dir, $hashdir, $repo);
}

printf("Linked %d files, %d were already linked.\n",$linked, $already);

$total_linked += $linked;
$total_already += $already;
}

printf("Totals: Linked %d files, %d were already linked.\n",
$total_linked, $total_already);


sub do_scan_directory($$$) {
my ($srcdir, $subdir, $dstdir) = @_;

my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir);
my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir);

opendir(S,$sfulldir)
or die "Failed to opendir $sfulldir: $!";

foreach my $file (grep(!/\.{1,2}$/, readdir(S))) {
my $sfilename = $sfulldir . $file;
my $dfilename = $dfulldir . $file;

compare_two_files($sfilename,$dfilename);

}
closedir(S);
}

sub compare_two_files($$) {
my ($sfilename, $dfilename) = @_;

# Perl's stat returns relevant information as follows:
# 0 = dev number
# 1 = inode number
# 7 = size
my @sstatinfo = stat($sfilename);
my @dstatinfo = stat($dfilename);

if (@sstatinfo == 0 && @dstatinfo == 0) {
die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!);

} elsif (@dstatinfo == 0) {
return;
}

if ( ($sstatinfo[0] == $dstatinfo[0]) &&
($sstatinfo[1] != $dstatinfo[1])) {
if ($sstatinfo[7] == $dstatinfo[7]) {
link_two_files($sfilename, $dfilename);

} else {
my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n",
$sfilename, $dfilename);
if ($fail_on_different_sizes) {
die $err;
} else {
warn $err;
}
}

} elsif ( ($sstatinfo[0] == $dstatinfo[0]) &&
($sstatinfo[1] == $dstatinfo[1])) {
$already++;
}
}

sub get_canonical_form($) {
my $dir = shift;
my $original = $dir;

die "$dir is not a directory." unless -d $dir;

$dir .= "/" unless $dir =~ m#/$#;
$dir .= ".git/" unless $dir =~ m#\.git/$#;

die "$original does not have a .git/ subdirectory.\n" unless -d $dir;

return $dir;
}

sub link_two_files($$) {
my ($sfilename, $dfilename) = @_;
my $tmpdname = sprintf("%s.old",$dfilename);
rename($dfilename,$tmpdname)
or die sprintf("Failure renaming %s to %s: %s",
$dfilename, $tmpdname, $!);

if (! link($sfilename,$dfilename)) {
my $failtxt = "";
unless (rename($tmpdname,$dfilename)) {
$failtxt = sprintf(
"Git Repository containing %s is probably corrupted, " .
"please copy '%s' to '%s' to fix.\n",
$tmpdname, $dfilename);
}

die sprintf("Failed to link %s to %s: %s\n%s" .
$sfilename, $dfilename,
$!, $dfilename, $failtxt);
}

unlink($tmpdname)
or die sprintf("Unlink of %s failed: %s\n",
$dfilename, $!);

$linked++;
}


sub usage() {
print("Usage: $0 [--safe] <dir> [<dir> ...] <master_dir> \n");
print("All directories should contain a .git/objects/ subdirectory.\n");
print("Options\n");
print("\t--safe\t" .
"Stops if two objects with the same hash exist but " .
"have different sizes. Default is to warn and continue.\n");
exit(1);
}