You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
793 lines
20 KiB
793 lines
20 KiB
#!/usr/bin/env python |
|
# |
|
# Copyright (c) 2012 Felipe Contreras |
|
# |
|
|
|
# |
|
# Just copy to your ~/bin, or anywhere in your $PATH. |
|
# Then you can clone with: |
|
# % git clone bzr::/path/to/bzr/repo/or/url |
|
# |
|
# For example: |
|
# % git clone bzr::$HOME/myrepo |
|
# or |
|
# % git clone bzr::lp:myrepo |
|
# |
|
|
|
import sys |
|
|
|
import bzrlib |
|
if hasattr(bzrlib, "initialize"): |
|
bzrlib.initialize() |
|
|
|
import bzrlib.plugin |
|
bzrlib.plugin.load_plugins() |
|
|
|
import bzrlib.generate_ids |
|
import bzrlib.transport |
|
import bzrlib.errors |
|
import bzrlib.ui |
|
|
|
import sys |
|
import os |
|
import json |
|
import re |
|
import StringIO |
|
import atexit, shutil, hashlib, urlparse, subprocess |
|
|
|
NAME_RE = re.compile('^([^<>]+)') |
|
AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$') |
|
RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)') |
|
|
|
def die(msg, *args): |
|
sys.stderr.write('ERROR: %s\n' % (msg % args)) |
|
sys.exit(1) |
|
|
|
def warn(msg, *args): |
|
sys.stderr.write('WARNING: %s\n' % (msg % args)) |
|
|
|
def gittz(tz): |
|
return '%+03d%02d' % (tz / 3600, tz % 3600 / 60) |
|
|
|
class Marks: |
|
|
|
def __init__(self, path): |
|
self.path = path |
|
self.tips = {} |
|
self.marks = {} |
|
self.rev_marks = {} |
|
self.last_mark = 0 |
|
self.load() |
|
|
|
def load(self): |
|
if not os.path.exists(self.path): |
|
return |
|
|
|
tmp = json.load(open(self.path)) |
|
self.tips = tmp['tips'] |
|
self.marks = tmp['marks'] |
|
self.last_mark = tmp['last-mark'] |
|
|
|
for rev, mark in self.marks.iteritems(): |
|
self.rev_marks[mark] = rev |
|
|
|
def dict(self): |
|
return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark } |
|
|
|
def store(self): |
|
json.dump(self.dict(), open(self.path, 'w')) |
|
|
|
def __str__(self): |
|
return str(self.dict()) |
|
|
|
def from_rev(self, rev): |
|
return self.marks[rev] |
|
|
|
def to_rev(self, mark): |
|
return self.rev_marks[mark] |
|
|
|
def next_mark(self): |
|
self.last_mark += 1 |
|
return self.last_mark |
|
|
|
def get_mark(self, rev): |
|
self.last_mark += 1 |
|
self.marks[rev] = self.last_mark |
|
return self.last_mark |
|
|
|
def is_marked(self, rev): |
|
return rev in self.marks |
|
|
|
def new_mark(self, rev, mark): |
|
self.marks[rev] = mark |
|
self.rev_marks[mark] = rev |
|
self.last_mark = mark |
|
|
|
def get_tip(self, branch): |
|
return self.tips.get(branch, None) |
|
|
|
def set_tip(self, branch, tip): |
|
self.tips[branch] = tip |
|
|
|
class Parser: |
|
|
|
def __init__(self, repo): |
|
self.repo = repo |
|
self.line = self.get_line() |
|
|
|
def get_line(self): |
|
return sys.stdin.readline().strip() |
|
|
|
def __getitem__(self, i): |
|
return self.line.split()[i] |
|
|
|
def check(self, word): |
|
return self.line.startswith(word) |
|
|
|
def each_block(self, separator): |
|
while self.line != separator: |
|
yield self.line |
|
self.line = self.get_line() |
|
|
|
def __iter__(self): |
|
return self.each_block('') |
|
|
|
def next(self): |
|
self.line = self.get_line() |
|
if self.line == 'done': |
|
self.line = None |
|
|
|
def get_mark(self): |
|
i = self.line.index(':') + 1 |
|
return int(self.line[i:]) |
|
|
|
def get_data(self): |
|
if not self.check('data'): |
|
return None |
|
i = self.line.index(' ') + 1 |
|
size = int(self.line[i:]) |
|
return sys.stdin.read(size) |
|
|
|
def get_author(self): |
|
m = RAW_AUTHOR_RE.match(self.line) |
|
if not m: |
|
return None |
|
_, name, email, date, tz = m.groups() |
|
committer = '%s <%s>' % (name, email) |
|
tz = int(tz) |
|
tz = ((tz / 100) * 3600) + ((tz % 100) * 60) |
|
return (committer, int(date), tz) |
|
|
|
def rev_to_mark(rev): |
|
global marks |
|
return marks.from_rev(rev) |
|
|
|
def mark_to_rev(mark): |
|
global marks |
|
return marks.to_rev(mark) |
|
|
|
def fixup_user(user): |
|
name = mail = None |
|
user = user.replace('"', '') |
|
m = AUTHOR_RE.match(user) |
|
if m: |
|
name = m.group(1) |
|
mail = m.group(2).strip() |
|
else: |
|
m = NAME_RE.match(user) |
|
if m: |
|
name = m.group(1).strip() |
|
|
|
return '%s <%s>' % (name, mail) |
|
|
|
def get_filechanges(cur, prev): |
|
modified = {} |
|
removed = {} |
|
|
|
changes = cur.changes_from(prev) |
|
|
|
def u(s): |
|
return s.encode('utf-8') |
|
|
|
for path, fid, kind in changes.added: |
|
modified[u(path)] = fid |
|
for path, fid, kind in changes.removed: |
|
removed[u(path)] = None |
|
for path, fid, kind, mod, _ in changes.modified: |
|
modified[u(path)] = fid |
|
for oldpath, newpath, fid, kind, mod, _ in changes.renamed: |
|
removed[u(oldpath)] = None |
|
if kind == 'directory': |
|
lst = cur.list_files(from_dir=newpath, recursive=True) |
|
for path, file_class, kind, fid, entry in lst: |
|
if kind != 'directory': |
|
modified[u(newpath + '/' + path)] = fid |
|
else: |
|
modified[u(newpath)] = fid |
|
|
|
return modified, removed |
|
|
|
def export_files(tree, files): |
|
global marks, filenodes |
|
|
|
final = [] |
|
for path, fid in files.iteritems(): |
|
kind = tree.kind(fid) |
|
|
|
h = tree.get_file_sha1(fid) |
|
|
|
if kind == 'symlink': |
|
d = tree.get_symlink_target(fid) |
|
mode = '120000' |
|
elif kind == 'file': |
|
|
|
if tree.is_executable(fid): |
|
mode = '100755' |
|
else: |
|
mode = '100644' |
|
|
|
# is the blob already exported? |
|
if h in filenodes: |
|
mark = filenodes[h] |
|
final.append((mode, mark, path)) |
|
continue |
|
|
|
d = tree.get_file_text(fid) |
|
elif kind == 'directory': |
|
continue |
|
else: |
|
die("Unhandled kind '%s' for path '%s'" % (kind, path)) |
|
|
|
mark = marks.next_mark() |
|
filenodes[h] = mark |
|
|
|
print "blob" |
|
print "mark :%u" % mark |
|
print "data %d" % len(d) |
|
print d |
|
|
|
final.append((mode, mark, path)) |
|
|
|
return final |
|
|
|
def export_branch(branch, name): |
|
global prefix |
|
|
|
ref = '%s/heads/%s' % (prefix, name) |
|
tip = marks.get_tip(name) |
|
|
|
repo = branch.repository |
|
repo.lock_read() |
|
revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward') |
|
count = 0 |
|
|
|
revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)] |
|
|
|
for revid in revs: |
|
|
|
rev = repo.get_revision(revid) |
|
|
|
parents = rev.parent_ids |
|
time = rev.timestamp |
|
tz = rev.timezone |
|
committer = rev.committer.encode('utf-8') |
|
committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz)) |
|
authors = rev.get_apparent_authors() |
|
if authors: |
|
author = authors[0].encode('utf-8') |
|
author = "%s %u %s" % (fixup_user(author), time, gittz(tz)) |
|
else: |
|
author = committer |
|
msg = rev.message.encode('utf-8') |
|
|
|
msg += '\n' |
|
|
|
if len(parents) == 0: |
|
parent = bzrlib.revision.NULL_REVISION |
|
else: |
|
parent = parents[0] |
|
|
|
cur_tree = repo.revision_tree(revid) |
|
prev = repo.revision_tree(parent) |
|
modified, removed = get_filechanges(cur_tree, prev) |
|
|
|
modified_final = export_files(cur_tree, modified) |
|
|
|
if len(parents) == 0: |
|
print 'reset %s' % ref |
|
|
|
print "commit %s" % ref |
|
print "mark :%d" % (marks.get_mark(revid)) |
|
print "author %s" % (author) |
|
print "committer %s" % (committer) |
|
print "data %d" % (len(msg)) |
|
print msg |
|
|
|
for i, p in enumerate(parents): |
|
try: |
|
m = rev_to_mark(p) |
|
except KeyError: |
|
# ghost? |
|
continue |
|
if i == 0: |
|
print "from :%s" % m |
|
else: |
|
print "merge :%s" % m |
|
|
|
for f in removed: |
|
print "D %s" % (f,) |
|
for f in modified_final: |
|
print "M %s :%u %s" % f |
|
print |
|
|
|
count += 1 |
|
if (count % 100 == 0): |
|
print "progress revision %s (%d/%d)" % (revid, count, len(revs)) |
|
print "#############################################################" |
|
|
|
repo.unlock() |
|
|
|
revid = branch.last_revision() |
|
|
|
# make sure the ref is updated |
|
print "reset %s" % ref |
|
print "from :%u" % rev_to_mark(revid) |
|
print |
|
|
|
marks.set_tip(name, revid) |
|
|
|
def export_tag(repo, name): |
|
global tags, prefix |
|
|
|
ref = '%s/tags/%s' % (prefix, name) |
|
print "reset %s" % ref |
|
print "from :%u" % rev_to_mark(tags[name]) |
|
print |
|
|
|
def do_import(parser): |
|
global dirname |
|
|
|
branch = parser.repo |
|
path = os.path.join(dirname, 'marks-git') |
|
|
|
print "feature done" |
|
if os.path.exists(path): |
|
print "feature import-marks=%s" % path |
|
print "feature export-marks=%s" % path |
|
sys.stdout.flush() |
|
|
|
while parser.check('import'): |
|
ref = parser[1] |
|
if ref.startswith('refs/heads/'): |
|
name = ref[len('refs/heads/'):] |
|
export_branch(branch, name) |
|
if ref.startswith('refs/tags/'): |
|
name = ref[len('refs/tags/'):] |
|
export_tag(branch, name) |
|
parser.next() |
|
|
|
print 'done' |
|
|
|
sys.stdout.flush() |
|
|
|
def parse_blob(parser): |
|
global blob_marks |
|
|
|
parser.next() |
|
mark = parser.get_mark() |
|
parser.next() |
|
data = parser.get_data() |
|
blob_marks[mark] = data |
|
parser.next() |
|
|
|
class CustomTree(): |
|
|
|
def __init__(self, repo, revid, parents, files): |
|
global files_cache |
|
|
|
self.updates = {} |
|
|
|
def copy_tree(revid): |
|
files = files_cache[revid] = {} |
|
repo.lock_read() |
|
tree = repo.repository.revision_tree(revid) |
|
try: |
|
for path, entry in tree.iter_entries_by_dir(): |
|
files[path] = entry.file_id |
|
finally: |
|
repo.unlock() |
|
return files |
|
|
|
if len(parents) == 0: |
|
self.base_id = bzrlib.revision.NULL_REVISION |
|
self.base_files = {} |
|
else: |
|
self.base_id = parents[0] |
|
self.base_files = files_cache.get(self.base_id, None) |
|
if not self.base_files: |
|
self.base_files = copy_tree(self.base_id) |
|
|
|
self.files = files_cache[revid] = self.base_files.copy() |
|
|
|
for path, f in files.iteritems(): |
|
fid = self.files.get(path, None) |
|
if not fid: |
|
fid = bzrlib.generate_ids.gen_file_id(path) |
|
f['path'] = path |
|
self.updates[fid] = f |
|
|
|
def last_revision(self): |
|
return self.base_id |
|
|
|
def iter_changes(self): |
|
changes = [] |
|
|
|
def get_parent(dirname, basename): |
|
parent_fid = self.base_files.get(dirname, None) |
|
if parent_fid: |
|
return parent_fid |
|
parent_fid = self.files.get(dirname, None) |
|
if parent_fid: |
|
return parent_fid |
|
if basename == '': |
|
return None |
|
fid = bzrlib.generate_ids.gen_file_id(path) |
|
add_entry(fid, dirname, 'directory') |
|
return fid |
|
|
|
def add_entry(fid, path, kind, mode = None): |
|
dirname, basename = os.path.split(path) |
|
parent_fid = get_parent(dirname, basename) |
|
|
|
executable = False |
|
if mode == '100755': |
|
executable = True |
|
elif mode == '120000': |
|
kind = 'symlink' |
|
|
|
change = (fid, |
|
(None, path), |
|
True, |
|
(False, True), |
|
(None, parent_fid), |
|
(None, basename), |
|
(None, kind), |
|
(None, executable)) |
|
self.files[path] = change[0] |
|
changes.append(change) |
|
|
|
def update_entry(fid, path, kind, mode = None): |
|
dirname, basename = os.path.split(path) |
|
parent_fid = get_parent(dirname, basename) |
|
|
|
executable = False |
|
if mode == '100755': |
|
executable = True |
|
elif mode == '120000': |
|
kind = 'symlink' |
|
|
|
change = (fid, |
|
(path, path), |
|
True, |
|
(True, True), |
|
(None, parent_fid), |
|
(None, basename), |
|
(None, kind), |
|
(None, executable)) |
|
self.files[path] = change[0] |
|
changes.append(change) |
|
|
|
def remove_entry(fid, path, kind): |
|
dirname, basename = os.path.split(path) |
|
parent_fid = get_parent(dirname, basename) |
|
change = (fid, |
|
(path, None), |
|
True, |
|
(True, False), |
|
(parent_fid, None), |
|
(None, None), |
|
(None, None), |
|
(None, None)) |
|
del self.files[path] |
|
changes.append(change) |
|
|
|
for fid, f in self.updates.iteritems(): |
|
path = f['path'] |
|
|
|
if 'deleted' in f: |
|
remove_entry(fid, path, 'file') |
|
continue |
|
|
|
if path in self.base_files: |
|
update_entry(fid, path, 'file', f['mode']) |
|
else: |
|
add_entry(fid, path, 'file', f['mode']) |
|
|
|
return changes |
|
|
|
def get_file_with_stat(self, file_id, path=None): |
|
mark = self.updates[file_id]['mark'] |
|
return (StringIO.StringIO(blob_marks[mark]), None) |
|
|
|
def get_symlink_target(self, file_id): |
|
mark = self.updates[file_id]['mark'] |
|
return blob_marks[mark] |
|
|
|
def c_style_unescape(string): |
|
if string[0] == string[-1] == '"': |
|
return string.decode('string-escape')[1:-1] |
|
return string |
|
|
|
def parse_commit(parser): |
|
global marks, blob_marks, parsed_refs |
|
global mode |
|
|
|
parents = [] |
|
|
|
ref = parser[1] |
|
parser.next() |
|
|
|
if ref != 'refs/heads/master': |
|
die("bzr doesn't support multiple branches; use 'master'") |
|
|
|
commit_mark = parser.get_mark() |
|
parser.next() |
|
author = parser.get_author() |
|
parser.next() |
|
committer = parser.get_author() |
|
parser.next() |
|
data = parser.get_data() |
|
parser.next() |
|
if parser.check('from'): |
|
parents.append(parser.get_mark()) |
|
parser.next() |
|
while parser.check('merge'): |
|
parents.append(parser.get_mark()) |
|
parser.next() |
|
|
|
# fast-export adds an extra newline |
|
if data[-1] == '\n': |
|
data = data[:-1] |
|
|
|
files = {} |
|
|
|
for line in parser: |
|
if parser.check('M'): |
|
t, m, mark_ref, path = line.split(' ', 3) |
|
mark = int(mark_ref[1:]) |
|
f = { 'mode' : m, 'mark' : mark } |
|
elif parser.check('D'): |
|
t, path = line.split(' ') |
|
f = { 'deleted' : True } |
|
else: |
|
die('Unknown file command: %s' % line) |
|
path = c_style_unescape(path).decode('utf-8') |
|
files[path] = f |
|
|
|
repo = parser.repo |
|
|
|
committer, date, tz = committer |
|
parents = [str(mark_to_rev(p)) for p in parents] |
|
revid = bzrlib.generate_ids.gen_revision_id(committer, date) |
|
props = {} |
|
props['branch-nick'] = repo.nick |
|
|
|
mtree = CustomTree(repo, revid, parents, files) |
|
changes = mtree.iter_changes() |
|
|
|
repo.lock_write() |
|
try: |
|
builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid) |
|
try: |
|
list(builder.record_iter_changes(mtree, mtree.last_revision(), changes)) |
|
builder.finish_inventory() |
|
builder.commit(data.decode('utf-8', 'replace')) |
|
except Exception, e: |
|
builder.abort() |
|
raise |
|
finally: |
|
repo.unlock() |
|
|
|
parsed_refs[ref] = revid |
|
marks.new_mark(revid, commit_mark) |
|
|
|
def parse_reset(parser): |
|
global parsed_refs |
|
|
|
ref = parser[1] |
|
parser.next() |
|
|
|
if ref != 'refs/heads/master': |
|
die("bzr doesn't support multiple branches; use 'master'") |
|
|
|
# ugh |
|
if parser.check('commit'): |
|
parse_commit(parser) |
|
return |
|
if not parser.check('from'): |
|
return |
|
from_mark = parser.get_mark() |
|
parser.next() |
|
|
|
parsed_refs[ref] = mark_to_rev(from_mark) |
|
|
|
def do_export(parser): |
|
global parsed_refs, dirname, peer |
|
|
|
parser.next() |
|
|
|
for line in parser.each_block('done'): |
|
if parser.check('blob'): |
|
parse_blob(parser) |
|
elif parser.check('commit'): |
|
parse_commit(parser) |
|
elif parser.check('reset'): |
|
parse_reset(parser) |
|
elif parser.check('tag'): |
|
pass |
|
elif parser.check('feature'): |
|
pass |
|
else: |
|
die('unhandled export command: %s' % line) |
|
|
|
repo = parser.repo |
|
|
|
for ref, revid in parsed_refs.iteritems(): |
|
if ref == 'refs/heads/master': |
|
repo.generate_revision_history(revid, marks.get_tip('master')) |
|
if peer: |
|
try: |
|
repo.push(peer, stop_revision=revid) |
|
except bzrlib.errors.DivergedBranches: |
|
print "error %s non-fast forward" % ref |
|
continue |
|
|
|
try: |
|
wt = repo.bzrdir.open_workingtree() |
|
wt.update() |
|
except bzrlib.errors.NoWorkingTree: |
|
pass |
|
|
|
print "ok %s" % ref |
|
|
|
print |
|
|
|
def do_capabilities(parser): |
|
global dirname |
|
|
|
print "import" |
|
print "export" |
|
print "refspec refs/heads/*:%s/heads/*" % prefix |
|
print "refspec refs/tags/*:%s/tags/*" % prefix |
|
|
|
path = os.path.join(dirname, 'marks-git') |
|
|
|
if os.path.exists(path): |
|
print "*import-marks %s" % path |
|
print "*export-marks %s" % path |
|
|
|
print |
|
|
|
def ref_is_valid(name): |
|
return not True in [c in name for c in '~^: \\'] |
|
|
|
def do_list(parser): |
|
global tags |
|
print "? refs/heads/%s" % 'master' |
|
|
|
branch = parser.repo |
|
branch.lock_read() |
|
for tag, revid in branch.tags.get_tag_dict().items(): |
|
try: |
|
branch.revision_id_to_dotted_revno(revid) |
|
except bzrlib.errors.NoSuchRevision: |
|
continue |
|
if not ref_is_valid(tag): |
|
continue |
|
print "? refs/tags/%s" % tag |
|
tags[tag] = revid |
|
branch.unlock() |
|
print "@refs/heads/%s HEAD" % 'master' |
|
print |
|
|
|
def get_repo(url, alias): |
|
global dirname, peer |
|
|
|
origin = bzrlib.bzrdir.BzrDir.open(url) |
|
branch = origin.open_branch() |
|
|
|
if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport): |
|
clone_path = os.path.join(dirname, 'clone') |
|
remote_branch = branch |
|
if os.path.exists(clone_path): |
|
# pull |
|
d = bzrlib.bzrdir.BzrDir.open(clone_path) |
|
branch = d.open_branch() |
|
result = branch.pull(remote_branch, [], None, False) |
|
else: |
|
# clone |
|
d = origin.sprout(clone_path, None, |
|
hardlink=True, create_tree_if_local=False, |
|
source_branch=remote_branch) |
|
branch = d.open_branch() |
|
branch.bind(remote_branch) |
|
|
|
peer = remote_branch |
|
else: |
|
peer = None |
|
|
|
return branch |
|
|
|
def fix_path(alias, orig_url): |
|
url = urlparse.urlparse(orig_url, 'file') |
|
if url.scheme != 'file' or os.path.isabs(url.path): |
|
return |
|
abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url) |
|
cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url] |
|
subprocess.call(cmd) |
|
|
|
def main(args): |
|
global marks, prefix, dirname |
|
global tags, filenodes |
|
global blob_marks |
|
global parsed_refs |
|
global files_cache |
|
global is_tmp |
|
|
|
alias = args[1] |
|
url = args[2] |
|
|
|
tags = {} |
|
filenodes = {} |
|
blob_marks = {} |
|
parsed_refs = {} |
|
files_cache = {} |
|
marks = None |
|
|
|
if alias[5:] == url: |
|
is_tmp = True |
|
alias = hashlib.sha1(alias).hexdigest() |
|
else: |
|
is_tmp = False |
|
|
|
prefix = 'refs/bzr/%s' % alias |
|
gitdir = os.environ['GIT_DIR'] |
|
dirname = os.path.join(gitdir, 'bzr', alias) |
|
|
|
if not is_tmp: |
|
fix_path(alias, url) |
|
|
|
if not os.path.exists(dirname): |
|
os.makedirs(dirname) |
|
|
|
bzrlib.ui.ui_factory.be_quiet(True) |
|
|
|
repo = get_repo(url, alias) |
|
|
|
marks_path = os.path.join(dirname, 'marks-int') |
|
marks = Marks(marks_path) |
|
|
|
parser = Parser(repo) |
|
for line in parser: |
|
if parser.check('capabilities'): |
|
do_capabilities(parser) |
|
elif parser.check('list'): |
|
do_list(parser) |
|
elif parser.check('import'): |
|
do_import(parser) |
|
elif parser.check('export'): |
|
do_export(parser) |
|
else: |
|
die('unhandled command: %s' % line) |
|
sys.stdout.flush() |
|
|
|
def bye(): |
|
if not marks: |
|
return |
|
if not is_tmp: |
|
marks.store() |
|
else: |
|
shutil.rmtree(dirname) |
|
|
|
atexit.register(bye) |
|
sys.exit(main(sys.argv))
|
|
|