You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
11 KiB
Python
372 lines
11 KiB
Python
import os
|
|
import re
|
|
import sys
|
|
from subprocess import Popen, PIPE, call
|
|
from email.Utils import unquote
|
|
|
|
__all__ = ["Blob", "Reset", "FileChanges", "Commit", "get_total_commits",
|
|
"FastExportFilter", "FastExportOuput", "FastImportInput"]
|
|
|
|
class IDs(object):
|
|
def __init__(self):
|
|
self.count = 0
|
|
self.translation = {}
|
|
|
|
def new(self):
|
|
self.count += 1
|
|
return self.count
|
|
|
|
def record_rename(self, old_id, new_id):
|
|
for id in [old_id, new_id]:
|
|
if id > self.count:
|
|
raise SystemExit("Specified ID, %d, has not been created yet." % id)
|
|
if old_id != new_id:
|
|
self.translation[old_id] = new_id
|
|
|
|
def translate(self, old_id):
|
|
if old_id > self.count:
|
|
raise SystemExit("Specified ID, %d, has not been created yet." % old_id)
|
|
if old_id in self.translation:
|
|
return self.translation[old_id]
|
|
else:
|
|
return old_id
|
|
ids = IDs()
|
|
|
|
class GitElement(object):
|
|
def __init__(self):
|
|
self.type = None
|
|
self.dumped = 0
|
|
self.old_id = None
|
|
|
|
def dump(self, file):
|
|
raise SystemExit("Unimplemented function: %s.dump()", type(self))
|
|
|
|
def set_old_id(self, value):
|
|
self.old_id = value
|
|
|
|
class Blob(GitElement):
|
|
def __init__(self, data):
|
|
GitElement.__init__(self)
|
|
self.type = 'blob'
|
|
self.data = data
|
|
self.id = ids.new()
|
|
|
|
def dump(self, file):
|
|
if self.dumped: return
|
|
self.dumped = 1
|
|
|
|
file.write('blob\n')
|
|
file.write('mark :%d\n' % self.id)
|
|
file.write('data %d\n%s' % (len(self.data), self.data))
|
|
file.write('\n')
|
|
|
|
def skip(self):
|
|
self.dumped = 2
|
|
ids.record_rename(self.old_id or self.id, None)
|
|
|
|
class Reset(GitElement):
|
|
def __init__(self, ref, from_ref = None):
|
|
GitElement.__init__(self)
|
|
self.type = 'reset'
|
|
self.ref = ref
|
|
self.from_ref = from_ref
|
|
|
|
def dump(self, file):
|
|
if self.dumped: return
|
|
self.dumped = 1
|
|
|
|
file.write('reset %s\n' % self.ref)
|
|
if self.from_ref:
|
|
file.write('from :%d\n' % self.from_ref)
|
|
file.write('\n')
|
|
|
|
def skip(self):
|
|
self.dumped = 2
|
|
|
|
class FileChanges(GitElement):
|
|
def __init__(self, type, filename, id = None, mode = None):
|
|
GitElement.__init__(self)
|
|
self.type = type
|
|
self.filename = filename
|
|
self.mode = None
|
|
self.id = None
|
|
if type == 'M':
|
|
if mode is None:
|
|
raise SystemExit("file mode and idnum needed for %s" % filename)
|
|
self.mode = mode
|
|
self.id = id
|
|
|
|
def dump(self, file):
|
|
skipped = (self.type == 'M' and self.id is None)
|
|
if self.dumped or skipped: return
|
|
self.dumped = 1
|
|
|
|
if self.type == 'M':
|
|
file.write('M %s :%d %s\n' % (self.mode, self.id, self.filename))
|
|
elif self.type == 'D':
|
|
file.write('D %s\n' % self.filename)
|
|
else:
|
|
raise SystemExit("Unhandled filechange type: %s" % self.type)
|
|
|
|
def skip(self):
|
|
self.dumped = 2
|
|
|
|
class Commit(GitElement):
|
|
def __init__(self, branch,
|
|
author_name, author_email, author_date,
|
|
committer_name, committer_email, committer_date,
|
|
message,
|
|
file_changes,
|
|
from_commit = None,
|
|
merge_commits = []):
|
|
GitElement.__init__(self)
|
|
self.type = 'commit'
|
|
self.branch = branch
|
|
self.author_name = author_name
|
|
self.author_email = author_email
|
|
self.author_date = author_date
|
|
self.committer_name = committer_name
|
|
self.committer_email = committer_email
|
|
self.committer_date = committer_date
|
|
self.message = message
|
|
self.file_changes = file_changes
|
|
self.id = ids.new()
|
|
self.from_commit = from_commit
|
|
self.merge_commits = merge_commits
|
|
|
|
def dump(self, file):
|
|
if self.dumped: return
|
|
self.dumped = 1
|
|
|
|
file.write('commit %s\n' % self.branch)
|
|
file.write('mark :%d\n' % self.id)
|
|
file.write('author %s <%s> %s\n' % \
|
|
(self.author_name, self.author_email, self.author_date))
|
|
file.write('committer %s <%s> %s\n' % \
|
|
(self.committer_name, self.committer_email,
|
|
self.committer_date))
|
|
file.write('data %d\n%s' % (len(self.message), self.message))
|
|
if self.from_commit:
|
|
file.write('from :%s\n' % self.from_commit)
|
|
for ref in self.merge_commits:
|
|
file.write('merge :%s\n' % ref)
|
|
for change in self.file_changes:
|
|
change.dump(file)
|
|
file.write('\n')
|
|
|
|
def skip(self, new_id):
|
|
self.dumped = 2
|
|
ids.record_rename(self.old_id or self.id, new_id)
|
|
|
|
class FastExportFilter(object):
|
|
def __init__(self,
|
|
tag_callback = None, commit_callback = None,
|
|
blob_callback = None, progress_callback = None,
|
|
reset_callback = None, checkpoint_callback = None,
|
|
everything_callback = None):
|
|
self.tag_callback = tag_callback
|
|
self.blob_callback = blob_callback
|
|
self.reset_callback = reset_callback
|
|
self.commit_callback = commit_callback
|
|
self.progress_callback = progress_callback
|
|
self.checkpoint_callback = checkpoint_callback
|
|
self.everything_callback = everything_callback
|
|
|
|
self.input = None
|
|
self.output = sys.stdout
|
|
self.nextline = ''
|
|
|
|
def _advance_nextline(self):
|
|
self.nextline = self.input.readline()
|
|
|
|
def _parse_optional_mark(self):
|
|
mark = None
|
|
matches = re.match('mark :(\d+)\n$', self.nextline)
|
|
if matches:
|
|
mark = int(matches.group(1))
|
|
self._advance_nextline()
|
|
return mark
|
|
|
|
def _parse_optional_baseref(self, refname):
|
|
baseref = None
|
|
matches = re.match('%s :(\d+)\n' % refname, self.nextline)
|
|
if matches:
|
|
baseref = ids.translate( int(matches.group(1)) )
|
|
self._advance_nextline()
|
|
return baseref
|
|
|
|
def _parse_optional_filechange(self):
|
|
filechange = None
|
|
if self.nextline.startswith('M '):
|
|
(mode, idnum, path) = \
|
|
re.match('M (\d+) :(\d+) (.*)\n$', self.nextline).groups()
|
|
idnum = ids.translate( int(idnum) )
|
|
if path.startswith('"'):
|
|
path = unquote(path)
|
|
filechange = FileChanges('M', path, idnum, mode)
|
|
self._advance_nextline()
|
|
elif self.nextline.startswith('D '):
|
|
path = self.nextline[2:-1]
|
|
if path.startswith('"'):
|
|
path = unquote(path)
|
|
filechange = FileChanges('D', path)
|
|
self._advance_nextline()
|
|
return filechange
|
|
|
|
def _parse_ref_line(self, refname):
|
|
matches = re.match('%s (.*)\n$' % refname, self.nextline)
|
|
if not matches:
|
|
raise SystemExit("Malformed %s line: '%s'" % (refname, self.nextline))
|
|
ref = matches.group(1)
|
|
self._advance_nextline()
|
|
return ref
|
|
|
|
def _parse_user(self, usertype):
|
|
(name, email, when) = \
|
|
re.match('%s (.*?) <(.*?)> (.*)\n$' % usertype, self.nextline).groups()
|
|
self._advance_nextline()
|
|
return (name, email, when)
|
|
|
|
def _parse_data(self):
|
|
size = int(re.match('data (\d+)\n$', self.nextline).group(1))
|
|
data = self.input.read(size)
|
|
self._advance_nextline()
|
|
return data
|
|
|
|
def _parse_blob(self):
|
|
# Parse the Blob
|
|
self._advance_nextline()
|
|
id = self._parse_optional_mark()
|
|
data = self._parse_data()
|
|
if self.nextline == '\n':
|
|
self._advance_nextline()
|
|
|
|
# Create the blob
|
|
blob = Blob(data)
|
|
if id:
|
|
blob.set_old_id(id)
|
|
ids.record_rename(id, blob.id)
|
|
|
|
# Call any user callback to allow them to modify the blob
|
|
if self.blob_callback:
|
|
self.blob_callback(blob)
|
|
if self.everything_callback:
|
|
self.everything_callback('blob', blob)
|
|
|
|
# Now print the resulting blob
|
|
blob.dump(self.output)
|
|
|
|
def _parse_reset(self):
|
|
# Parse the Reset
|
|
ref = self._parse_ref_line('reset')
|
|
from_ref = self._parse_optional_baseref('from')
|
|
if self.nextline == '\n':
|
|
self._advance_nextline()
|
|
|
|
# Create the reset
|
|
reset = Reset(ref, from_ref)
|
|
|
|
# Call any user callback to allow them to modify the reset
|
|
if self.reset_callback:
|
|
self.reset_callback(reset)
|
|
if self.everything_callback:
|
|
self.everything_callback('reset', reset)
|
|
|
|
# Now print the resulting reset
|
|
reset.dump(self.output)
|
|
|
|
def _parse_commit(self):
|
|
# Parse the Commit
|
|
branch = self._parse_ref_line('commit')
|
|
id = self._parse_optional_mark()
|
|
|
|
author_name = None
|
|
if self.nextline.startswith('author'):
|
|
(author_name, author_email, author_date) = self._parse_user('author')
|
|
|
|
(committer_name, committer_email, committer_date) = \
|
|
self._parse_user('committer')
|
|
|
|
if not author_name:
|
|
(author_name, author_email, author_date) = \
|
|
(committer_name, committer_email, committer_date)
|
|
|
|
commit_msg = self._parse_data()
|
|
|
|
from_commit = self._parse_optional_baseref('from')
|
|
merge_commits = []
|
|
merge_ref = self._parse_optional_baseref('merge')
|
|
while merge_ref:
|
|
merge_commits.append(merge_ref)
|
|
merge_ref = self._parse_optional_baseref('merge')
|
|
|
|
file_changes = []
|
|
file_change = self._parse_optional_filechange()
|
|
while file_change:
|
|
file_changes.append(file_change)
|
|
file_change = self._parse_optional_filechange()
|
|
if self.nextline == '\n':
|
|
self._advance_nextline()
|
|
|
|
# Okay, now we can finally create the Commit object
|
|
commit = Commit(branch,
|
|
author_name, author_email, author_date,
|
|
committer_name, committer_email, committer_date,
|
|
commit_msg,
|
|
file_changes,
|
|
from_commit,
|
|
merge_commits)
|
|
if id:
|
|
commit.set_old_id(id)
|
|
ids.record_rename(id, commit.id)
|
|
|
|
# Call any user callback to allow them to modify the commit
|
|
if self.commit_callback:
|
|
self.commit_callback(commit)
|
|
if self.everything_callback:
|
|
self.everything_callback('commit', commit)
|
|
|
|
# Now print the resulting commit to stdout
|
|
commit.dump(self.output)
|
|
|
|
def run(self, input_file, output_file):
|
|
self.input = input_file
|
|
if output_file:
|
|
self.output = output_file
|
|
self.nextline = input_file.readline()
|
|
while self.nextline:
|
|
if self.nextline.startswith('blob'):
|
|
self._parse_blob()
|
|
elif self.nextline.startswith('reset'):
|
|
self._parse_reset()
|
|
elif self.nextline.startswith('commit'):
|
|
self._parse_commit()
|
|
else:
|
|
raise SystemExit("Could not parse line: '%s'" % self.nextline)
|
|
|
|
def FastExportOutput(source_repo, extra_args = []):
|
|
return Popen(["git", "fast-export", "--all", "--topo-order"] + extra_args,
|
|
stdout = PIPE,
|
|
cwd = source_repo).stdout
|
|
|
|
def FastImportInput(target_repo, extra_args = []):
|
|
if not os.path.isdir(target_repo):
|
|
os.makedirs(target_repo)
|
|
if call(["git", "init"], cwd = target_repo) != 0:
|
|
raise SystemExit("git init in %s failed!" % target_repo)
|
|
return Popen(["git", "fast-import"] + extra_args,
|
|
stdin = PIPE,
|
|
stderr = PIPE, # We don't want no stinkin' statistics
|
|
cwd = target_repo).stdin
|
|
|
|
def get_total_commits(repo):
|
|
p1 = Popen(["git", "rev-list", "--all"], stdout = PIPE, cwd = repo)
|
|
p2 = Popen(["wc", "-l"], stdin = p1.stdout, stdout = PIPE)
|
|
return int(p2.communicate()[0])
|
|
|
|
def get_total_objects(repo):
|
|
p1 = Popen(["git", "count-objects", "-v"], stdout = PIPE, cwd = repo)
|
|
lines = p1.stdout.read().splitlines()
|
|
# Return unpacked objects + packed-objects
|
|
return int(lines[0].split()[1]) + int(lines[2].split()[1])
|