You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
git-filter-repo/git-filter-repo

153 lines
4.2 KiB
Plaintext

#!/usr/bin/env python
import commands
import re
import sha # bleh...when can I assume python >= 2.5?
import sys
from pyparsing import ParserElement, Literal, Optional, Combine, Word, nums, \
ZeroOrMore, ParseException
from pyparsing import Token, ParseResults
class ExactData(Token):
"""Specialized pyparsing subclass for handling data dumps in git-fast-import
exact data format"""
def __init__( self ):
super(ExactData,self).__init__()
self.pattern = r"data (\d+)\n"
self.re = re.compile(self.pattern)
self.reString = self.pattern
self.name = "ExactData"
self.errmsg = "Expected " + self.name
self.mayIndexError = False
self.mayReturnEmpty = True
def parseImpl( self, instring, loc, doActions=True ):
result = self.re.match(instring,loc)
if not result:
exc = self.myException
exc.loc = loc
exc.pstr = instring
raise exc
num = result.group(1)
loc = result.end()+int(num)
data = instring[result.end():loc]
d = result.groupdict()
ret = ParseResults(['data', num, data])
return loc,ret
def __str__( self ):
try:
return super(ExactMath,self).__str__()
except:
pass
if self.strRepr is None:
self.strRepr = "Data:"
return self.strRepr
newmark = 0
mark_dict = {}
def translate_mark(old_mark = None):
if not old_mark or old_mark not in mark_dict:
global newmark
newmark += 1
mark_dict[old_mark] = newmark
return mark_dict[old_mark]
class GitElement(object):
def __init__(self):
self.type = None
def dump(self):
raise SystemExit("Unimplemented function: %s.dump()", type(self))
class Blob(GitElement):
def __init__(self, data, mark = None):
GitElement.__init__(self)
self.type = 'blob'
self.data = data
self.mark = translate_mark(mark)
def dump(self):
sys.stdout.write('blob\n')
sys.stdout.write('mark :%d\n' % self.mark)
sys.stdout.write('data %d\n%s' % (len(self.data), self.data))
class FastExportParser(object):
def __init__(self,
tag_callback = None, commit_callback = None,
blob_callback = None, progress_callback = None,
reset_callback = None, checkpoint_callback = None,
everything_callback = None):
self._setup_parser()
self.tag_callback = tag_callback
self.blob_callback = blob_callback
self.reset_callback = reset_callback
self.commit_callback = commit_callback
self.progress_callback = progress_callback
self.checkpoint_callback = checkpoint_callback
self.everything_callback = everything_callback
def _make_blob(self, t):
# Create the Blob object from the parser tokens
mark = int(t[1][1:])
datalen = int(t[3])
data = t[4]
if datalen != len(data):
raise SystemExit('%d != len(%s)' % datalen, data)
blob = Blob(data, mark)
# Call any user callback to allow them to modify the blob
if self.blob_callback:
self.blob_callback(blob)
# Now print the resulting blob to stdout
blob.dump()
# Replace data with its sha1sum to cut down on memory usage
# (python parser stores whole resulting parse tree in memory)
sha1sum = sha.new(blob.data).hexdigest()
return ['blob', blob.mark, len(blob.data), sha1sum]
def _setup_parser(self):
# Basic setup
ParserElement.setDefaultWhitespaceChars('')
number = Word(nums)
lf = Literal('\n').suppress()
sp = Literal(' ').suppress()
# Parsing marks
mark_name = Combine(Literal(':') + number)
mark = Literal('mark').suppress() - sp + mark_name + lf
# Parsing blobs
exact_data = ExactData() + Optional(lf)
file_content = exact_data
blob = Literal('blob') + lf + mark + file_content
blob.setParseAction(lambda t: self._make_blob(t))
# Tying it all together
cmd = blob
self.stream = ZeroOrMore(cmd)
def parse(self, string):
try:
results = self.stream.parseString(string, parseAll = True)
except ParseException, err:
print err.line
print " "*(err.column-1) + "^"
print err
raise SystemExit
return results
parser = FastExportParser()
string = commands.getoutput("GIT_DIR=foo/.git git fast-export --all")
results = parser.parse(string)
print results