filter-repo: add basic path filtering

Signed-off-by: Elijah Newren <newren@gmail.com>
pull/13/head
Elijah Newren 6 years ago
parent a427a80322
commit 636a3cf575

@ -12,6 +12,7 @@ in order to make it into a well-rounded filtering tool.
from __future__ import print_function
import argparse
import fnmatch
import os
import re
import subprocess
@ -1055,6 +1056,13 @@ _CURRENT_STREAM_NUMBER = 0
######################################################################
class AppendFilter(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
filter_type = option_string[len('--path-'):] or 'match'
items = getattr(namespace, self.dest, []) or []
items.append((mod_type, match_type, values))
setattr(namespace, self.dest, items)
def get_args():
parser = argparse.ArgumentParser(description='Rewrite repository history')
# FIXME: Need to special case all --* args that rev-list takes, or call
@ -1062,6 +1070,32 @@ def get_args():
parser.add_argument('--force', '-f', action='store_true',
help='''Rewrite history even if the current repo does not
look like a fresh clone.''')
path_group = parser.add_argument_group(title='Filtering based on paths')
path_group.add_argument('--invert-paths', action='store_false',
dest='inclusive',
help='''Invert the selection of files from the
specified --path-{match,glob,regex} options
below, i.e. only select files matching none
of those options.''')
path_group.add_argument('--path-match', '--path', metavar='DIR_OR_FILE',
action=AppendFilter, dest='path_filter',
help='''Exact paths (files or directories) to include
in filtered history. Multiple --path
options can be specified to get a union of
paths.''')
path_group.add_argument('--path-glob', metavar='GLOB',
action=AppendFilter, dest='path_filter',
help='''Glob of paths to include in filtered
history. Multiple --path-glob options can
be specified to get a union of paths.''')
path_group.add_argument('--path-regex', metavar='REGEX',
action=AppendFilter, dest='path_filter',
help='''Regex of paths to include in filtered
history. Multiple --path-regex options can
be specified to get a union of paths''')
parser.add_argument('revisions', nargs='*',
help='''Branches/tags/refs to rewrite. Special rev-list
options, such as --branches, --tags, --all,
@ -1073,6 +1107,9 @@ def get_args():
args = parser.parse_args()
if not args.revisions:
args.revisions = ['--all']
if args.path_filter == None:
args.path_filter = []
args.inclusive = False
return args
def is_repository_bare():
@ -1144,6 +1181,28 @@ def get_refs():
output = subprocess.check_output('git show-ref'.split())
return dict(reversed(x.split()) for x in output.splitlines())
def tweak_commit(args, commit):
def include_file(path_filter, pathname):
for (filter_type, path_expression) in path_filter:
if filter_type == 'match':
n = len(path_expression)
if (pathname.startswith(path_expression) and
(path_expression[n-1] == '/' or
len(pathname) == n or
pathname[n] == '/')):
return True
if filter_type == 'glob' and fnmatch.fnmatch(pathname, path_expression):
return True
if filter_type == 'regex' and re.search(path_expression, pathname):
return True
return False
new_file_changes = []
for change in commit.file_changes:
if include_file(args.path_filter, change.filename) == args.inclusive:
new_file_changes.append(change)
commit.file_changes = new_file_changes
def run_fast_filter():
args = get_args()
orig_refs = get_refs()
@ -1156,7 +1215,9 @@ def run_fast_filter():
stdout=subprocess.PIPE)
fip = subprocess.Popen('git fast-import --force --quiet'.split(),
stdin=subprocess.PIPE)
filter = FastExportFilter()
filter = FastExportFilter(
commit_callback = lambda c : tweak_commit(args, c),
)
filter.run(fep.stdout, fip.stdin)
fip.stdin.close()
if fep.wait():

Loading…
Cancel
Save