filter-repo: add a flag for restricting where --replace-text applies

Add a new callback, --replace-text-filename-callback, which will return
a simple boolean for any given filename.  When this command line flag is
passed, only filenames for which the callback returns true will have the
--replace-text rules applied to them.

I could also potentially simplify bfg-ish quite a bit by making use of
this new callback, but it serves as a pretty interesting demonstration
of how to do more complex operations so I have left it as-is for now.

NOTE: This should be modified to check for any --source parameter and
make sure the cat-file --batch process runs with the --source directory
as its current-working-directory.

Signed-off-by: Elijah Newren <newren@gmail.com>
replace-text-limited-to-certain-files
Elijah Newren 4 years ago
parent ebfdb43380
commit 3f8f8c2b6a

@ -1828,6 +1828,12 @@ EXAMPLES
"end the line with '==>' and some replacement text to "
"choose a replacement choice other than the default of '{}'."
.format(decode(FilteringOptions.default_replace_text))))
contents.add_argument('--replace-text-filename-callback',
metavar="FUNCTION_BODY",
help=_("Python code body for choosing whether a given file should "
"be affected by --replace-text; see CALLBACKS sections below, "
"and note that the function argument is 'filename' and the "
"return type expected is a boolean."))
contents.add_argument('--strip-blobs-bigger-than', metavar='SIZE',
dest='max_blob_size', default=0,
help=_("Strip blobs (files) bigger than specified size (e.g. '5M', "
@ -2083,6 +2089,9 @@ EXAMPLES
args.max_blob_size = int(args.max_blob_size[0:-1]) * mult[suffix]
else:
args.max_blob_size = int(args.max_blob_size)
if args.replace_text_filename_callback and not args.replace_text:
raise SystemExit(_("Error: --replace-text-filename-callback makes no "
"sense without --replace-text"))
@staticmethod
def get_replace_text(filename):
@ -2702,7 +2711,8 @@ class RepoFilter(object):
commit_callback = None,
tag_callback = None,
reset_callback = None,
done_callback = None):
done_callback = None,
replace_text_filename_callback = None):
self._args = args
@ -2722,6 +2732,10 @@ class RepoFilter(object):
self._name_callback = name_callback # author, committer, tagger
self._email_callback = email_callback # author, committer, tagger
self._refname_callback = refname_callback # from commit/tag/reset
# Store other miscellaneous callbacks
self._replace_text_filename_callback = replace_text_filename_callback
self._handle_arg_callbacks()
# Defaults for input
@ -2800,12 +2814,21 @@ class RepoFilter(object):
# Compile some regexes and cache those
self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
# If we are doing replace-text on only individual filenames, then we
# need a cat-file batch process
self.cat_file_proc = None
if self._args.replace_text_filename_callback and self._args.replace_text:
self.cat_file_proc = subproc.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
def _handle_arg_callbacks(self):
def make_callback(argname, str):
exec('def callback({}, _do_not_use_this_var = None):\n'.format(argname)+
' '+'\n '.join(str.splitlines()), globals())
return callback #namespace['callback']
def handle(type):
def handle(type, argname = None):
argname = argname or type
callback_field = '_{}_callback'.format(type)
code_string = getattr(self._args, type+'_callback')
if code_string:
@ -2817,7 +2840,8 @@ class RepoFilter(object):
type not in ('blob', 'commit', 'tag', 'reset'):
raise SystemExit(_("Error: --%s-callback should have a return statement")
% type)
setattr(self, callback_field, make_callback(type, code_string))
setattr(self, callback_field, make_callback(argname, code_string))
handle('replace_text_filename', 'filename')
handle('filename')
handle('message')
handle('name')
@ -3242,6 +3266,7 @@ class RepoFilter(object):
blob.skip()
if self._args.replace_text and \
not self._args.replace_text_filename_callback and \
not any(x == b"0" for x in blob.data[0:8192]):
for literal, replacement in self._args.replace_text['literals']:
blob.data = blob.data.replace(literal, replacement)
@ -3251,6 +3276,34 @@ class RepoFilter(object):
if self._blob_callback:
self._blob_callback(blob, self.callback_metadata())
def _filter_file_content(self, change):
if change.type == b'D':
return # deleted files have no remaining content to filter
if change.mode in (b'120000', b'160000'):
return # symlinks and submodules aren't text files we can filter
if not self._args.replace_text_filename_callback or \
not self._args.replace_text:
return # rest of this function deals with text replacement
if not self._replace_text_filename_callback(change.filename):
return # this isn't one of the files we need to do text replacement on
self.cat_file_proc.stdin.write(change.blob_id + b'\n')
self.cat_file_proc.stdin.flush()
objhash, objtype, objsize = self.cat_file_proc.stdout.readline().split()
contents = self.cat_file_proc.stdout.read(int(objsize))
for literal, replacement in self._args.replace_text['literals']:
contents = contents.replace(literal, replacement)
for regex, replacement in self._args.replace_text['regexes']:
contents = regex.sub(replacement, contents)
self.cat_file_proc.stdout.read(1) # Read trailing newline
blob = Blob(contents)
self.insert(blob) # Note: we rely on this to call self._tweak_blob()
change.blob_id = blob.id
def _filter_file_name(self, change, new_file_changes, commit_id):
def filename_matches(path_expression, pathname):
''' Returns whether path_expression matches pathname or a leading
@ -3357,6 +3410,9 @@ class RepoFilter(object):
change.blob_id in self._args.strip_blobs_with_ids:
continue
# Modify file content as needed
self._filter_file_content(change)
# Modify filename as needed
if self._filter_file_name(change, new_file_changes, commit.original_id) is None:
continue # Exclude this file
@ -3598,7 +3654,8 @@ class RepoFilter(object):
self._fe_orig = None
else:
skip_blobs = (self._blob_callback is None and
self._args.replace_text is None and
(self._args.replace_text is None or
self._args.replace_text_filename_callback is not None) and
self._args.source == self._args.target)
extra_flags = []
if skip_blobs:
@ -3895,6 +3952,11 @@ class RepoFilter(object):
if not self._args.stdin and self._fep.wait():
raise SystemExit(_("Error: fast-export failed; see above.")) # pragma: no cover
# Shut down self.cat_file_proc if we started it up
if self.cat_file_proc:
self.cat_file_proc.stdin.close()
self.cat_file_proc.wait()
# If we're not the manager of self._output, we should avoid post-run cleanup
if not self._managed_output:
return

@ -824,6 +824,34 @@ test_expect_success '--replace-text all options' '
)
'
test_expect_success '--replace-text-filename-callback' '
setup_analyze_me &&
(
git clone file://"$(pwd)"/analyze_me replace_text_callback &&
cd replace_text_callback &&
cat >../replace-rules <<-\EOF &&
rename==>relabel
literal:spam==>foodstuff
glob:ran*m==>haphazard
EOF
git filter-repo --replace-text ../replace-rules \
--replace-text-filename-callback \
"return filename == b\"whatever\"" &&
echo "spam" >expect && # Due to filename restriction
#echo "foodstuff" >expect && # Expected otherwise
test_cmp expect sequence/to &&
echo "haphazard other change" >expect &&
test_cmp expect whatever &&
echo "rename a lot" >expect && # Due to filename restriction
#echo "relabel a lot" >expect && # Expected otherwise
test_cmp expect mercurial
)
'
test_expect_success '--strip-blobs-bigger-than' '
setup_analyze_me &&
(
@ -1232,7 +1260,10 @@ test_expect_success 'other startup error cases and requests for help' '
test_i18ngrep "Pathnames cannot begin with a ./" err &&
test_must_fail git filter-repo --subdirectory-filter /foo 2>err &&
test_i18ngrep "Pathnames cannot begin with a ./" err
test_i18ngrep "Pathnames cannot begin with a ./" err &&
test_must_fail git filter-repo --replace-text-filename-callback "return True" 2>err &&
test_i18ngrep "makes no sense without --replace-text" err
)
'

Loading…
Cancel
Save