diff --git a/Documentation/converting-from-filter-branch.md b/Documentation/converting-from-filter-branch.md index 96361f9..f7f1de2 100644 --- a/Documentation/converting-from-filter-branch.md +++ b/Documentation/converting-from-filter-branch.md @@ -9,6 +9,7 @@ to learn how to convert over to using filter-repo. * [Intention of "equivalent" commands](#intention-of-equivalent-commands) * [Basic Differences](#basic-differences) * [Cheat Sheet: Conversion of Examples from the filter-branch manpage](#cheat-sheet-conversion-of-examples-from-the-filter-branch-manpage) + * [Cheat Sheet: Additional conversion examples](#cheat-sheet-additional-conversion-examples) ## Half-hearted conversions @@ -309,3 +310,37 @@ Note that filter-branch accepts `--not` among the revision specifiers, but that appears to python to be a flag name which breaks parsing. So, instead of e.g. `--not C` as we might use with filter-branch, we can specify `^C` to filter-repo. + +## Cheat Sheet: Additional conversion examples + +### Running a code formatter or linter on each file with some extension + +Running some program on a subset of files is relatively natural in +filter-branch: + +```shell + git filter-branch --tree-filter ' + git ls-files -z "*.c" \ + | xargs -0 -n 1 clang-format -style=file -i + ' +``` + +filter-repo decided not to provide a way to run an external program to +do filtering, because most filter-branch uses of this ability are +riddled with [safety +problems](https://git-scm.com/docs/git-filter-branch#SAFETY) and +[performance +issues](https://git-scm.com/docs/git-filter-branch#PERFORMANCE). +However, in special cases like this it's fairly safe. One can write a +script that uses filter-repo as a library to achieve this, while also +gaining filter-repo's automatic handling of other concerns like +rewriting commit IDs in commit messages or pruning commits that become +empty. In fact, one of the [contrib +demos](../contrib/filter-repo-demos), +[lint-history](../contrib/filter-repo-demos/lint-history), handles +this exact type of situation already: + +```shell + lint-history --relevant 'return filename.endswith(b".c")' \ + clang-format -style=file -i +``` diff --git a/contrib/filter-repo-demos/lint-history b/contrib/filter-repo-demos/lint-history index eb016b6..4ec34ed 100755 --- a/contrib/filter-repo-demos/lint-history +++ b/contrib/filter-repo-demos/lint-history @@ -13,7 +13,12 @@ NOTE: Several people have taken and modified this script for a variety of special cases (linting python files, linting jupyter notebooks, just linting java files, etc.) and posted their modifications at https://github.com/newren/git-filter-repo/issues/45 -Feel free to take a look and adopt some of their ideas. +Feel free to take a look and adopt some of their ideas. Most of these +modifications are probably strictly unnecessary since you could just make +a lint-script that takes the filename, checks that it matches what you +want, and then calls the real linter. But I guess folks don't like making +an intermediate script. So I eventually added the --relevant flag for +picking out certain files providing yet another way to handle it. """ """ @@ -22,7 +27,8 @@ Please see the near the top of git-filter-repo. """ -# Technically, this program could be replaced by a "one-liner"; e.g. +# Technically, if you are only running on all non-binary files and don't care +# about filenames, then this program could be replaced by a "one-liner"; e.g. # git filter-repo --force --blob-callback ' # if not any(x == b"0" for x in blob.data[0:8192]): # filename = '.git/info/tmpfile' @@ -34,7 +40,7 @@ near the top of git-filter-repo. # os.remove(filename) # ' # but let's do it as a full-fledged program that imports git_filter_repo -# anyway... +# and show how to also do it with filename handling... import argparse import os @@ -45,8 +51,34 @@ try: except ImportError: raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?") -parser = argparse.ArgumentParser( - description='Lint all files in history') +example_text = '''CALLBACK + + When you pass --relevant 'BODY', the following style of function + will be compiled and called: + + def is_relevant(filename): + BODY + + Thus, to only run on files with a ".txt" extension you would run + lint-history --relevant 'return filename.endswith(b".txt")' ... + +EXAMPLES + + To run dos2unix on all non-binary files in history: + lint-history dos2unix + + To run eslint --fix on all .js files in history: + lint-history --relevant 'return filename.endswith(b".js")' eslint --fix + ''' + +parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history', + epilog = example_text, + formatter_class=argparse.RawDescriptionHelpFormatter) + +parser.add_argument('--relevant', metavar="FUNCTION_BODY", + help=("Python code for determining whether to apply linter to a " + "given filename. Implies --filenames-important. See CALLBACK " + "below.")) parser.add_argument('--filenames-important', action='store_true', help=("By default, contents are written to a temporary file with a " "random name. If the linting program needs to know the file " @@ -67,6 +99,8 @@ def lint_with_real_filenames(commit, metadata): change.blob_id = blobs_handled[change.blob_id] elif change.type == b'D': continue + elif not is_relevant(change.filename): + continue else: # Get the old blob contents cat_file_process.stdin.write(change.blob_id + b'\n') @@ -104,6 +138,13 @@ def lint_non_binary_blobs(blob, metadata): blob.data = f.read() os.remove(filename) +if lint_args.filenames_important and not lint_args.relevant: + lint_args.relevant = 'return True' +if lint_args.relevant: + body = lint_args.relevant + exec('def is_relevant(filename):\n '+'\n '.join(body.splitlines()), + globals()) + lint_args.filenames_important = True args = fr.FilteringOptions.default_options() args.force = True if lint_args.filenames_important: