contrib: simple examples of tools based on filter-repo

Signed-off-by: Elijah Newren <newren@gmail.com>
pull/13/head
Elijah Newren 5 years ago
parent 2094221721
commit 6d231c0a94

@ -0,0 +1,34 @@
## Background
filter-repo is not merely a history rewriting tool, it also contains a
library that can be used to write new history rewriting tools. This
directory contains several examples showing the breadth of different things
that could be done.
## Quick overview
Command&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; |Description
-------|-----------
insert-beginning |Add a new file (e.g. LICENSE/COPYING) to the beginning of history.
signed-off-by |Add a Signed-off-by tag to a range of commits
lint-history |Run some lint command on all non-binary files in history.
clean-ignore |Delete files from history which match current gitignore rules.
## Purpose
Please note that the point of these examples is not to provide new complete
tools, but simply to demonstrate that extremely varied history rewriting
tools can be created which automatically inherit lots of useful base
functionality: rewriting hashes in commit messages, pruning commits that
become empty, handling filenames with funny characters, non-standard
encodings, handling of replace refs, etc. (Additional examples of using
filter-repo as a library can also be found in [the
testsuite](../../t/t9391/).) My sincerest hope is that these examples
provide lots of useful functionality, but that each is missing at least one
critical piece for your usecase. Go forth and extend and improve.
## Usage
All the examples require a symlink to git-filter-repo in your PYTHONPATH
named git_filter_repo.py in order to run; also, all have a --help flag to
get a description of their usage and flags.

@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
This is a simple program that will delete files from history which match
current gitignore rules, while also:
1) pruning commits which become empty
2) pruning merge commits which become degenerate and have no changes
relative to its remaining relevant parent
3) rewriting commit hashes in commit messages to reference new commit IDs.
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
import argparse
import os
import subprocess
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
class CheckIgnores:
def __init__(self):
self.ignored = set()
self.okay = set()
cmd = 'git check-ignore --stdin --verbose --non-matching --no-index'
self.check_ignore_process = subprocess.Popen(cmd.split(),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
def __del__(self):
if self.check_ignore_process:
self.check_ignore_process.stdin.close()
def get_ignored(self, filenames):
ignored = set()
for name in filenames:
if name in self.ignored:
ignored.add(name)
elif name in self.okay:
continue
else:
self.check_ignore_process.stdin.write(name+b'\n')
self.check_ignore_process.stdin.flush()
result = self.check_ignore_process.stdout.readline().rstrip(b'\n')
(rest, pathname) = result.split(b"\t")
if name != pathname:
raise SystemExit("Error: Passed {} but got {}".format(name, pathname))
if rest == b'::':
self.okay.add(name)
else:
self.ignored.add(name)
ignored.add(name)
return ignored
def skip_ignores(self, commit, metadata):
filenames = [x.filename for x in commit.file_changes]
bad = self.get_ignored(filenames)
commit.file_changes = [x for x in commit.file_changes
if x.filename not in bad]
checker = CheckIgnores()
args = fr.FilteringOptions.default_options()
filter = fr.RepoFilter(args, commit_callback=checker.skip_ignores)
filter.run()

@ -0,0 +1,58 @@
#!/usr/bin/env python3
"""
This is a simple program that will insert some regular file into the root
commit(s) of history, e.g. adding a file named LICENSE or COPYING to the
first commit. It also rewrites commit hashes in commit messages to update
them based on these changes.
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
# Technically, this program could be replaced by a one-liner:
# git filter-repo --force --commit-callback "if not commit.parents: commit.file_changes.append(FileChange(b'M', $PATHNAME, $(git hash-object -w $FILENAME), 100644))"
# but let's do it as a full-fledged program that imports git_filter_repo
# anyway...
import argparse
import os
import subprocess
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
parser = argparse.ArgumentParser(
description='Add a file to the root commit(s) of history')
parser.add_argument('--file', type=os.fsencode,
help=("Path to file whose contents should be added to root commit(s)"))
args = parser.parse_args()
if not args.file:
raise SystemExit("Error: Need to specify the --file option")
fhash = subprocess.check_output(['git', 'hash-object', '-w', args.file]).strip()
fmode = b'100755' if os.access(args.file, os.X_OK) else b'100644'
# FIXME: I've assumed the file wasn't a directory or symlink...
def fixup_commits(commit, metadata):
if len(commit.parents) == 0:
commit.file_changes.append(fr.FileChange(b'M', args.file, fhash, fmode))
# FIXME: What if the history already had a file matching the given name,
# but which didn't exist until later in history? Is the intent for the
# user to keep the other version that existed when it existed, or to
# overwrite the version for all of history with the specified file? I
# don't know, but if it's the latter, we'd need to add an 'else' clause
# like the following:
#else:
# commit.file_changes = [x for x in commit.file_changes
# if x.filename != args.file]
fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-encoding',
'--force',
'--replace-refs', 'update-no-add'])
filter = fr.RepoFilter(fr_args, commit_callback=fixup_commits)
filter.run()

@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
This is a simple program that will run a linting program on all non-binary
files in history. It also rewrites commit hashes in commit messages to
refer to the new commits with the rewritten files. You call it like this:
lint-history my-lint-command --arg whatever --another-arg
and it will repeatedly call
my-lint-command --arg whatever --another-arg $TEMPORARY_FILE
with $TEMPORARY_FILE having contents of some file from history.
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
# Technically, this program could be replaced by a "one-liner"; e.g.
# git filter-repo --force --blob-callback '
# if not any(x == b"0" for x in blob.data[0:8192]):
# filename = '.git/info/tmpfile'
# with open(filename, "wb") as f:
# f.write(blob.data)
# subprocess.check_call(["lint_program", "--some", "arg", filename])
# with open(filename, "rb") as f:
# blob.data = f.read()
# os.remove(filename)
# '
# but let's do it as a full-fledged program that imports git_filter_repo
# anyway...
import argparse
import os
import subprocess
import tempfile
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
parser = argparse.ArgumentParser(
description='Lint all files in history')
parser.add_argument('--filenames-important', action='store_true',
help=("By default, contents are written to a temporary file with a "
"random name. If the linting program needs to know the file "
"basename to operate correctly (e.g. because it needs to know "
"the file's extension), then pass this argument"))
parser.add_argument('command', nargs=argparse.REMAINDER,
help=("Lint command to run, other than the filename at the end"))
lint_args = parser.parse_args()
if not lint_args.command:
raise SystemExit("Error: Need to specify a lint command")
tmpdir = None
blobs_handled = {}
cat_file_process = None
def lint_with_real_filenames(commit, metadata):
for change in commit.file_changes:
if change.blob_id in blobs_handled:
change.blob_id = blobs_handled[change.blob_id]
else:
# Get the old blob contents
cat_file_process.stdin.write(change.blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
# Write it out to a file with the same basename
filename = os.path.join(tmpdir, os.path.basename(change.filename))
with open(filename, "wb") as f:
f.write(contents_plus_newline[:-1])
# Lint the file
subprocess.check_call(lint_args.command + [filename])
# Get the new contents
with open(filename, "rb") as f:
blob = fr.Blob(f.read())
# Insert the new file into the filter's stream, and remove the tempfile
filter.insert(blob)
os.remove(filename)
# Record our handling of the blob and use it for this change
blobs_handled[change.blob_id] = blob.id
change.blob_id = blob.id
def lint_non_binary_blobs(blob, metadata):
if not any(x == b"0" for x in blob.data[0:8192]):
filename = '.git/info/tmpfile'
with open(filename, "wb") as f:
f.write(blob.data)
subprocess.check_call(lint_args.command + [filename])
with open(filename, "rb") as f:
blob.data = f.read()
os.remove(filename)
args = fr.FilteringOptions.default_options()
args.force = True
if lint_args.filenames_important:
tmpdir = tempfile.mkdtemp().encode()
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
filter.run()
cat_file_process.stdin.close()
cat_file_process.wait()
else:
filter = fr.RepoFilter(args, blob_callback=lint_non_binary_blobs)
filter.run()

@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""
This is a simple program that will add Signed-off-by: tags to a range of
commits. Example usage, to add a signed-off-by trailer to every commit that
is not in next but is in any of master, develop, or maint:
signed-off-by master develop maint ^next
More likely called as:
signed-off-by master~4..master
There's no real reason to use this script since `rebase --signoff` exists;
it's mostly just a demonstration of what could be done.
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
import argparse
import re
import subprocess
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
parser = argparse.ArgumentParser(
description="Add 'Signed-off-by:' tags to a range of commits")
parser.add_argument('rev_list_args', metavar='rev-list args',
nargs=argparse.REMAINDER,
help=("Range of commits (need to include ref tips) to work on"))
myargs = parser.parse_args()
user_name = subprocess.check_output('git config user.name'.split()).rstrip()
user_email = subprocess.check_output('git config user.email'.split()).rstrip()
trailer = b'Signed-off-by: %s <%s>' % (user_name, user_email)
def add_signed_off_by_trailer(commit, metadata):
if trailer in commit.message:
return
# We want to add the trailer, but we want it to be separated from any
# existing paragraphs by a blank line. However, if the commit message
# already ends with trailers, then we want all trailers to be on adjacent
# lines.
if not commit.message.endswith(b'\n'):
commit.message += b'\n'
lastline = commit.message.splitlines()[-1]
if not re.match(b'[A-Za-z0-9-_]*: ', lastline):
commit.message += b'\n'
commit.message += trailer
# Setting source and target to anything prevents:
# * remapping origin remote tracking branches to regular branches
# * deletion of the origin remote
# * nuking unused refs
# * nuking reflogs
# * repacking
# so we cheat and set source and target both to '.'
args = fr.FilteringOptions.parse_args(['--source', '.',
'--target', '.',
'--force',
'--replace-refs', 'update-no-add',
'--refs'] + myargs.rev_list_args)
args.refs = myargs.rev_list_args
filter = fr.RepoFilter(args, commit_callback=add_signed_off_by_trailer)
filter.run()
Loading…
Cancel
Save