contrib: simple examples of tools based on filter-repo
Signed-off-by: Elijah Newren <newren@gmail.com>pull/13/head
parent
2094221721
commit
6d231c0a94
@ -0,0 +1,34 @@
|
||||
## Background
|
||||
|
||||
filter-repo is not merely a history rewriting tool, it also contains a
|
||||
library that can be used to write new history rewriting tools. This
|
||||
directory contains several examples showing the breadth of different things
|
||||
that could be done.
|
||||
|
||||
## Quick overview
|
||||
|
||||
Command |Description
|
||||
-------|-----------
|
||||
insert-beginning |Add a new file (e.g. LICENSE/COPYING) to the beginning of history.
|
||||
signed-off-by |Add a Signed-off-by tag to a range of commits
|
||||
lint-history |Run some lint command on all non-binary files in history.
|
||||
clean-ignore |Delete files from history which match current gitignore rules.
|
||||
|
||||
## Purpose
|
||||
|
||||
Please note that the point of these examples is not to provide new complete
|
||||
tools, but simply to demonstrate that extremely varied history rewriting
|
||||
tools can be created which automatically inherit lots of useful base
|
||||
functionality: rewriting hashes in commit messages, pruning commits that
|
||||
become empty, handling filenames with funny characters, non-standard
|
||||
encodings, handling of replace refs, etc. (Additional examples of using
|
||||
filter-repo as a library can also be found in [the
|
||||
testsuite](../../t/t9391/).) My sincerest hope is that these examples
|
||||
provide lots of useful functionality, but that each is missing at least one
|
||||
critical piece for your usecase. Go forth and extend and improve.
|
||||
|
||||
## Usage
|
||||
|
||||
All the examples require a symlink to git-filter-repo in your PYTHONPATH
|
||||
named git_filter_repo.py in order to run; also, all have a --help flag to
|
||||
get a description of their usage and flags.
|
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is a simple program that will delete files from history which match
|
||||
current gitignore rules, while also:
|
||||
1) pruning commits which become empty
|
||||
2) pruning merge commits which become degenerate and have no changes
|
||||
relative to its remaining relevant parent
|
||||
3) rewriting commit hashes in commit messages to reference new commit IDs.
|
||||
"""
|
||||
|
||||
"""
|
||||
Please see the
|
||||
***** API BACKWARD COMPATIBILITY CAVEAT *****
|
||||
near the top of git-filter-repo.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
try:
|
||||
import git_filter_repo as fr
|
||||
except ImportError:
|
||||
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
|
||||
|
||||
class CheckIgnores:
|
||||
def __init__(self):
|
||||
self.ignored = set()
|
||||
self.okay = set()
|
||||
|
||||
cmd = 'git check-ignore --stdin --verbose --non-matching --no-index'
|
||||
self.check_ignore_process = subprocess.Popen(cmd.split(),
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE)
|
||||
|
||||
def __del__(self):
|
||||
if self.check_ignore_process:
|
||||
self.check_ignore_process.stdin.close()
|
||||
|
||||
def get_ignored(self, filenames):
|
||||
ignored = set()
|
||||
for name in filenames:
|
||||
if name in self.ignored:
|
||||
ignored.add(name)
|
||||
elif name in self.okay:
|
||||
continue
|
||||
else:
|
||||
self.check_ignore_process.stdin.write(name+b'\n')
|
||||
self.check_ignore_process.stdin.flush()
|
||||
result = self.check_ignore_process.stdout.readline().rstrip(b'\n')
|
||||
(rest, pathname) = result.split(b"\t")
|
||||
if name != pathname:
|
||||
raise SystemExit("Error: Passed {} but got {}".format(name, pathname))
|
||||
if rest == b'::':
|
||||
self.okay.add(name)
|
||||
else:
|
||||
self.ignored.add(name)
|
||||
ignored.add(name)
|
||||
|
||||
return ignored
|
||||
|
||||
def skip_ignores(self, commit, metadata):
|
||||
filenames = [x.filename for x in commit.file_changes]
|
||||
bad = self.get_ignored(filenames)
|
||||
commit.file_changes = [x for x in commit.file_changes
|
||||
if x.filename not in bad]
|
||||
|
||||
checker = CheckIgnores()
|
||||
args = fr.FilteringOptions.default_options()
|
||||
filter = fr.RepoFilter(args, commit_callback=checker.skip_ignores)
|
||||
filter.run()
|
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is a simple program that will insert some regular file into the root
|
||||
commit(s) of history, e.g. adding a file named LICENSE or COPYING to the
|
||||
first commit. It also rewrites commit hashes in commit messages to update
|
||||
them based on these changes.
|
||||
"""
|
||||
|
||||
"""
|
||||
Please see the
|
||||
***** API BACKWARD COMPATIBILITY CAVEAT *****
|
||||
near the top of git-filter-repo.
|
||||
"""
|
||||
|
||||
# Technically, this program could be replaced by a one-liner:
|
||||
# git filter-repo --force --commit-callback "if not commit.parents: commit.file_changes.append(FileChange(b'M', $PATHNAME, $(git hash-object -w $FILENAME), 100644))"
|
||||
# but let's do it as a full-fledged program that imports git_filter_repo
|
||||
# anyway...
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
try:
|
||||
import git_filter_repo as fr
|
||||
except ImportError:
|
||||
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Add a file to the root commit(s) of history')
|
||||
parser.add_argument('--file', type=os.fsencode,
|
||||
help=("Path to file whose contents should be added to root commit(s)"))
|
||||
args = parser.parse_args()
|
||||
if not args.file:
|
||||
raise SystemExit("Error: Need to specify the --file option")
|
||||
|
||||
fhash = subprocess.check_output(['git', 'hash-object', '-w', args.file]).strip()
|
||||
fmode = b'100755' if os.access(args.file, os.X_OK) else b'100644'
|
||||
# FIXME: I've assumed the file wasn't a directory or symlink...
|
||||
|
||||
def fixup_commits(commit, metadata):
|
||||
if len(commit.parents) == 0:
|
||||
commit.file_changes.append(fr.FileChange(b'M', args.file, fhash, fmode))
|
||||
# FIXME: What if the history already had a file matching the given name,
|
||||
# but which didn't exist until later in history? Is the intent for the
|
||||
# user to keep the other version that existed when it existed, or to
|
||||
# overwrite the version for all of history with the specified file? I
|
||||
# don't know, but if it's the latter, we'd need to add an 'else' clause
|
||||
# like the following:
|
||||
#else:
|
||||
# commit.file_changes = [x for x in commit.file_changes
|
||||
# if x.filename != args.file]
|
||||
|
||||
fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-encoding',
|
||||
'--force',
|
||||
'--replace-refs', 'update-no-add'])
|
||||
filter = fr.RepoFilter(fr_args, commit_callback=fixup_commits)
|
||||
filter.run()
|
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is a simple program that will run a linting program on all non-binary
|
||||
files in history. It also rewrites commit hashes in commit messages to
|
||||
refer to the new commits with the rewritten files. You call it like this:
|
||||
lint-history my-lint-command --arg whatever --another-arg
|
||||
and it will repeatedly call
|
||||
my-lint-command --arg whatever --another-arg $TEMPORARY_FILE
|
||||
with $TEMPORARY_FILE having contents of some file from history.
|
||||
"""
|
||||
|
||||
"""
|
||||
Please see the
|
||||
***** API BACKWARD COMPATIBILITY CAVEAT *****
|
||||
near the top of git-filter-repo.
|
||||
"""
|
||||
|
||||
# Technically, this program could be replaced by a "one-liner"; e.g.
|
||||
# git filter-repo --force --blob-callback '
|
||||
# if not any(x == b"0" for x in blob.data[0:8192]):
|
||||
# filename = '.git/info/tmpfile'
|
||||
# with open(filename, "wb") as f:
|
||||
# f.write(blob.data)
|
||||
# subprocess.check_call(["lint_program", "--some", "arg", filename])
|
||||
# with open(filename, "rb") as f:
|
||||
# blob.data = f.read()
|
||||
# os.remove(filename)
|
||||
# '
|
||||
# but let's do it as a full-fledged program that imports git_filter_repo
|
||||
# anyway...
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
try:
|
||||
import git_filter_repo as fr
|
||||
except ImportError:
|
||||
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Lint all files in history')
|
||||
parser.add_argument('--filenames-important', action='store_true',
|
||||
help=("By default, contents are written to a temporary file with a "
|
||||
"random name. If the linting program needs to know the file "
|
||||
"basename to operate correctly (e.g. because it needs to know "
|
||||
"the file's extension), then pass this argument"))
|
||||
parser.add_argument('command', nargs=argparse.REMAINDER,
|
||||
help=("Lint command to run, other than the filename at the end"))
|
||||
lint_args = parser.parse_args()
|
||||
if not lint_args.command:
|
||||
raise SystemExit("Error: Need to specify a lint command")
|
||||
|
||||
tmpdir = None
|
||||
blobs_handled = {}
|
||||
cat_file_process = None
|
||||
def lint_with_real_filenames(commit, metadata):
|
||||
for change in commit.file_changes:
|
||||
if change.blob_id in blobs_handled:
|
||||
change.blob_id = blobs_handled[change.blob_id]
|
||||
else:
|
||||
# Get the old blob contents
|
||||
cat_file_process.stdin.write(change.blob_id + b'\n')
|
||||
cat_file_process.stdin.flush()
|
||||
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
|
||||
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
|
||||
|
||||
# Write it out to a file with the same basename
|
||||
filename = os.path.join(tmpdir, os.path.basename(change.filename))
|
||||
with open(filename, "wb") as f:
|
||||
f.write(contents_plus_newline[:-1])
|
||||
|
||||
# Lint the file
|
||||
subprocess.check_call(lint_args.command + [filename])
|
||||
|
||||
# Get the new contents
|
||||
with open(filename, "rb") as f:
|
||||
blob = fr.Blob(f.read())
|
||||
|
||||
# Insert the new file into the filter's stream, and remove the tempfile
|
||||
filter.insert(blob)
|
||||
os.remove(filename)
|
||||
|
||||
# Record our handling of the blob and use it for this change
|
||||
blobs_handled[change.blob_id] = blob.id
|
||||
change.blob_id = blob.id
|
||||
|
||||
def lint_non_binary_blobs(blob, metadata):
|
||||
if not any(x == b"0" for x in blob.data[0:8192]):
|
||||
filename = '.git/info/tmpfile'
|
||||
with open(filename, "wb") as f:
|
||||
f.write(blob.data)
|
||||
subprocess.check_call(lint_args.command + [filename])
|
||||
with open(filename, "rb") as f:
|
||||
blob.data = f.read()
|
||||
os.remove(filename)
|
||||
|
||||
args = fr.FilteringOptions.default_options()
|
||||
args.force = True
|
||||
if lint_args.filenames_important:
|
||||
tmpdir = tempfile.mkdtemp().encode()
|
||||
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
|
||||
stdin = subprocess.PIPE,
|
||||
stdout = subprocess.PIPE)
|
||||
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
|
||||
filter.run()
|
||||
cat_file_process.stdin.close()
|
||||
cat_file_process.wait()
|
||||
else:
|
||||
filter = fr.RepoFilter(args, blob_callback=lint_non_binary_blobs)
|
||||
filter.run()
|
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is a simple program that will add Signed-off-by: tags to a range of
|
||||
commits. Example usage, to add a signed-off-by trailer to every commit that
|
||||
is not in next but is in any of master, develop, or maint:
|
||||
signed-off-by master develop maint ^next
|
||||
More likely called as:
|
||||
signed-off-by master~4..master
|
||||
There's no real reason to use this script since `rebase --signoff` exists;
|
||||
it's mostly just a demonstration of what could be done.
|
||||
"""
|
||||
|
||||
"""
|
||||
Please see the
|
||||
***** API BACKWARD COMPATIBILITY CAVEAT *****
|
||||
near the top of git-filter-repo.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
try:
|
||||
import git_filter_repo as fr
|
||||
except ImportError:
|
||||
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Add 'Signed-off-by:' tags to a range of commits")
|
||||
parser.add_argument('rev_list_args', metavar='rev-list args',
|
||||
nargs=argparse.REMAINDER,
|
||||
help=("Range of commits (need to include ref tips) to work on"))
|
||||
myargs = parser.parse_args()
|
||||
|
||||
user_name = subprocess.check_output('git config user.name'.split()).rstrip()
|
||||
user_email = subprocess.check_output('git config user.email'.split()).rstrip()
|
||||
trailer = b'Signed-off-by: %s <%s>' % (user_name, user_email)
|
||||
|
||||
def add_signed_off_by_trailer(commit, metadata):
|
||||
if trailer in commit.message:
|
||||
return
|
||||
|
||||
# We want to add the trailer, but we want it to be separated from any
|
||||
# existing paragraphs by a blank line. However, if the commit message
|
||||
# already ends with trailers, then we want all trailers to be on adjacent
|
||||
# lines.
|
||||
if not commit.message.endswith(b'\n'):
|
||||
commit.message += b'\n'
|
||||
lastline = commit.message.splitlines()[-1]
|
||||
if not re.match(b'[A-Za-z0-9-_]*: ', lastline):
|
||||
commit.message += b'\n'
|
||||
commit.message += trailer
|
||||
|
||||
# Setting source and target to anything prevents:
|
||||
# * remapping origin remote tracking branches to regular branches
|
||||
# * deletion of the origin remote
|
||||
# * nuking unused refs
|
||||
# * nuking reflogs
|
||||
# * repacking
|
||||
# so we cheat and set source and target both to '.'
|
||||
args = fr.FilteringOptions.parse_args(['--source', '.',
|
||||
'--target', '.',
|
||||
'--force',
|
||||
'--replace-refs', 'update-no-add',
|
||||
'--refs'] + myargs.rev_list_args)
|
||||
args.refs = myargs.rev_list_args
|
||||
filter = fr.RepoFilter(args, commit_callback=add_signed_off_by_trailer)
|
||||
filter.run()
|
Loading…
Reference in New Issue