filter-repo: fix issue with pruning of empty commits

In order to build the correct tree for a commit, git-fast-import always
takes a list of file changes for a merge commit relative to the first
parent.

When the entire first-parent history of a merge commit is pruned away
and the merge had paths with no difference relative to the first parent
but which differed relative to later parents, then we really need to
generate a new list of file changes in order to have one of those other
parents become the new first parent.  An example might help clarify...

Let's say that there is a merge commit, and:

  * it resolved differences in pathA between its two parents by taking
    the version of pathA from the first parent.

  * pathB was added in the history of the second parent (it is not
    present in the first parent) and is NOT included in the merge commit
    (either being deleted, or via rename treated as deleted and added as
    something else)

For this merge commit, neither pathA nor pathB differ from the first
parent, and thus wouldn't appear in the list of file changes shown by
fast-export.  However, when our filtering rules determine that the first
parent (and all its parents) should be pruned away, then the second
parent has to become the new first parent of the merge commit.  But to
end up with the right files in the merge commit despite using a
different parent, we need a list of file changes that specifies the
changes for both pathA and pathB.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-10-03 18:10:47 -07:00
parent cdec483573
commit 509a624b6a
2 changed files with 71 additions and 1 deletions

View File

@ -438,7 +438,7 @@ class _GitElement(object):
def __bytes__(self):
"""
Convert GitElement to string; used for debugging
Convert GitElement to bytestring; used for debugging
"""
old_dumped = self.dumped
writeme = io.BytesIO()
@ -503,6 +503,8 @@ class Blob(_GitElementWithId):
Write this blob element to a file.
"""
self.dumped = 1
HASH_TO_ID[self.original_id] = self.id
ID_TO_HASH[self.id] = self.original_id
file_.write(b'blob\n')
file_.write(b'mark :%d\n' % self.id)
@ -665,6 +667,8 @@ class Commit(_GitElementWithId):
Write this commit element to a file.
"""
self.dumped = 1
HASH_TO_ID[self.original_id] = self.id
ID_TO_HASH[self.id] = self.original_id
# Make output to fast-import slightly easier for humans to read if the
# message has no trailing newline of its own; cosmetic, but a nice touch...
@ -754,6 +758,8 @@ class Tag(_GitElementWithId):
"""
self.dumped = 1
HASH_TO_ID[self.original_id] = self.id
ID_TO_HASH[self.id] = self.original_id
file_.write(b'tag %s\n' % self.ref)
if (write_marks and self.id):
@ -1426,6 +1432,8 @@ def record_id_rename(old_id, new_id):
# Internal globals
_IDS = _IDs()
_SKIPPED_COMMITS = set()
HASH_TO_ID = {}
ID_TO_HASH = {}
class GitUtils(object):
@staticmethod
@ -1513,6 +1521,31 @@ class GitUtils(object):
blob_size_progress.finish()
return unpacked_size, packed_size
@staticmethod
def get_file_changes(repo, parent_hash, commit_hash):
"""
Return a FileChanges list with the differences between parent_hash
and commit_hash
"""
file_changes = []
cmd = ["git", "diff-tree", "-r", parent_hash, commit_hash]
output = subprocess.check_output(cmd, cwd=repo)
for line in output.splitlines():
fileinfo, path = line.split(b'\t', 1)
if path.startswith(b'"'):
path = PathQuoting.dequote(path)
oldmode, mode, oldhash, newhash, changetype = fileinfo.split()
if changetype == b'D':
file_changes.append(FileChange(b'D', path))
elif changetype in (b'A', b'M'):
identifier = HASH_TO_ID.get(newhash, newhash)
file_changes.append(FileChange(b'M', path, identifier, mode))
else: # pragma: no cover
raise SystemExit("Unknown change type for line {}".format(line))
return file_changes
class FilteringOptions(object):
class AppendFilter(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
@ -3203,9 +3236,17 @@ class RepoFilter(object):
self._orig_graph.add_commit_and_parents(commit.old_id, orig_parents)
# Prune parents (due to pruning of empty commits) if relevant
old_1st_parent = parents[0] if parents else None
parents, new_1st_parent = self._trim_extra_parents(orig_parents, parents)
commit.parents = parents
# If parents were pruned, then we need our file changes to be relative
# to the new first parent
if parents and old_1st_parent != parents[0]:
commit.file_changes = GitUtils.get_file_changes(self._repo_working_dir,
ID_TO_HASH[parents[0]],
commit.original_id)
# Call the user-defined callback, if any
if self._commit_callback:
self._commit_callback(commit, self.callback_metadata(aux_info))

View File

@ -1309,4 +1309,33 @@ test_expect_success '--state-branch with expanding paths and refs' '
)
'
test_expect_success 'degenerate merge with non-matching filenames' '
test_create_repo degenerate_merge_differing_filenames &&
(
cd degenerate_merge_differing_filenames &&
touch "foo \"quote\" bar" &&
git add "foo \"quote\" bar" &&
git commit -m "Add foo \"quote\" bar"
git branch A &&
git checkout --orphan B &&
git reset --hard &&
mkdir -p pkg/list &&
test_commit pkg/list/whatever &&
git checkout A &&
git merge --allow-unrelated-histories --no-commit B &&
>pkg/list/wanted &&
git add pkg/list/wanted &&
git rm -f pkg/list/whatever.t &&
git commit &&
git filter-repo --force --path pkg/list &&
! test_path_is_file pkg/list/whatever.t &&
git ls-files >files &&
! grep pkg/list/whatever.t files
)
'
test_done