diff --git a/Documentation/git-filter-repo.txt b/Documentation/git-filter-repo.txt index 654ef33..a43cceb 100644 --- a/Documentation/git-filter-repo.txt +++ b/Documentation/git-filter-repo.txt @@ -256,9 +256,10 @@ Generic callback code snippets Location to filter from/to ~~~~~~~~~~~~~~~~~~~~~~~~~~ -NOTE: Specifying alternate source or target locations will disable some -auxiliary steps such as disconnecting the origin remote, and avoiding -mixing new and old history. +NOTE: Specifying alternate source or target locations implies --partial +except that the normal default for --replace-refs is used. However, unlike +normal uses of --partial, this doesn't risk mixing old and new history +since the old and new histories are in different repositories. --source :: Git repository to read from @@ -278,6 +279,25 @@ Miscellaneous options Rewrite history even if the current repo does not look like a fresh clone. +--partial: + Do a partial history rewrite, resulting in the mixture of old and + new history. This implies a default of update-no-add for + --replace-refs, disables rewriting refs/remotes/origin/* to + refs/heads/*, disables removing of the 'origin' remote, disables + removing unexported refs, disables expiring the reflog, and + disables the automatic post-filter gc. Also, this modifies + --tag-rename and --refname-callback options such that instead of + replacing old refs with new refnames, it will instead create new + refs and keep the old ones around. Use with caution. + +--refs :: + Limit history rewriting to the specified refs. Implies --partial. + In addition to the normal caveats of --partial (mixing old and new + history, no automatic remapping of refs/remotes/origin/* to + refs/heads/*, etc.), this also may cause problems for pruning of + degenerate empty merge commits when negative revisions are + specified. + --dry-run:: Do not change the repository. Run `git fast-export` and filter its output, and save both the original and the filtered version for @@ -699,6 +719,23 @@ The reason to specify --force is two-fold: filter-repo will error out if no arguments are specified, and the new graft commit would otherwise trigger the not-a-fresh-clone check. +Partial history rewrites +~~~~~~~~~~~~~~~~~~~~~~~~ + +To rewrite the history on just one branch (which may cause it to no longer +share any common history with other branches), use `--refs`. For example, +to remove a file named 'extraneous.txt' from the 'master' branch: + +-------------------------------------------------- +git filter-repo --invert-paths --path extraneous.txt --refs master +-------------------------------------------------- + +To rewrite just some recent commits: + +-------------------------------------------------- +git filter-repo --invert-paths --path extraneous.txt --refs master~3..master +-------------------------------------------------- + [[CALLBACKS]] CALLBACKS --------- @@ -946,8 +983,11 @@ Some notes or exceptions on each of the above: are that they've only rewritten trees and commits and maybe a few blobs, so `--aggressive` isn't needed and would be too slow.) -Information about these steps is printed out when `--debug` is passed to -filter-repo. +Information about these steps is printed out when `--debug` is passed +to filter-repo. When doing a `--partial` history rewrite, steps 2, 3, +7, and 8 are unconditionally skipped, step 5 is skipped if +`--replace-refs` is `update-no-add`, and just the nuke-unused-refs +portion of step 5 is skipped if `--replace-refs` is something else. Limitations ~~~~~~~~~~~ @@ -1041,18 +1081,16 @@ Issues specific to filter-repo such as `-M` or `-C` would break assumptions used in other places of filter-repo. - * Partial-repo filtering does not mesh well with filter-repo's "avoid - mixing old and new history" design. filter-repo has some capability - in this area but it is intentionally underdocumented and mostly left - for use by external scripts which import filter-repo as a module - (some examples in contrib/filter-repo-demos/ do use this). The only - real usecases I've seen for partial repo filtering, though, are - sidestepping filter-branch's insanely slow execution on commits that - would not be changed by the filters in question anyway (which is - largely irrelevant since filter-repo is multiple orders of magnitude - faster), or to do operations better suited to linkgit:git-rebase[1] - and which rebase grew special options for years ago (e.g. the - `--signoff` option). + * Partial-repo filtering, while supported, runs counter to filter-repo's + "avoid mixing old and new history" design. This support has required + improvements to core git as well (e.g. it depends upon the + `--reference-excluded-parents` option to fast-export that was added + specifically for this usage within filter-repo). The `--partial` and + `--refs` options will continue to be supported since there are people + with usecases for them; however, I am concerned that this inconsistency + about mixing old and new history seems likely to lead to user mistakes. + For now, I just hope that long explanations of caveats in the + documentation of these options suffice to curtail any such problems. Comments on reversibility ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/contrib/filter-repo-demos/bfg-ish b/contrib/filter-repo-demos/bfg-ish index cfa9621..85bc51d 100755 --- a/contrib/filter-repo-demos/bfg-ish +++ b/contrib/filter-repo-demos/bfg-ish @@ -400,7 +400,7 @@ class BFG_ish: stdin = subprocess.PIPE, stdout = subprocess.PIPE) self.args = bfg_args - # Setting source and target to anything prevents: + # Setting partial prevents: # * remapping origin remote tracking branches to regular branches # * deletion of the origin remote # * nuking unused refs @@ -411,9 +411,8 @@ class BFG_ish: # The third is irrelevant since BFG has no mechanism for renaming refs, # and we'll manually add the fourth and fifth back in below by calling # RepoFilter.cleanup(). - fr_args = fr.FilteringOptions.parse_args(['--source', '.', - '--target', '.', - '--force'] + extra_args) + fr_args = fr.FilteringOptions.parse_args(['--partial', '--force'] + + extra_args) self.filter = fr.RepoFilter(fr_args, commit_callback=self.commit_update) self.filter.run() if new_replace_file: diff --git a/contrib/filter-repo-demos/filter-lamely b/contrib/filter-repo-demos/filter-lamely index 7460754..d463ae6 100755 --- a/contrib/filter-repo-demos/filter-lamely +++ b/contrib/filter-repo-demos/filter-lamely @@ -585,9 +585,7 @@ class UserInterfaceNightmare: self.args.prune_empty = True fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-hashes', '--preserve-commit-encoding', - '--replace-refs', 'update-no-add', - '--source', '.', - '--target', '.', + '--partial', '--force'] + extra_args) fr_args.prune_empty = 'always' if self.args.prune_empty else 'never' fr_args.refs = self.get_extended_refs() diff --git a/contrib/filter-repo-demos/signed-off-by b/contrib/filter-repo-demos/signed-off-by index 1c07792..ddb9e0f 100755 --- a/contrib/filter-repo-demos/signed-off-by +++ b/contrib/filter-repo-demos/signed-off-by @@ -58,10 +58,7 @@ def add_signed_off_by_trailer(commit, metadata): # * nuking reflogs # * repacking # so we cheat and set source and target both to '.' -args = fr.FilteringOptions.parse_args(['--source', '.', - '--target', '.', - '--force', - '--replace-refs', 'update-no-add', +args = fr.FilteringOptions.parse_args(['--force', '--refs'] + myargs.rev_list_args) args.refs = myargs.rev_list_args filter = fr.RepoFilter(args, commit_callback=add_signed_off_by_trailer) diff --git a/git-filter-repo b/git-filter-repo index 3d0c446..a55f53f 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1673,10 +1673,6 @@ EXAMPLES "useful in determining what to filter in a subsequent run. " "Will not modify your repo.")) - refs = parser.add_argument_group(title=_("Git References")) - refs.add_argument('--refs', nargs='*', default=['--all'], - help=argparse.SUPPRESS) - path = parser.add_argument_group(title=_("Filtering based on paths " "(see also --filename-callback)")) path.add_argument('--invert-paths', action='store_false', dest='inclusive', @@ -1846,9 +1842,10 @@ EXAMPLES "CALLBACKS section below.")) desc = _( - "Specifying alternate source or target locations will disable some \n" - "auxiliary steps such as disconnecting the origin remote, and avoiding\n" - "mixing new and old history.") + "Specifying alternate source or target locations implies --partial,\n" + "except that the normal default for --replace-refs is used. However,\n" + "unlike normal uses of --partial, this doesn't risk mixing old and new\n" + "history since the old and new histories are in different repositories.") location = parser.add_argument_group(title=_("Location to filter from/to"), description=desc) location.add_argument('--source', type=os.fsencode, @@ -1862,6 +1859,29 @@ EXAMPLES misc.add_argument('--force', '-f', action='store_true', help=_("Rewrite history even if the current repo does not look " "like a fresh clone.")) + misc.add_argument('--partial', action='store_true', + help=_("Do a partial history rewrite, resulting in the mixture of " + "old and new history. This implies a default of " + "update-no-add for --replace-refs, disables rewriting " + "refs/remotes/origin/* to refs/heads/*, disables removing " + "of the 'origin' remote, disables removing unexported refs, " + "disables expiring the reflog, and disables the automatic " + "post-filter gc. Also, this modifies --tag-rename and " + "--refname-callback options such that instead of replacing " + "old refs with new refnames, it will instead create new " + "refs and keep the old ones around. Use with caution.")) + # WARNING: --refs presents a problem with become-degenerate pruning: + # * Excluding a commit also excludes its ancestors so when some other + # commit has an excluded ancestor as a parent we have no way of + # knowing what it is an ancestor of without doing a special + # full-graph walk. + misc.add_argument('--refs', nargs='+', + help=_("Limit history rewriting to the specified refs. Implies " + "--partial. In addition to the normal caveats of --partial " + "(mixing old and new history, no automatic remapping of " + "refs/remotes/origin/* to refs/heads/*, etc.), this also may " + "cause problems for pruning of degenerate empty merge " + "commits when negative revisions are specified.")) misc.add_argument('--dry-run', action='store_true', help=_("Do not change the repository. Run `git fast-export` and " @@ -2065,6 +2085,12 @@ EXAMPLES args.strip_blobs_with_ids = set(f.read().split()) else: args.strip_blobs_with_ids = set() + if (args.partial or args.refs) and not args.replace_refs: + args.replace_refs = 'update-no-add' + if args.refs or args.source or args.target: + args.partial = True + if not args.refs: + args.refs = ['--all'] return args class RepoAnalyze(object): @@ -3475,8 +3501,6 @@ class RepoFilter(object): .format(decode(self._fe_filt))) def _migrate_origin_to_heads(self): - if self._args.dry_run or self._args.source or self._args.target: - return refs_to_migrate = set(x for x in self._orig_refs if x.startswith(b'refs/remotes/origin/')) if not refs_to_migrate: @@ -3532,7 +3556,7 @@ class RepoFilter(object): # Remove unused refs exported_refs, imported_refs = self.get_exported_and_imported_refs() refs_to_nuke = exported_refs - imported_refs - if self._args.source or self._args.target: + if self._args.partial: refs_to_nuke = set() if refs_to_nuke and self._args.debug: print("[DEBUG] Deleting the following refs:\n "+ @@ -3690,7 +3714,8 @@ class RepoFilter(object): start = time.time() if not self._input and not self._output: self._run_sanity_checks() - self._migrate_origin_to_heads() + if not self._args.dry_run and not self._args.partial: + self._migrate_origin_to_heads() self._setup_input(use_done_feature = True) self._setup_output() assert self._sanity_checks_handled @@ -3725,7 +3750,7 @@ class RepoFilter(object): self._save_marks_files() # Notify user how long it took, before doing a gc and such - repack = (not self._args.source and not self._args.target) + repack = (not self._args.partial) msg = "New history written in {:.2f} seconds..." if repack: msg = "New history written in {:.2f} seconds; now repacking/cleaning..." @@ -3749,7 +3774,7 @@ class RepoFilter(object): # Write out data about run self._record_metadata(self.results_tmp_dir(), self._orig_refs) - # Nuke the reflogs and repack + # If repack, then nuke the reflogs and repack. If reset, do a reset --hard reset = not GitUtils.is_repository_bare(target_working_dir) RepoFilter.cleanup(target_working_dir, repack, reset, run_quietly=self._args.quiet,