You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2717 lines
88 KiB
HTML
2717 lines
88 KiB
HTML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
|
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
|
<meta name="generator" content="AsciiDoc 9.1.0" />
|
|
<title>git-filter-repo(1)</title>
|
|
<style type="text/css">
|
|
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
|
|
|
/* Default font. */
|
|
body {
|
|
font-family: Georgia,serif;
|
|
}
|
|
|
|
/* Title font. */
|
|
h1, h2, h3, h4, h5, h6,
|
|
div.title, caption.title,
|
|
thead, p.table.header,
|
|
#toctitle,
|
|
#author, #revnumber, #revdate, #revremark,
|
|
#footer {
|
|
font-family: Arial,Helvetica,sans-serif;
|
|
}
|
|
|
|
body {
|
|
margin: 1em 5% 1em 5%;
|
|
}
|
|
|
|
a {
|
|
color: blue;
|
|
text-decoration: underline;
|
|
}
|
|
a:visited {
|
|
color: fuchsia;
|
|
}
|
|
|
|
em {
|
|
font-style: italic;
|
|
color: navy;
|
|
}
|
|
|
|
strong {
|
|
font-weight: bold;
|
|
color: #083194;
|
|
}
|
|
|
|
h1, h2, h3, h4, h5, h6 {
|
|
color: #527bbd;
|
|
margin-top: 1.2em;
|
|
margin-bottom: 0.5em;
|
|
line-height: 1.3;
|
|
}
|
|
|
|
h1, h2, h3 {
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
h2 {
|
|
padding-top: 0.5em;
|
|
}
|
|
h3 {
|
|
float: left;
|
|
}
|
|
h3 + * {
|
|
clear: left;
|
|
}
|
|
h5 {
|
|
font-size: 1.0em;
|
|
}
|
|
|
|
div.sectionbody {
|
|
margin-left: 0;
|
|
}
|
|
|
|
hr {
|
|
border: 1px solid silver;
|
|
}
|
|
|
|
p {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
ul, ol, li > p {
|
|
margin-top: 0;
|
|
}
|
|
ul > li { color: #aaa; }
|
|
ul > li > * { color: black; }
|
|
|
|
.monospaced, code, pre {
|
|
font-family: "Courier New", Courier, monospace;
|
|
font-size: inherit;
|
|
color: navy;
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
pre {
|
|
white-space: pre-wrap;
|
|
}
|
|
|
|
#author {
|
|
color: #527bbd;
|
|
font-weight: bold;
|
|
font-size: 1.1em;
|
|
}
|
|
#email {
|
|
}
|
|
#revnumber, #revdate, #revremark {
|
|
}
|
|
|
|
#footer {
|
|
font-size: small;
|
|
border-top: 2px solid silver;
|
|
padding-top: 0.5em;
|
|
margin-top: 4.0em;
|
|
}
|
|
#footer-text {
|
|
float: left;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
#footer-badges {
|
|
float: right;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
|
|
#preamble {
|
|
margin-top: 1.5em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.imageblock, div.exampleblock, div.verseblock,
|
|
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
|
div.admonitionblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.admonitionblock {
|
|
margin-top: 2.0em;
|
|
margin-bottom: 2.0em;
|
|
margin-right: 10%;
|
|
color: #606060;
|
|
}
|
|
|
|
div.content { /* Block element content. */
|
|
padding: 0;
|
|
}
|
|
|
|
/* Block element titles. */
|
|
div.title, caption.title {
|
|
color: #527bbd;
|
|
font-weight: bold;
|
|
text-align: left;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
div.title + * {
|
|
margin-top: 0;
|
|
}
|
|
|
|
td div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content + div.title {
|
|
margin-top: 0.0em;
|
|
}
|
|
|
|
div.sidebarblock > div.content {
|
|
background: #ffffee;
|
|
border: 1px solid #dddddd;
|
|
border-left: 4px solid #f0f0f0;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.listingblock > div.content {
|
|
border: 1px solid #dddddd;
|
|
border-left: 5px solid #f0f0f0;
|
|
background: #f8f8f8;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.quoteblock, div.verseblock {
|
|
padding-left: 1.0em;
|
|
margin-left: 1.0em;
|
|
margin-right: 10%;
|
|
border-left: 5px solid #f0f0f0;
|
|
color: #888;
|
|
}
|
|
|
|
div.quoteblock > div.attribution {
|
|
padding-top: 0.5em;
|
|
text-align: right;
|
|
}
|
|
|
|
div.verseblock > pre.content {
|
|
font-family: inherit;
|
|
font-size: inherit;
|
|
}
|
|
div.verseblock > div.attribution {
|
|
padding-top: 0.75em;
|
|
text-align: left;
|
|
}
|
|
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
|
div.verseblock + div.attribution {
|
|
text-align: left;
|
|
}
|
|
|
|
div.admonitionblock .icon {
|
|
vertical-align: top;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
text-decoration: underline;
|
|
color: #527bbd;
|
|
padding-right: 0.5em;
|
|
}
|
|
div.admonitionblock td.content {
|
|
padding-left: 0.5em;
|
|
border-left: 3px solid #dddddd;
|
|
}
|
|
|
|
div.exampleblock > div.content {
|
|
border-left: 3px solid #dddddd;
|
|
padding-left: 0.5em;
|
|
}
|
|
|
|
div.imageblock div.content { padding-left: 0; }
|
|
span.image img { border-style: none; vertical-align: text-bottom; }
|
|
a.image:visited { color: white; }
|
|
|
|
dl {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
dt {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0;
|
|
font-style: normal;
|
|
color: navy;
|
|
}
|
|
dd > *:first-child {
|
|
margin-top: 0.1em;
|
|
}
|
|
|
|
ul, ol {
|
|
list-style-position: outside;
|
|
}
|
|
ol.arabic {
|
|
list-style-type: decimal;
|
|
}
|
|
ol.loweralpha {
|
|
list-style-type: lower-alpha;
|
|
}
|
|
ol.upperalpha {
|
|
list-style-type: upper-alpha;
|
|
}
|
|
ol.lowerroman {
|
|
list-style-type: lower-roman;
|
|
}
|
|
ol.upperroman {
|
|
list-style-type: upper-roman;
|
|
}
|
|
|
|
div.compact ul, div.compact ol,
|
|
div.compact p, div.compact p,
|
|
div.compact div, div.compact div {
|
|
margin-top: 0.1em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
tfoot {
|
|
font-weight: bold;
|
|
}
|
|
td > div.verse {
|
|
white-space: pre;
|
|
}
|
|
|
|
div.hdlist {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
div.hdlist tr {
|
|
padding-bottom: 15px;
|
|
}
|
|
dt.hdlist1.strong, td.hdlist1.strong {
|
|
font-weight: bold;
|
|
}
|
|
td.hdlist1 {
|
|
vertical-align: top;
|
|
font-style: normal;
|
|
padding-right: 0.8em;
|
|
color: navy;
|
|
}
|
|
td.hdlist2 {
|
|
vertical-align: top;
|
|
}
|
|
div.hdlist.compact tr {
|
|
margin: 0;
|
|
padding-bottom: 0;
|
|
}
|
|
|
|
.comment {
|
|
background: yellow;
|
|
}
|
|
|
|
.footnote, .footnoteref {
|
|
font-size: 0.8em;
|
|
}
|
|
|
|
span.footnote, span.footnoteref {
|
|
vertical-align: super;
|
|
}
|
|
|
|
#footnotes {
|
|
margin: 20px 0 20px 0;
|
|
padding: 7px 0 0 0;
|
|
}
|
|
|
|
#footnotes div.footnote {
|
|
margin: 0 0 5px 0;
|
|
}
|
|
|
|
#footnotes hr {
|
|
border: none;
|
|
border-top: 1px solid silver;
|
|
height: 1px;
|
|
text-align: left;
|
|
margin-left: 0;
|
|
width: 20%;
|
|
min-width: 100px;
|
|
}
|
|
|
|
div.colist td {
|
|
padding-right: 0.5em;
|
|
padding-bottom: 0.3em;
|
|
vertical-align: top;
|
|
}
|
|
div.colist td img {
|
|
margin-top: 0.3em;
|
|
}
|
|
|
|
@media print {
|
|
#footer-badges { display: none; }
|
|
}
|
|
|
|
#toc {
|
|
margin-bottom: 2.5em;
|
|
}
|
|
|
|
#toctitle {
|
|
color: #527bbd;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
|
margin-top: 0;
|
|
margin-bottom: 0;
|
|
}
|
|
div.toclevel2 {
|
|
margin-left: 2em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel3 {
|
|
margin-left: 4em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel4 {
|
|
margin-left: 6em;
|
|
font-size: 0.9em;
|
|
}
|
|
|
|
span.aqua { color: aqua; }
|
|
span.black { color: black; }
|
|
span.blue { color: blue; }
|
|
span.fuchsia { color: fuchsia; }
|
|
span.gray { color: gray; }
|
|
span.green { color: green; }
|
|
span.lime { color: lime; }
|
|
span.maroon { color: maroon; }
|
|
span.navy { color: navy; }
|
|
span.olive { color: olive; }
|
|
span.purple { color: purple; }
|
|
span.red { color: red; }
|
|
span.silver { color: silver; }
|
|
span.teal { color: teal; }
|
|
span.white { color: white; }
|
|
span.yellow { color: yellow; }
|
|
|
|
span.aqua-background { background: aqua; }
|
|
span.black-background { background: black; }
|
|
span.blue-background { background: blue; }
|
|
span.fuchsia-background { background: fuchsia; }
|
|
span.gray-background { background: gray; }
|
|
span.green-background { background: green; }
|
|
span.lime-background { background: lime; }
|
|
span.maroon-background { background: maroon; }
|
|
span.navy-background { background: navy; }
|
|
span.olive-background { background: olive; }
|
|
span.purple-background { background: purple; }
|
|
span.red-background { background: red; }
|
|
span.silver-background { background: silver; }
|
|
span.teal-background { background: teal; }
|
|
span.white-background { background: white; }
|
|
span.yellow-background { background: yellow; }
|
|
|
|
span.big { font-size: 2em; }
|
|
span.small { font-size: 0.6em; }
|
|
|
|
span.underline { text-decoration: underline; }
|
|
span.overline { text-decoration: overline; }
|
|
span.line-through { text-decoration: line-through; }
|
|
|
|
div.unbreakable { page-break-inside: avoid; }
|
|
|
|
|
|
/*
|
|
* xhtml11 specific
|
|
*
|
|
* */
|
|
|
|
div.tableblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.tableblock > table {
|
|
border: 3px solid #527bbd;
|
|
}
|
|
thead, p.table.header {
|
|
font-weight: bold;
|
|
color: #527bbd;
|
|
}
|
|
p.table {
|
|
margin-top: 0;
|
|
}
|
|
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
|
div.tableblock > table[frame="void"] {
|
|
border-style: none;
|
|
}
|
|
div.tableblock > table[frame="hsides"] {
|
|
border-left-style: none;
|
|
border-right-style: none;
|
|
}
|
|
div.tableblock > table[frame="vsides"] {
|
|
border-top-style: none;
|
|
border-bottom-style: none;
|
|
}
|
|
|
|
|
|
/*
|
|
* html5 specific
|
|
*
|
|
* */
|
|
|
|
table.tableblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
thead, p.tableblock.header {
|
|
font-weight: bold;
|
|
color: #527bbd;
|
|
}
|
|
p.tableblock {
|
|
margin-top: 0;
|
|
}
|
|
table.tableblock {
|
|
border-width: 3px;
|
|
border-spacing: 0px;
|
|
border-style: solid;
|
|
border-color: #527bbd;
|
|
border-collapse: collapse;
|
|
}
|
|
th.tableblock, td.tableblock {
|
|
border-width: 1px;
|
|
padding: 4px;
|
|
border-style: solid;
|
|
border-color: #527bbd;
|
|
}
|
|
|
|
table.tableblock.frame-topbot {
|
|
border-left-style: hidden;
|
|
border-right-style: hidden;
|
|
}
|
|
table.tableblock.frame-sides {
|
|
border-top-style: hidden;
|
|
border-bottom-style: hidden;
|
|
}
|
|
table.tableblock.frame-none {
|
|
border-style: hidden;
|
|
}
|
|
|
|
th.tableblock.halign-left, td.tableblock.halign-left {
|
|
text-align: left;
|
|
}
|
|
th.tableblock.halign-center, td.tableblock.halign-center {
|
|
text-align: center;
|
|
}
|
|
th.tableblock.halign-right, td.tableblock.halign-right {
|
|
text-align: right;
|
|
}
|
|
|
|
th.tableblock.valign-top, td.tableblock.valign-top {
|
|
vertical-align: top;
|
|
}
|
|
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
|
vertical-align: middle;
|
|
}
|
|
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
|
vertical-align: bottom;
|
|
}
|
|
|
|
|
|
/*
|
|
* manpage specific
|
|
*
|
|
* */
|
|
|
|
body.manpage h1 {
|
|
padding-top: 0.5em;
|
|
padding-bottom: 0.5em;
|
|
border-top: 2px solid silver;
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
body.manpage h2 {
|
|
border-style: none;
|
|
}
|
|
body.manpage div.sectionbody {
|
|
margin-left: 3em;
|
|
}
|
|
|
|
@media print {
|
|
body.manpage div#toc { display: none; }
|
|
}
|
|
|
|
|
|
</style>
|
|
<script type="text/javascript">
|
|
/*<![CDATA[*/
|
|
var asciidoc = { // Namespace.
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// Table Of Contents generator
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
/* Author: Mihai Bazon, September 2002
|
|
* http://students.infoiasi.ro/~mishoo
|
|
*
|
|
* Table Of Content generator
|
|
* Version: 0.4
|
|
*
|
|
* Feel free to use this script under the terms of the GNU General Public
|
|
* License, as long as you do not remove or alter this notice.
|
|
*/
|
|
|
|
/* modified by Troy D. Hanson, September 2006. License: GPL */
|
|
/* modified by Stuart Rackham, 2006, 2009. License: GPL */
|
|
|
|
// toclevels = 1..4.
|
|
toc: function (toclevels) {
|
|
|
|
function getText(el) {
|
|
var text = "";
|
|
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
|
if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
|
|
text += i.data;
|
|
else if (i.firstChild != null)
|
|
text += getText(i);
|
|
}
|
|
return text;
|
|
}
|
|
|
|
function TocEntry(el, text, toclevel) {
|
|
this.element = el;
|
|
this.text = text;
|
|
this.toclevel = toclevel;
|
|
}
|
|
|
|
function tocEntries(el, toclevels) {
|
|
var result = new Array;
|
|
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
|
// Function that scans the DOM tree for header elements (the DOM2
|
|
// nodeIterator API would be a better technique but not supported by all
|
|
// browsers).
|
|
var iterate = function (el) {
|
|
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
|
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
|
var mo = re.exec(i.tagName);
|
|
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
|
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
|
}
|
|
iterate(i);
|
|
}
|
|
}
|
|
}
|
|
iterate(el);
|
|
return result;
|
|
}
|
|
|
|
var toc = document.getElementById("toc");
|
|
if (!toc) {
|
|
return;
|
|
}
|
|
|
|
// Delete existing TOC entries in case we're reloading the TOC.
|
|
var tocEntriesToRemove = [];
|
|
var i;
|
|
for (i = 0; i < toc.childNodes.length; i++) {
|
|
var entry = toc.childNodes[i];
|
|
if (entry.nodeName.toLowerCase() == 'div'
|
|
&& entry.getAttribute("class")
|
|
&& entry.getAttribute("class").match(/^toclevel/))
|
|
tocEntriesToRemove.push(entry);
|
|
}
|
|
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
|
toc.removeChild(tocEntriesToRemove[i]);
|
|
}
|
|
|
|
// Rebuild TOC entries.
|
|
var entries = tocEntries(document.getElementById("content"), toclevels);
|
|
for (var i = 0; i < entries.length; ++i) {
|
|
var entry = entries[i];
|
|
if (entry.element.id == "")
|
|
entry.element.id = "_toc_" + i;
|
|
var a = document.createElement("a");
|
|
a.href = "#" + entry.element.id;
|
|
a.appendChild(document.createTextNode(entry.text));
|
|
var div = document.createElement("div");
|
|
div.appendChild(a);
|
|
div.className = "toclevel" + entry.toclevel;
|
|
toc.appendChild(div);
|
|
}
|
|
if (entries.length == 0)
|
|
toc.parentNode.removeChild(toc);
|
|
},
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// Footnotes generator
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
/* Based on footnote generation code from:
|
|
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
|
*/
|
|
|
|
footnotes: function () {
|
|
// Delete existing footnote entries in case we're reloading the footnodes.
|
|
var i;
|
|
var noteholder = document.getElementById("footnotes");
|
|
if (!noteholder) {
|
|
return;
|
|
}
|
|
var entriesToRemove = [];
|
|
for (i = 0; i < noteholder.childNodes.length; i++) {
|
|
var entry = noteholder.childNodes[i];
|
|
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
|
entriesToRemove.push(entry);
|
|
}
|
|
for (i = 0; i < entriesToRemove.length; i++) {
|
|
noteholder.removeChild(entriesToRemove[i]);
|
|
}
|
|
|
|
// Rebuild footnote entries.
|
|
var cont = document.getElementById("content");
|
|
var spans = cont.getElementsByTagName("span");
|
|
var refs = {};
|
|
var n = 0;
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnote") {
|
|
n++;
|
|
var note = spans[i].getAttribute("data-note");
|
|
if (!note) {
|
|
// Use [\s\S] in place of . so multi-line matches work.
|
|
// Because JavaScript has no s (dotall) regex flag.
|
|
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
|
spans[i].innerHTML =
|
|
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
spans[i].setAttribute("data-note", note);
|
|
}
|
|
noteholder.innerHTML +=
|
|
"<div class='footnote' id='_footnote_" + n + "'>" +
|
|
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
|
n + "</a>. " + note + "</div>";
|
|
var id =spans[i].getAttribute("id");
|
|
if (id != null) refs["#"+id] = n;
|
|
}
|
|
}
|
|
if (n == 0)
|
|
noteholder.parentNode.removeChild(noteholder);
|
|
else {
|
|
// Process footnoterefs.
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnoteref") {
|
|
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
|
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
|
n = refs[href];
|
|
spans[i].innerHTML =
|
|
"[<a href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
install: function(toclevels) {
|
|
var timerId;
|
|
|
|
function reinstall() {
|
|
asciidoc.footnotes();
|
|
if (toclevels) {
|
|
asciidoc.toc(toclevels);
|
|
}
|
|
}
|
|
|
|
function reinstallAndRemoveTimer() {
|
|
clearInterval(timerId);
|
|
reinstall();
|
|
}
|
|
|
|
timerId = setInterval(reinstall, 500);
|
|
if (document.addEventListener)
|
|
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
|
else
|
|
window.onload = reinstallAndRemoveTimer;
|
|
}
|
|
|
|
}
|
|
asciidoc.install();
|
|
/*]]>*/
|
|
</script>
|
|
</head>
|
|
<body class="manpage">
|
|
<div id="header">
|
|
<h1>
|
|
git-filter-repo(1) Manual Page
|
|
</h1>
|
|
<h2>NAME</h2>
|
|
<div class="sectionbody">
|
|
<p>git-filter-repo -
|
|
Rewrite repository history
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="content">
|
|
<div class="sect1">
|
|
<h2 id="_synopsis">SYNOPSIS</h2>
|
|
<div class="sectionbody">
|
|
<div class="verseblock">
|
|
<pre class="content"><em>git filter-repo</em> --analyze
|
|
<em>git filter-repo</em> [<path_filtering_options>] [<content_filtering_options>]
|
|
[<ref_renaming_options>] [<commit_message_filtering_options>]
|
|
[<name_or_email_filtering_options>] [<parent_rewriting_options>]
|
|
[<generic_callback_options>] [<miscellaneous_options>]</pre>
|
|
<div class="attribution">
|
|
</div></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="_description">DESCRIPTION</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Rapidly rewrite entire repository history using user-specified filters.
|
|
This is a destructive operation which should not be used lightly; it
|
|
writes new commits, trees, tags, and blobs corresponding to (but
|
|
filtered from) the original objects in the repository, then deletes the
|
|
original history and leaves only the new. See <a href="#DISCUSSION">[DISCUSSION]</a> for more
|
|
details on the ramifications of using this tool. Several different
|
|
types of history rewrites are possible; examples include (but are not
|
|
limited to):</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
stripping large files (or large directories or large extensions)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
stripping unwanted files by path
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
extracting wanted paths and their history (stripping everything else)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
restructuring the file layout (such as moving all files into a
|
|
subdirectory in preparation for merging with another repo, making a
|
|
subdirectory become the new toplevel directory, or merging two
|
|
directories with independent filenames into one directory)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
renaming tags (also often in preparation for merging with another repo)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
replacing or removing sensitive text such as passwords
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
making mailmap rewriting of user names or emails permanent
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
making grafts or replacement refs permanent
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
rewriting commit messages
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>Additionally, several concerns are handled automatically (many of these
|
|
can be overridden, but they are all on by default):</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
rewriting (possibly abbreviated) hashes in commit messages to
|
|
refer to the new post-rewrite commit hashes
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
pruning commits which become empty due to the above filters (also
|
|
handles edge cases like pruning of merge commits which become
|
|
degenerate and empty)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
creating replace-refs (see <a href="git-replace.html">git-replace(1)</a>) for old commit
|
|
hashes, which if manually pushed and fetched will allow users to
|
|
continue to refer to new commits using (unabbreviated) old commit
|
|
IDs
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
stripping of original history to avoid mixing old and new history
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
repacking the repository post-rewrite to shrink the repo for the
|
|
user
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>Also, it’s worth noting that there is an important safety mechanism:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
abort if run from a repo that is not a fresh clone (to prevent
|
|
accidental data loss from rewriting local history that doesn’t
|
|
exist anywhere else). See <a href="#FRESHCLONE">[FRESHCLONE]</a>.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>For those who know that there is large unwanted stuff in their history
|
|
and want help finding it, this command also</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
provides an option to analyze a repository and generate reports that
|
|
can be useful in determining what to filter (or in determining
|
|
whether a separate filtering command was successful).
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>See also <a href="#VERSATILITY">[VERSATILITY]</a>, <a href="#DISCUSSION">[DISCUSSION]</a>, <a href="#EXAMPLES">[EXAMPLES]</a>, and
|
|
<a href="#INTERNALS">[INTERNALS]</a>.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="_options">OPTIONS</h2>
|
|
<div class="sectionbody">
|
|
<div class="sect2">
|
|
<h3 id="_analysis_options">Analysis Options</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--analyze
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Analyze repository history and create a report that may be
|
|
useful in determining what to filter in a subsequent run (or
|
|
in determining if a previous filtering command did what you
|
|
wanted). Will not modify your repo.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_filtering_based_on_paths_see_also_filename_callback">Filtering based on paths (see also --filename-callback)</h3>
|
|
<div class="paragraph"><p>These options specify the paths to select. Note that much like git
|
|
itself, renames are NOT followed so you may need to specify multiple
|
|
paths, e.g. <code>--path olddir/ --path newdir/</code></p></div>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--invert-paths
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Invert the selection of files from the specified
|
|
--path-{match,glob,regex} options below, i.e. only select
|
|
files matching none of those options.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--path-match <dir_or_file>
|
|
</dt>
|
|
<dt class="hdlist1">
|
|
--path <dir_or_file>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Exact paths (files or directories) to include in filtered
|
|
history. Multiple --path options can be specified to get a
|
|
union of paths.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--path-glob <glob>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Glob of paths to include in filtered history. Multiple
|
|
--path-glob options can be specified to get a union of paths.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--path-regex <regex>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Regex of paths to include in filtered history. Multiple
|
|
--path-regex options can be specified to get a union of paths.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--use-base-name
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Match on file base name instead of full path from the top of
|
|
the repo. Incompatible with --path-rename, and incompatible
|
|
with matching against directory names.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_renaming_based_on_paths_see_also_filename_callback">Renaming based on paths (see also --filename-callback)</h3>
|
|
<div class="paragraph"><p>Note: if you combine path filtering with path renaming, be aware that
|
|
a rename directive does not select paths, it only says how to
|
|
rename paths that are selected with the filters.</p></div>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--path-rename <old_name:new_name>
|
|
</dt>
|
|
<dt class="hdlist1">
|
|
--path-rename-match <old_name:new_name>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Path to rename; if filename or directory matches <old_name>
|
|
rename to <new_name>. Multiple --path-rename options can be
|
|
specified.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_path_shortcuts">Path shortcuts</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--paths-from-file <filename>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Specify several path filtering and renaming directives, one
|
|
per line. Lines with <code>==></code> in them specify path renames, and
|
|
lines can begin with <code>literal:</code> (the default), <code>glob:</code>, or
|
|
<code>regex:</code> to specify different matching styles. Blank lines
|
|
and lines starting with a <code>#</code> are ignored (if you have a
|
|
filename that you want to filter on that starts with
|
|
<code>literal:</code>, <code>#</code>, <code>glob:</code>, or <code>regex:</code>, then prefix the line
|
|
with <em>literal:</em>).
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--subdirectory-filter <directory>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Only look at history that touches the given subdirectory and
|
|
treat that directory as the project root. Equivalent to using
|
|
<code>--path <directory>/ --path-rename <directory>/:</code>
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--to-subdirectory-filter <directory>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Treat the project root as instead being under
|
|
<directory>. Equivalent to using <code>--path-rename :<directory>/</code>
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_content_editing_filters_see_also_blob_callback">Content editing filters (see also --blob-callback)</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--replace-text <expressions_file>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
A file with expressions that, if found, will be replaced. By
|
|
default, each expression is treated as literal text, but
|
|
<code>regex:</code> and <code>glob:</code> prefixes are supported. You can end the
|
|
line with <code>==></code> and some replacement text to choose a
|
|
replacement choice other than the default of <code>***REMOVED***</code>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--strip-blobs-bigger-than <size>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Strip blobs (files) bigger than specified size (e.g. <code>5M</code>,
|
|
<code>2G</code>, etc)
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--strip-blobs-with-ids <blob_id_filename>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Read git object ids from each line of the given file, and
|
|
strip all of them from history
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_renaming_of_refs_see_also_refname_callback">Renaming of refs (see also --refname-callback)</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--tag-rename <old:new>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Rename tags starting with <old> to start with <new>. For example,
|
|
--tag-rename foo:bar will rename tag foo-1.2.3 to bar-1.2.3;
|
|
either <old> or <new> can be empty.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_filtering_of_commit_messages_see_also_message_callback">Filtering of commit messages (see also --message-callback)</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--replace-message <expressions_file>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
A file with expressions that, if found in commit or tag
|
|
messages, will be replaced. This file uses the same syntax as
|
|
--replace-text.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--preserve-commit-hashes
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
By default, since commits are rewritten and thus gain new
|
|
hashes, references to old commit hashes in commit messages are
|
|
replaced with new commit hashes (abbreviated to the same
|
|
length as the old reference). Use this flag to turn off
|
|
updating commit hashes in commit messages.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--preserve-commit-encoding
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Do not reencode commit messages into UTF-8. By default, if the
|
|
commit object specifies an encoding for the commit message,
|
|
the message is re-encoded into UTF-8.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_filtering_of_names_amp_emails_see_also_name_callback_and_email_callback">Filtering of names & emails (see also --name-callback and --email-callback)</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--mailmap <filename>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Use specified mailmap file (see <a href="git-shortlog.html">git-shortlog(1)</a> for details
|
|
on the format) when rewriting author, committer, and tagger names
|
|
and emails. If the specified file is part of git history,
|
|
historical versions of the file will be ignored; only the current
|
|
contents are consulted.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--use-mailmap
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Same as: <em>--mailmap .mailmap</em>
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_parent_rewriting">Parent rewriting</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--replace-refs {delete-no-add, delete-and-add, update-no-add, update-or-add, update-and-add}
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Replace refs (see <a href="git-replace.html">git-replace(1)</a>) are used to rewrite
|
|
parents (unless turned off by the usual git mechanism); this
|
|
flag specifies what do do with those refs afterward. Replace
|
|
refs can either be deleted or updated to point at new commit
|
|
hashes. Also, new replace refs can be added for each commit
|
|
rewrite. With <em>update-or-add</em>, new replace refs are only
|
|
added for commit rewrites that aren’t used to update an
|
|
existing replace ref. default is <em>update-and-add</em> if
|
|
$GIT_DIR/filter-repo/already_ran does not exist;
|
|
<em>update-or-add</em> otherwise.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--prune-empty {always, auto, never}
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Whether to prune empty commits. <em>auto</em> (the default) means
|
|
only prune commits which become empty (not commits which were
|
|
empty in the original repo, unless their parent was
|
|
pruned). When the parent of a commit is pruned, the first
|
|
non-pruned ancestor becomes the new parent.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--prune-degenerate {always, auto, never}
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Since merge commits are needed for history topology, they are
|
|
typically exempt from pruning. However, they can become
|
|
degenerate with the pruning of other commits (having fewer
|
|
than two parents, having one commit serve as both parents, or
|
|
having one parent as the ancestor of the other.) If such merge
|
|
commits have no file changes, they can be pruned. The default
|
|
(<em>auto</em>) is to only prune empty merge commits which become
|
|
degenerate (not which started as such).
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--no-ff
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Even if the first parent is or becomes an ancestor of another
|
|
parent, do not prune it. This modifies how --prune-degenerate
|
|
behaves, and may be useful in projects who always use merge
|
|
--no-ff.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_generic_callback_code_snippets">Generic callback code snippets</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--filename-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing filenames; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--message-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing messages (both commit messages and
|
|
tag messages); see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--name-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing names of people; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--email-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing emails addresses; see
|
|
<a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--refname-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing refnames; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--blob-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing blob objects; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--commit-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing commit objects; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--tag-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing tag objects; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--reset-callback <function_body>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Python code body for processing reset objects; see <a href="#CALLBACKS">[CALLBACKS]</a>.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_location_to_filter_from_to">Location to filter from/to</h3>
|
|
<div class="admonitionblock">
|
|
<table><tr>
|
|
<td class="icon">
|
|
<div class="title">Note</div>
|
|
</td>
|
|
<td class="content">Specifying alternate source or target locations implies --partial
|
|
except that the normal default for --replace-refs is used. However, unlike
|
|
normal uses of --partial, this doesn’t risk mixing old and new history
|
|
since the old and new histories are in different repositories.</td>
|
|
</tr></table>
|
|
</div>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--source <source>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Git repository to read from
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--target <target>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Git repository to overwrite with filtered history
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_miscellaneous_options">Miscellaneous options</h3>
|
|
<div class="dlist"><dl>
|
|
<dt class="hdlist1">
|
|
--help
|
|
</dt>
|
|
<dt class="hdlist1">
|
|
-h
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Show a help message and exit.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--force
|
|
</dt>
|
|
<dt class="hdlist1">
|
|
-f
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Ignore fresh clone checks and rewrite history (an irreversible
|
|
operation, especially since it by default ends with an
|
|
immediate pruning of reflogs and old objects). See
|
|
<a href="#FRESHCLONE">[FRESHCLONE]</a>. Note that when cloning repos on a local
|
|
filesystem, it is better to pass <code>--no-local</code> to git clone
|
|
than passing <code>--force</code> to git-filter-repo.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--partial
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Do a partial history rewrite, resulting in the mixture of old and
|
|
new history. This implies a default of update-no-add for
|
|
--replace-refs, disables rewriting refs/remotes/origin/* to
|
|
refs/heads/*, disables removing of the <em>origin</em> remote, disables
|
|
removing unexported refs, disables expiring the reflog, and
|
|
disables the automatic post-filter gc. Also, this modifies
|
|
--tag-rename and --refname-callback options such that instead of
|
|
replacing old refs with new refnames, it will instead create new
|
|
refs and keep the old ones around. Use with caution.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--refs <refs+>
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Limit history rewriting to the specified refs. Implies --partial.
|
|
In addition to the normal caveats of --partial (mixing old and new
|
|
history, no automatic remapping of refs/remotes/origin/* to
|
|
refs/heads/*, etc.), this also may cause problems for pruning of
|
|
degenerate empty merge commits when negative revisions are
|
|
specified.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--dry-run
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Do not change the repository. Run <code>git fast-export</code> and filter its
|
|
output, and save both the original and the filtered version for
|
|
comparison. This also disables rewriting commit messages due to
|
|
not knowing new commit IDs and disables filtering of some empty
|
|
commits due to inability to query the fast-import backend.
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--debug
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Print additional information about operations being performed and
|
|
commands being run. (If used together with --dry-run, shows
|
|
extra information about what would be run).
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--stdin
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Instead of running <code>git fast-export</code> and filtering its output,
|
|
filter the fast-export stream from stdin. The stdin must be in
|
|
the expected input format (e.g. it needs to include original-oid
|
|
directives).
|
|
</p>
|
|
</dd>
|
|
<dt class="hdlist1">
|
|
--quiet
|
|
</dt>
|
|
<dd>
|
|
<p>
|
|
Pass --quiet to other git commands called.
|
|
</p>
|
|
</dd>
|
|
</dl></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="_output">OUTPUT</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Every time filter-repo is run, files are created in the <code>.git/filter-repo/</code>
|
|
directory. These files are overwritten unconditionally on every run.</p></div>
|
|
<div class="sect2">
|
|
<h3 id="_commit_map">Commit map</h3>
|
|
<div class="paragraph"><p>The <code>.git/filter-repo/commit-map</code> file contains a mapping of how all
|
|
commits were (or were not) changed.</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
A header is the first line with the text "old" and "new"
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Commit mappings are in no particular order
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
All commits in range of the rewrite will be listed, even commits
|
|
that are unchanged (e.g. because the commit pre-dated when the
|
|
large file(s) were introduced to the repo).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
An all-zeros hash, or null SHA, represents a non-existent object.
|
|
When in the "new" column, this means the commit was removed
|
|
entirely.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_reference_map">Reference map</h3>
|
|
<div class="paragraph"><p>The <code>.git/filter-repo/ref-map</code> file contains a mapping of which local
|
|
references were changed.</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
A header is the first line with the text "old", "new" and "ref"
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Reference mappings are in no particular order
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
An all-zeros hash, or null SHA, represents a non-existent object.
|
|
When in the "new" column, this means the ref was removed entirely.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="FRESHCLONE">FRESH CLONE SAFETY CHECK AND --FORCE</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Since filter-repo does irreversible rewriting of history, it is
|
|
important to avoid making changes to a repo for which the user doesn’t
|
|
have a good backup. The primary defense mechanism is to simply
|
|
educate users and rely on them to be good stewards of their data; thus
|
|
there are several warnings in the documentation about how filter repo
|
|
rewrites history.</p></div>
|
|
<div class="paragraph"><p>However, as a service to users, we would like to provide an additional
|
|
safety check beyond the documentation. There isn’t a good way to
|
|
check if the user has a good backup, but we can ask a related question
|
|
that is an imperfect but quite reasonable proxy: "Is this repository a
|
|
fresh clone?" Unfortunately, that is also a question we can’t get a
|
|
perfect answer to; git provides no way to answer that question.
|
|
However, there are approximately a dozen things that I found that seem
|
|
to always be true of brand new clones (assuming they are either clones
|
|
of remote repositories or are made with the <code>--no-local</code> flag), and I
|
|
check for all of those.</p></div>
|
|
<div class="paragraph"><p>These checks can have both false positives and false negatives.
|
|
Someone might have a perfectly good backup of their repo without it
|
|
actually being a fresh clone — but there’s no way for filter-repo to
|
|
know that. Conversely, someone could look at all things that
|
|
filter-repo checks for in its safety checks and then just tweak their
|
|
non-backed-up repository to satisfy those conditions (though it would
|
|
take a fair amount of effort, and it’s astronomically unlikely that a
|
|
repo that isn’t a fresh clone randomly happens to match all the
|
|
criteria). In practice, the safety checks filter-repo uses seem to be
|
|
really good at avoiding people accidentally running filter-repo on a
|
|
repository that they shouldn’t be running it on. It even caught me
|
|
once when I did mean to run filter-repo but was in a different
|
|
directory than I thought I was.</p></div>
|
|
<div class="paragraph"><p>In short, it’s perfectly fine to use ‘--force` to override the safety
|
|
checks as long as you’re okay with filter-repo irreversibly rewriting
|
|
the contents of the current repository. It is a really bad idea to
|
|
get in the habit of always specifying <code>--force</code>; if you do, one day
|
|
you will run one of your commands in the wrong directory like I did,
|
|
and you won’t have the safety check anymore to bail you out. Also, it
|
|
is definitely NOT okay to recommend <code>--force</code> on forums, Q&A sites, or
|
|
in emails to other users without first carefully explaining that
|
|
<code>--force</code> means putting your repositories’ data at risk. I am
|
|
especially bothered by people who suggest the flag when it clearly is
|
|
NOT needed; they are needlessly putting other peoples' data at risk.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="VERSATILITY">VERSATILITY</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>filter-repo has a hierarchy of capabilities on the spectrum from easy to
|
|
use convenience flags that perform pre-defined types of filtering, to
|
|
choices that provide lots of flexibility in controlling how filtering
|
|
occurs. This spectrum includes the following:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Convenience flags making common types of history rewriting simple (e.g.
|
|
--path, --strip-blobs-bigger-than, --replace-text, --mailmap)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Options which are shorthand for others or which provide greater control
|
|
than others (e.g. --subdirectory-filter could just be written using
|
|
both a path selection (--path) and a path rename (--path-rename)
|
|
filter; --paths-from-file can handle all other --path* options and more
|
|
such as regex renaming of paths)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Generic python callbacks for handling a certain type of data (the
|
|
filename, message, name, email, and refname callbacks)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Generic python callbacks for handling fundamental git objects, allowing
|
|
greater control over the combination of data types the object holds
|
|
(the commit, tag, blob, and reset callbacks)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
The ability to import filter-repo as a module in a python program and
|
|
use its classes and functions for even greater control and flexibility
|
|
while still leveraging lots of basic capabilities. One can even use
|
|
this to write new tools with a completely different interface.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>For more information about callbacks, see <a href="#CALLBACKS">[CALLBACKS]</a>. For examples on
|
|
writing python programs that import filter-repo as a module to create new
|
|
history rewriting tools, look at the contrib/filter-repo-demos/ directory.
|
|
That directory includes, among other examples, a reimplementation of
|
|
git-filter-branch which is faster than git-filter-branch, and a
|
|
reimplementation of BFG Repo Cleaner with several bug fixes and new
|
|
features.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="DISCUSSION">DISCUSSION</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Using filter-repo is relatively simple, but rewriting history is part of
|
|
a larger discussion in terms of collaboration. When you rewrite
|
|
history, the old and new histories are no longer compatible; if you push
|
|
this history somewhere for others to view, it will look as though you’ve
|
|
done a rebase of all branches and tags. Make sure you are familiar with
|
|
the "RECOVERING FROM UPSTREAM REBASE" section of <a href="git-rebase.html">git-rebase(1)</a>
|
|
(and in particular, "The hard case") before proceeding, in addition to
|
|
this section.</p></div>
|
|
<div class="paragraph"><p>Steps to use git-filter-repo as part of the bigger picture of doing a
|
|
history rewrite are roughly as follows:</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
Create a clone of your repository (if you created special refs outside
|
|
of refs/heads/ or refs/tags/, make sure to fetch those too). You may
|
|
pass <code>--bare</code> or <code>--mirror</code> to <code>git clone</code>, if you prefer. You should
|
|
pass <code>--no-local</code> if the repository you are cloning from is on the local
|
|
filesystem. Avoid other flags; some might confuse the fresh clone
|
|
check, and others could cause parts of the data to be missing that are
|
|
needed for the rewrite.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
(Optional) Run <code>git filter-repo --analyze</code>. This will create a
|
|
directory of reports mentioning renames that have occurred in your
|
|
repo and also listing sizes of objects aggregated by
|
|
path/directory/extension/blob-id; this information may be useful in
|
|
choosing how to filter your repo. It can also be useful to re-run
|
|
--analyze after filtering to verify the changes look correct.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Run filter-repo with your desired filtering options. Many examples
|
|
are given below. For more complex cases, note that doing the
|
|
filtering in multiple steps (by running multiple filter-repo
|
|
invocations in a sequence) is supported. If anything goes wrong here,
|
|
simply delete your clone and restart.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Push your new repository to its new home (note that
|
|
refs/remotes/origin/* will have been moved to refs/heads/* as the
|
|
first part of filter-repo, so you can just deal with normal branches
|
|
instead of remote tracking branches). While you can force push this
|
|
to the same URL you cloned from, there are good reasons to consider
|
|
pushing to a different location instead:
|
|
</p>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
People who cloned from the original repo will have old history.
|
|
When they fetch the new history you force pushed up, unless they
|
|
do a <code>git reset --hard @{u}</code> on their branches or rebase their
|
|
local work, git will think they have hundreds or thousands of
|
|
commits with very similar commit messages as what exist upstream
|
|
(but which include files you wanted excised from history), and
|
|
allow the user to merge the two histories, resulting in what
|
|
looks like two copies of each commit. If they then push this
|
|
history back up, then everyone now has history with two copies of
|
|
each commit and the bad files have returned. You’re more likely
|
|
to succeed in forcing people to get rid of the old history if
|
|
they have to clone a new URL.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rewriting history will rewrite tags; those who have already
|
|
downloaded tags will not get the updated tags by default (see the
|
|
"On Re-tagging" section of <a href="git-tag.html">git-tag(1)</a>). Every user
|
|
trying to use an existing clone will have to forcibly delete all
|
|
tags and re-fetch them; it may be easier for them to just
|
|
re-clone, which they are more likely to do with a new clone URL.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rewriting history may delete some refs (e.g. branches that only
|
|
had files that you wanted excised from history); unless you run
|
|
git push with the <code>--mirror</code> or <code>--prune</code> options, those refs
|
|
will continue to exist on the server. If folks then merge these
|
|
branches into others, then people have started mixing old and new
|
|
history. If users had already cloned these branches, removing
|
|
them from the server isn’t enough; you need all users to delete
|
|
any local branches based on these refs and run fetch with the
|
|
<code>--prune</code> option as well. Simply re-cloning from a new URL is
|
|
easier.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
The server may not allow you to force push over some refs.
|
|
For example, code review systems may have special ref
|
|
namespaces (e.g. refs/changes/, refs/pull/,
|
|
refs/merge-requests/) that they have locked down.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
If you still want to push your rewritten history back to the
|
|
original url despite my warnings above, you’ll have to manage it
|
|
very carefully:
|
|
</p>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
git-filter-repo deletes the "origin" remote to help avoid people
|
|
accidentally repushing to the same repository, so you’ll need to
|
|
remind git what origin’s url was. You’ll have to look up the
|
|
command for that.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
You’ll need to carefully synchronize with <strong>everyone</strong> who has
|
|
cloned the repository, and will also need to carefully
|
|
synchronize with <strong>everything</strong> (e.g. CI systems) that has cloned
|
|
it. Every single clone will either need to be thrown away and
|
|
re-cloned, or need to take all the steps outlined in item 4 as
|
|
well as follow the necessary steps from "RECOVERING FROM UPSTREAM
|
|
REBASE" section of <a href="git-rebase.html">git-rebase(1)</a>. If you miss fixing any
|
|
clones, you’ll risk mixing old and new history and end up with an
|
|
even worse mess to clean up.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Finally, you’ll need to consult any documentation from your
|
|
hosting provider about how to remove any server-side references
|
|
to the old commits (example:
|
|
<a href="https://docs.gitlab.com/ee/user/project/repository/reducing_the_repo_size_using_git.html">GitLab’s
|
|
excellent docs on reducing repository size</a>, or just the warning
|
|
box that references "GitHub support" from
|
|
<a href="https://docs.github.com/en/github/authenticating-to-github/removing-sensitive-data-from-a-repository">GitHub’s
|
|
otherwise dangerously out-of-date docs on removing sensitive
|
|
data</a>).
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
(Optional) Some additional considerations
|
|
</p>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
filter-repo by default creates replace refs (see
|
|
<a href="git-replace.html">git-replace(1)</a>) for each rewritten commit ID, allowing
|
|
you to use old (unabbreviated) commit hashes in the git command
|
|
line to refer to the newly rewritten commits. If you want to use
|
|
these replace refs, manually push them to the relevant clone URL
|
|
and tell users to manually fetch them (e.g. by adjusting their
|
|
fetch refspec, <code>git config --add remote.origin.fetch
|
|
+refs/replace/*:refs/replace/*</code>). Sadly, replace refs are not
|
|
yet widely understood; projects like jgit and libgit2 do not
|
|
support them and existing repository managers (e.g. Gerrit,
|
|
GitHub, GitLab) do not yet understand replace refs. Thus one
|
|
can’t use old commit hashes within the UI of these other systems.
|
|
This may change in the future, but replace refs at least help
|
|
users locally within the git command line interface. Also, be
|
|
aware that commit-graphs are excessively cautious around replace
|
|
refs and just turn off entirely if any are present, so after
|
|
enough time has passed that old commit IDs become less relevant,
|
|
users may want to locally delete the replace refs to regain the
|
|
speedups from commit-graphs.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
If you have a central repo, you may want to prevent people
|
|
from pushing old commit IDs, in order to avoid mixing old
|
|
and new history. Every repository manager does this
|
|
differently, some provide specialized commands
|
|
(e.g. <a href="https://gerrit-review.googlesource.com/Documentation/cmd-ban-commit.html">https://gerrit-review.googlesource.com/Documentation/cmd-ban-commit.html</a>),
|
|
others require you to write hooks.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</li>
|
|
</ol></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="EXAMPLES">EXAMPLES</h2>
|
|
<div class="sectionbody">
|
|
<div class="sect2">
|
|
<h3 id="_path_based_filtering">Path based filtering</h3>
|
|
<div class="paragraph"><p>To only keep the <em>README.md</em> file plus the directories <em>guides</em> and
|
|
<em>tools/releases/</em>:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path README.md --path guides/ --path tools/releases</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Directory names can be given with or without a trailing slash, and all
|
|
filenames are relative to the toplevel of the repo. To keep all files
|
|
except these paths, just add <code>--invert-paths</code>:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path README.md --path guides/ --path tools/releases --invert-paths</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you want to have both an inclusion filter and an exclusion filter, just
|
|
run filter-repo multiple times. For example, to keep the src/main
|
|
subdirectory but exclude files under src/main named <em>data</em>, run:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path src/main/
|
|
git filter-repo --path-glob 'src/*/data' --invert-paths</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Note that the asterisk (<code>*</code>) will match across multiple directories, so the
|
|
second command would remove e.g. src/main/org/whatever/data. Also, the
|
|
second command by itself would also remove e.g. src/not-main/foo/data, but
|
|
since src/not-main/ was removed by the first command, that’s not an issue.
|
|
Also, the use of quotes around the asterisk is sometimes important to avoid
|
|
glob expansion by the shell.</p></div>
|
|
<div class="paragraph"><p>You can also select paths by regular expression (see
|
|
<a href="https://docs.python.org/3/library/re.html#regular-expression-syntax">https://docs.python.org/3/library/re.html#regular-expression-syntax</a>).
|
|
For example, to only include files from the repo whose name is in the
|
|
format YYYY-MM-DD.txt and is found at least two subdirectories deep:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path-regex '^.*/.*/[0-9]{4}-[0-9]{2}-[0-9]{2}.txt$'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you want two directories to be renamed (and maybe merged if both are
|
|
renamed to the same location), use --path-rename; for example, to rename
|
|
both <em>cmds/</em> and <em>src/scripts/</em> to <em>tools/</em>:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path-rename cmds:tools --path-rename src/scripts/:tools/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>As with <code>--path</code>, directories can be specified with or without a
|
|
trailing slash for <code>--path-rename</code>.</p></div>
|
|
<div class="paragraph"><p>If you do a <code>--path-rename</code> to something that was already in use, it will
|
|
be silently overwritten. However, if you try to rename multiple files to
|
|
the same location (e.g. src/scripts/run_release.sh and cmds/run_release.sh
|
|
both existed and had different content with the renames above), then you
|
|
will be given an error. If you have such a case, you may want to add
|
|
another rename command to move one of the paths somewhere else where it
|
|
won’t collide:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path-rename cmds/run_release.sh:tools/do_release.sh \
|
|
--path-rename cmds/:tools/ \
|
|
--path-rename src/scripts/:tools/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Also, <code>--path-rename</code> brings up ordering issues; all path arguments are
|
|
applied in order. Thus, a command like</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path-rename sources/:src/main/ --path src/main/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>would make sense but reversing the two arguments would not (src/main/ is
|
|
created by the rename so reversing the two would give you an empty repo).
|
|
Also, note that the rename of cmds/run_release.sh a couple examples ago was
|
|
done before the other renames.</p></div>
|
|
<div class="paragraph"><p>Note that path renaming does not do path filtering, thus the following
|
|
command</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path src/main/ --path-rename tools/:scripts/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>would not result in the tools or scripts directories being present, because
|
|
the single filter selected only src/main/. It’s likely that you would
|
|
instead want to run:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --path src/main/ --path tools/ --path-rename tools/:scripts/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you prefer to filter based solely on basename, use the <code>--use-base-name</code>
|
|
flag (though this is incompatible with <code>--path-rename</code>). For example, to
|
|
only include README.md and Makefile files from any directory:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --use-base-name --path README.md --path Makefile</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you wanted to delete all .DS_Store files in any directory, you could
|
|
either use:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --invert-paths --path '.DS_Store' --use-base-name</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>or</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --invert-paths --path-glob '*/.DS_Store' --path '.DS_Store'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>(the <code>--path-glob</code> isn’t sufficient by itself as it might miss a toplevel
|
|
.DS_Store file; further while something like <code>--path-glob '*.DS_Store'</code>
|
|
would workaround that problem it would also grab files named <code>foo.DS_Store</code>
|
|
or <code>bar/baz.DS_Store</code>)</p></div>
|
|
<div class="paragraph"><p>Finally, see also the <code>--filename-callback</code> from <a href="#CALLBACKS">[CALLBACKS]</a>.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_filtering_based_on_many_paths">Filtering based on many paths</h3>
|
|
<div class="paragraph"><p>If you have a long list of files, directories, globs, or regular
|
|
expressions to filter on, you can stick them in a file and use
|
|
<code>--paths-from-file</code>; for example, with a file named stuff-i-want.txt with
|
|
contents of</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code># Blank lines and comment lines are ignored.
|
|
# Examples similar to --path:
|
|
README.md
|
|
guides/
|
|
tools/releases
|
|
|
|
# An example that is like --path-glob:
|
|
glob:*.py
|
|
|
|
# An example that is like --path-regex:
|
|
regex:^.*/.*/[0-9]{4}-[0-9]{2}-[0-9]{2}.txt$
|
|
|
|
# An example of renaming a path
|
|
tools/==>scripts/
|
|
|
|
# An example of using a regex to rename a path
|
|
regex:(.*)/([^/]*)/([^/]*)\.text$==>\2/\1/\3.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then you could run</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --paths-from-file stuff-i-want.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>to get a repo containing only the toplevel README.md file, the guides/
|
|
and tools/releases/ directories, all python files, files whose name
|
|
was of the form YYYY-MM-DD.txt at least two subdirectories deep, and
|
|
would rename tools/ to scripts/ and rename files like foo/bar/baz.text
|
|
to bar/foo/baz.txt. Note the special line prefixes of <code>glob:</code> and
|
|
<code>regex:</code> and the special string <code>==></code> denoting renames.</p></div>
|
|
<div class="paragraph"><p>Sometimes you have a way of easily generating all the files you want.
|
|
For example, if you know that none of the currently tracked files have
|
|
any newlines or special characters in them (see core.quotePath from
|
|
<code>git config --help</code>) so that <code>git ls-files</code> would print all files
|
|
literally one per line, and you knew that you wanted to keep only the
|
|
files that are currently tracked (thus deleting from all commits in
|
|
history any files that only appear on other branches or that only
|
|
appear in older commits), then you could use a pair of commands such
|
|
as</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git ls-files >../paths-i-want.txt
|
|
git filter-repo --paths-from-file ../paths-i-want.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Similarly, you could use --paths-from-file to delete many files. For
|
|
example, you could run <code>git filter-repo --analyze</code> to get reports,
|
|
look in one such as .git/filter-repo/analysis/path-deleted-sizes.txt
|
|
and copy all the filenames into a file such as
|
|
/tmp/files-i-dont-want-anymore.txt and then run</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --invert-paths --paths-from-file /tmp/files-i-dont-want-anymore.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>to delete them all.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_directory_based_shortcuts">Directory based shortcuts</h3>
|
|
<div class="paragraph"><p>Let’s say you had a directory structure like the following:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>module/
|
|
foo.c
|
|
bar.c
|
|
otherDir/
|
|
blah.config
|
|
stuff.txt
|
|
zebra.jpg</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you wanted just the module/ directory and you wanted it to become the
|
|
new root so that your new directory structure looked like</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>foo.c
|
|
bar.c</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then you could run:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --subdirectory-filter module/</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you wanted all the files from the original repo, but wanted to move
|
|
everything under a subdirectory named my-module/, so that your new
|
|
directory structure looked like</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>my-module/
|
|
module/
|
|
foo.c
|
|
bar.c
|
|
otherDir/
|
|
blah.config
|
|
stuff.txt
|
|
zebra.jpg</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then you would instead run run</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --to-subdirectory-filter my-module/</code></pre>
|
|
</div></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_content_based_filtering">Content based filtering</h3>
|
|
<div class="paragraph"><p>If you want to filter out all files bigger than a certain size, you can use
|
|
<code>--strip-blobs-bigger-than</code> with some size (K, M, and G suffixes are
|
|
recognized), e.g.:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --strip-blobs-bigger-than 10M</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you want to strip out all files with specified git object ids (hashes),
|
|
list the hashes in a file and run</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --strip-blobs-with-ids FILE_WITH_GIT_BLOB_IDS</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If you want to modify file contents, you can do so based on a list of
|
|
expressions in a file, one per line. For example, with a file named
|
|
expressions.txt containing</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>p455w0rd
|
|
foo==>bar
|
|
glob:*666*==>
|
|
regex:\bdriver\b==>pilot
|
|
literal:MM/DD/YYYY==>YYYY-MM-DD
|
|
regex:([0-9]{2})/([0-9]{2})/([0-9]{4})==>\3-\1-\2</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then running</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --replace-text expressions.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>will go through and replace <code>p455w0rd</code> with <code>***REMOVED***</code>, <code>foo</code> with
|
|
<code>bar</code>, any line containing <code>666</code> with a blank line, the word <code>driver</code> with
|
|
<code>pilot</code> (but not if it has letters before or after; e.g. <code>drivers</code> will be
|
|
unmodified), replace the exact text <code>MM/DD/YYYY</code> with <code>YYYY-MM-DD</code> and
|
|
replace date strings of the form MM/DD/YYYY with ones of the form
|
|
YYYY-MM-DD. In the expressions file, there are a few things to note:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Every line has a replacement, given by whatever is on the right of
|
|
<code>==></code>. If <code>==></code> does not appear on the line, the default replacement
|
|
is <code>***REMOVED***</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Lines can start with <code>literal:</code>, <code>glob:</code>, or <code>regex:</code> to specify
|
|
whether to do literal string matches,
|
|
globs (see <a href="https://docs.python.org/3/library/fnmatch.html">https://docs.python.org/3/library/fnmatch.html</a>), or regular
|
|
expressions (see <a href="https://docs.python.org/3/library/re.html#regular-expression-syntax">https://docs.python.org/3/library/re.html#regular-expression-syntax</a>).
|
|
If none of these are specified, <code>literal:</code> is assumed.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
If multiple matches are found, all are replaced.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
globs and regexes are applied to the entire file, but without any
|
|
special flags turned on. Some folks may be interested in adding <code>(?m)</code>
|
|
to the regex to turn on MULTILINE mode, so that <code>^</code> and <code>$</code> match the
|
|
beginning and ends of lines rather than the beginning and end of file.
|
|
See <a href="https://docs.python.org/3/library/re.html">https://docs.python.org/3/library/re.html</a> for details.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>See also the <code>--blob-callback</code> from <a href="#CALLBACKS">[CALLBACKS]</a>.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_updating_commit_tag_messages">Updating commit/tag messages</h3>
|
|
<div class="paragraph"><p>If you want to modify commit or tag messages, you can do so with the
|
|
same syntax as <code>--replace-text</code>, explained above. For example, with a
|
|
file named expressions.txt containing</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>foo==>bar</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then running</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --replace-message expressions.txt</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>will replace <code>foo</code> in commit or tag messages with <code>bar</code>.</p></div>
|
|
<div class="paragraph"><p>See also the <code>--message-callback</code> from <a href="#CALLBACKS">[CALLBACKS]</a>.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_refname_based_filtering">Refname based filtering</h3>
|
|
<div class="paragraph"><p>To rename tags, use <code>--tag-rename</code>, e.g.:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --tag-rename foo:bar</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>This will rename any tags starting with <code>foo</code> to now start with <code>bar</code>.
|
|
Either side of the colon could be blank, e.g.</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --tag-rename '':'my-module-'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>For more general refname modification, see <code>--refname-callback</code> from
|
|
<a href="#CALLBACKS">[CALLBACKS]</a>.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_user_and_email_based_filtering">User and email based filtering</h3>
|
|
<div class="paragraph"><p>To modify username and emails of commits, you can create a mailmap
|
|
file in the format accepted by <a href="git-shortlog.html">git-shortlog(1)</a>. For example,
|
|
if you have a file named my-mailmap you can run</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --mailmap my-mailmap</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>and if the current contents of that file are as follows (if the
|
|
specified mailmap file is version controlled, historical versions of
|
|
the file are ignored):</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>Name For User <email@addre.ss>
|
|
<new@ema.il> <old1@ema.il>
|
|
New Name And <new@ema.il> <old2@ema.il>
|
|
New Name And <new@ema.il> Old Name And <old3@ema.il></code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>then we can update username and/or emails based on the specified
|
|
mapping.</p></div>
|
|
<div class="paragraph"><p>See also the <code>--name-callback</code> and <code>--email-callback</code> from
|
|
<a href="#CALLBACKS">[CALLBACKS]</a>.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_parent_rewriting_2">Parent rewriting</h3>
|
|
<div class="paragraph"><p>To replace $commit_A with $commit_B (e.g. make all commits which had
|
|
$commit_A as a parent instead have $commit_B for that parent), and
|
|
rewrite history to make it permanent:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git replace $commit_A $commit_B
|
|
git filter-repo --force</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>To create a new commit with the same contents as $commit_A except with
|
|
different parent(s) and then replace $commit_A with the new commit,
|
|
and rewrite history to make it permanent:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git replace --graft $commit_A $new_parent_or_parents
|
|
git filter-repo --force</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The reason to specify --force is two-fold: filter-repo will error out
|
|
if no arguments are specified, and the new graft commit would
|
|
otherwise trigger the not-a-fresh-clone check.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_partial_history_rewrites">Partial history rewrites</h3>
|
|
<div class="paragraph"><p>To rewrite the history on just one branch (which may cause it to no longer
|
|
share any common history with other branches), use <code>--refs</code>. For example,
|
|
to remove a file named <em>extraneous.txt</em> from the <em>master</em> branch:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --invert-paths --path extraneous.txt --refs master</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>To rewrite just some recent commits:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --invert-paths --path extraneous.txt --refs master~3..master</code></pre>
|
|
</div></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="CALLBACKS">CALLBACKS</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>For flexibility, filter-repo allows you to specify functions on the
|
|
command line to further filter all changes. Please note that there
|
|
are some API compatibility caveats associated with these callbacks
|
|
that you should be aware of before using them; see the "API BACKWARD
|
|
COMPATIBILITY CAVEAT" comment near the top of git-filter-repo source
|
|
code.</p></div>
|
|
<div class="paragraph"><p>All callback functions are of the same general format. For a command line
|
|
argument like</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>--foo-callback 'BODY'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>the following code will be compiled and called:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>def foo_callback(foo):
|
|
BODY</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Thus, you just need to make sure your <em>BODY</em> modifies and returns
|
|
<em>foo</em> appropriately. One important thing to note for all callbacks is
|
|
that filter-repo uses bytestrings (see
|
|
<a href="https://docs.python.org/3/library/stdtypes.html#bytes">https://docs.python.org/3/library/stdtypes.html#bytes</a>) everywhere
|
|
instead of strings.</p></div>
|
|
<div class="paragraph"><p>There are four callbacks that allow you to operate directly on raw
|
|
objects that contain data that’s easy to write in
|
|
<a href="git-fast-import.html">git-fast-import(1)</a> format:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>--blob-callback
|
|
--commit-callback
|
|
--tag-callback
|
|
--reset-callback</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>We’ll come back to these later because it is often the case that the
|
|
other callbacks are more convenient. The other callbacks operate on a
|
|
small piece of the raw objects or operate on pieces across multiple
|
|
types of raw object (e.g. author names and committer names and tagger
|
|
names across commits and tags, or refnames across commits, tags, and
|
|
resets, or messages across commits and tags). The convenience
|
|
callbacks are:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>--filename-callback
|
|
--message-callback
|
|
--name-callback
|
|
--email-callback
|
|
--refname-callback</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>in each you are expected to simply return a new value based on the one
|
|
passed in. For example,</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git-filter-repo --name-callback 'return name.replace(b"Wiliam", b"William")'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>would result in the following function being called:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>def name_callback(name):
|
|
return name.replace(b"Wiliam", b"William")</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The email callback is quite similar:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git-filter-repo --email-callback 'return email.replace(b".cm", b".com")'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The refname callback is also similar, but note that the refname passed in
|
|
and returned are expected to be fully qualified (e.g. b"refs/heads/master"
|
|
instead of just b"master" and b"refs/tags/v1.0.7" instead of b"1.0.7"):</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git-filter-repo --refname-callback '
|
|
# Change e.g. refs/heads/master to refs/heads/prefix-master
|
|
rdir,rpath = os.path.split(refname)
|
|
return rdir + b"/prefix-" + rpath'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The message callback is quite similar to the previous three callbacks,
|
|
though it operates on a bytestring that is likely more than one line:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git-filter-repo --message-callback '
|
|
if b"Signed-off-by:" not in message:
|
|
message += b"\nSigned-off-by: Me My <self@and.eye>"
|
|
return re.sub(b"[Ee]-?[Mm][Aa][Ii][Ll]", b"email", message)'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The filename callback is slightly more interesting. Returning None means
|
|
the file should be removed from all commits, returning the filename
|
|
unmodified marks the file to be kept, and returning a different name means
|
|
the file should be renamed. An example:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git-filter-repo --filename-callback '
|
|
if b"/src/" in filename:
|
|
# Remove all files with a directory named "src" in their path
|
|
# (except when "src" appears at the toplevel).
|
|
return None
|
|
elif filename.startswith(b"tools/"):
|
|
# Rename tools/ -> scripts/misc/
|
|
return b"scripts/misc/" + filename[6:]
|
|
else:
|
|
# Keep the filename and do not rename it
|
|
return filename
|
|
'</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>In contrast, the blob, reset, tag, and commit callbacks are not
|
|
expected to return a value, but are instead expected to modify the
|
|
object passed in. Major fields for these objects are (subject to API
|
|
backward compatibility caveats mentioned previously):</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Blob: <code>original_id</code> (original hash) and <code>data</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Reset: <code>ref</code> (name of reference) and <code>from_ref</code> (hash or integer mark)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Tag: <code>ref</code>, <code>from_ref</code>, <code>original_id</code>, <code>tagger_name</code>, <code>tagger_email</code>,
|
|
<code>tagger_date</code>, <code>message</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Commit: <code>branch</code>, <code>original_id</code>, <code>author_name</code>, <code>author_email</code>,
|
|
<code>author_date</code>, <code>committer_name</code>, <code>committer_email</code>,
|
|
<code>committer_date</code>, <code>message</code>, <code>file_changes</code> (list of
|
|
FileChange objects, each containing a <code>type</code>, <code>filename</code>,
|
|
<code>mode</code>, and <code>blob_id</code>), <code>parents</code> (list of hashes or integer
|
|
marks)
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>An example of each:</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --blob-callback '
|
|
if len(blob.data) > 25:
|
|
# Mark this blob for removal from all commits
|
|
blob.skip()
|
|
else:
|
|
blob.data = blob.data.replace(b"Hello", b"Goodbye")
|
|
'</code></pre>
|
|
</div></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --reset-callback 'reset.ref = reset.ref.replace(b"master", b"dev")'</code></pre>
|
|
</div></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --tag-callback '
|
|
if tag.tagger_name == b"Jim Williams":
|
|
# Omit this tag
|
|
tag.skip()
|
|
else:
|
|
tag.message = tag.message + b"\n\nTag of %s by %s on %s" % (tag.ref, tag.tagger_email, tag.tagger_date)'</code></pre>
|
|
</div></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git filter-repo --commit-callback '
|
|
# Remove executable files with three 6s in their name (including
|
|
# from leading directories).
|
|
# Also, undo deletion of sources/foo/bar.txt (change types are
|
|
# either b"D" (deletion) or b"M" (add or modify); renames are
|
|
# handled by deleting the old file and adding a new one)
|
|
commit.file_changes = [
|
|
change for change in commit.file_changes
|
|
if not (change.mode == b"100755" and
|
|
change.filename.count(b"6") == 3) and
|
|
not (change.type == b"D" and
|
|
change.filename == b"sources/foo/bar.txt")]
|
|
# Mark all .sh files as executable; modes in git are always one of
|
|
# 100644 (normal file), 100755 (executable), 120000 (symlink), or
|
|
# 160000 (submodule)
|
|
for change in commit.file_changes:
|
|
if change.filename.endswith(b".sh"):
|
|
change.mode = b"100755"
|
|
'</code></pre>
|
|
</div></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="INTERNALS">INTERNALS</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>You probably don’t need to read this section unless you are just very
|
|
curious or you are trying to do a very complex history rewrite.</p></div>
|
|
<div class="sect2">
|
|
<h3 id="_how_filter_repo_works">How filter-repo works</h3>
|
|
<div class="paragraph"><p>Roughly, filter-repo works by running</p></div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre><code>git fast-export <options> | filter | git fast-import <options></code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>where filter-repo not only launches the whole pipeline but also serves as
|
|
the <em>filter</em> in the middle. However, filter-repo does a few additional
|
|
things on top in order to make it into a well-rounded filtering tool. A
|
|
sequence that more accurately reflects what filter-repo runs is:</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
Verify we’re in a fresh clone
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git fetch -u . refs/remotes/origin/*:refs/heads/*</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git remote rm origin</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git fast-export --show-original-ids --reference-excluded-parents --fake-missing-tagger --signed-tags=strip --tag-of-filtered-object=rewrite --use-done-feature --no-data --reencode=yes --mark-tags --all | filter | git -c core.ignorecase=false fast-import --date-format=raw-permissive --force --quiet</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git update-ref --no-deref --stdin</code>, fed with a list of refs to nuke, and a list of replace refs to delete, create, or update.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git reset --hard</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git reflog expire --expire=now --all</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>git gc --prune=now</code>
|
|
</p>
|
|
</li>
|
|
</ol></div>
|
|
<div class="paragraph"><p>Some notes or exceptions on each of the above:</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
If we’re not in a fresh clone, users will not be able to recover if
|
|
they used the wrong command or ran in the wrong repo. (Though
|
|
<code>--force</code> overrides this check, and it’s also off if you’ve already
|
|
ran filter-repo once in this repo.)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Technically, we actually use a <code>git update-ref</code> command fed with a lot
|
|
of input due to the fact that users can use <code>--force</code> when local
|
|
branches might not match remote branches. But this fetch command
|
|
catches the intent rather succinctly.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
We don’t want users accidentally pushing back to the original repo, as
|
|
discussed in <a href="#DISCUSSION">[DISCUSSION]</a>. It also reminds users that since history
|
|
has been rewritten, this repo is no longer compatible with the
|
|
original. Finally, another minor benefit is this allows users to push
|
|
with the <code>--mirror</code> option to their new home without accidentally
|
|
sending remote tracking branches.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Some of these flags are always used but others are actually
|
|
conditional. For example, filter-repo’s <code>--replace-text</code> and
|
|
<code>--blob-callback</code> options need to work on blobs so <code>--no-data</code> cannot
|
|
be passed to fast-export. But when we don’t need to work on blobs,
|
|
passing <code>--no-data</code> speeds things up. Also, other flags may change
|
|
the structure of the pipeline as well (e.g. <code>--dry-run</code> and <code>--debug</code>)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
We use this step to write replace refs for accessing the newly written
|
|
commit hashes using their previous names. Also, if refs were renamed
|
|
by various steps, we need to delete the old refnames in order to avoid
|
|
mixing old and new history.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Users also have old versions of files in their working tree and index;
|
|
we want those cleaned up to match the rewritten history as well. Note
|
|
that this step is skipped in bare repos.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Reflogs will hold on to old history, so we need to expire them.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
We need to gc to avoid mixing new and old history. Also, it shrinks
|
|
the repository for users, so they don’t have to do extra work. (Odds
|
|
are that they’ve only rewritten trees and commits and maybe a few
|
|
blobs, so <code>--aggressive</code> isn’t needed and would be too slow.)
|
|
</p>
|
|
</li>
|
|
</ol></div>
|
|
<div class="paragraph"><p>Information about these steps is printed out when <code>--debug</code> is passed
|
|
to filter-repo. When doing a <code>--partial</code> history rewrite, steps 2, 3,
|
|
7, and 8 are unconditionally skipped, step 5 is skipped if
|
|
<code>--replace-refs</code> is <code>update-no-add</code>, and just the nuke-unused-refs
|
|
portion of step 5 is skipped if <code>--replace-refs</code> is something else.</p></div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_limitations">Limitations</h3>
|
|
<div class="sect3">
|
|
<h4 id="_inherited_limitations">Inherited limitations</h4>
|
|
<div class="paragraph"><p>Since git filter-repo calls fast-export and fast-import to do a lot of the
|
|
heavy lifting, it inherits limitations from those systems:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
extended commit headers, if any, are stripped
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
commits get rewritten meaning they will have new hashes; therefore,
|
|
signatures on commits and tags cannot continue to work and instead are
|
|
just removed (thus signed tags become annotated tags)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
tags of commits are supported. Prior to git-2.24.0, tags of blobs and
|
|
tags of tags are not supported (fast-export would die on such tags).
|
|
tags of trees are not supported in any git version (since fast-export
|
|
ignores tags of trees with a warning and fast-import provides no way to
|
|
import them).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
annotated and signed tags outside of the refs/tags/ namespace are not
|
|
supported (their location will be mangled in weird ways)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
fast-import will die on various forms of invalid input, such as a
|
|
timezone with more than four digits
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
fast-export cannot reencode commit messages into UTF-8 if the commit
|
|
message is not valid in its specified encoding (in such cases, it’ll
|
|
leave the commit message and the encoding header alone).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
commits without an author will be given one matching the committer
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
tags without a tagger will be given a fake tagger
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
references that include commit cycles in their history (which can be
|
|
created with <a href="git-replace.html">git-replace(1)</a>) will not be flagged to the user as
|
|
an error but will be silently deleted by fast-export as though the
|
|
branch or tag contained no interesting files
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>There are also some limitations due to the design of these systems:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Trying to insert additional files into the stream can be tricky; since
|
|
fast-export only lists file changes in a merge relative to its first
|
|
parent, if you insert additional files into a commit that is in the
|
|
second (or third or fourth) parent history of a merge, then you also
|
|
need to add it to the merge manually. (Similarly, if you change which
|
|
parent is the first parent in a merge commit, you need to manually
|
|
update the list of file changes to be relative to the new first
|
|
parent.)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
fast-export and fast-import work with exact file contents, not patches.
|
|
(e.g. "Whatever the current contents of this file, update them to now
|
|
have these contents") Because of this, removing the changes made in a
|
|
single commit or inserting additional changes to a file in some commit
|
|
and expecting them to propagate forward is not something that can be
|
|
done with these tools. Use <a href="git-rebase.html">git-rebase(1)</a> for that.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_intrinsic_limitations">Intrinsic limitations</h4>
|
|
<div class="paragraph"><p>Some types of filtering have limitations that would affect any tool
|
|
attempting to perform them; the most any tool can do is attempt to notify
|
|
the user when it detects an issue:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
When rewriting commit hashes in commit messages, there are a variety
|
|
of cases when the hash will not be updated (whenever this happens, a
|
|
note is written to <code>.git/filter-repo/suboptimal-issues</code>):
|
|
</p>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
if a commit hash does not correspond to a commit in the old repo
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
if a commit hash corresponds to a commit that gets pruned
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
if an abbreviated hash is not unique
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Pruning of empty commits can cause a merge commit to lose an entire
|
|
ancestry line and become a non-merge. If the merge commit had no
|
|
changes then it can be pruned too, but if it still has changes it needs
|
|
to be kept. This might cause minor confusion since the commit will
|
|
likely have a commit message that makes it sound like a merge commit
|
|
even though it’s not. (Whenever a merge commit becomes a non-merge
|
|
commit, a note is written to <code>.git/filter-repo/suboptimal-issues</code>)
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_issues_specific_to_filter_repo">Issues specific to filter-repo</h4>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Multiple repositories in the wild have been observed which use a bogus
|
|
timezone (<code>+051800</code>); google will find you some reports. The intended
|
|
timezone wasn’t clear or wasn’t always the same. Replace with a
|
|
different bogus timezone that fast-import will accept (<code>+0261</code>).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>--path-rename</code> can result in pathname collisions; to avoid excessive
|
|
memory requirements of tracking which files are in all commits or
|
|
looking up what files exist with either every commit or every usage of
|
|
--path-rename, we just tell the user that they might clobber other
|
|
changes if they aren’t careful. We can check if the clobbering comes
|
|
from another --path-rename without much overhead. (Perhaps in the
|
|
future it’s worth adding a slow mode to --path-rename that will do the
|
|
more exhaustive checks?)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
There is no mechanism for directly controlling which flags are passed
|
|
to fast-export (or fast-import); only pre-defined flags can be turned
|
|
on or off as a side-effect of other options. Direct control would make
|
|
little sense because some options like <code>--full-tree</code> would require
|
|
additional code in filter-repo (to parse new directives), and others
|
|
such as <code>-M</code> or <code>-C</code> would break assumptions used in other places of
|
|
filter-repo.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Partial-repo filtering, while supported, runs counter to filter-repo’s
|
|
"avoid mixing old and new history" design. This support has required
|
|
improvements to core git as well (e.g. it depends upon the
|
|
<code>--reference-excluded-parents</code> option to fast-export that was added
|
|
specifically for this usage within filter-repo). The <code>--partial</code> and
|
|
<code>--refs</code> options will continue to be supported since there are people
|
|
with usecases for them; however, I am concerned that this inconsistency
|
|
about mixing old and new history seems likely to lead to user mistakes.
|
|
For now, I just hope that long explanations of caveats in the
|
|
documentation of these options suffice to curtail any such problems.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_comments_on_reversibility">Comments on reversibility</h4>
|
|
<div class="paragraph"><p>Some people are interested in reversibility of a rewrite; e.g. rewrite
|
|
history, possibly add some commits, then unrewrite and get the original
|
|
history back plus a few new "unrewritten" commits. Obviously this is
|
|
impossible if your rewrite involves throwing away information
|
|
(e.g. filtering out files or replacing several different strings with
|
|
<code>***REMOVED***</code>), but may be possible with some rewrites. filter-repo is
|
|
likely to be a poor fit for this type of workflow for a few reasons:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
most of the limitations inherited from fast-export and fast-import
|
|
are of a type that cause reversibility issues
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
grafts and replace refs, if present, are used in the rewrite and made
|
|
permanent
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
rewriting of commit hashes will probably be reversible, but it is
|
|
possible for rewritten abbreviated hashes to not be unique even if the
|
|
original abbreviated hashes were.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
filter-repo defaults to several forms of irreversible rewriting that
|
|
you may need to turn off (e.g. the last two bullet points above or
|
|
reencoding commit messages into UTF-8); it’s possible that additional
|
|
forms of irreversible rewrites will be added in the future.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
I assume that people use filter-repo for one-shot conversions, not
|
|
ongoing data transfers. I explicitly reserve the right to change any
|
|
API in filter-repo based on this presumption (and a comment to this
|
|
effect is found in multiple places in the code and examples). You
|
|
have been warned.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="_see_also">SEE ALSO</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p><a href="git-rebase.html">git-rebase(1)</a>, <a href="git-filter-branch.html">git-filter-branch(1)</a></p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="_git">GIT</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Part of the <a href="git.html">git(1)</a> suite</p></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="footnotes"><hr /></div>
|
|
<div id="footer">
|
|
<div id="footer-text">
|
|
Last updated
|
|
2022-10-04 21:51:55 PDT
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|