From 9cfe2b4090c723df0bd577e2f7ec4070b4e2dcd4 Mon Sep 17 00:00:00 2001 From: rndbit Date: Thu, 26 Aug 2021 01:59:29 +0000 Subject: [PATCH 1/2] filter-repo: fix detection of binary blobs for --replace-text Detection if blob is binary for the purpose of --replace-text always fails and text replacement is applied to all blobs. This has changed going to python3. With python2 the same code would still be wrong but would manifest differently. In the construct 'for x in b"..."' the x is - of type in python3 - of type in python2 thus in python3 condition 'x == b"\0"' can not be true for any x due to type difference. Further, the search was supposed to look for NUL byte and not 0 character, thus change to b"\0" instead of b"0". Signed-off-by: rndbit --- git-filter-repo | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 63c47da..c539eb8 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -3261,8 +3261,10 @@ class RepoFilter(object): if blob.original_id in self._args.strip_blobs_with_ids: blob.skip() - if self._args.replace_text and \ - not any(x == b"0" for x in blob.data[0:8192]): + if ( self._args.replace_text + # not (if blob contains zero byte in the first 8Kb, that is, if blob is binary data) + and not b"\0" in blob.data[0:8192] + ): for literal, replacement in self._args.replace_text['literals']: blob.data = blob.data.replace(literal, replacement) for regex, replacement in self._args.replace_text['regexes']: From 993216739e7cf143e433affee3e91204a914f313 Mon Sep 17 00:00:00 2001 From: rndbit Date: Thu, 26 Aug 2021 05:38:14 +0000 Subject: [PATCH 2/2] filter-repo: add tests for --replace-text in binary blobs The --replace-text failed to detect blobs as binary and incorrectly applied to all blobs. Prior to switch from python2 to python3 it incorrectly designated blobs containing 0 character instead of NUL byte as binary and would have been causing text replacements to apply to binary files and not apply to text files containing 0 character. Add regression tests with blobs containing; 0 character, NUL byte, and both 0 character and NUL byte. Signed-off-by: rndbit --- t/t9390-filter-repo.sh | 81 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh index 6d2d985..0133c4b 100755 --- a/t/t9390-filter-repo.sh +++ b/t/t9390-filter-repo.sh @@ -850,6 +850,87 @@ test_expect_success '--replace-text all options' ' ) ' +test_expect_success '--replace-text binary zero_byte-0_char' ' + ( + set -e + set -u + REPO=replace-text-detect-binary + FILE=mangle.bin + OLD_STR=replace-from + NEW_STR=replace-with + # used with printf, contains a zero byte and a "0" character, binary + OLD_CONTENT_FORMAT="${OLD_STR}\\0${OLD_STR}\\n0\\n" + # expect content unchanged due to binary + NEW_CONTENT_FORMAT="${OLD_CONTENT_FORMAT}" + + rm -rf "${REPO}" + git init "${REPO}" + cd "${REPO}" + echo "${OLD_STR}==>${NEW_STR}" >../replace-rules + printf "${NEW_CONTENT_FORMAT}" > ../expect + printf "${OLD_CONTENT_FORMAT}" > "${FILE}" + git add "${FILE}" + git commit -m 'test' + git filter-repo --force --replace-text ../replace-rules + + test_cmp ../expect "${FILE}" + ) +' + +test_expect_success '--replace-text binary zero_byte-no_0_char' ' + ( + set -e + set -u + REPO=replace-text-detect-binary + FILE=mangle.bin + OLD_STR=replace-from + NEW_STR=replace-with + # used with printf, contains a zero byte but no "0" character, binary + OLD_CONTENT_FORMAT="${OLD_STR}\\0${OLD_STR}\\n" + # expect content unchanged due to binary + NEW_CONTENT_FORMAT="${OLD_CONTENT_FORMAT}" + + rm -rf "${REPO}" + git init "${REPO}" + cd "${REPO}" + echo "${OLD_STR}==>${NEW_STR}" >../replace-rules + printf "${NEW_CONTENT_FORMAT}" > ../expect + printf "${OLD_CONTENT_FORMAT}" > "${FILE}" + git add "${FILE}" + git commit -m 'test' + git filter-repo --force --replace-text ../replace-rules + + test_cmp ../expect "${FILE}" + ) +' + +test_expect_success '--replace-text text-file no_zero_byte-zero_char' ' + ( + set -e + set -u + REPO=replace-text-detect-binary + FILE=mangle.bin + OLD_STR=replace-from + NEW_STR=replace-with + # used with printf, contains no zero byte but contains a "0" character, text + OLD_CONTENT_FORMAT="${OLD_STR}0\\n0${OLD_STR}\\n0\\n" + # expect content changed due to text + NEW_CONTENT_FORMAT="${NEW_STR}0\\n0${NEW_STR}\\n0\\n" + + rm -rf "${REPO}" + git init "${REPO}" + cd "${REPO}" + echo "${OLD_STR}==>${NEW_STR}" >../replace-rules + printf "${NEW_CONTENT_FORMAT}" > ../expect + printf "${OLD_CONTENT_FORMAT}" > "${FILE}" + git add "${FILE}" + git commit -m 'test' + git filter-repo --force --replace-text ../replace-rules + + test_cmp ../expect "${FILE}" + ) +' + test_expect_success '--strip-blobs-bigger-than' ' setup_analyze_me && (