mirror of
https://github.com/miguelmota/cointop
synced 2024-11-06 21:20:34 +00:00
424 lines
11 KiB
Go
424 lines
11 KiB
Go
/*
|
|
Copyright 2015 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package bytereplacer
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
var htmlEscaper = New(
|
|
"&", "&",
|
|
"<", "<",
|
|
">", ">",
|
|
`"`, """,
|
|
"'", "'",
|
|
)
|
|
|
|
var htmlUnescaper = New(
|
|
"&", "&",
|
|
"<", "<",
|
|
">", ">",
|
|
""", `"`,
|
|
"'", "'",
|
|
)
|
|
|
|
var capitalLetters = New("a", "A", "b", "B")
|
|
|
|
func TestReplacer(t *testing.T) {
|
|
type testCase struct {
|
|
r *Replacer
|
|
in, out string
|
|
}
|
|
var testCases []testCase
|
|
|
|
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
|
|
str := func(b byte) string {
|
|
return string([]byte{b})
|
|
}
|
|
var s []string
|
|
|
|
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
|
|
s = nil
|
|
for i := 0; i < 256; i++ {
|
|
s = append(s, str(byte(i)), str(byte(i+1)))
|
|
}
|
|
inc := New(s...)
|
|
|
|
// Test cases with 1-byte old strings, 1-byte new strings.
|
|
testCases = append(testCases,
|
|
testCase{capitalLetters, "brad", "BrAd"},
|
|
testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
|
|
testCase{capitalLetters, "", ""},
|
|
|
|
testCase{inc, "brad", "csbe"},
|
|
testCase{inc, "\x00\xff", "\x01\x00"},
|
|
testCase{inc, "", ""},
|
|
|
|
testCase{New("a", "1", "a", "2"), "brad", "br1d"},
|
|
)
|
|
|
|
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
|
|
s = nil
|
|
for i := 0; i < 256; i++ {
|
|
n := i + 1 - 'a'
|
|
if n < 1 {
|
|
n = 1
|
|
}
|
|
s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
|
|
}
|
|
repeat := New(s...)
|
|
|
|
// Test cases with 1-byte old strings, variable length new strings.
|
|
testCases = append(testCases,
|
|
testCase{htmlEscaper, "No changes", "No changes"},
|
|
testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
|
|
testCase{htmlEscaper, "&&&", "&&&"},
|
|
testCase{htmlEscaper, "", ""},
|
|
|
|
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
|
|
testCase{repeat, "abba", "abbbba"},
|
|
testCase{repeat, "", ""},
|
|
|
|
testCase{New("a", "11", "a", "22"), "brad", "br11d"},
|
|
)
|
|
|
|
// The remaining test cases have variable length old strings.
|
|
|
|
testCases = append(testCases,
|
|
testCase{htmlUnescaper, "&amp;", "&"},
|
|
testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
|
|
testCase{htmlUnescaper, "", ""},
|
|
|
|
testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
|
|
|
|
testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
|
|
|
|
testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
|
|
)
|
|
|
|
// gen1 has multiple old strings of variable length. There is no
|
|
// overall non-empty common prefix, but some pairwise common prefixes.
|
|
gen1 := New(
|
|
"aaa", "3[aaa]",
|
|
"aa", "2[aa]",
|
|
"a", "1[a]",
|
|
"i", "i",
|
|
"longerst", "most long",
|
|
"longer", "medium",
|
|
"long", "short",
|
|
"xx", "xx",
|
|
"x", "X",
|
|
"X", "Y",
|
|
"Y", "Z",
|
|
)
|
|
testCases = append(testCases,
|
|
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
|
|
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
|
|
testCase{gen1, "xxxxx", "xxxxX"},
|
|
testCase{gen1, "XiX", "YiY"},
|
|
testCase{gen1, "", ""},
|
|
)
|
|
|
|
// gen2 has multiple old strings with no pairwise common prefix.
|
|
gen2 := New(
|
|
"roses", "red",
|
|
"violets", "blue",
|
|
"sugar", "sweet",
|
|
)
|
|
testCases = append(testCases,
|
|
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
|
|
testCase{gen2, "", ""},
|
|
)
|
|
|
|
// gen3 has multiple old strings with an overall common prefix.
|
|
gen3 := New(
|
|
"abracadabra", "poof",
|
|
"abracadabrakazam", "splat",
|
|
"abraham", "lincoln",
|
|
"abrasion", "scrape",
|
|
"abraham", "isaac",
|
|
)
|
|
testCases = append(testCases,
|
|
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
|
|
testCase{gen3, "abrasion abracad", "scrape abracad"},
|
|
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
|
|
testCase{gen3, "", ""},
|
|
)
|
|
|
|
// foo{1,2,3,4} have multiple old strings with an overall common prefix
|
|
// and 1- or 2- byte extensions from the common prefix.
|
|
foo1 := New(
|
|
"foo1", "A",
|
|
"foo2", "B",
|
|
"foo3", "C",
|
|
)
|
|
foo2 := New(
|
|
"foo1", "A",
|
|
"foo2", "B",
|
|
"foo31", "C",
|
|
"foo32", "D",
|
|
)
|
|
foo3 := New(
|
|
"foo11", "A",
|
|
"foo12", "B",
|
|
"foo31", "C",
|
|
"foo32", "D",
|
|
)
|
|
foo4 := New(
|
|
"foo12", "B",
|
|
"foo32", "D",
|
|
)
|
|
testCases = append(testCases,
|
|
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
|
|
testCase{foo1, "", ""},
|
|
|
|
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
|
|
testCase{foo2, "", ""},
|
|
|
|
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
|
|
testCase{foo3, "", ""},
|
|
|
|
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
|
|
testCase{foo4, "", ""},
|
|
)
|
|
|
|
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
|
|
allBytes := make([]byte, 256)
|
|
for i := range allBytes {
|
|
allBytes[i] = byte(i)
|
|
}
|
|
allString := string(allBytes)
|
|
genAll := New(
|
|
allString, "[all]",
|
|
"\xff", "[ff]",
|
|
"\x00", "[00]",
|
|
)
|
|
testCases = append(testCases,
|
|
testCase{genAll, allString, "[all]"},
|
|
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
|
|
testCase{genAll, "", ""},
|
|
)
|
|
|
|
// Test cases with empty old strings.
|
|
|
|
blankToX1 := New("", "X")
|
|
blankToX2 := New("", "X", "", "")
|
|
blankHighPriority := New("", "X", "o", "O")
|
|
blankLowPriority := New("o", "O", "", "X")
|
|
blankNoOp1 := New("", "")
|
|
blankNoOp2 := New("", "", "", "A")
|
|
blankFoo := New("", "X", "foobar", "R", "foobaz", "Z")
|
|
testCases = append(testCases,
|
|
testCase{blankToX1, "foo", "XfXoXoX"},
|
|
testCase{blankToX1, "", "X"},
|
|
|
|
testCase{blankToX2, "foo", "XfXoXoX"},
|
|
testCase{blankToX2, "", "X"},
|
|
|
|
testCase{blankHighPriority, "oo", "XOXOX"},
|
|
testCase{blankHighPriority, "ii", "XiXiX"},
|
|
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
|
|
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
|
|
testCase{blankHighPriority, "", "X"},
|
|
|
|
testCase{blankLowPriority, "oo", "OOX"},
|
|
testCase{blankLowPriority, "ii", "XiXiX"},
|
|
testCase{blankLowPriority, "oiio", "OXiXiOX"},
|
|
testCase{blankLowPriority, "iooi", "XiOOXiX"},
|
|
testCase{blankLowPriority, "", "X"},
|
|
|
|
testCase{blankNoOp1, "foo", "foo"},
|
|
testCase{blankNoOp1, "", ""},
|
|
|
|
testCase{blankNoOp2, "foo", "foo"},
|
|
testCase{blankNoOp2, "", ""},
|
|
|
|
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
|
|
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
|
|
testCase{blankFoo, "", "X"},
|
|
)
|
|
|
|
// single string replacer
|
|
|
|
abcMatcher := New("abc", "[match]")
|
|
|
|
testCases = append(testCases,
|
|
testCase{abcMatcher, "", ""},
|
|
testCase{abcMatcher, "ab", "ab"},
|
|
testCase{abcMatcher, "abc", "[match]"},
|
|
testCase{abcMatcher, "abcd", "[match]d"},
|
|
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
|
|
)
|
|
|
|
// Issue 6659 cases (more single string replacer)
|
|
|
|
noHello := New("Hello", "")
|
|
testCases = append(testCases,
|
|
testCase{noHello, "Hello", ""},
|
|
testCase{noHello, "Hellox", "x"},
|
|
testCase{noHello, "xHello", "x"},
|
|
testCase{noHello, "xHellox", "xx"},
|
|
)
|
|
|
|
// No-arg test cases.
|
|
|
|
nop := New()
|
|
testCases = append(testCases,
|
|
testCase{nop, "abc", "abc"},
|
|
testCase{nop, "", ""},
|
|
)
|
|
|
|
// Run the test cases.
|
|
|
|
for i, tc := range testCases {
|
|
{
|
|
// Replace with len(in) == cap(in)
|
|
in := make([]byte, len(tc.in))
|
|
copy(in, tc.in)
|
|
if s := string(tc.r.Replace(in)); s != tc.out {
|
|
t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out)
|
|
}
|
|
}
|
|
|
|
{
|
|
// Replace with len(in) < cap(in)
|
|
in := make([]byte, len(tc.in), len(tc.in)*2)
|
|
copy(in, tc.in)
|
|
if s := string(tc.r.Replace(in)); s != tc.out {
|
|
t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkGenericNoMatch(b *testing.B) {
|
|
str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100))
|
|
generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic
|
|
for i := 0; i < b.N; i++ {
|
|
generic.Replace(str)
|
|
}
|
|
}
|
|
|
|
func BenchmarkGenericMatch1(b *testing.B) {
|
|
str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100))
|
|
generic := New("a", "A", "b", "B", "12", "123")
|
|
for i := 0; i < b.N; i++ {
|
|
generic.Replace(str)
|
|
}
|
|
}
|
|
|
|
func BenchmarkGenericMatch2(b *testing.B) {
|
|
str := bytes.Repeat([]byte("It's <b>HTML</b>!"), 100)
|
|
for i := 0; i < b.N; i++ {
|
|
htmlUnescaper.Replace(str)
|
|
}
|
|
}
|
|
|
|
func benchmarkSingleString(b *testing.B, pattern, text string) {
|
|
r := New(pattern, "[match]")
|
|
buf := make([]byte, len(text), len(text)*7)
|
|
b.SetBytes(int64(len(text)))
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
copy(buf, text)
|
|
r.Replace(buf)
|
|
}
|
|
}
|
|
|
|
func BenchmarkSingleMaxSkipping(b *testing.B) {
|
|
benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
|
|
}
|
|
|
|
func BenchmarkSingleLongSuffixFail(b *testing.B) {
|
|
benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
|
|
}
|
|
|
|
func BenchmarkSingleMatch(b *testing.B) {
|
|
benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
|
|
}
|
|
|
|
func benchmarkReplacer(b *testing.B, r *Replacer, str string) {
|
|
buf := make([]byte, len(str))
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
copy(buf, str)
|
|
r.Replace(buf)
|
|
}
|
|
}
|
|
|
|
func BenchmarkByteByteNoMatch(b *testing.B) {
|
|
benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100))
|
|
}
|
|
|
|
func BenchmarkByteByteMatch(b *testing.B) {
|
|
benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100))
|
|
}
|
|
|
|
func BenchmarkByteStringMatch(b *testing.B) {
|
|
benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">")
|
|
}
|
|
|
|
func BenchmarkHTMLEscapeNew(b *testing.B) {
|
|
benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.")
|
|
}
|
|
|
|
func BenchmarkHTMLEscapeOld(b *testing.B) {
|
|
str := "I <3 to escape HTML & other text too."
|
|
buf := make([]byte, len(str))
|
|
for i := 0; i < b.N; i++ {
|
|
copy(buf, str)
|
|
oldHTMLEscape(buf)
|
|
}
|
|
}
|
|
|
|
// The http package's old HTML escaping function in bytes form.
|
|
func oldHTMLEscape(s []byte) []byte {
|
|
s = bytes.Replace(s, []byte("&"), []byte("&"), -1)
|
|
s = bytes.Replace(s, []byte("<"), []byte("<"), -1)
|
|
s = bytes.Replace(s, []byte(">"), []byte(">"), -1)
|
|
s = bytes.Replace(s, []byte(`"`), []byte("""), -1)
|
|
s = bytes.Replace(s, []byte("'"), []byte("'"), -1)
|
|
return s
|
|
}
|
|
|
|
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
|
|
func BenchmarkByteByteReplaces(b *testing.B) {
|
|
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
|
for i := 0; i < b.N; i++ {
|
|
bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1)
|
|
}
|
|
}
|
|
|
|
// BenchmarkByteByteMap compares byteByteImpl against Map.
|
|
func BenchmarkByteByteMap(b *testing.B) {
|
|
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
|
fn := func(r rune) rune {
|
|
switch r {
|
|
case 'a':
|
|
return 'A'
|
|
case 'b':
|
|
return 'B'
|
|
}
|
|
return r
|
|
}
|
|
for i := 0; i < b.N; i++ {
|
|
bytes.Map(fn, []byte(str))
|
|
}
|
|
}
|