Update vendored dependencies.
parent
f1a9a1f462
commit
04232fb4e5
Binary file not shown.
@ -1,6 +1,15 @@
|
||||
github.com/gabriel-vasile/mimetype v1.2.0 h1:A6z5J8OhjiWFV91sQ3dMI8apYu/tvP9keDaMM3Xu6p4=
|
||||
github.com/gabriel-vasile/mimetype v1.2.0/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
|
||||
github.com/gabriel-vasile/mimetype v1.3.0 h1:4YOHITFLyYwF+iqG0ybSLGArRItynpfwdlWRmJnd75E=
|
||||
github.com/gabriel-vasile/mimetype v1.3.0/go.mod h1:fA8fi6KUiG7MgQQ+mEWotXoEOvmxRtOJlERCzSmRvr8=
|
||||
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d h1:LrXbX6iVhQ3Z50hnhTdyP4K60jevMzk/x2TpMYtOJqg=
|
||||
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
|
||||
github.com/pborman/getopt/v2 v2.1.0 h1:eNfR+r+dWLdWmV8g5OlpyrTYHkhVNxHBdN2cCrJmOEA=
|
||||
github.com/pborman/getopt/v2 v2.1.0/go.mod h1:4NtW75ny4eBw9fO1bhtNdYTlZKYX5/tBLtsOpwKIKd0=
|
||||
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 h1:Ugb8sMTWuWRC3+sz5WeN/4kejDx9BvIwnPUiJBjJE+8=
|
||||
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f h1:Si4U+UcgJzya9kpiEUJKQvjr512OLli+gL4poHrz93U=
|
||||
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
@ -1,14 +0,0 @@
|
||||
language: go
|
||||
go:
|
||||
- "1.12"
|
||||
- "master"
|
||||
before_install:
|
||||
- go get github.com/mattn/goveralls
|
||||
- go get github.com/client9/misspell/cmd/misspell
|
||||
before_script:
|
||||
- go vet .
|
||||
script:
|
||||
- diff -u <(echo -n) <(gofmt -d ./)
|
||||
- go test -v -race
|
||||
- $GOPATH/bin/goveralls -service=travis-ci
|
||||
- misspell -locale US -error *.md *.go
|
@ -1,3 +1,5 @@
|
||||
module github.com/gabriel-vasile/mimetype
|
||||
|
||||
go 1.12
|
||||
|
||||
require golang.org/x/net v0.0.0-20210505024714-0287a6fb4125
|
||||
|
@ -0,0 +1,7 @@
|
||||
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 h1:Ugb8sMTWuWRC3+sz5WeN/4kejDx9BvIwnPUiJBjJE+8=
|
||||
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
@ -0,0 +1,296 @@
|
||||
package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
const (
|
||||
F = 0 /* character never appears in text */
|
||||
T = 1 /* character appears in plain ASCII text */
|
||||
I = 2 /* character appears in ISO-8859 text */
|
||||
X = 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
|
||||
)
|
||||
|
||||
var (
|
||||
boms = []struct {
|
||||
bom []byte
|
||||
enc string
|
||||
}{
|
||||
{[]byte{0xEF, 0xBB, 0xBF}, "utf-8"},
|
||||
{[]byte{0x00, 0x00, 0xFE, 0xFF}, "utf-32be"},
|
||||
{[]byte{0xFF, 0xFE, 0x00, 0x00}, "utf-32le"},
|
||||
{[]byte{0xFE, 0xFF}, "utf-16be"},
|
||||
{[]byte{0xFF, 0xFE}, "utf-16le"},
|
||||
}
|
||||
|
||||
// https://github.com/file/file/blob/fa93fb9f7d21935f1c7644c47d2975d31f12b812/src/encoding.c#L241
|
||||
textChars [256]byte = [256]byte{
|
||||
/* BEL BS HT LF VT FF CR */
|
||||
F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */
|
||||
/* ESC */
|
||||
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
|
||||
/* NEL */
|
||||
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
|
||||
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
|
||||
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xfX */
|
||||
}
|
||||
)
|
||||
|
||||
func FromBOM(content []byte) string {
|
||||
for _, b := range boms {
|
||||
if bytes.HasPrefix(content, b.bom) {
|
||||
return b.enc
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func FromPlain(content []byte) string {
|
||||
if len(content) == 0 {
|
||||
return ""
|
||||
}
|
||||
if cset := FromBOM(content); cset != "" {
|
||||
return cset
|
||||
}
|
||||
origContent := content
|
||||
// Try to detect UTF-8.
|
||||
// First eliminate any partial rune at the end.
|
||||
for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
|
||||
b := content[i]
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
if utf8.RuneStart(b) {
|
||||
content = content[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
hasHighBit := false
|
||||
for _, c := range content {
|
||||
if c >= 0x80 {
|
||||
hasHighBit = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasHighBit && utf8.Valid(content) {
|
||||
return "utf-8"
|
||||
}
|
||||
|
||||
// ASCII is a subset of UTF8. Follow W3C recommendation and replace with UTF8.
|
||||
if ascii(origContent) {
|
||||
return "utf-8"
|
||||
}
|
||||
|
||||
return latin(origContent)
|
||||
}
|
||||
|
||||
func latin(content []byte) string {
|
||||
hasControlBytes := false
|
||||
for _, b := range content {
|
||||
t := textChars[b]
|
||||
if t != T && t != I {
|
||||
return ""
|
||||
}
|
||||
if b >= 0x80 && b <= 0x9F {
|
||||
hasControlBytes = true
|
||||
}
|
||||
}
|
||||
// Code range 0x80 to 0x9F is reserved for control characters in ISO-8859-1
|
||||
// (so-called C1 Controls). Windows 1252, however, has printable punctuation
|
||||
// characters in this range.
|
||||
if hasControlBytes {
|
||||
return "windows-1252"
|
||||
}
|
||||
return "iso-8859-1"
|
||||
}
|
||||
|
||||
func ascii(content []byte) bool {
|
||||
for _, b := range content {
|
||||
if textChars[b] != T {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func FromXML(content []byte) string {
|
||||
if cset := fromXML(content); cset != "" {
|
||||
return cset
|
||||
}
|
||||
return FromPlain(content)
|
||||
}
|
||||
func fromXML(content []byte) string {
|
||||
content = trimLWS(content)
|
||||
dec := xml.NewDecoder(bytes.NewReader(content))
|
||||
rawT, err := dec.RawToken()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
t, ok := rawT.(xml.ProcInst)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
|
||||
return strings.ToLower(xmlEncoding(string(t.Inst)))
|
||||
}
|
||||
|
||||
func FromHTML(content []byte) string {
|
||||
if cset := fromHTML(content); cset != "" {
|
||||
return cset
|
||||
}
|
||||
return FromPlain(content)
|
||||
}
|
||||
|
||||
func fromHTML(content []byte) string {
|
||||
z := html.NewTokenizer(bytes.NewReader(content))
|
||||
for {
|
||||
switch z.Next() {
|
||||
case html.ErrorToken:
|
||||
return ""
|
||||
|
||||
case html.StartTagToken, html.SelfClosingTagToken:
|
||||
tagName, hasAttr := z.TagName()
|
||||
if !bytes.Equal(tagName, []byte("meta")) {
|
||||
continue
|
||||
}
|
||||
attrList := make(map[string]bool)
|
||||
gotPragma := false
|
||||
|
||||
const (
|
||||
dontKnow = iota
|
||||
doNeedPragma
|
||||
doNotNeedPragma
|
||||
)
|
||||
needPragma := dontKnow
|
||||
|
||||
name := ""
|
||||
for hasAttr {
|
||||
var key, val []byte
|
||||
key, val, hasAttr = z.TagAttr()
|
||||
ks := string(key)
|
||||
if attrList[ks] {
|
||||
continue
|
||||
}
|
||||
attrList[ks] = true
|
||||
for i, c := range val {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
val[i] = c + 0x20
|
||||
}
|
||||
}
|
||||
|
||||
switch ks {
|
||||
case "http-equiv":
|
||||
if bytes.Equal(val, []byte("content-type")) {
|
||||
gotPragma = true
|
||||
}
|
||||
|
||||
case "content":
|
||||
name = fromMetaElement(string(val))
|
||||
if name != "" {
|
||||
needPragma = doNeedPragma
|
||||
}
|
||||
|
||||
case "charset":
|
||||
name = string(val)
|
||||
needPragma = doNotNeedPragma
|
||||
}
|
||||
}
|
||||
|
||||
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(name, "utf-16") {
|
||||
name = "utf-8"
|
||||
}
|
||||
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fromMetaElement(s string) string {
|
||||
for s != "" {
|
||||
csLoc := strings.Index(s, "charset")
|
||||
if csLoc == -1 {
|
||||
return ""
|
||||
}
|
||||
s = s[csLoc+len("charset"):]
|
||||
s = strings.TrimLeft(s, " \t\n\f\r")
|
||||
if !strings.HasPrefix(s, "=") {
|
||||
continue
|
||||
}
|
||||
s = s[1:]
|
||||
s = strings.TrimLeft(s, " \t\n\f\r")
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if q := s[0]; q == '"' || q == '\'' {
|
||||
s = s[1:]
|
||||
closeQuote := strings.IndexRune(s, rune(q))
|
||||
if closeQuote == -1 {
|
||||
return ""
|
||||
}
|
||||
return s[:closeQuote]
|
||||
}
|
||||
|
||||
end := strings.IndexAny(s, "; \t\n\f\r")
|
||||
if end == -1 {
|
||||
end = len(s)
|
||||
}
|
||||
return s[:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func xmlEncoding(s string) string {
|
||||
param := "encoding="
|
||||
idx := strings.Index(s, param)
|
||||
if idx == -1 {
|
||||
return ""
|
||||
}
|
||||
v := s[idx+len(param):]
|
||||
if v == "" {
|
||||
return ""
|
||||
}
|
||||
if v[0] != '\'' && v[0] != '"' {
|
||||
return ""
|
||||
}
|
||||
idx = strings.IndexRune(v[1:], rune(v[0]))
|
||||
if idx == -1 {
|
||||
return ""
|
||||
}
|
||||
return v[1 : idx+1]
|
||||
}
|
||||
|
||||
// trimLWS trims whitespace from beginning of the input.
|
||||
// TODO: find a way to call trimLWS once per detection instead of once in each
|
||||
// detector which needs the trimmed input.
|
||||
func trimLWS(in []byte) []byte {
|
||||
firstNonWS := 0
|
||||
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
|
||||
}
|
||||
|
||||
return in[firstNonWS:]
|
||||
}
|
||||
|
||||
func isWS(b byte) bool {
|
||||
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var (
|
||||
// SevenZ matches a 7z archive.
|
||||
SevenZ = prefix([]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
|
||||
// Gzip matches gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
|
||||
Gzip = prefix([]byte{0x1f, 0x8b})
|
||||
// Tar matches a (t)ape (ar)chive file.
|
||||
Tar = offset([]byte("ustar"), 257)
|
||||
// Fits matches an Flexible Image Transport System file.
|
||||
Fits = prefix([]byte{
|
||||
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
|
||||
})
|
||||
// Xar matches an eXtensible ARchive format file.
|
||||
Xar = prefix([]byte{0x78, 0x61, 0x72, 0x21})
|
||||
// Bz2 matches a bzip2 file.
|
||||
Bz2 = prefix([]byte{0x42, 0x5A, 0x68})
|
||||
// Ar matches an ar (Unix) archive file.
|
||||
Ar = prefix([]byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
|
||||
// Deb matches a Debian package file.
|
||||
Deb = offset([]byte{
|
||||
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
|
||||
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
|
||||
}, 8)
|
||||
// Warc matches a Web ARChive file.
|
||||
Warc = prefix([]byte("WARC/"))
|
||||
// Cab matches a Cabinet archive file.
|
||||
Cab = prefix([]byte("MSCF"))
|
||||
// Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
|
||||
Xz = prefix([]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
|
||||
// Lzip matches an Lzip compressed file.
|
||||
Lzip = prefix([]byte{0x4c, 0x5a, 0x49, 0x50})
|
||||
)
|
||||
|
||||
// Zstd matches a Zstandard archive file.
|
||||
func Zstd(raw []byte, limit uint32) bool {
|
||||
return len(raw) >= 4 &&
|
||||
(0x22 <= raw[0] && raw[0] <= 0x28 || raw[0] == 0x1E) && // Different Zstandard versions.
|
||||
bytes.HasPrefix(raw[1:], []byte{0xB5, 0x2F, 0xFD})
|
||||
}
|
||||
|
||||
// Rpm matches an RPM or Delta RPM package file.
|
||||
func Rpm(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte{0xed, 0xab, 0xee, 0xdb}) ||
|
||||
bytes.HasPrefix(raw, []byte("drpm"))
|
||||
}
|
||||
|
||||
// Cpio matches a cpio archive file.
|
||||
func Cpio(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte("070707")) ||
|
||||
bytes.HasPrefix(raw, []byte("070701")) ||
|
||||
bytes.HasPrefix(raw, []byte("070702"))
|
||||
}
|
||||
|
||||
// Rar matches a RAR archive file.
|
||||
func Rar(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x00")) ||
|
||||
bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x01\x00"))
|
||||
}
|
||||
|
||||
// Crx matches a Chrome extension file: a zip archive prepended by a package header.
|
||||
func Crx(raw []byte, limit uint32) bool {
|
||||
const minHeaderLen = 16
|
||||
if len(raw) < minHeaderLen || !bytes.HasPrefix(raw, []byte("Cr24")) {
|
||||
return false
|
||||
}
|
||||
pubkeyLen := binary.LittleEndian.Uint32(raw[8:12])
|
||||
sigLen := binary.LittleEndian.Uint32(raw[12:16])
|
||||
zipOffset := minHeaderLen + pubkeyLen + sigLen
|
||||
if uint32(len(raw)) < zipOffset {
|
||||
return false
|
||||
}
|
||||
return Zip(raw[zipOffset:], limit)
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var (
|
||||
// Flac matches a Free Lossless Audio Codec file.
|
||||
Flac = prefix([]byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
|
||||
// Midi matches a Musical Instrument Digital Interface file.
|
||||
Midi = prefix([]byte("\x4D\x54\x68\x64"))
|
||||
// Ape matches a Monkey's Audio file.
|
||||
Ape = prefix([]byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
|
||||
// MusePack matches a Musepack file.
|
||||
MusePack = prefix([]byte("MPCK"))
|
||||
// Au matches a Sun Microsystems au file.
|
||||
Au = prefix([]byte("\x2E\x73\x6E\x64"))
|
||||
// Amr matches an Adaptive Multi-Rate file.
|
||||
Amr = prefix([]byte("\x23\x21\x41\x4D\x52"))
|
||||
// Voc matches a Creative Voice file.
|
||||
Voc = prefix([]byte("Creative Voice File"))
|
||||
// M3u matches a Playlist file.
|
||||
M3u = prefix([]byte("#EXTM3U"))
|
||||
)
|
||||
|
||||
// Mp3 matches an mp3 file.
|
||||
func Mp3(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
if bytes.HasPrefix(raw, []byte("ID3")) {
|
||||
// MP3s with an ID3v2 tag will start with "ID3"
|
||||
// ID3v1 tags, however appear at the end of the file.
|
||||
return true
|
||||
}
|
||||
|
||||
// Match MP3 files without tags
|
||||
switch binary.BigEndian.Uint16(raw[:2]) & 0xFFFE {
|
||||
case 0xFFFA:
|
||||
// MPEG ADTS, layer III, v1
|
||||
return true
|
||||
case 0xFFF2:
|
||||
// MPEG ADTS, layer III, v2
|
||||
return true
|
||||
case 0xFFE2:
|
||||
// MPEG ADTS, layer III, v2.5
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Aac matches an Advanced Audio Coding file.
|
||||
func Aac(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte{0xFF, 0xF1}) ||
|
||||
bytes.HasPrefix(raw, []byte{0xFF, 0xF9})
|
||||
}
|
||||
|
||||
// Wav matches a Waveform Audio File Format file.
|
||||
func Wav(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 12 &&
|
||||
bytes.Equal(raw[:4], []byte("RIFF")) &&
|
||||
bytes.Equal(raw[8:12], []byte("\x57\x41\x56\x45"))
|
||||
}
|
||||
|
||||
// Aiff matches Audio Interchange File Format file.
|
||||
func Aiff(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 12 &&
|
||||
bytes.Equal(raw[:4], []byte("\x46\x4F\x52\x4D")) &&
|
||||
bytes.Equal(raw[8:12], []byte("\x41\x49\x46\x46"))
|
||||
}
|
||||
|
||||
// Qcp matches a Qualcomm Pure Voice file.
|
||||
func Qcp(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 12 &&
|
||||
bytes.Equal(raw[:4], []byte("RIFF")) &&
|
||||
bytes.Equal(raw[8:12], []byte("QLCM"))
|
||||
}
|
@ -0,0 +1,144 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/macho"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var (
|
||||
// Lnk matches Microsoft lnk binary format.
|
||||
Lnk = prefix([]byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00})
|
||||
// Wasm matches a web assembly File Format file.
|
||||
Wasm = prefix([]byte{0x00, 0x61, 0x73, 0x6D})
|
||||
// Exe matches a Windows/DOS executable file.
|
||||
Exe = prefix([]byte{0x4D, 0x5A})
|
||||
// Elf matches an Executable and Linkable Format file.
|
||||
Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46})
|
||||
// Nes matches a Nintendo Entertainment system ROM file.
|
||||
Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A})
|
||||
// TzIf matches a Time Zone Information Format (TZif) file.
|
||||
TzIf = prefix([]byte("TZif"))
|
||||
)
|
||||
|
||||
// Java bytecode and Mach-O binaries share the same magic number.
|
||||
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
|
||||
func classOrMachOFat(in []byte) bool {
|
||||
// There should be at least 8 bytes for both of them because the only way to
|
||||
// quickly distinguish them is by comparing byte at position 7
|
||||
if len(in) < 8 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
|
||||
}
|
||||
|
||||
// Class matches a java class file.
|
||||
func Class(raw []byte, limit uint32) bool {
|
||||
return classOrMachOFat(raw) && raw[7] > 30
|
||||
}
|
||||
|
||||
// MachO matches Mach-O binaries format.
|
||||
func MachO(raw []byte, limit uint32) bool {
|
||||
if classOrMachOFat(raw) && raw[7] < 20 {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(raw) < 4 {
|
||||
return false
|
||||
}
|
||||
|
||||
be := binary.BigEndian.Uint32(raw)
|
||||
le := binary.LittleEndian.Uint32(raw)
|
||||
|
||||
return be == macho.Magic32 ||
|
||||
le == macho.Magic32 ||
|
||||
be == macho.Magic64 ||
|
||||
le == macho.Magic64
|
||||
}
|
||||
|
||||
// Swf matches an Adobe Flash swf file.
|
||||
func Swf(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte("CWS")) ||
|
||||
bytes.HasPrefix(raw, []byte("FWS")) ||
|
||||
bytes.HasPrefix(raw, []byte("ZWS"))
|
||||
}
|
||||
|
||||
// Dbf matches a dBase file.
|
||||
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
|
||||
func Dbf(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 4 {
|
||||
return false
|
||||
}
|
||||
|
||||
// 3rd and 4th bytes contain the last update month and day of month
|
||||
if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
|
||||
return false
|
||||
}
|
||||
|
||||
// dbf type is dictated by the first byte
|
||||
dbfTypes := []byte{
|
||||
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
|
||||
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
|
||||
}
|
||||
for _, b := range dbfTypes {
|
||||
if raw[0] == b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// ElfObj matches an object file.
|
||||
func ElfObj(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) ||
|
||||
(raw[16] == 0x00 && raw[17] == 0x01))
|
||||
}
|
||||
|
||||
// ElfExe matches an executable file.
|
||||
func ElfExe(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) ||
|
||||
(raw[16] == 0x00 && raw[17] == 0x02))
|
||||
}
|
||||
|
||||
// ElfLib matches a shared library file.
|
||||
func ElfLib(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) ||
|
||||
(raw[16] == 0x00 && raw[17] == 0x03))
|
||||
}
|
||||
|
||||
// ElfDump matches a core dump file.
|
||||
func ElfDump(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) ||
|
||||
(raw[16] == 0x00 && raw[17] == 0x04))
|
||||
}
|
||||
|
||||
// Dcm matches a DICOM medical format file.
|
||||
func Dcm(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 131 &&
|
||||
bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
|
||||
}
|
||||
|
||||
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
|
||||
func Marc(raw []byte, limit uint32) bool {
|
||||
// File is at least 24 bytes ("leader" field size).
|
||||
if len(raw) < 24 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Fixed bytes at offset 20.
|
||||
if !bytes.Equal(raw[20:24], []byte("4500")) {
|
||||
return false
|
||||
}
|
||||
|
||||
// First 5 bytes are ASCII digits.
|
||||
for i := 0; i < 5; i++ {
|
||||
if raw[i] < '0' || raw[i] > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Field terminator is present.
|
||||
return bytes.Contains(raw, []byte{0x1E})
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package magic
|
||||
|
||||
var (
|
||||
// Sqlite matches an SQLite database file.
|
||||
Sqlite = prefix([]byte{
|
||||
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
|
||||
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
|
||||
})
|
||||
// MsAccessAce matches Microsoft Access dababase file.
|
||||
MsAccessAce = offset([]byte("Standard ACE DB"), 4)
|
||||
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
|
||||
MsAccessMdb = offset([]byte("Standard Jet DB"), 4)
|
||||
)
|
@ -0,0 +1,54 @@
|
||||
package magic
|
||||
|
||||
import "bytes"
|
||||
|
||||
var (
|
||||
// Pdf matches a Portable Document Format file.
|
||||
Pdf = prefix([]byte{0x25, 0x50, 0x44, 0x46})
|
||||
// Fdf matches a Forms Data Format file.
|
||||
Fdf = prefix([]byte("%FDF"))
|
||||
// Mobi matches a Mobi file.
|
||||
Mobi = offset([]byte("BOOKMOBI"), 60)
|
||||
// Lit matches a Microsoft Lit file.
|
||||
Lit = prefix([]byte("ITOLITLS"))
|
||||
)
|
||||
|
||||
// DjVu matches a DjVu file.
|
||||
func DjVu(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 12 {
|
||||
return false
|
||||
}
|
||||
if !bytes.HasPrefix(raw, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
|
||||
return false
|
||||
}
|
||||
return bytes.HasPrefix(raw[12:], []byte("DJVM")) ||
|
||||
bytes.HasPrefix(raw[12:], []byte("DJVU")) ||
|
||||
bytes.HasPrefix(raw[12:], []byte("DJVI")) ||
|
||||
bytes.HasPrefix(raw[12:], []byte("THUM"))
|
||||
}
|
||||
|
||||
// P7s matches an .p7s signature File (PEM, Base64).
|
||||
func P7s(raw []byte, limit uint32) bool {
|
||||
// Check for PEM Encoding.
|
||||
if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) {
|
||||
return true
|
||||
}
|
||||
// Check if DER Encoding is long enough.
|
||||
if len(raw) < 20 {
|
||||
return false
|
||||
}
|
||||
// Magic Bytes for the signedData ASN.1 encoding.
|
||||
startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}}
|
||||
signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07}
|
||||
// Check if Header is correct. There are multiple valid headers.
|
||||
for i, match := range startHeader {
|
||||
// If first bytes match, then check for ASN.1 Object Type.
|
||||
if bytes.HasPrefix(raw, match) {
|
||||
if bytes.HasPrefix(raw[i+2:], signedDataMatch) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
var (
|
||||
// Woff matches a Web Open Font Format file.
|
||||
Woff = prefix([]byte("wOFF"))
|
||||
// Woff2 matches a Web Open Font Format version 2 file.
|
||||
Woff2 = prefix([]byte("wOF2"))
|
||||
// Otf matches an OpenType font file.
|
||||
Otf = prefix([]byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
|
||||
)
|
||||
|
||||
// Ttf matches a TrueType font file.
|
||||
func Ttf(raw []byte, limit uint32) bool {
|
||||
if !bytes.HasPrefix(raw, []byte{0x00, 0x01, 0x00, 0x00}) {
|
||||
return false
|
||||
}
|
||||
return !MsAccessAce(raw, limit) && !MsAccessMdb(raw, limit)
|
||||
}
|
||||
|
||||
// Eot matches an Embedded OpenType font file.
|
||||
func Eot(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 35 &&
|
||||
bytes.Equal(raw[34:36], []byte{0x4C, 0x50}) &&
|
||||
(bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x01}) ||
|
||||
bytes.Equal(raw[8:11], []byte{0x01, 0x00, 0x00}) ||
|
||||
bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x02}))
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package magic
|
||||
|
||||
var (
|
||||
// Mp4 matches an MP4 file.
|
||||
Mp4 = ftyp(
|
||||
[]byte("avc1"), []byte("dash"), []byte("iso2"), []byte("iso3"),
|
||||
[]byte("iso4"), []byte("iso5"), []byte("iso6"), []byte("isom"),
|
||||
[]byte("mmp4"), []byte("mp41"), []byte("mp42"), []byte("mp4v"),
|
||||
[]byte("mp71"), []byte("MSNV"), []byte("NDAS"), []byte("NDSC"),
|
||||
[]byte("NSDC"), []byte("NSDH"), []byte("NDSM"), []byte("NDSP"),
|
||||
[]byte("NDSS"), []byte("NDXC"), []byte("NDXH"), []byte("NDXM"),
|
||||
[]byte("NDXP"), []byte("NDXS"), []byte("F4V "), []byte("F4P "),
|
||||
)
|
||||
// ThreeGP matches a 3GPP file.
|
||||
ThreeGP = ftyp(
|
||||
[]byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"),
|
||||
[]byte("3gp5"), []byte("3gp6"), []byte("3gp7"), []byte("3gs7"),
|
||||
[]byte("3ge6"), []byte("3ge7"), []byte("3gg6"),
|
||||
)
|
||||
// ThreeG2 matches a 3GPP2 file.
|
||||
ThreeG2 = ftyp(
|
||||
[]byte("3g24"), []byte("3g25"), []byte("3g26"), []byte("3g2a"),
|
||||
[]byte("3g2b"), []byte("3g2c"), []byte("KDDI"),
|
||||
)
|
||||
// AMp4 matches an audio MP4 file.
|
||||
AMp4 = ftyp(
|
||||
// audio for Adobe Flash Player 9+
|
||||
[]byte("F4A "), []byte("F4B "),
|
||||
// Apple iTunes AAC-LC (.M4A) Audio
|
||||
[]byte("M4B "), []byte("M4P "),
|
||||
// MPEG-4 (.MP4) for SonyPSP
|
||||
[]byte("MSNV"),
|
||||
// Nero Digital AAC Audio
|
||||
[]byte("NDAS"),
|
||||
)
|
||||
// QuickTime matches a QuickTime File Format file.
|
||||
QuickTime = ftyp([]byte("qt "), []byte("moov"))
|
||||
// Mqv matches a Sony / Mobile QuickTime file.
|
||||
Mqv = ftyp([]byte("mqt "))
|
||||
// M4a matches an audio M4A file.
|
||||
M4a = ftyp([]byte("M4A "))
|
||||
// M4v matches an Appl4 M4V video file.
|
||||
M4v = ftyp([]byte("M4V "), []byte("M4VH"), []byte("M4VP"))
|
||||
// Heic matches a High Efficiency Image Coding (HEIC) file.
|
||||
Heic = ftyp([]byte("heic"), []byte("heix"))
|
||||
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
|
||||
HeicSequence = ftyp([]byte("hevc"), []byte("hevx"))
|
||||
// Heif matches a High Efficiency Image File Format (HEIF) file.
|
||||
Heif = ftyp([]byte("mif1"), []byte("heim"), []byte("heis"), []byte("avic"))
|
||||
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
|
||||
HeifSequence = ftyp([]byte("msf1"), []byte("hevm"), []byte("hevs"), []byte("avcs"))
|
||||
// TODO: add support for remaining video formats at ftyps.com.
|
||||
)
|
@ -0,0 +1,96 @@
|
||||
package magic
|
||||
|
||||
import "bytes"
|
||||
|
||||
var (
|
||||
// Png matches a Portable Network Graphics file.
|
||||
Png = prefix([]byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
|
||||
// Jpg matches a Joint Photographic Experts Group file.
|
||||
Jpg = prefix([]byte{0xFF, 0xD8, 0xFF})
|
||||
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
|
||||
Jp2 = jpeg2k([]byte{0x6a, 0x70, 0x32, 0x20})
|
||||
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
|
||||
Jpx = jpeg2k([]byte{0x6a, 0x70, 0x78, 0x20})
|
||||
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
|
||||
Jpm = jpeg2k([]byte{0x6a, 0x70, 0x6D, 0x20})
|
||||
// Gif matches a Graphics Interchange Format file.
|
||||
Gif = prefix([]byte("GIF87a"), []byte("GIF89a"))
|
||||
// Bmp matches a bitmap image file.
|
||||
Bmp = prefix([]byte{0x42, 0x4D})
|
||||
// Ps matches a PostScript file.
|
||||
Ps = prefix([]byte("%!PS-Adobe-"))
|
||||
// Psd matches a Photoshop Document file.
|
||||
Psd = prefix([]byte("8BPS"))
|
||||
// Ico matches an ICO file.
|
||||
Ico = prefix([]byte{0x00, 0x00, 0x01, 0x00}, []byte{0x00, 0x00, 0x02, 0x00})
|
||||
// Icns matches an ICNS (Apple Icon Image format) file.
|
||||
Icns = prefix([]byte("icns"))
|
||||
// Tiff matches a Tagged Image File Format file.
|
||||
Tiff = prefix([]byte{0x49, 0x49, 0x2A, 0x00}, []byte{0x4D, 0x4D, 0x00, 0x2A})
|
||||
// Bpg matches a Better Portable Graphics file.
|
||||
Bpg = prefix([]byte{0x42, 0x50, 0x47, 0xFB})
|
||||
// Xcf matches GIMP image data.
|
||||
Xcf = prefix([]byte("gimp xcf"))
|
||||
// Pat matches GIMP pattern data.
|
||||
Pat = offset([]byte("GPAT"), 20)
|
||||
// Gbr matches GIMP brush data.
|
||||
Gbr = offset([]byte("GIMP"), 20)
|
||||
// Hdr matches Radiance HDR image.
|
||||
// https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
|
||||
Hdr = prefix([]byte("#?RADIANCE\n"))
|
||||
// Xpm matches X PixMap image data.
|
||||
Xpm = prefix([]byte{0x2F, 0x2A, 0x20, 0x58, 0x50, 0x4D, 0x20, 0x2A, 0x2F})
|
||||
)
|
||||
|
||||
func jpeg2k(sig []byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 24 {
|
||||
return false
|
||||
}
|
||||
|
||||
if !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x20, 0x20}) &&
|
||||
!bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x32, 0x20}) {
|
||||
return false
|
||||
}
|
||||
return bytes.Equal(raw[20:24], sig)
|
||||
}
|
||||
}
|
||||
|
||||
// Webp matches a WebP file.
|
||||
func Webp(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 12 &&
|
||||
bytes.Equal(raw[0:4], []byte("RIFF")) &&
|
||||
bytes.Equal(raw[8:12], []byte{0x57, 0x45, 0x42, 0x50})
|
||||
}
|
||||
|
||||
// Dwg matches a CAD drawing file.
|
||||
func Dwg(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 6 || raw[0] != 0x41 || raw[1] != 0x43 {
|
||||
return false
|
||||
}
|
||||
dwgVersions := [][]byte{
|
||||
{0x31, 0x2E, 0x34, 0x30},
|
||||
{0x31, 0x2E, 0x35, 0x30},
|
||||
{0x32, 0x2E, 0x31, 0x30},
|
||||
{0x31, 0x30, 0x30, 0x32},
|
||||
{0x31, 0x30, 0x30, 0x33},
|
||||
{0x31, 0x30, 0x30, 0x34},
|
||||
{0x31, 0x30, 0x30, 0x36},
|
||||
{0x31, 0x30, 0x30, 0x39},
|
||||
{0x31, 0x30, 0x31, 0x32},
|
||||
{0x31, 0x30, 0x31, 0x34},
|
||||
{0x31, 0x30, 0x31, 0x35},
|
||||
{0x31, 0x30, 0x31, 0x38},
|
||||
{0x31, 0x30, 0x32, 0x31},
|
||||
{0x31, 0x30, 0x32, 0x34},
|
||||
{0x31, 0x30, 0x33, 0x32},
|
||||
}
|
||||
|
||||
for _, d := range dwgVersions {
|
||||
if bytes.Equal(raw[2:6], d) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package magic
|
||||
|
||||
// Glb matches a glTF model format file.
|
||||
// GLB is the binary file format representation of 3D models save in
|
||||
// the GL transmission Format (glTF).
|
||||
// see more: https://docs.fileformat.com/3d/glb/
|
||||
// https://www.iana.org/assignments/media-types/model/gltf-binary
|
||||
// GLB file format is based on little endian and its header structure
|
||||
// show below:
|
||||
//
|
||||
// <-- 12-byte header -->
|
||||
// | magic | version | length |
|
||||
// | (uint32) | (uint32) | (uint32) |
|
||||
// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... |
|
||||
// | g l T F | 1 | ... |
|
||||
var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
|
||||
[]byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
|
@ -0,0 +1,42 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
|
||||
In May 2003, two Internet RFCs were published relating to the format.
|
||||
The Ogg bitstream was defined in RFC 3533 (which is classified as
|
||||
'informative') and its Internet content type (application/ogg) in RFC
|
||||
3534 (which is, as of 2006, a proposed standard protocol). In
|
||||
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
|
||||
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
|
||||
.oga, .spx.
|
||||
|
||||
See:
|
||||
https://tools.ietf.org/html/rfc3533
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
|
||||
https://github.com/file/file/blob/master/magic/Magdir/vorbis
|
||||
*/
|
||||
|
||||
// Ogg matches an Ogg file.
|
||||
func Ogg(raw []byte, limit uint32) bool {
|
||||
return bytes.HasPrefix(raw, []byte("\x4F\x67\x67\x53\x00"))
|
||||
}
|
||||
|
||||
// OggAudio matches an audio ogg file.
|
||||
func OggAudio(raw []byte, limit uint32) bool {
|
||||
return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x7fFLAC")) ||
|
||||
bytes.HasPrefix(raw[28:], []byte("\x01vorbis")) ||
|
||||
bytes.HasPrefix(raw[28:], []byte("OpusHead")) ||
|
||||
bytes.HasPrefix(raw[28:], []byte("Speex\x20\x20\x20")))
|
||||
}
|
||||
|
||||
// OggVideo matches a video ogg file.
|
||||
func OggVideo(raw []byte, limit uint32) bool {
|
||||
return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x80theora")) ||
|
||||
bytes.HasPrefix(raw[28:], []byte("fishead\x00")) ||
|
||||
bytes.HasPrefix(raw[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
|
||||
}
|
@ -0,0 +1,226 @@
|
||||
// package magic holds the matching functions used to find MIME types.
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Detector func(raw []byte, limit uint32) bool
|
||||
type xmlSig struct {
|
||||
// the local name of the root tag
|
||||
localName []byte
|
||||
// the namespace of the XML document
|
||||
xmlns []byte
|
||||
}
|
||||
|
||||
// prefix creates a Detector which returns true if any of the provided signatures
|
||||
// is the prefix of the raw input.
|
||||
func prefix(sigs ...[]byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
for _, s := range sigs {
|
||||
if bytes.HasPrefix(raw, s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// offset creates a Detector which returns true if the provided signature can be
|
||||
// found at offset in the raw input.
|
||||
func offset(sig []byte, offset int) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig)
|
||||
}
|
||||
}
|
||||
|
||||
// ciPrefix is like prefix but the check is case insensitive.
|
||||
func ciPrefix(sigs ...[]byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
for _, s := range sigs {
|
||||
if ciCheck(s, raw) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
func ciCheck(sig, raw []byte) bool {
|
||||
if len(raw) < len(sig)+1 {
|
||||
return false
|
||||
}
|
||||
// perform case insensitive check
|
||||
for i, b := range sig {
|
||||
db := raw[i]
|
||||
if 'A' <= b && b <= 'Z' {
|
||||
db &= 0xDF
|
||||
}
|
||||
if b != db {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// xml creates a Detector which returns true if any of the provided XML signatures
|
||||
// matches the raw input.
|
||||
func xml(sigs ...xmlSig) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
raw = trimLWS(raw)
|
||||
if len(raw) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, s := range sigs {
|
||||
if xmlCheck(s, raw) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
func xmlCheck(sig xmlSig, raw []byte) bool {
|
||||
raw = raw[:min(len(raw), 512)]
|
||||
|
||||
if len(sig.localName) == 0 {
|
||||
return bytes.Index(raw, sig.xmlns) > 0
|
||||
}
|
||||
if len(sig.xmlns) == 0 {
|
||||
return bytes.Index(raw, sig.localName) > 0
|
||||
}
|
||||
|
||||
localNameIndex := bytes.Index(raw, sig.localName)
|
||||
return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns)
|
||||
}
|
||||
|
||||
// markup creates a Detector which returns true is any of the HTML signatures
|
||||
// matches the raw input.
|
||||
func markup(sigs ...[]byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
raw = trimLWS(raw)
|
||||
if len(raw) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, s := range sigs {
|
||||
if markupCheck(s, raw) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
func markupCheck(sig, raw []byte) bool {
|
||||
if len(raw) < len(sig)+1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// perform case insensitive check
|
||||
for i, b := range sig {
|
||||
db := raw[i]
|
||||
if 'A' <= b && b <= 'Z' {
|
||||
db &= 0xDF
|
||||
}
|
||||
if b != db {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Next byte must be space or right angle bracket.
|
||||
if db := raw[len(sig)]; db != ' ' && db != '>' {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// ftyp creates a Detector which returns true if any of the FTYP signatures
|
||||
// matches the raw input.
|
||||
func ftyp(sigs ...[]byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
if len(raw) < 12 {
|
||||
return false
|
||||
}
|
||||
for _, s := range sigs {
|
||||
if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func newXmlSig(localName, xmlns string) xmlSig {
|
||||
ret := xmlSig{xmlns: []byte(xmlns)}
|
||||
if localName != "" {
|
||||
ret.localName = []byte(fmt.Sprintf("<%s", localName))
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// A valid shebang starts with the "#!" characters,
|
||||
// followed by any number of spaces,
|
||||
// followed by the path to the interpreter,
|
||||
// and, optionally, followed by the arguments for the interpreter.
|
||||
//
|
||||
// Ex:
|
||||
// #! /usr/bin/env php
|
||||
// /usr/bin/env is the interpreter, php is the first and only argument.
|
||||
func shebang(sigs ...[]byte) Detector {
|
||||
return func(raw []byte, limit uint32) bool {
|
||||
for _, s := range sigs {
|
||||
if shebangCheck(s, firstLine(raw)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func shebangCheck(sig, raw []byte) bool {
|
||||
if len(raw) < len(sig)+2 {
|
||||
return false
|
||||
}
|
||||
if raw[0] != '#' || raw[1] != '!' {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
|
||||
}
|
||||
|
||||
// trimLWS trims whitespace from beginning of the input.
|
||||
func trimLWS(in []byte) []byte {
|
||||
firstNonWS := 0
|
||||
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
|
||||
}
|
||||
|
||||
return in[firstNonWS:]
|
||||
}
|
||||
|
||||
// trimRWS trims whitespace from the end of the input.
|
||||
func trimRWS(in []byte) []byte {
|
||||
lastNonWS := len(in) - 1
|
||||
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
|
||||
}
|
||||
|
||||
return in[:lastNonWS+1]
|
||||
}
|
||||
|
||||
func firstLine(in []byte) []byte {
|
||||
lineEnd := 0
|
||||
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
|
||||
}
|
||||
|
||||
return in[:lineEnd]
|
||||
}
|
||||
|
||||
func isWS(b byte) bool {
|
||||
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
@ -0,0 +1,268 @@
|
||||
package magic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/gabriel-vasile/mimetype/internal/charset"
|
||||
"github.com/gabriel-vasile/mimetype/internal/json"
|
||||
)
|
||||
|
||||
var (
|
||||
// Html matches a Hypertext Markup Language file.
|
||||
Html = markup(
|
||||
[]byte("<!DOCTYPE HTML"),
|
||||
[]byte("<HTML"),
|
||||
[]byte("<HEAD"),
|
||||
[]byte("<SCRIPT"),
|
||||
[]byte("<IFRAME"),
|
||||
[]byte("<H1"),
|
||||
[]byte("<DIV"),
|
||||
[]byte("<FONT"),
|
||||
[]byte("<TABLE"),
|
||||
[]byte("<A"),
|
||||
[]byte("<STYLE"),
|
||||
[]byte("<TITLE"),
|
||||
[]byte("<B"),
|
||||
[]byte("<BODY"),
|
||||
[]byte("<BR"),
|
||||
[]byte("<P"),
|
||||
[]byte("<!--"),
|
||||
)
|
||||
// Xml matches an Extensible Markup Language file.
|
||||
Xml = markup([]byte("<?XML"))
|
||||
// Owl2 matches an Owl ontology file.
|
||||
Owl2 = xml(newXmlSig("Ontology", `xmlns="http://www.w3.org/2002/07/owl#"`))
|
||||
// Rss matches a Rich Site Summary file.
|
||||
Rss = xml(newXmlSig("rss", ""))
|
||||
// Atom matches an Atom Syndication Format file.
|
||||
Atom = xml(newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`))
|
||||
// Kml matches a Keyhole Markup Language file.
|
||||
Kml = xml(
|
||||
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
|
||||
)
|
||||
// Xliff matches a XML Localization Interchange File Format file.
|
||||
Xliff = xml(newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`))
|
||||
// Collada matches a COLLAborative Design Activity file.
|
||||
Collada = xml(newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`))
|
||||
// Gml matches a Geography Markup Language file.
|
||||
Gml = xml(
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
|
||||
)
|
||||
// Gpx matches a GPS Exchange Format file.
|
||||
Gpx = xml(newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`))
|
||||
// Tcx matches a Training Center XML file.
|
||||
Tcx = xml(newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`))
|
||||
// X3d matches an Extensible 3D Graphics file.
|
||||
X3d = xml(newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`))
|
||||
// Amf matches an Additive Manufacturing XML file.
|
||||
Amf = xml(newXmlSig("amf", ""))
|
||||
// Threemf matches a 3D Manufacturing Format file.
|
||||
Threemf = xml(newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`))
|
||||
// Xfdf matches a XML Forms Data Format file.
|
||||
Xfdf = xml(newXmlSig("xfdf", `xmlns="http://ns.adobe.com/xfdf/"`))
|
||||
// VCard matches a Virtual Contact File.
|
||||
VCard = ciPrefix([]byte("BEGIN:VCARD\n"), []byte("BEGIN:VCARD\r\n"))
|
||||
// ICalendar matches a iCalendar file.
|
||||
ICalendar = ciPrefix([]byte("BEGIN:VCALENDAR\n"), []byte("BEGIN:VCALENDAR\r\n"))
|
||||
phpPageF = ciPrefix(
|
||||
[]byte("<?PHP"),
|
||||
[]byte("<?\n"),
|
||||
[]byte("<?\r"),
|
||||
[]byte("<? "),
|
||||
)
|
||||
phpScriptF = shebang(
|
||||
[]byte("/usr/local/bin/php"),
|
||||
[]byte("/usr/bin/php"),
|
||||
[]byte("/usr/bin/env php"),
|
||||
)
|
||||
// Js matches a Javascript file.
|
||||
Js = shebang(
|
||||
[]byte("/bin/node"),
|
||||
[]byte("/usr/bin/node"),
|
||||
[]byte("/bin/nodejs"),
|
||||
[]byte("/usr/bin/nodejs"),
|
||||
[]byte("/usr/bin/env node"),
|
||||
[]byte("/usr/bin/env nodejs"),
|
||||
)
|
||||
// Lua matches a Lua programming language file.
|
||||
Lua = shebang(
|
||||
[]byte("/usr/bin/lua"),
|
||||
[]byte("/usr/local/bin/lua"),
|
||||
[]byte("/usr/bin/env lua"),
|
||||
)
|
||||
// Perl matches a Perl programming language file.
|
||||
Perl = shebang(
|
||||
[]byte("/usr/bin/perl"),
|
||||
[]byte("/usr/bin/env perl"),
|
||||
)
|
||||
// Python matches a Python programming language file.
|
||||
Python = shebang(
|
||||
[]byte("/usr/bin/python"),
|
||||
[]byte("/usr/local/bin/python"),
|
||||
[]byte("/usr/bin/env python"),
|
||||
)
|
||||
// Tcl matches a Tcl programming language file.
|
||||
Tcl = shebang(
|
||||
[]byte("/usr/bin/tcl"),
|
||||
[]byte("/usr/local/bin/tcl"),
|
||||
[]byte("/usr/bin/env tcl"),
|
||||
[]byte("/usr/bin/tclsh"),
|
||||
[]byte("/usr/local/bin/tclsh"),
|
||||
[]byte("/usr/bin/env tclsh"),
|
||||
[]byte("/usr/bin/wish"),
|
||||
[]byte("/usr/local/bin/wish"),
|
||||
[]byte("/usr/bin/env wish"),
|
||||
)
|
||||
// Rtf matches a Rich Text Format file.
|
||||
Rtf = prefix([]byte("{\\rtf1"))
|
||||
)
|
||||
|
||||
// Text matches a plain text file.
|
||||
//
|
||||
// TODO: This function does not parse BOM-less UTF16 and UTF32 files. Not really
|
||||
// sure it should. Linux file utility also requires a BOM for UTF16 and UTF32.
|
||||
func Text(raw []byte, limit uint32) bool {
|
||||
// First look for BOM.
|
||||
if cset := charset.FromBOM(raw); cset != "" {
|
||||
return true
|
||||
}
|
||||
return isText(raw)
|
||||
}
|
||||
|
||||
// Php matches a PHP: Hypertext Preprocessor file.
|
||||
func Php(raw []byte, limit uint32) bool {
|
||||
if res := phpPageF(raw, limit); res {
|
||||
return res
|
||||
}
|
||||
return phpScriptF(raw, limit)
|
||||
}
|
||||
|
||||
// Json matches a JavaScript Object Notation file.
|
||||
func Json(raw []byte, limit uint32) bool {
|
||||
parsed, err := json.Scan(raw)
|
||||
if len(raw) < int(limit) {
|
||||
return err == nil
|
||||
}
|
||||
|
||||
return parsed == len(raw)
|
||||
}
|
||||
|
||||
// GeoJson matches a RFC 7946 GeoJSON file.
|
||||
//
|
||||
// GeoJson detection implies searching for key:value pairs like: `"type": "Feature"`
|
||||
// in the input.
|
||||
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
|
||||
func GeoJson(raw []byte, limit uint32) bool {
|
||||
raw = trimLWS(raw)
|
||||
if len(raw) == 0 {
|
||||
return false
|
||||
}
|
||||
// GeoJSON is always a JSON object, not a JSON array.
|
||||
if raw[0] != '{' {
|
||||
return false
|
||||
}
|
||||
|
||||
s := []byte(`"type"`)
|
||||
si, sl := bytes.Index(raw, s), len(s)
|
||||
|
||||
if si == -1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// If the "type" string is the suffix of the input,
|
||||
// there is no need to search for the value of the key.
|
||||
if si+sl == len(raw) {
|
||||
return false
|
||||
}
|
||||
// Skip the "type" part.
|
||||
raw = raw[si+sl:]
|
||||
// Skip any whitespace before the colon.
|
||||
raw = trimLWS(raw)
|
||||
// Check for colon.
|
||||
if len(raw) == 0 || raw[0] != ':' {
|
||||
return false
|
||||
}
|
||||
// Skip any whitespace after the colon.
|
||||
raw = trimLWS(raw[1:])
|
||||
|
||||
geoJsonTypes := [][]byte{
|
||||
[]byte(`"Feature"`),
|
||||
[]byte(`"FeatureCollection"`),
|
||||
[]byte(`"Point"`),
|
||||
[]byte(`"LineString"`),
|
||||
[]byte(`"Polygon"`),
|
||||
[]byte(`"MultiPoint"`),
|
||||
[]byte(`"MultiLineString"`),
|
||||
[]byte(`"MultiPolygon"`),
|
||||
[]byte(`"GeometryCollection"`),
|
||||
}
|
||||
for _, t := range geoJsonTypes {
|
||||
if bytes.HasPrefix(raw, t) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// NdJson matches a Newline delimited JSON file.
|
||||
func NdJson(raw []byte, limit uint32) bool {
|
||||
// Separator with carriage return and new line `\r\n`.
|
||||
srn := []byte{0x0D, 0x0A}
|
||||
|
||||
// Separator with only new line `\n`.
|
||||
sn := []byte{0x0A}
|
||||
|
||||
// Total bytes scanned.
|
||||
parsed := 0
|
||||
|
||||
// Split by `srn`.
|
||||
for rni, insrn := range bytes.Split(raw, srn) {
|
||||
// Separator byte count should be added only after the first split.
|
||||
if rni != 0 {
|
||||
// Add two as `\r\n` is used for split.
|
||||
parsed += 2
|
||||
}
|
||||
// Split again by `sn`.
|
||||
for ni, insn := range bytes.Split(insrn, sn) {
|
||||
// Separator byte count should be added only after the first split.
|
||||
if ni != 0 {
|
||||
// Add one as `\n` is used for split.
|
||||
parsed++
|
||||
}
|
||||
// Empty line is valid.
|
||||
if len(insn) == 0 {
|
||||
continue
|
||||
}
|
||||
p, err := json.Scan(insn)
|
||||
parsed += p
|
||||
if parsed < int(limit) && err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Empty inputs should not pass as valid NDJSON with 0 lines.
|
||||
return parsed > 2 && parsed == len(raw)
|
||||
}
|
||||
|
||||
// Svg matches a SVG file.
|
||||
func Svg(raw []byte, limit uint32) bool {
|
||||
return bytes.Contains(raw, []byte("<svg"))
|
||||
}
|
||||
|
||||
// isText considers any file containing null bytes as a binary file.
|
||||
// There is plenty room for disagreement regarding what should be considered a
|
||||
// text file. This approach is used by diff, cat, and other linux utilities.
|
||||
func isText(raw []byte) bool {
|
||||
l := 8000
|
||||
if len(raw) > l {
|
||||
raw = raw[:l]
|
||||
}
|
||||
return bytes.IndexByte(raw, 0) == -1
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
package magic
|
||||
|
||||
// Torrent has bencoded text in the beginning.
|
||||
var Torrent = prefix([]byte("d8:announce"))
|
@ -0,0 +1,48 @@
|
||||
package magic
|
||||
|
||||
var (
|
||||
// Odt matches an OpenDocument Text file.
|
||||
Odt = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.text"), 30)
|
||||
// Ott matches an OpenDocument Text Template file.
|
||||
Ott = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.text-template"), 30)
|
||||
// Ods matches an OpenDocument Spreadsheet file.
|
||||
Ods = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"), 30)
|
||||
// Ots matches an OpenDocument Spreadsheet Template file.
|
||||
Ots = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"), 30)
|
||||
// Odp matches an OpenDocument Presentation file.
|
||||
Odp = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.presentation"), 30)
|
||||
// Otp matches an OpenDocument Presentation Template file.
|
||||
Otp = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"), 30)
|
||||
// Odg matches an OpenDocument Drawing file.
|
||||
Odg = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.graphics"), 30)
|
||||
// Otg matches an OpenDocument Drawing Template file.
|
||||
Otg = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"), 30)
|
||||
// Odf matches an OpenDocument Formula file.
|
||||
Odf = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.formula"), 30)
|
||||
// Odc matches an OpenDocument Chart file.
|
||||
Odc = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.chart"), 30)
|
||||
// Epub matches an EPUB file.
|
||||
Epub = offset([]byte("mimetypeapplication/epub+zip"), 30)
|
||||
// Sxc matches an OpenOffice Spreadsheet file.
|
||||
Sxc = offset([]byte("mimetypeapplication/vnd.sun.xml.calc"), 30)
|
||||
)
|
||||
|
||||
// Zip matches a zip archive.
|
||||
func Zip(raw []byte, limit uint32) bool {
|
||||
return len(raw) > 3 &&
|
||||
raw[0] == 0x50 && raw[1] == 0x4B &&
|
||||
(raw[2] == 0x3 || raw[2] == 0x5 || raw[2] == 0x7) &&
|
||||
(raw[3] == 0x4 || raw[3] == 0x6 || raw[3] == 0x8)
|
||||
}
|
||||
|
||||
// Jar matches a Java archive file.
|
||||
func Jar(raw []byte, limit uint32) bool {
|
||||
t := zipTokenizer{in: raw}
|
||||
for i, tok := 0, t.next(); i < 10 && tok != ""; i, tok = i+1, t.next() {
|
||||
if tok == "META-INF/MANIFEST.MF" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// SevenZ matches a 7z archive.
|
||||
func SevenZ(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
|
||||
}
|
||||
|
||||
// Gzip matched gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
|
||||
func Gzip(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x1f, 0x8b})
|
||||
}
|
||||
|
||||
// Crx matches a Chrome extension file: a zip archive prepended by "Cr24".
|
||||
func Crx(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("Cr24"))
|
||||
}
|
||||
|
||||
// Tar matches a (t)ape (ar)chive file.
|
||||
func Tar(in []byte, _ uint32) bool {
|
||||
return len(in) > 262 && bytes.Equal(in[257:262], []byte("ustar"))
|
||||
}
|
||||
|
||||
// Fits matches an Flexible Image Transport System file.
|
||||
func Fits(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{
|
||||
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
|
||||
})
|
||||
}
|
||||
|
||||
// Xar matches an eXtensible ARchive format file.
|
||||
func Xar(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x78, 0x61, 0x72, 0x21})
|
||||
}
|
||||
|
||||
// Bz2 matches a bzip2 file.
|
||||
func Bz2(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x42, 0x5A, 0x68})
|
||||
}
|
||||
|
||||
// Ar matches an ar (Unix) archive file.
|
||||
func Ar(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
|
||||
}
|
||||
|
||||
// Deb matches a Debian package file.
|
||||
func Deb(in []byte, _ uint32) bool {
|
||||
return len(in) > 8 && bytes.HasPrefix(in[8:], []byte{
|
||||
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
|
||||
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
|
||||
})
|
||||
}
|
||||
|
||||
// Rar matches a RAR archive file.
|
||||
func Rar(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("Rar!\x1A\x07\x00")) ||
|
||||
bytes.HasPrefix(in, []byte("Rar!\x1A\x07\x01\x00"))
|
||||
}
|
||||
|
||||
// Warc matches a Web ARChive file.
|
||||
func Warc(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("WARC/"))
|
||||
}
|
||||
|
||||
// Zstd matches a Zstandard archive file.
|
||||
func Zstd(in []byte, _ uint32) bool {
|
||||
return len(in) >= 4 &&
|
||||
(0x22 <= in[0] && in[0] <= 0x28 || in[0] == 0x1E) && // Different Zstandard versions.
|
||||
bytes.HasPrefix(in[1:], []byte{0xB5, 0x2F, 0xFD})
|
||||
}
|
||||
|
||||
// Cab matches a Cabinet archive file.
|
||||
func Cab(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("MSCF"))
|
||||
}
|
||||
|
||||
// Rpm matches an RPM or Delta RPM package file.
|
||||
func Rpm(in []byte, _ uint32) bool {
|
||||
return len(in) > 4 &&
|
||||
(bytes.HasPrefix(in, []byte{0xed, 0xab, 0xee, 0xdb}) ||
|
||||
bytes.HasPrefix(in, []byte("drpm")))
|
||||
}
|
||||
|
||||
// Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
|
||||
func Xz(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
|
||||
}
|
||||
|
||||
// Lzip matches an Lzip compressed file.
|
||||
func Lzip(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x4c, 0x5a, 0x49, 0x50})
|
||||
}
|
||||
|
||||
// Cpio matches a cpio archive file
|
||||
func Cpio(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("070707")) ||
|
||||
bytes.HasPrefix(in, []byte("070701")) ||
|
||||
bytes.HasPrefix(in, []byte("070702"))
|
||||
}
|
@ -1,101 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// Mp3 matches an mp3 file.
|
||||
func Mp3(in []byte, _ uint32) bool {
|
||||
if len(in) < 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
if bytes.HasPrefix(in, []byte("ID3")) {
|
||||
// MP3s with an ID3v2 tag will start with "ID3"
|
||||
// ID3v1 tags, however appear at the end of the file.
|
||||
return true
|
||||
}
|
||||
|
||||
// Match MP3 files without tags
|
||||
switch binary.BigEndian.Uint16(in[:2]) & 0xFFFE {
|
||||
case 0xFFFA:
|
||||
// MPEG ADTS, layer III, v1
|
||||
return true
|
||||
case 0xFFF2:
|
||||
// MPEG ADTS, layer III, v2
|
||||
return true
|
||||
case 0xFFE2:
|
||||
// MPEG ADTS, layer III, v2.5
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Flac matches a Free Lossless Audio Codec file.
|
||||
func Flac(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
|
||||
}
|
||||
|
||||
// Midi matches a Musical Instrument Digital Interface file.
|
||||
func Midi(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x4D\x54\x68\x64"))
|
||||
}
|
||||
|
||||
// Ape matches a Monkey's Audio file.
|
||||
func Ape(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
|
||||
}
|
||||
|
||||
// MusePack matches a Musepack file.
|
||||
func MusePack(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("MPCK"))
|
||||
}
|
||||
|
||||
// Wav matches a Waveform Audio File Format file.
|
||||
func Wav(in []byte, _ uint32) bool {
|
||||
return len(in) > 12 &&
|
||||
bytes.Equal(in[:4], []byte("RIFF")) &&
|
||||
bytes.Equal(in[8:12], []byte("\x57\x41\x56\x45"))
|
||||
}
|
||||
|
||||
// Aiff matches Audio Interchange File Format file.
|
||||
func Aiff(in []byte, _ uint32) bool {
|
||||
return len(in) > 12 &&
|
||||
bytes.Equal(in[:4], []byte("\x46\x4F\x52\x4D")) &&
|
||||
bytes.Equal(in[8:12], []byte("\x41\x49\x46\x46"))
|
||||
}
|
||||
|
||||
// Au matches a Sun Microsystems au file.
|
||||
func Au(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x2E\x73\x6E\x64"))
|
||||
}
|
||||
|
||||
// Amr matches an Adaptive Multi-Rate file.
|
||||
func Amr(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x23\x21\x41\x4D\x52"))
|
||||
}
|
||||
|
||||
// Aac matches an Advanced Audio Coding file.
|
||||
func Aac(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFF, 0xF1}) || bytes.HasPrefix(in, []byte{0xFF, 0xF9})
|
||||
}
|
||||
|
||||
// Voc matches a Creative Voice file.
|
||||
func Voc(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("Creative Voice File"))
|
||||
}
|
||||
|
||||
// Qcp matches a Qualcomm Pure Voice file.
|
||||
func Qcp(in []byte, _ uint32) bool {
|
||||
return len(in) > 12 &&
|
||||
bytes.Equal(in[:4], []byte("RIFF")) &&
|
||||
bytes.Equal(in[8:12], []byte("QLCM"))
|
||||
}
|
||||
|
||||
// M3u matches a Playlist file.
|
||||
func M3u(in []byte, _ uint32) bool {
|
||||
return len(in) > 7 &&
|
||||
bytes.Equal(in[:7], []byte("#EXTM3U"))
|
||||
}
|
@ -1,151 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/macho"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// Java bytecode and Mach-O binaries share the same magic number.
|
||||
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
|
||||
func classOrMachOFat(in []byte) bool {
|
||||
// There should be at least 8 bytes for both of them because the only way to
|
||||
// quickly distinguish them is by comparing byte at position 7
|
||||
if len(in) < 8 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
|
||||
}
|
||||
|
||||
// Class matches a java class file.
|
||||
func Class(in []byte, _ uint32) bool {
|
||||
return classOrMachOFat(in) && in[7] > 30
|
||||
}
|
||||
|
||||
// MachO matches Mach-O binaries format.
|
||||
func MachO(in []byte, _ uint32) bool {
|
||||
if classOrMachOFat(in) && in[7] < 20 {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(in) < 4 {
|
||||
return false
|
||||
}
|
||||
|
||||
be := binary.BigEndian.Uint32(in)
|
||||
le := binary.LittleEndian.Uint32(in)
|
||||
|
||||
return be == macho.Magic32 || le == macho.Magic32 || be == macho.Magic64 || le == macho.Magic64
|
||||
}
|
||||
|
||||
// Swf matches an Adobe Flash swf file.
|
||||
func Swf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("CWS")) ||
|
||||
bytes.HasPrefix(in, []byte("FWS")) ||
|
||||
bytes.HasPrefix(in, []byte("ZWS"))
|
||||
}
|
||||
|
||||
// Wasm matches a web assembly File Format file.
|
||||
func Wasm(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x00, 0x61, 0x73, 0x6D})
|
||||
}
|
||||
|
||||
// Dbf matches a dBase file.
|
||||
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
|
||||
func Dbf(in []byte, _ uint32) bool {
|
||||
if len(in) < 4 {
|
||||
return false
|
||||
}
|
||||
|
||||
// 3rd and 4th bytes contain the last update month and day of month
|
||||
if !(0 < in[2] && in[2] < 13 && 0 < in[3] && in[3] < 32) {
|
||||
return false
|
||||
}
|
||||
|
||||
// dbf type is dictated by the first byte
|
||||
dbfTypes := []byte{
|
||||
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
|
||||
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
|
||||
}
|
||||
for _, b := range dbfTypes {
|
||||
if in[0] == b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Exe matches a Windows/DOS executable file.
|
||||
func Exe(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x4D, 0x5A})
|
||||
}
|
||||
|
||||
// Elf matches an Executable and Linkable Format file.
|
||||
func Elf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x7F, 0x45, 0x4C, 0x46})
|
||||
}
|
||||
|
||||
// ElfObj matches an object file.
|
||||
func ElfObj(in []byte, _ uint32) bool {
|
||||
return len(in) > 17 && ((in[16] == 0x01 && in[17] == 0x00) ||
|
||||
(in[16] == 0x00 && in[17] == 0x01))
|
||||
}
|
||||
|
||||
// ElfExe matches an executable file.
|
||||
func ElfExe(in []byte, _ uint32) bool {
|
||||
return len(in) > 17 && ((in[16] == 0x02 && in[17] == 0x00) ||
|
||||
(in[16] == 0x00 && in[17] == 0x02))
|
||||
}
|
||||
|
||||
// ElfLib matches a shared library file.
|
||||
func ElfLib(in []byte, _ uint32) bool {
|
||||
return len(in) > 17 && ((in[16] == 0x03 && in[17] == 0x00) ||
|
||||
(in[16] == 0x00 && in[17] == 0x03))
|
||||
}
|
||||
|
||||
// ElfDump matches a core dump file.
|
||||
func ElfDump(in []byte, _ uint32) bool {
|
||||
return len(in) > 17 && ((in[16] == 0x04 && in[17] == 0x00) ||
|
||||
(in[16] == 0x00 && in[17] == 0x04))
|
||||
}
|
||||
|
||||
// Dcm matches a DICOM medical format file.
|
||||
func Dcm(in []byte, _ uint32) bool {
|
||||
return len(in) > 131 &&
|
||||
bytes.Equal(in[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
|
||||
}
|
||||
|
||||
// Nes matches a Nintendo Entertainment system ROM file.
|
||||
func Nes(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x4E, 0x45, 0x53, 0x1A})
|
||||
}
|
||||
|
||||
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
|
||||
func Marc(in []byte, _ uint32) bool {
|
||||
// File is at least 24 bytes ("leader" field size)
|
||||
if len(in) < 24 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Fixed bytes at offset 20
|
||||
if !bytes.Equal(in[20:24], []byte("4500")) {
|
||||
return false
|
||||
}
|
||||
|
||||
// First 5 bytes are ASCII digits
|
||||
for i := 0; i < 5; i++ {
|
||||
if in[i] < '0' || in[i] > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Field terminator is present
|
||||
return bytes.Contains(in, []byte{0x1E})
|
||||
}
|
||||
|
||||
// TzIf matches a Time Zone Information Format (TZif) file.
|
||||
func TzIf(in []byte, _ uint32) bool {
|
||||
return len(in) > 4 && bytes.HasPrefix(in, []byte("TZif"))
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Sqlite matches an SQLite database file.
|
||||
func Sqlite(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{
|
||||
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
|
||||
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
|
||||
})
|
||||
}
|
||||
|
||||
// MsAccessAce matches Microsoft Access dababase file.
|
||||
func MsAccessAce(in []byte, _ uint32) bool {
|
||||
return msAccess(in, []byte("Standard ACE DB"))
|
||||
}
|
||||
|
||||
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
|
||||
func MsAccessMdb(in []byte, _ uint32) bool {
|
||||
return msAccess(in, []byte("Standard Jet DB"))
|
||||
}
|
||||
|
||||
func msAccess(in []byte, magic []byte) bool {
|
||||
return len(in) > 19 && bytes.Equal(in[4:19], magic)
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Pdf matches a Portable Document Format file.
|
||||
func Pdf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x25, 0x50, 0x44, 0x46})
|
||||
}
|
||||
|
||||
// Fdf matches a Forms Data Format file.
|
||||
func Fdf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("%FDF"))
|
||||
}
|
||||
|
||||
// DjVu matches a DjVu file.
|
||||
func DjVu(in []byte, _ uint32) bool {
|
||||
if len(in) < 12 {
|
||||
return false
|
||||
}
|
||||
if !bytes.HasPrefix(in, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
|
||||
return false
|
||||
}
|
||||
return bytes.HasPrefix(in[12:], []byte("DJVM")) ||
|
||||
bytes.HasPrefix(in[12:], []byte("DJVU")) ||
|
||||
bytes.HasPrefix(in[12:], []byte("DJVI")) ||
|
||||
bytes.HasPrefix(in[12:], []byte("THUM"))
|
||||
}
|
||||
|
||||
// Mobi matches a Mobi file.
|
||||
func Mobi(in []byte, _ uint32) bool {
|
||||
return len(in) > 67 && bytes.Equal(in[60:68], []byte("BOOKMOBI"))
|
||||
}
|
||||
|
||||
// Lit matches a Microsoft Lit file.
|
||||
func Lit(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("ITOLITLS"))
|
||||
}
|
||||
|
||||
// P7s matches an .p7s signature File (PEM, Base64).
|
||||
func P7s(in []byte, _ uint32) bool {
|
||||
// Check for PEM Encoding.
|
||||
if bytes.HasPrefix(in, []byte("-----BEGIN PKCS7")) {
|
||||
return true
|
||||
}
|
||||
// Check if DER Encoding is long enough.
|
||||
if len(in) < 20 {
|
||||
return false
|
||||
}
|
||||
// Magic Bytes for the signedData ASN.1 encoding.
|
||||
startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}}
|
||||
signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07}
|
||||
// Check if Header is correct. There are multiple valid headers.
|
||||
for i, match := range startHeader {
|
||||
// If first bytes match, then check for ASN.1 Object Type.
|
||||
if bytes.HasPrefix(in, match) {
|
||||
if bytes.HasPrefix(in[i+2:], signedDataMatch) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
// Ttf matches a TrueType font file.
|
||||
func Ttf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x00, 0x01, 0x00, 0x00}) &&
|
||||
!MsAccessAce(in, 0) && !MsAccessMdb(in, 0)
|
||||
}
|
||||
|
||||
// Woff matches a Web Open Font Format file.
|
||||
func Woff(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("wOFF"))
|
||||
}
|
||||
|
||||
// Woff2 matches a Web Open Font Format version 2 file.
|
||||
func Woff2(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("wOF2"))
|
||||
}
|
||||
|
||||
// Otf matches an OpenType font file.
|
||||
func Otf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
|
||||
}
|
||||
|
||||
// Eot matches an Embedded OpenType font file.
|
||||
func Eot(in []byte, _ uint32) bool {
|
||||
return len(in) > 35 &&
|
||||
bytes.Equal(in[34:36], []byte{0x4C, 0x50}) &&
|
||||
(bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x01}) ||
|
||||
bytes.Equal(in[8:11], []byte{0x01, 0x00, 0x00}) ||
|
||||
bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x02}))
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
package matchers
|
||||
|
||||
var (
|
||||
mp4Sigs = []sig{
|
||||
ftypSig("avc1"), ftypSig("dash"), ftypSig("iso2"), ftypSig("iso3"),
|
||||
ftypSig("iso4"), ftypSig("iso5"), ftypSig("iso6"), ftypSig("isom"),
|
||||
ftypSig("mmp4"), ftypSig("mp41"), ftypSig("mp42"), ftypSig("mp4v"),
|
||||
ftypSig("mp71"), ftypSig("MSNV"), ftypSig("NDAS"), ftypSig("NDSC"),
|
||||
ftypSig("NSDC"), ftypSig("NSDH"), ftypSig("NDSM"), ftypSig("NDSP"),
|
||||
ftypSig("NDSS"), ftypSig("NDXC"), ftypSig("NDXH"), ftypSig("NDXM"),
|
||||
ftypSig("NDXP"), ftypSig("NDXS"), ftypSig("F4V "), ftypSig("F4P "),
|
||||
}
|
||||
threeGPSigs = []sig{
|
||||
ftypSig("3gp1"), ftypSig("3gp2"), ftypSig("3gp3"), ftypSig("3gp4"),
|
||||
ftypSig("3gp5"), ftypSig("3gp6"), ftypSig("3gp7"), ftypSig("3gs7"),
|
||||
ftypSig("3ge6"), ftypSig("3ge7"), ftypSig("3gg6"),
|
||||
}
|
||||
threeG2Sigs = []sig{
|
||||
ftypSig("3g24"), ftypSig("3g25"), ftypSig("3g26"), ftypSig("3g2a"),
|
||||
ftypSig("3g2b"), ftypSig("3g2c"), ftypSig("KDDI"),
|
||||
}
|
||||
amp4Sigs = []sig{
|
||||
// audio for Adobe Flash Player 9+
|
||||
ftypSig("F4A "), ftypSig("F4B "),
|
||||
// Apple iTunes AAC-LC (.M4A) Audio
|
||||
ftypSig("M4B "), ftypSig("M4P "),
|
||||
// MPEG-4 (.MP4) for SonyPSP
|
||||
ftypSig("MSNV"),
|
||||
// Nero Digital AAC Audio
|
||||
ftypSig("NDAS"),
|
||||
}
|
||||
qtSigs = []sig{ftypSig("qt "), ftypSig("moov")}
|
||||
mqvSigs = []sig{ftypSig("mqt ")}
|
||||
m4aSigs = []sig{ftypSig("M4A ")}
|
||||
m4vSigs = []sig{ftypSig("M4V "), ftypSig("M4VH"), ftypSig("M4VP")}
|
||||
heicSigs = []sig{ftypSig("heic"), ftypSig("heix")}
|
||||
heicSeqSigs = []sig{ftypSig("hevc"), ftypSig("hevx")}
|
||||
heifSigs = []sig{
|
||||
ftypSig("mif1"), ftypSig("heim"), ftypSig("heis"), ftypSig("avic"),
|
||||
}
|
||||
heifSeqSigs = []sig{
|
||||
ftypSig("msf1"), ftypSig("hevm"), ftypSig("hevs"), ftypSig("avcs"),
|
||||
}
|
||||
// TODO: add support for remaining video formats at ftyps.com.
|
||||
)
|
||||
|
||||
// Mp4 matches an MP4 file.
|
||||
func Mp4(in []byte, _ uint32) bool {
|
||||
return detect(in, mp4Sigs)
|
||||
}
|
||||
|
||||
// ThreeGP matches a 3GPP file.
|
||||
func ThreeGP(in []byte, _ uint32) bool {
|
||||
return detect(in, threeGPSigs)
|
||||
}
|
||||
|
||||
// ThreeG2 matches a 3GPP2 file.
|
||||
func ThreeG2(in []byte, _ uint32) bool {
|
||||
return detect(in, threeG2Sigs)
|
||||
}
|
||||
|
||||
// AMp4 matches an audio MP4 file.
|
||||
func AMp4(in []byte, _ uint32) bool {
|
||||
return detect(in, amp4Sigs)
|
||||
}
|
||||
|
||||
// QuickTime matches a QuickTime File Format file.
|
||||
func QuickTime(in []byte, _ uint32) bool {
|
||||
return detect(in, qtSigs)
|
||||
}
|
||||
|
||||
// Mqv matches a Sony / Mobile QuickTime file.
|
||||
func Mqv(in []byte, _ uint32) bool {
|
||||
return detect(in, mqvSigs)
|
||||
}
|
||||
|
||||
// M4a matches an audio M4A file.
|
||||
func M4a(in []byte, _ uint32) bool {
|
||||
return detect(in, m4aSigs)
|
||||
}
|
||||
|
||||
// Heic matches a High Efficiency Image Coding (HEIC) file.
|
||||
func Heic(in []byte, _ uint32) bool {
|
||||
return detect(in, heicSigs)
|
||||
}
|
||||
|
||||
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
|
||||
func HeicSequence(in []byte, _ uint32) bool {
|
||||
return detect(in, heicSeqSigs)
|
||||
}
|
||||
|
||||
// Heif matches a High Efficiency Image File Format (HEIF) file.
|
||||
func Heif(in []byte, _ uint32) bool {
|
||||
return detect(in, heifSigs)
|
||||
}
|
||||
|
||||
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
|
||||
func HeifSequence(in []byte, _ uint32) bool {
|
||||
return detect(in, heifSeqSigs)
|
||||
}
|
||||
|
||||
// M4v matches an Appl4 M4V video file.
|
||||
func M4v(in []byte, _ uint32) bool {
|
||||
return detect(in, m4vSigs)
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Png matches a Portable Network Graphics file.
|
||||
func Png(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
|
||||
}
|
||||
|
||||
// Jpg matches a Joint Photographic Experts Group file.
|
||||
func Jpg(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFF, 0xD8, 0xFF})
|
||||
}
|
||||
|
||||
// isJpeg2k matches a generic JPEG2000 file.
|
||||
func isJpeg2k(in []byte) bool {
|
||||
if len(in) < 24 {
|
||||
return false
|
||||
}
|
||||
|
||||
signature := in[4:8]
|
||||
return bytes.Equal(signature, []byte{0x6A, 0x50, 0x20, 0x20}) ||
|
||||
bytes.Equal(signature, []byte{0x6A, 0x50, 0x32, 0x20})
|
||||
}
|
||||
|
||||
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
|
||||
func Jp2(in []byte, _ uint32) bool {
|
||||
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x32, 0x20})
|
||||
}
|
||||
|
||||
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
|
||||
func Jpx(in []byte, _ uint32) bool {
|
||||
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x78, 0x20})
|
||||
}
|
||||
|
||||
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
|
||||
func Jpm(in []byte, _ uint32) bool {
|
||||
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x6D, 0x20})
|
||||
}
|
||||
|
||||
// Gif matches a Graphics Interchange Format file.
|
||||
func Gif(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("GIF87a")) ||
|
||||
bytes.HasPrefix(in, []byte("GIF89a"))
|
||||
}
|
||||
|
||||
// Webp matches a WebP file.
|
||||
func Webp(in []byte, _ uint32) bool {
|
||||
return len(in) > 12 &&
|
||||
bytes.Equal(in[0:4], []byte("RIFF")) &&
|
||||
bytes.Equal(in[8:12], []byte{0x57, 0x45, 0x42, 0x50})
|
||||
}
|
||||
|
||||
// Bmp matches a bitmap image file.
|
||||
func Bmp(in []byte, _ uint32) bool {
|
||||
return len(in) > 1 && in[0] == 0x42 && in[1] == 0x4D
|
||||
}
|
||||
|
||||
// Ps matches a PostScript file.
|
||||
func Ps(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("%!PS-Adobe-"))
|
||||
}
|
||||
|
||||
// Psd matches a Photoshop Document file.
|
||||
func Psd(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("8BPS"))
|
||||
}
|
||||
|
||||
// Ico matches an ICO file.
|
||||
func Ico(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01, 0x00}) ||
|
||||
bytes.HasPrefix(in, []byte{0x00, 0x00, 0x02, 0x00})
|
||||
}
|
||||
|
||||
// Icns matches an ICNS (Apple Icon Image format) file.
|
||||
func Icns(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("icns"))
|
||||
}
|
||||
|
||||
// Tiff matches a Tagged Image File Format file.
|
||||
func Tiff(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x49, 0x49, 0x2A, 0x00}) ||
|
||||
bytes.HasPrefix(in, []byte{0x4D, 0x4D, 0x00, 0x2A})
|
||||
}
|
||||
|
||||
// Bpg matches a Better Portable Graphics file.
|
||||
func Bpg(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x42, 0x50, 0x47, 0xFB})
|
||||
}
|
||||
|
||||
// Dwg matches a CAD drawing file.
|
||||
func Dwg(in []byte, _ uint32) bool {
|
||||
if len(in) < 6 || in[0] != 0x41 || in[1] != 0x43 {
|
||||
return false
|
||||
}
|
||||
dwgVersions := [][]byte{
|
||||
{0x31, 0x2E, 0x34, 0x30},
|
||||
{0x31, 0x2E, 0x35, 0x30},
|
||||
{0x32, 0x2E, 0x31, 0x30},
|
||||
{0x31, 0x30, 0x30, 0x32},
|
||||
{0x31, 0x30, 0x30, 0x33},
|
||||
{0x31, 0x30, 0x30, 0x34},
|
||||
{0x31, 0x30, 0x30, 0x36},
|
||||
{0x31, 0x30, 0x30, 0x39},
|
||||
{0x31, 0x30, 0x31, 0x32},
|
||||
{0x31, 0x30, 0x31, 0x34},
|
||||
{0x31, 0x30, 0x31, 0x35},
|
||||
{0x31, 0x30, 0x31, 0x38},
|
||||
{0x31, 0x30, 0x32, 0x31},
|
||||
{0x31, 0x30, 0x32, 0x34},
|
||||
{0x31, 0x30, 0x33, 0x32},
|
||||
}
|
||||
|
||||
for _, d := range dwgVersions {
|
||||
if bytes.Equal(in[2:6], d) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Xcf matches GIMP image data.
|
||||
func Xcf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("gimp xcf"))
|
||||
}
|
||||
|
||||
// Pat matches GIMP pattern data.
|
||||
func Pat(in []byte, _ uint32) bool {
|
||||
return len(in) >= 24 && bytes.Equal(in[20:24], []byte("GPAT"))
|
||||
}
|
||||
|
||||
// Gbr matches GIMP brush data.
|
||||
func Gbr(in []byte, _ uint32) bool {
|
||||
return len(in) >= 24 && bytes.Equal(in[20:24], []byte("GIMP"))
|
||||
}
|
||||
|
||||
// Hdr matches Radiance HDR image.
|
||||
// https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
|
||||
func Hdr(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("#?RADIANCE\n"))
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
// Package matchers holds the matching functions used to find MIME types.
|
||||
package matchers
|
||||
|
||||
// trimLWS trims whitespace from beginning of the input.
|
||||
func trimLWS(in []byte) []byte {
|
||||
firstNonWS := 0
|
||||
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
|
||||
}
|
||||
|
||||
return in[firstNonWS:]
|
||||
}
|
||||
|
||||
// trimRWS trims whitespace from the end of the input.
|
||||
func trimRWS(in []byte) []byte {
|
||||
lastNonWS := len(in) - 1
|
||||
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
|
||||
}
|
||||
|
||||
return in[:lastNonWS+1]
|
||||
}
|
||||
|
||||
func firstLine(in []byte) []byte {
|
||||
lineEnd := 0
|
||||
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
|
||||
}
|
||||
|
||||
return in[:lineEnd]
|
||||
}
|
||||
|
||||
func isWS(b byte) bool {
|
||||
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Glb matches a glTF model format file.
|
||||
func Glb(in []byte, _ uint32) bool {
|
||||
// GLB is the binary file format representation of 3D models save in
|
||||
// the GL transmission Format (glTF).
|
||||
// see more: https://docs.fileformat.com/3d/glb/
|
||||
// https://www.iana.org/assignments/media-types/model/gltf-binary
|
||||
if len(in) < 8 {
|
||||
return false
|
||||
}
|
||||
|
||||
// GLB file format is based on little endian and its header structure
|
||||
// show below:
|
||||
//
|
||||
// <-- 12-byte header -->
|
||||
// | magic | version | length |
|
||||
// | (uint32) | (uint32) | (uint32) |
|
||||
// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... |
|
||||
// | g l T F | 1 | ... |
|
||||
return bytes.HasPrefix(in, []byte("\x67\x6C\x54\x46\x02\x00\x00\x00")) ||
|
||||
bytes.HasPrefix(in, []byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
|
||||
In May 2003, two Internet RFCs were published relating to the format.
|
||||
The Ogg bitstream was defined in RFC 3533 (which is classified as
|
||||
'informative') and its Internet content type (application/ogg) in RFC
|
||||
3534 (which is, as of 2006, a proposed standard protocol). In
|
||||
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
|
||||
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
|
||||
.oga, .spx.
|
||||
|
||||
See:
|
||||
https://tools.ietf.org/html/rfc3533
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
|
||||
https://github.com/file/file/blob/master/magic/Magdir/vorbis
|
||||
*/
|
||||
|
||||
// Ogg matches an Ogg file.
|
||||
func Ogg(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("\x4F\x67\x67\x53\x00"))
|
||||
}
|
||||
|
||||
// OggAudio matches an audio ogg file.
|
||||
func OggAudio(in []byte, _ uint32) bool {
|
||||
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x7fFLAC")) ||
|
||||
bytes.HasPrefix(in[28:], []byte("\x01vorbis")) ||
|
||||
bytes.HasPrefix(in[28:], []byte("OpusHead")) ||
|
||||
bytes.HasPrefix(in[28:], []byte("Speex\x20\x20\x20")))
|
||||
}
|
||||
|
||||
// OggVideo matches a video ogg file.
|
||||
func OggVideo(in []byte, _ uint32) bool {
|
||||
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x80theora")) ||
|
||||
bytes.HasPrefix(in[28:], []byte("fishead\x00")) ||
|
||||
bytes.HasPrefix(in[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type (
|
||||
markupSig []byte
|
||||
ciSig []byte // case insensitive signature
|
||||
shebangSig []byte // matches !# followed by the signature
|
||||
ftypSig []byte // matches audio/video files. www.ftyps.com
|
||||
xmlSig struct {
|
||||
// the local name of the root tag
|
||||
localName []byte
|
||||
// the namespace of the XML document
|
||||
xmlns []byte
|
||||
}
|
||||
sig interface {
|
||||
detect([]byte) bool
|
||||
}
|
||||
)
|
||||
|
||||
func newXmlSig(localName, xmlns string) xmlSig {
|
||||
ret := xmlSig{xmlns: []byte(xmlns)}
|
||||
if localName != "" {
|
||||
ret.localName = []byte(fmt.Sprintf("<%s", localName))
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// Implement sig interface.
|
||||
func (hSig markupSig) detect(in []byte) bool {
|
||||
if len(in) < len(hSig)+1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// perform case insensitive check
|
||||
for i, b := range hSig {
|
||||
db := in[i]
|
||||
if 'A' <= b && b <= 'Z' {
|
||||
db &= 0xDF
|
||||
}
|
||||
if b != db {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Next byte must be space or right angle bracket.
|
||||
if db := in[len(hSig)]; db != ' ' && db != '>' {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Implement sig interface.
|
||||
func (tSig ciSig) detect(in []byte) bool {
|
||||
if len(in) < len(tSig)+1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// perform case insensitive check
|
||||
for i, b := range tSig {
|
||||
db := in[i]
|
||||
if 'A' <= b && b <= 'Z' {
|
||||
db &= 0xDF
|
||||
}
|
||||
if b != db {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// A valid shebang starts with the "#!" characters,
|
||||
// followed by any number of spaces,
|
||||
// followed by the path to the interpreter,
|
||||
// and, optionally, followed by the arguments for the interpreter.
|
||||
//
|
||||
// Ex:
|
||||
// #! /usr/bin/env php
|
||||
// /usr/bin/env is the interpreter, php is the first and only argument.
|
||||
func (sSig shebangSig) detect(in []byte) bool {
|
||||
in = firstLine(in)
|
||||
|
||||
if len(in) < len(sSig)+2 {
|
||||
return false
|
||||
}
|
||||
if in[0] != '#' || in[1] != '!' {
|
||||
return false
|
||||
}
|
||||
|
||||
in = trimLWS(trimRWS(in[2:]))
|
||||
|
||||
return bytes.Equal(in, sSig)
|
||||
}
|
||||
|
||||
// Implement sig interface.
|
||||
func (fSig ftypSig) detect(in []byte) bool {
|
||||
return len(in) > 12 &&
|
||||
bytes.Equal(in[4:8], []byte("ftyp")) &&
|
||||
bytes.Equal(in[8:12], fSig)
|
||||
}
|
||||
|
||||
// Implement sig interface.
|
||||
func (xSig xmlSig) detect(in []byte) bool {
|
||||
in = in[:min(len(in), 512)]
|
||||
|
||||
if len(xSig.localName) == 0 {
|
||||
return bytes.Index(in, xSig.xmlns) > 0
|
||||
}
|
||||
if len(xSig.xmlns) == 0 {
|
||||
return bytes.Index(in, xSig.localName) > 0
|
||||
}
|
||||
|
||||
localNameIndex := bytes.Index(in, xSig.localName)
|
||||
return localNameIndex != -1 && localNameIndex < bytes.Index(in, xSig.xmlns)
|
||||
}
|
||||
|
||||
// detect returns true if any of the provided signatures pass for in input.
|
||||
func detect(in []byte, sigs []sig) bool {
|
||||
for _, sig := range sigs {
|
||||
if sig.detect(in) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -1,407 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/gabriel-vasile/mimetype/internal/json"
|
||||
)
|
||||
|
||||
var (
|
||||
htmlSigs = []sig{
|
||||
markupSig("<!DOCTYPE HTML"),
|
||||
markupSig("<HTML"),
|
||||
markupSig("<HEAD"),
|
||||
markupSig("<SCRIPT"),
|
||||
markupSig("<IFRAME"),
|
||||
markupSig("<H1"),
|
||||
markupSig("<DIV"),
|
||||
markupSig("<FONT"),
|
||||
markupSig("<TABLE"),
|
||||
markupSig("<A"),
|
||||
markupSig("<STYLE"),
|
||||
markupSig("<TITLE"),
|
||||
markupSig("<B"),
|
||||
markupSig("<BODY"),
|
||||
markupSig("<BR"),
|
||||
markupSig("<P"),
|
||||
markupSig("<!--"),
|
||||
}
|
||||
xmlSigs = []sig{
|
||||
markupSig("<?XML"),
|
||||
}
|
||||
owlSigs = []sig{
|
||||
newXmlSig("Ontology", `xmlns="http://www.w3.org/2002/07/owl#"`),
|
||||
}
|
||||
rssSigs = []sig{
|
||||
newXmlSig("rss", ""),
|
||||
}
|
||||
atomSigs = []sig{
|
||||
newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`),
|
||||
}
|
||||
kmlSigs = []sig{
|
||||
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
|
||||
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
|
||||
}
|
||||
xliffSigs = []sig{
|
||||
newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`),
|
||||
}
|
||||
colladaSigs = []sig{
|
||||
newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`),
|
||||
}
|
||||
gmlSigs = []sig{
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
|
||||
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
|
||||
}
|
||||
gpxSigs = []sig{
|
||||
newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`),
|
||||
}
|
||||
tcxSigs = []sig{
|
||||
newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`),
|
||||
}
|
||||
x3dSigs = []sig{
|
||||
newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`),
|
||||
}
|
||||
amfSigs = []sig{
|
||||
newXmlSig("amf", ""),
|
||||
}
|
||||
threemfSigs = []sig{
|
||||
newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`),
|
||||
}
|
||||
xfdfSigs = []sig{
|
||||
newXmlSig("xfdf", `xmlns="http://ns.adobe.com/xfdf/"`),
|
||||
}
|
||||
vCardSigs = []sig{
|
||||
ciSig("BEGIN:VCARD\n"),
|
||||
ciSig("BEGIN:VCARD\r\n"),
|
||||
}
|
||||
iCalSigs = []sig{
|
||||
ciSig("BEGIN:VCALENDAR\n"),
|
||||
ciSig("BEGIN:VCALENDAR\r\n"),
|
||||
}
|
||||
phpSigs = []sig{
|
||||
ciSig("<?PHP"),
|
||||
ciSig("<?\n"),
|
||||
ciSig("<?\r"),
|
||||
ciSig("<? "),
|
||||
shebangSig("/usr/local/bin/php"),
|
||||
shebangSig("/usr/bin/php"),
|
||||
shebangSig("/usr/bin/env php"),
|
||||
}
|
||||
jsSigs = []sig{
|
||||
shebangSig("/bin/node"),
|
||||
shebangSig("/usr/bin/node"),
|
||||
shebangSig("/bin/nodejs"),
|
||||
shebangSig("/usr/bin/nodejs"),
|
||||
shebangSig("/usr/bin/env node"),
|
||||
shebangSig("/usr/bin/env nodejs"),
|
||||
}
|
||||
luaSigs = []sig{
|
||||
shebangSig("/usr/bin/lua"),
|
||||
shebangSig("/usr/local/bin/lua"),
|
||||
shebangSig("/usr/bin/env lua"),
|
||||
}
|
||||
perlSigs = []sig{
|
||||
shebangSig("/usr/bin/perl"),
|
||||
shebangSig("/usr/bin/env perl"),
|
||||
}
|
||||
pythonSigs = []sig{
|
||||
shebangSig("/usr/bin/python"),
|
||||
shebangSig("/usr/local/bin/python"),
|
||||
shebangSig("/usr/bin/env python"),
|
||||
}
|
||||
tclSigs = []sig{
|
||||
shebangSig("/usr/bin/tcl"),
|
||||
shebangSig("/usr/local/bin/tcl"),
|
||||
shebangSig("/usr/bin/env tcl"),
|
||||
shebangSig("/usr/bin/tclsh"),
|
||||
shebangSig("/usr/local/bin/tclsh"),
|
||||
shebangSig("/usr/bin/env tclsh"),
|
||||
shebangSig("/usr/bin/wish"),
|
||||
shebangSig("/usr/local/bin/wish"),
|
||||
shebangSig("/usr/bin/env wish"),
|
||||
}
|
||||
)
|
||||
|
||||
// Utf32be matches a text file encoded with UTF-32 and with the characters
|
||||
// represented in big endian.
|
||||
func Utf32be(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0xFE, 0xFF})
|
||||
}
|
||||
|
||||
// Utf32le matches a text file encoded with UTF-32 and with the characters
|
||||
// represented in little endian.
|
||||
func Utf32le(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFF, 0xFE, 0x00, 0x00})
|
||||
}
|
||||
|
||||
// Utf16be matches a text file encoded with UTF-16 and with the characters
|
||||
// represented in big endian.
|
||||
func Utf16be(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFE, 0xFF})
|
||||
}
|
||||
|
||||
// Utf16le matches a text file encoded with UTF-16 and with the characters
|
||||
// represented in little endian.
|
||||
func Utf16le(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte{0xFF, 0xFE})
|
||||
}
|
||||
|
||||
// Utf8 matches an UTF-8 text file.
|
||||
func Utf8(in []byte, _ uint32) bool {
|
||||
in = trimLWS(in)
|
||||
for _, b := range in {
|
||||
if b <= 0x08 ||
|
||||
b == 0x0B ||
|
||||
0x0E <= b && b <= 0x1A ||
|
||||
0x1C <= b && b <= 0x1F {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Owl2 matches an Owl ontology file.
|
||||
func Owl2(in []byte, _ uint32) bool {
|
||||
return detect(in, owlSigs)
|
||||
}
|
||||
|
||||
// Html matches a Hypertext Markup Language file.
|
||||
func Html(in []byte, _ uint32) bool {
|
||||
in = trimLWS(in)
|
||||
if len(in) == 0 {
|
||||
return false
|
||||
}
|
||||
return detect(in, htmlSigs)
|
||||
}
|
||||
|
||||
// Xml matches an Extensible Markup Language file.
|
||||
func Xml(in []byte, _ uint32) bool {
|
||||
in = trimLWS(in)
|
||||
if len(in) == 0 {
|
||||
return false
|
||||
}
|
||||
return detect(in, xmlSigs)
|
||||
}
|
||||
|
||||
// Php matches a PHP: Hypertext Preprocessor file.
|
||||
func Php(in []byte, _ uint32) bool {
|
||||
return detect(in, phpSigs)
|
||||
}
|
||||
|
||||
// Json matches a JavaScript Object Notation file.
|
||||
func Json(in []byte, readLimit uint32) bool {
|
||||
parsed, err := json.Scan(in)
|
||||
if len(in) < int(readLimit) {
|
||||
return err == nil
|
||||
}
|
||||
|
||||
return parsed == len(in)
|
||||
}
|
||||
|
||||
// GeoJson matches a RFC 7946 GeoJSON file.
|
||||
//
|
||||
// GeoJson detection implies searching for key:value pairs like: `"type": "Feature"`
|
||||
// in the input.
|
||||
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
|
||||
func GeoJson(in []byte, _ uint32) bool {
|
||||
in = trimLWS(in)
|
||||
if len(in) == 0 {
|
||||
return false
|
||||
}
|
||||
// GeoJSON is always a JSON object, not a JSON array.
|
||||
if in[0] != '{' {
|
||||
return false
|
||||
}
|
||||
|
||||
s := []byte(`"type"`)
|
||||
si, sl := bytes.Index(in, s), len(s)
|
||||
|
||||
if si == -1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// If the "type" string is the suffix of the input,
|
||||
// there is no need to search for the value of the key.
|
||||
if si+sl == len(in) {
|
||||
return false
|
||||
}
|
||||
// Skip the "type" part.
|
||||
in = in[si+sl:]
|
||||
// Skip any whitespace before the colon.
|
||||
in = trimLWS(in)
|
||||
// Check for colon.
|
||||
if len(in) == 0 || in[0] != ':' {
|
||||
return false
|
||||
}
|
||||
// Skip any whitespace after the colon.
|
||||
in = trimLWS(in[1:])
|
||||
|
||||
geoJsonTypes := [][]byte{
|
||||
[]byte(`"Feature"`),
|
||||
[]byte(`"FeatureCollection"`),
|
||||
[]byte(`"Point"`),
|
||||
[]byte(`"LineString"`),
|
||||
[]byte(`"Polygon"`),
|
||||
[]byte(`"MultiPoint"`),
|
||||
[]byte(`"MultiLineString"`),
|
||||
[]byte(`"MultiPolygon"`),
|
||||
[]byte(`"GeometryCollection"`),
|
||||
}
|
||||
for _, t := range geoJsonTypes {
|
||||
if bytes.HasPrefix(in, t) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// NdJson matches a Newline delimited JSON file.
|
||||
func NdJson(in []byte, readLimit uint32) bool {
|
||||
// Separator with carriage return and new line `\r\n`.
|
||||
srn := []byte{0x0D, 0x0A}
|
||||
|
||||
// Separator with only new line `\n`.
|
||||
sn := []byte{0x0A}
|
||||
|
||||
// Total bytes scanned.
|
||||
parsed := 0
|
||||
|
||||
// Split by `srn`.
|
||||
for rni, insrn := range bytes.Split(in, srn) {
|
||||
// Separator byte count should be added only after the first split.
|
||||
if rni != 0 {
|
||||
// Add two as `\r\n` is used for split.
|
||||
parsed += 2
|
||||
}
|
||||
// Split again by `sn`.
|
||||
for ni, insn := range bytes.Split(insrn, sn) {
|
||||
// Separator byte count should be added only after the first split.
|
||||
if ni != 0 {
|
||||
// Add one as `\n` is used for split.
|
||||
parsed++
|
||||
}
|
||||
// Empty line is valid.
|
||||
if len(insn) == 0 {
|
||||
continue
|
||||
}
|
||||
p, err := json.Scan(insn)
|
||||
parsed += p
|
||||
if parsed < int(readLimit) && err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Empty inputs should not pass as valid NDJSON with 0 lines.
|
||||
return parsed > 2 && parsed == len(in)
|
||||
}
|
||||
|
||||
// Js matches a Javascript file.
|
||||
func Js(in []byte, _ uint32) bool {
|
||||
return detect(in, jsSigs)
|
||||
}
|
||||
|
||||
// Lua matches a Lua programming language file.
|
||||
func Lua(in []byte, _ uint32) bool {
|
||||
return detect(in, luaSigs)
|
||||
}
|
||||
|
||||
// Perl matches a Perl programming language file.
|
||||
func Perl(in []byte, _ uint32) bool {
|
||||
return detect(in, perlSigs)
|
||||
}
|
||||
|
||||
// Python matches a Python programming language file.
|
||||
func Python(in []byte, _ uint32) bool {
|
||||
return detect(in, pythonSigs)
|
||||
}
|
||||
|
||||
// Tcl matches a Tcl programming language file.
|
||||
func Tcl(in []byte, _ uint32) bool {
|
||||
return detect(in, tclSigs)
|
||||
}
|
||||
|
||||
// Rtf matches a Rich Text Format file.
|
||||
func Rtf(in []byte, _ uint32) bool {
|
||||
return bytes.HasPrefix(in, []byte("{\\rtf1"))
|
||||
}
|
||||
|
||||
// Svg matches a SVG file.
|
||||
func Svg(in []byte, _ uint32) bool {
|
||||
return bytes.Contains(in, []byte("<svg"))
|
||||
}
|
||||
|
||||
// Rss matches a Rich Site Summary file.
|
||||
func Rss(in []byte, _ uint32) bool {
|
||||
return detect(in, rssSigs)
|
||||
}
|
||||
|
||||
// Atom matches an Atom Syndication Format file.
|
||||
func Atom(in []byte, _ uint32) bool {
|
||||
return detect(in, atomSigs)
|
||||
}
|
||||
|
||||
// Kml matches a Keyhole Markup Language file.
|
||||
func Kml(in []byte, _ uint32) bool {
|
||||
return detect(in, kmlSigs)
|
||||
}
|
||||
|
||||
// Xliff matches a XML Localization Interchange File Format file.
|
||||
func Xliff(in []byte, _ uint32) bool {
|
||||
return detect(in, xliffSigs)
|
||||
}
|
||||
|
||||
// Collada matches a COLLAborative Design Activity file.
|
||||
func Collada(in []byte, _ uint32) bool {
|
||||
return detect(in, colladaSigs)
|
||||
}
|
||||
|
||||
// Gml matches a Geography Markup Language file.
|
||||
func Gml(in []byte, _ uint32) bool {
|
||||
return detect(in, gmlSigs)
|
||||
}
|
||||
|
||||
// Gpx matches a GPS Exchange Format file.
|
||||
func Gpx(in []byte, _ uint32) bool {
|
||||
return detect(in, gpxSigs)
|
||||
}
|
||||
|
||||
// Tcx matches a Training Center XML file.
|
||||
func Tcx(in []byte, _ uint32) bool {
|
||||
return detect(in, tcxSigs)
|
||||
}
|
||||
|
||||
// Amf matches an Additive Manufacturing XML file.
|
||||
func Amf(in []byte, _ uint32) bool {
|
||||
return detect(in, amfSigs)
|
||||
}
|
||||
|
||||
// Threemf matches a 3D Manufacturing Format file.
|
||||
func Threemf(in []byte, _ uint32) bool {
|
||||
return detect(in, threemfSigs)
|
||||
}
|
||||
|
||||
// X3d matches an Extensible 3D Graphics file.
|
||||
func X3d(in []byte, _ uint32) bool {
|
||||
return detect(in, x3dSigs)
|
||||
}
|
||||
|
||||
// VCard matches a Virtual Contact File.
|
||||
func VCard(in []byte, _ uint32) bool {
|
||||
return detect(in, vCardSigs)
|
||||
}
|
||||
|
||||
// ICalendar matches a iCalendar file.
|
||||
func ICalendar(in []byte, _ uint32) bool {
|
||||
return detect(in, iCalSigs)
|
||||
}
|
||||
|
||||
// Xfdf matches a XML Forms Data Format file.
|
||||
func Xfdf(in []byte, _ uint32) bool {
|
||||
return detect(in, xfdfSigs)
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
// Torrent has bencoded text in the beginning
|
||||
func Torrent(in []byte, _ uint32) bool {
|
||||
return len(in) > 11 &&
|
||||
bytes.Equal(in[:11], []byte("d8:announce"))
|
||||
}
|
@ -1,83 +0,0 @@
|
||||
package matchers
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Zip matches a zip archive.
|
||||
func Zip(in []byte, _ uint32) bool {
|
||||
return len(in) > 3 &&
|
||||
in[0] == 0x50 && in[1] == 0x4B &&
|
||||
(in[2] == 0x3 || in[2] == 0x5 || in[2] == 0x7) &&
|
||||
(in[3] == 0x4 || in[3] == 0x6 || in[3] == 0x8)
|
||||
}
|
||||
|
||||
// Odt matches an OpenDocument Text file.
|
||||
func Odt(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.text"))
|
||||
}
|
||||
|
||||
// Ott matches an OpenDocument Text Template file.
|
||||
func Ott(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.text-template"))
|
||||
}
|
||||
|
||||
// Ods matches an OpenDocument Spreadsheet file.
|
||||
func Ods(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"))
|
||||
}
|
||||
|
||||
// Ots matches an OpenDocument Spreadsheet Template file.
|
||||
func Ots(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"))
|
||||
}
|
||||
|
||||
// Odp matches an OpenDocument Presentation file.
|
||||
func Odp(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.presentation"))
|
||||
}
|
||||
|
||||
// Otp matches an OpenDocument Presentation Template file.
|
||||
func Otp(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"))
|
||||
}
|
||||
|
||||
// Odg matches an OpenDocument Drawing file.
|
||||
func Odg(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.graphics"))
|
||||
}
|
||||
|
||||
// Otg matches an OpenDocument Drawing Template file.
|
||||
func Otg(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"))
|
||||
}
|
||||
|
||||
// Odf matches an OpenDocument Formula file.
|
||||
func Odf(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.formula"))
|
||||
}
|
||||
|
||||
// Odc matches an OpenDocument Chart file.
|
||||
func Odc(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.chart"))
|
||||
}
|
||||
|
||||
// Epub matches an EPUB file.
|
||||
func Epub(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/epub+zip"))
|
||||
}
|
||||
|
||||
// Sxc matches an OpenOffice Spreadsheet file.
|
||||
func Sxc(in []byte, _ uint32) bool {
|
||||
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.sun.xml.calc"))
|
||||
}
|
||||
|
||||
// Jar matches a Java archive file.
|
||||
func Jar(in []byte, _ uint32) bool {
|
||||
t := zipTokenizer{in: in}
|
||||
for i, tok := 0, t.next(); i < 10 && tok != ""; i, tok = i+1, t.next() {
|
||||
if tok == "META-INF/MANIFEST.MF" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 744 KiB After Width: | Height: | Size: 1.3 MiB |
@ -0,0 +1,3 @@
|
||||
# This source code refers to The Go Authors for copyright purposes.
|
||||
# The master list of authors is in the main Go distribution,
|
||||
# visible at http://tip.golang.org/AUTHORS.
|
@ -0,0 +1,3 @@
|
||||
# This source code was written by the Go contributors.
|
||||
# The master list of contributors is in the main Go distribution,
|
||||
# visible at http://tip.golang.org/CONTRIBUTORS.
|
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,22 @@
|
||||
Additional IP Rights Grant (Patents)
|
||||
|
||||
"This implementation" means the copyrightable works distributed by
|
||||
Google as part of the Go project.
|
||||
|
||||
Google hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable (except as stated in this section)
|
||||
patent license to make, have made, use, offer to sell, sell, import,
|
||||
transfer and otherwise run, modify and propagate the contents of this
|
||||
implementation of Go, where such license applies only to those patent
|
||||
claims, both currently owned or controlled by Google and acquired in
|
||||
the future, licensable by Google that are necessarily infringed by this
|
||||
implementation of Go. This grant does not include claims that would be
|
||||
infringed only as a consequence of further modification of this
|
||||
implementation. If you or your agent or exclusive licensee institute or
|
||||
order or agree to the institution of patent litigation against any
|
||||
entity (including a cross-claim or counterclaim in a lawsuit) alleging
|
||||
that this implementation of Go or any code incorporated within this
|
||||
implementation of Go constitutes direct or contributory patent
|
||||
infringement, or inducement of patent infringement, then any patent
|
||||
rights granted to you under this License for this implementation of Go
|
||||
shall terminate as of the date such litigation is filed.
|
@ -0,0 +1,78 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package atom provides integer codes (also known as atoms) for a fixed set of
|
||||
// frequently occurring HTML strings: tag names and attribute keys such as "p"
|
||||
// and "id".
|
||||
//
|
||||
// Sharing an atom's name between all elements with the same tag can result in
|
||||
// fewer string allocations when tokenizing and parsing HTML. Integer
|
||||
// comparisons are also generally faster than string comparisons.
|
||||
//
|
||||
// The value of an atom's particular code is not guaranteed to stay the same
|
||||
// between versions of this package. Neither is any ordering guaranteed:
|
||||
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
|
||||
// be dense. The only guarantees are that e.g. looking up "div" will yield
|
||||
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
|
||||
package atom // import "golang.org/x/net/html/atom"
|
||||
|
||||
// Atom is an integer code for a string. The zero value maps to "".
|
||||
type Atom uint32
|
||||
|
||||
// String returns the atom's name.
|
||||
func (a Atom) String() string {
|
||||
start := uint32(a >> 8)
|
||||
n := uint32(a & 0xff)
|
||||
if start+n > uint32(len(atomText)) {
|
||||
return ""
|
||||
}
|
||||
return atomText[start : start+n]
|
||||
}
|
||||
|
||||
func (a Atom) string() string {
|
||||
return atomText[a>>8 : a>>8+a&0xff]
|
||||
}
|
||||
|
||||
// fnv computes the FNV hash with an arbitrary starting value h.
|
||||
func fnv(h uint32, s []byte) uint32 {
|
||||
for i := range s {
|
||||
h ^= uint32(s[i])
|
||||
h *= 16777619
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
func match(s string, t []byte) bool {
|
||||
for i, c := range t {
|
||||
if s[i] != c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Lookup returns the atom whose name is s. It returns zero if there is no
|
||||
// such atom. The lookup is case sensitive.
|
||||
func Lookup(s []byte) Atom {
|
||||
if len(s) == 0 || len(s) > maxAtomLen {
|
||||
return 0
|
||||
}
|
||||
h := fnv(hash0, s)
|
||||
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
|
||||
return a
|
||||
}
|
||||
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
|
||||
return a
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// String returns a string whose contents are equal to s. In that sense, it is
|
||||
// equivalent to string(s) but may be more efficient.
|
||||
func String(s []byte) string {
|
||||
if a := Lookup(s); a != 0 {
|
||||
return a.String()
|
||||
}
|
||||
return string(s)
|
||||
}
|
@ -0,0 +1,783 @@
|
||||
// Code generated by go generate gen.go; DO NOT EDIT.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
package atom
|
||||
|
||||
const (
|
||||
A Atom = 0x1
|
||||
Abbr Atom = 0x4
|
||||
Accept Atom = 0x1a06
|
||||
AcceptCharset Atom = 0x1a0e
|
||||
Accesskey Atom = 0x2c09
|
||||
Acronym Atom = 0xaa07
|
||||
Action Atom = 0x27206
|
||||
Address Atom = 0x6f307
|
||||
Align Atom = 0xb105
|
||||
Allowfullscreen Atom = 0x2080f
|
||||
Allowpaymentrequest Atom = 0xc113
|
||||
Allowusermedia Atom = 0xdd0e
|
||||
Alt Atom = 0xf303
|
||||
Annotation Atom = 0x1c90a
|
||||
AnnotationXml Atom = 0x1c90e
|
||||
Applet Atom = 0x31906
|
||||
Area Atom = 0x35604
|
||||
Article Atom = 0x3fc07
|
||||
As Atom = 0x3c02
|
||||
Aside Atom = 0x10705
|
||||
Async Atom = 0xff05
|
||||
Audio Atom = 0x11505
|
||||
Autocomplete Atom = 0x2780c
|
||||
Autofocus Atom = 0x12109
|
||||
Autoplay Atom = 0x13c08
|
||||
B Atom = 0x101
|
||||
Base Atom = 0x3b04
|
||||
Basefont Atom = 0x3b08
|
||||
Bdi Atom = 0xba03
|
||||
Bdo Atom = 0x14b03
|
||||
Bgsound Atom = 0x15e07
|
||||
Big Atom = 0x17003
|
||||
Blink Atom = 0x17305
|
||||
Blockquote Atom = 0x1870a
|
||||
Body Atom = 0x2804
|
||||
Br Atom = 0x202
|
||||
Button Atom = 0x19106
|
||||
Canvas Atom = 0x10306
|
||||
Caption Atom = 0x23107
|
||||
Center Atom = 0x22006
|
||||
Challenge Atom = 0x29b09
|
||||
Charset Atom = 0x2107
|
||||
Checked Atom = 0x47907
|
||||
Cite Atom = 0x19c04
|
||||
Class Atom = 0x56405
|
||||
Code Atom = 0x5c504
|
||||
Col Atom = 0x1ab03
|
||||
Colgroup Atom = 0x1ab08
|
||||
Color Atom = 0x1bf05
|
||||
Cols Atom = 0x1c404
|
||||
Colspan Atom = 0x1c407
|
||||
Command Atom = 0x1d707
|
||||
Content Atom = 0x58b07
|
||||
Contenteditable Atom = 0x58b0f
|
||||
Contextmenu Atom = 0x3800b
|
||||
Controls Atom = 0x1de08
|
||||
Coords Atom = 0x1ea06
|
||||
Crossorigin Atom = 0x1fb0b
|
||||
Data Atom = 0x4a504
|
||||
Datalist Atom = 0x4a508
|
||||
Datetime Atom = 0x2b808
|
||||
Dd Atom = 0x2d702
|
||||
Default Atom = 0x10a07
|
||||
Defer Atom = 0x5c705
|
||||
Del Atom = 0x45203
|
||||
Desc Atom = 0x56104
|
||||
Details Atom = 0x7207
|
||||
Dfn Atom = 0x8703
|
||||
Dialog Atom = 0xbb06
|
||||
Dir Atom = 0x9303
|
||||
Dirname Atom = 0x9307
|
||||
Disabled Atom = 0x16408
|
||||
Div Atom = 0x16b03
|
||||
Dl Atom = 0x5e602
|
||||
Download Atom = 0x46308
|
||||
Draggable Atom = 0x17a09
|
||||
Dropzone Atom = 0x40508
|
||||
Dt Atom = 0x64b02
|
||||
Em Atom = 0x6e02
|
||||
Embed Atom = 0x6e05
|
||||
Enctype Atom = 0x28d07
|
||||
Face Atom = 0x21e04
|
||||
Fieldset Atom = 0x22608
|
||||
Figcaption Atom = 0x22e0a
|
||||
Figure Atom = 0x24806
|
||||
Font Atom = 0x3f04
|
||||
Footer Atom = 0xf606
|
||||
For Atom = 0x25403
|
||||
ForeignObject Atom = 0x2540d
|
||||
Foreignobject Atom = 0x2610d
|
||||
Form Atom = 0x26e04
|
||||
Formaction Atom = 0x26e0a
|
||||
Formenctype Atom = 0x2890b
|
||||
Formmethod Atom = 0x2a40a
|
||||
Formnovalidate Atom = 0x2ae0e
|
||||
Formtarget Atom = 0x2c00a
|
||||
Frame Atom = 0x8b05
|
||||
Frameset Atom = 0x8b08
|
||||
H1 Atom = 0x15c02
|
||||
H2 Atom = 0x2de02
|
||||
H3 Atom = 0x30d02
|
||||
H4 Atom = 0x34502
|
||||
H5 Atom = 0x34f02
|
||||
H6 Atom = 0x64d02
|
||||
Head Atom = 0x33104
|
||||
Header Atom = 0x33106
|
||||
Headers Atom = 0x33107
|
||||
Height Atom = 0x5206
|
||||
Hgroup Atom = 0x2ca06
|
||||
Hidden Atom = 0x2d506
|
||||
High Atom = 0x2db04
|
||||
Hr Atom = 0x15702
|
||||
Href Atom = 0x2e004
|
||||
Hreflang Atom = 0x2e008
|
||||
Html Atom = 0x5604
|
||||
HttpEquiv Atom = 0x2e80a
|
||||
I Atom = 0x601
|
||||
Icon Atom = 0x58a04
|
||||
Id Atom = 0x10902
|
||||
Iframe Atom = 0x2fc06
|
||||
Image Atom = 0x30205
|
||||
Img Atom = 0x30703
|
||||
Input Atom = 0x44b05
|
||||
Inputmode Atom = 0x44b09
|
||||
Ins Atom = 0x20403
|
||||
Integrity Atom = 0x23f09
|
||||
Is Atom = 0x16502
|
||||
Isindex Atom = 0x30f07
|
||||
Ismap Atom = 0x31605
|
||||
Itemid Atom = 0x38b06
|
||||
Itemprop Atom = 0x19d08
|
||||
Itemref Atom = 0x3cd07
|
||||
Itemscope Atom = 0x67109
|
||||
Itemtype Atom = 0x31f08
|
||||
Kbd Atom = 0xb903
|
||||
Keygen Atom = 0x3206
|
||||
Keytype Atom = 0xd607
|
||||
Kind Atom = 0x17704
|
||||
Label Atom = 0x5905
|
||||
Lang Atom = 0x2e404
|
||||
Legend Atom = 0x18106
|
||||
Li Atom = 0xb202
|
||||
Link Atom = 0x17404
|
||||
List Atom = 0x4a904
|
||||
Listing Atom = 0x4a907
|
||||
Loop Atom = 0x5d04
|
||||
Low Atom = 0xc303
|
||||
Main Atom = 0x1004
|
||||
Malignmark Atom = 0xb00a
|
||||
Manifest Atom = 0x6d708
|
||||
Map Atom = 0x31803
|
||||
Mark Atom = 0xb604
|
||||
Marquee Atom = 0x32707
|
||||
Math Atom = 0x32e04
|
||||
Max Atom = 0x33d03
|
||||
Maxlength Atom = 0x33d09
|
||||
Media Atom = 0xe605
|
||||
Mediagroup Atom = 0xe60a
|
||||
Menu Atom = 0x38704
|
||||
Menuitem Atom = 0x38708
|
||||
Meta Atom = 0x4b804
|
||||
Meter Atom = 0x9805
|
||||
Method Atom = 0x2a806
|
||||
Mglyph Atom = 0x30806
|
||||
Mi Atom = 0x34702
|
||||
Min Atom = 0x34703
|
||||
Minlength Atom = 0x34709
|
||||
Mn Atom = 0x2b102
|
||||
Mo Atom = 0xa402
|
||||
Ms Atom = 0x67402
|
||||
Mtext Atom = 0x35105
|
||||
Multiple Atom = 0x35f08
|
||||
Muted Atom = 0x36705
|
||||
Name Atom = 0x9604
|
||||
Nav Atom = 0x1303
|
||||
Nobr Atom = 0x3704
|
||||
Noembed Atom = 0x6c07
|
||||
Noframes Atom = 0x8908
|
||||
Nomodule Atom = 0xa208
|
||||
Nonce Atom = 0x1a605
|
||||
Noscript Atom = 0x21608
|
||||
Novalidate Atom = 0x2b20a
|
||||
Object Atom = 0x26806
|
||||
Ol Atom = 0x13702
|
||||
Onabort Atom = 0x19507
|
||||
Onafterprint Atom = 0x2360c
|
||||
Onautocomplete Atom = 0x2760e
|
||||
Onautocompleteerror Atom = 0x27613
|
||||
Onauxclick Atom = 0x61f0a
|
||||
Onbeforeprint Atom = 0x69e0d
|
||||
Onbeforeunload Atom = 0x6e70e
|
||||
Onblur Atom = 0x56d06
|
||||
Oncancel Atom = 0x11908
|
||||
Oncanplay Atom = 0x14d09
|
||||
Oncanplaythrough Atom = 0x14d10
|
||||
Onchange Atom = 0x41b08
|
||||
Onclick Atom = 0x2f507
|
||||
Onclose Atom = 0x36c07
|
||||
Oncontextmenu Atom = 0x37e0d
|
||||
Oncopy Atom = 0x39106
|
||||
Oncuechange Atom = 0x3970b
|
||||
Oncut Atom = 0x3a205
|
||||
Ondblclick Atom = 0x3a70a
|
||||
Ondrag Atom = 0x3b106
|
||||
Ondragend Atom = 0x3b109
|
||||
Ondragenter Atom = 0x3ba0b
|
||||
Ondragexit Atom = 0x3c50a
|
||||
Ondragleave Atom = 0x3df0b
|
||||
Ondragover Atom = 0x3ea0a
|
||||
Ondragstart Atom = 0x3f40b
|
||||
Ondrop Atom = 0x40306
|
||||
Ondurationchange Atom = 0x41310
|
||||
Onemptied Atom = 0x40a09
|
||||
Onended Atom = 0x42307
|
||||
Onerror Atom = 0x42a07
|
||||
Onfocus Atom = 0x43107
|
||||
Onhashchange Atom = 0x43d0c
|
||||
Oninput Atom = 0x44907
|
||||
Oninvalid Atom = 0x45509
|
||||
Onkeydown Atom = 0x45e09
|
||||
Onkeypress Atom = 0x46b0a
|
||||
Onkeyup Atom = 0x48007
|
||||
Onlanguagechange Atom = 0x48d10
|
||||
Onload Atom = 0x49d06
|
||||
Onloadeddata Atom = 0x49d0c
|
||||
Onloadedmetadata Atom = 0x4b010
|
||||
Onloadend Atom = 0x4c609
|
||||
Onloadstart Atom = 0x4cf0b
|
||||
Onmessage Atom = 0x4da09
|
||||
Onmessageerror Atom = 0x4da0e
|
||||
Onmousedown Atom = 0x4e80b
|
||||
Onmouseenter Atom = 0x4f30c
|
||||
Onmouseleave Atom = 0x4ff0c
|
||||
Onmousemove Atom = 0x50b0b
|
||||
Onmouseout Atom = 0x5160a
|
||||
Onmouseover Atom = 0x5230b
|
||||
Onmouseup Atom = 0x52e09
|
||||
Onmousewheel Atom = 0x53c0c
|
||||
Onoffline Atom = 0x54809
|
||||
Ononline Atom = 0x55108
|
||||
Onpagehide Atom = 0x5590a
|
||||
Onpageshow Atom = 0x5730a
|
||||
Onpaste Atom = 0x57f07
|
||||
Onpause Atom = 0x59a07
|
||||
Onplay Atom = 0x5a406
|
||||
Onplaying Atom = 0x5a409
|
||||
Onpopstate Atom = 0x5ad0a
|
||||
Onprogress Atom = 0x5b70a
|
||||
Onratechange Atom = 0x5cc0c
|
||||
Onrejectionhandled Atom = 0x5d812
|
||||
Onreset Atom = 0x5ea07
|
||||
Onresize Atom = 0x5f108
|
||||
Onscroll Atom = 0x60008
|
||||
Onsecuritypolicyviolation Atom = 0x60819
|
||||
Onseeked Atom = 0x62908
|
||||
Onseeking Atom = 0x63109
|
||||
Onselect Atom = 0x63a08
|
||||
Onshow Atom = 0x64406
|
||||
Onsort Atom = 0x64f06
|
||||
Onstalled Atom = 0x65909
|
||||
Onstorage Atom = 0x66209
|
||||
Onsubmit Atom = 0x66b08
|
||||
Onsuspend Atom = 0x67b09
|
||||
Ontimeupdate Atom = 0x400c
|
||||
Ontoggle Atom = 0x68408
|
||||
Onunhandledrejection Atom = 0x68c14
|
||||
Onunload Atom = 0x6ab08
|
||||
Onvolumechange Atom = 0x6b30e
|
||||
Onwaiting Atom = 0x6c109
|
||||
Onwheel Atom = 0x6ca07
|
||||
Open Atom = 0x1a304
|
||||
Optgroup Atom = 0x5f08
|
||||
Optimum Atom = 0x6d107
|
||||
Option Atom = 0x6e306
|
||||
Output Atom = 0x51d06
|
||||
P Atom = 0xc01
|
||||
Param Atom = 0xc05
|
||||
Pattern Atom = 0x6607
|
||||
Picture Atom = 0x7b07
|
||||
Ping Atom = 0xef04
|
||||
Placeholder Atom = 0x1310b
|
||||
Plaintext Atom = 0x1b209
|
||||
Playsinline Atom = 0x1400b
|
||||
Poster Atom = 0x2cf06
|
||||
Pre Atom = 0x47003
|
||||
Preload Atom = 0x48607
|
||||
Progress Atom = 0x5b908
|
||||
Prompt Atom = 0x53606
|
||||
Public Atom = 0x58606
|
||||
Q Atom = 0xcf01
|
||||
Radiogroup Atom = 0x30a
|
||||
Rb Atom = 0x3a02
|
||||
Readonly Atom = 0x35708
|
||||
Referrerpolicy Atom = 0x3d10e
|
||||
Rel Atom = 0x48703
|
||||
Required Atom = 0x24c08
|
||||
Reversed Atom = 0x8008
|
||||
Rows Atom = 0x9c04
|
||||
Rowspan Atom = 0x9c07
|
||||
Rp Atom = 0x23c02
|
||||
Rt Atom = 0x19a02
|
||||
Rtc Atom = 0x19a03
|
||||
Ruby Atom = 0xfb04
|
||||
S Atom = 0x2501
|
||||
Samp Atom = 0x7804
|
||||
Sandbox Atom = 0x12907
|
||||
Scope Atom = 0x67505
|
||||
Scoped Atom = 0x67506
|
||||
Script Atom = 0x21806
|
||||
Seamless Atom = 0x37108
|
||||
Section Atom = 0x56807
|
||||
Select Atom = 0x63c06
|
||||
Selected Atom = 0x63c08
|
||||
Shape Atom = 0x1e505
|
||||
Size Atom = 0x5f504
|
||||
Sizes Atom = 0x5f505
|
||||
Slot Atom = 0x1ef04
|
||||
Small Atom = 0x20605
|
||||
Sortable Atom = 0x65108
|
||||
Sorted Atom = 0x33706
|
||||
Source Atom = 0x37806
|
||||
Spacer Atom = 0x43706
|
||||
Span Atom = 0x9f04
|
||||
Spellcheck Atom = 0x4740a
|
||||
Src Atom = 0x5c003
|
||||
Srcdoc Atom = 0x5c006
|
||||
Srclang Atom = 0x5f907
|
||||
Srcset Atom = 0x6f906
|
||||
Start Atom = 0x3fa05
|
||||
Step Atom = 0x58304
|
||||
Strike Atom = 0xd206
|
||||
Strong Atom = 0x6dd06
|
||||
Style Atom = 0x6ff05
|
||||
Sub Atom = 0x66d03
|
||||
Summary Atom = 0x70407
|
||||
Sup Atom = 0x70b03
|
||||
Svg Atom = 0x70e03
|
||||
System Atom = 0x71106
|
||||
Tabindex Atom = 0x4be08
|
||||
Table Atom = 0x59505
|
||||
Target Atom = 0x2c406
|
||||
Tbody Atom = 0x2705
|
||||
Td Atom = 0x9202
|
||||
Template Atom = 0x71408
|
||||
Textarea Atom = 0x35208
|
||||
Tfoot Atom = 0xf505
|
||||
Th Atom = 0x15602
|
||||
Thead Atom = 0x33005
|
||||
Time Atom = 0x4204
|
||||
Title Atom = 0x11005
|
||||
Tr Atom = 0xcc02
|
||||
Track Atom = 0x1ba05
|
||||
Translate Atom = 0x1f209
|
||||
Tt Atom = 0x6802
|
||||
Type Atom = 0xd904
|
||||
Typemustmatch Atom = 0x2900d
|
||||
U Atom = 0xb01
|
||||
Ul Atom = 0xa702
|
||||
Updateviacache Atom = 0x460e
|
||||
Usemap Atom = 0x59e06
|
||||
Value Atom = 0x1505
|
||||
Var Atom = 0x16d03
|
||||
Video Atom = 0x2f105
|
||||
Wbr Atom = 0x57c03
|
||||
Width Atom = 0x64905
|
||||
Workertype Atom = 0x71c0a
|
||||
Wrap Atom = 0x72604
|
||||
Xmp Atom = 0x12f03
|
||||
)
|
||||
|
||||
const hash0 = 0x81cdf10e
|
||||
|
||||
const maxAtomLen = 25
|
||||
|
||||
var table = [1 << 9]Atom{
|
||||
0x1: 0xe60a, // mediagroup
|
||||
0x2: 0x2e404, // lang
|
||||
0x4: 0x2c09, // accesskey
|
||||
0x5: 0x8b08, // frameset
|
||||
0x7: 0x63a08, // onselect
|
||||
0x8: 0x71106, // system
|
||||
0xa: 0x64905, // width
|
||||
0xc: 0x2890b, // formenctype
|
||||
0xd: 0x13702, // ol
|
||||
0xe: 0x3970b, // oncuechange
|
||||
0x10: 0x14b03, // bdo
|
||||
0x11: 0x11505, // audio
|
||||
0x12: 0x17a09, // draggable
|
||||
0x14: 0x2f105, // video
|
||||
0x15: 0x2b102, // mn
|
||||
0x16: 0x38704, // menu
|
||||
0x17: 0x2cf06, // poster
|
||||
0x19: 0xf606, // footer
|
||||
0x1a: 0x2a806, // method
|
||||
0x1b: 0x2b808, // datetime
|
||||
0x1c: 0x19507, // onabort
|
||||
0x1d: 0x460e, // updateviacache
|
||||
0x1e: 0xff05, // async
|
||||
0x1f: 0x49d06, // onload
|
||||
0x21: 0x11908, // oncancel
|
||||
0x22: 0x62908, // onseeked
|
||||
0x23: 0x30205, // image
|
||||
0x24: 0x5d812, // onrejectionhandled
|
||||
0x26: 0x17404, // link
|
||||
0x27: 0x51d06, // output
|
||||
0x28: 0x33104, // head
|
||||
0x29: 0x4ff0c, // onmouseleave
|
||||
0x2a: 0x57f07, // onpaste
|
||||
0x2b: 0x5a409, // onplaying
|
||||
0x2c: 0x1c407, // colspan
|
||||
0x2f: 0x1bf05, // color
|
||||
0x30: 0x5f504, // size
|
||||
0x31: 0x2e80a, // http-equiv
|
||||
0x33: 0x601, // i
|
||||
0x34: 0x5590a, // onpagehide
|
||||
0x35: 0x68c14, // onunhandledrejection
|
||||
0x37: 0x42a07, // onerror
|
||||
0x3a: 0x3b08, // basefont
|
||||
0x3f: 0x1303, // nav
|
||||
0x40: 0x17704, // kind
|
||||
0x41: 0x35708, // readonly
|
||||
0x42: 0x30806, // mglyph
|
||||
0x44: 0xb202, // li
|
||||
0x46: 0x2d506, // hidden
|
||||
0x47: 0x70e03, // svg
|
||||
0x48: 0x58304, // step
|
||||
0x49: 0x23f09, // integrity
|
||||
0x4a: 0x58606, // public
|
||||
0x4c: 0x1ab03, // col
|
||||
0x4d: 0x1870a, // blockquote
|
||||
0x4e: 0x34f02, // h5
|
||||
0x50: 0x5b908, // progress
|
||||
0x51: 0x5f505, // sizes
|
||||
0x52: 0x34502, // h4
|
||||
0x56: 0x33005, // thead
|
||||
0x57: 0xd607, // keytype
|
||||
0x58: 0x5b70a, // onprogress
|
||||
0x59: 0x44b09, // inputmode
|
||||
0x5a: 0x3b109, // ondragend
|
||||
0x5d: 0x3a205, // oncut
|
||||
0x5e: 0x43706, // spacer
|
||||
0x5f: 0x1ab08, // colgroup
|
||||
0x62: 0x16502, // is
|
||||
0x65: 0x3c02, // as
|
||||
0x66: 0x54809, // onoffline
|
||||
0x67: 0x33706, // sorted
|
||||
0x69: 0x48d10, // onlanguagechange
|
||||
0x6c: 0x43d0c, // onhashchange
|
||||
0x6d: 0x9604, // name
|
||||
0x6e: 0xf505, // tfoot
|
||||
0x6f: 0x56104, // desc
|
||||
0x70: 0x33d03, // max
|
||||
0x72: 0x1ea06, // coords
|
||||
0x73: 0x30d02, // h3
|
||||
0x74: 0x6e70e, // onbeforeunload
|
||||
0x75: 0x9c04, // rows
|
||||
0x76: 0x63c06, // select
|
||||
0x77: 0x9805, // meter
|
||||
0x78: 0x38b06, // itemid
|
||||
0x79: 0x53c0c, // onmousewheel
|
||||
0x7a: 0x5c006, // srcdoc
|
||||
0x7d: 0x1ba05, // track
|
||||
0x7f: 0x31f08, // itemtype
|
||||
0x82: 0xa402, // mo
|
||||
0x83: 0x41b08, // onchange
|
||||
0x84: 0x33107, // headers
|
||||
0x85: 0x5cc0c, // onratechange
|
||||
0x86: 0x60819, // onsecuritypolicyviolation
|
||||
0x88: 0x4a508, // datalist
|
||||
0x89: 0x4e80b, // onmousedown
|
||||
0x8a: 0x1ef04, // slot
|
||||
0x8b: 0x4b010, // onloadedmetadata
|
||||
0x8c: 0x1a06, // accept
|
||||
0x8d: 0x26806, // object
|
||||
0x91: 0x6b30e, // onvolumechange
|
||||
0x92: 0x2107, // charset
|
||||
0x93: 0x27613, // onautocompleteerror
|
||||
0x94: 0xc113, // allowpaymentrequest
|
||||
0x95: 0x2804, // body
|
||||
0x96: 0x10a07, // default
|
||||
0x97: 0x63c08, // selected
|
||||
0x98: 0x21e04, // face
|
||||
0x99: 0x1e505, // shape
|
||||
0x9b: 0x68408, // ontoggle
|
||||
0x9e: 0x64b02, // dt
|
||||
0x9f: 0xb604, // mark
|
||||
0xa1: 0xb01, // u
|
||||
0xa4: 0x6ab08, // onunload
|
||||
0xa5: 0x5d04, // loop
|
||||
0xa6: 0x16408, // disabled
|
||||
0xaa: 0x42307, // onended
|
||||
0xab: 0xb00a, // malignmark
|
||||
0xad: 0x67b09, // onsuspend
|
||||
0xae: 0x35105, // mtext
|
||||
0xaf: 0x64f06, // onsort
|
||||
0xb0: 0x19d08, // itemprop
|
||||
0xb3: 0x67109, // itemscope
|
||||
0xb4: 0x17305, // blink
|
||||
0xb6: 0x3b106, // ondrag
|
||||
0xb7: 0xa702, // ul
|
||||
0xb8: 0x26e04, // form
|
||||
0xb9: 0x12907, // sandbox
|
||||
0xba: 0x8b05, // frame
|
||||
0xbb: 0x1505, // value
|
||||
0xbc: 0x66209, // onstorage
|
||||
0xbf: 0xaa07, // acronym
|
||||
0xc0: 0x19a02, // rt
|
||||
0xc2: 0x202, // br
|
||||
0xc3: 0x22608, // fieldset
|
||||
0xc4: 0x2900d, // typemustmatch
|
||||
0xc5: 0xa208, // nomodule
|
||||
0xc6: 0x6c07, // noembed
|
||||
0xc7: 0x69e0d, // onbeforeprint
|
||||
0xc8: 0x19106, // button
|
||||
0xc9: 0x2f507, // onclick
|
||||
0xca: 0x70407, // summary
|
||||
0xcd: 0xfb04, // ruby
|
||||
0xce: 0x56405, // class
|
||||
0xcf: 0x3f40b, // ondragstart
|
||||
0xd0: 0x23107, // caption
|
||||
0xd4: 0xdd0e, // allowusermedia
|
||||
0xd5: 0x4cf0b, // onloadstart
|
||||
0xd9: 0x16b03, // div
|
||||
0xda: 0x4a904, // list
|
||||
0xdb: 0x32e04, // math
|
||||
0xdc: 0x44b05, // input
|
||||
0xdf: 0x3ea0a, // ondragover
|
||||
0xe0: 0x2de02, // h2
|
||||
0xe2: 0x1b209, // plaintext
|
||||
0xe4: 0x4f30c, // onmouseenter
|
||||
0xe7: 0x47907, // checked
|
||||
0xe8: 0x47003, // pre
|
||||
0xea: 0x35f08, // multiple
|
||||
0xeb: 0xba03, // bdi
|
||||
0xec: 0x33d09, // maxlength
|
||||
0xed: 0xcf01, // q
|
||||
0xee: 0x61f0a, // onauxclick
|
||||
0xf0: 0x57c03, // wbr
|
||||
0xf2: 0x3b04, // base
|
||||
0xf3: 0x6e306, // option
|
||||
0xf5: 0x41310, // ondurationchange
|
||||
0xf7: 0x8908, // noframes
|
||||
0xf9: 0x40508, // dropzone
|
||||
0xfb: 0x67505, // scope
|
||||
0xfc: 0x8008, // reversed
|
||||
0xfd: 0x3ba0b, // ondragenter
|
||||
0xfe: 0x3fa05, // start
|
||||
0xff: 0x12f03, // xmp
|
||||
0x100: 0x5f907, // srclang
|
||||
0x101: 0x30703, // img
|
||||
0x104: 0x101, // b
|
||||
0x105: 0x25403, // for
|
||||
0x106: 0x10705, // aside
|
||||
0x107: 0x44907, // oninput
|
||||
0x108: 0x35604, // area
|
||||
0x109: 0x2a40a, // formmethod
|
||||
0x10a: 0x72604, // wrap
|
||||
0x10c: 0x23c02, // rp
|
||||
0x10d: 0x46b0a, // onkeypress
|
||||
0x10e: 0x6802, // tt
|
||||
0x110: 0x34702, // mi
|
||||
0x111: 0x36705, // muted
|
||||
0x112: 0xf303, // alt
|
||||
0x113: 0x5c504, // code
|
||||
0x114: 0x6e02, // em
|
||||
0x115: 0x3c50a, // ondragexit
|
||||
0x117: 0x9f04, // span
|
||||
0x119: 0x6d708, // manifest
|
||||
0x11a: 0x38708, // menuitem
|
||||
0x11b: 0x58b07, // content
|
||||
0x11d: 0x6c109, // onwaiting
|
||||
0x11f: 0x4c609, // onloadend
|
||||
0x121: 0x37e0d, // oncontextmenu
|
||||
0x123: 0x56d06, // onblur
|
||||
0x124: 0x3fc07, // article
|
||||
0x125: 0x9303, // dir
|
||||
0x126: 0xef04, // ping
|
||||
0x127: 0x24c08, // required
|
||||
0x128: 0x45509, // oninvalid
|
||||
0x129: 0xb105, // align
|
||||
0x12b: 0x58a04, // icon
|
||||
0x12c: 0x64d02, // h6
|
||||
0x12d: 0x1c404, // cols
|
||||
0x12e: 0x22e0a, // figcaption
|
||||
0x12f: 0x45e09, // onkeydown
|
||||
0x130: 0x66b08, // onsubmit
|
||||
0x131: 0x14d09, // oncanplay
|
||||
0x132: 0x70b03, // sup
|
||||
0x133: 0xc01, // p
|
||||
0x135: 0x40a09, // onemptied
|
||||
0x136: 0x39106, // oncopy
|
||||
0x137: 0x19c04, // cite
|
||||
0x138: 0x3a70a, // ondblclick
|
||||
0x13a: 0x50b0b, // onmousemove
|
||||
0x13c: 0x66d03, // sub
|
||||
0x13d: 0x48703, // rel
|
||||
0x13e: 0x5f08, // optgroup
|
||||
0x142: 0x9c07, // rowspan
|
||||
0x143: 0x37806, // source
|
||||
0x144: 0x21608, // noscript
|
||||
0x145: 0x1a304, // open
|
||||
0x146: 0x20403, // ins
|
||||
0x147: 0x2540d, // foreignObject
|
||||
0x148: 0x5ad0a, // onpopstate
|
||||
0x14a: 0x28d07, // enctype
|
||||
0x14b: 0x2760e, // onautocomplete
|
||||
0x14c: 0x35208, // textarea
|
||||
0x14e: 0x2780c, // autocomplete
|
||||
0x14f: 0x15702, // hr
|
||||
0x150: 0x1de08, // controls
|
||||
0x151: 0x10902, // id
|
||||
0x153: 0x2360c, // onafterprint
|
||||
0x155: 0x2610d, // foreignobject
|
||||
0x156: 0x32707, // marquee
|
||||
0x157: 0x59a07, // onpause
|
||||
0x158: 0x5e602, // dl
|
||||
0x159: 0x5206, // height
|
||||
0x15a: 0x34703, // min
|
||||
0x15b: 0x9307, // dirname
|
||||
0x15c: 0x1f209, // translate
|
||||
0x15d: 0x5604, // html
|
||||
0x15e: 0x34709, // minlength
|
||||
0x15f: 0x48607, // preload
|
||||
0x160: 0x71408, // template
|
||||
0x161: 0x3df0b, // ondragleave
|
||||
0x162: 0x3a02, // rb
|
||||
0x164: 0x5c003, // src
|
||||
0x165: 0x6dd06, // strong
|
||||
0x167: 0x7804, // samp
|
||||
0x168: 0x6f307, // address
|
||||
0x169: 0x55108, // ononline
|
||||
0x16b: 0x1310b, // placeholder
|
||||
0x16c: 0x2c406, // target
|
||||
0x16d: 0x20605, // small
|
||||
0x16e: 0x6ca07, // onwheel
|
||||
0x16f: 0x1c90a, // annotation
|
||||
0x170: 0x4740a, // spellcheck
|
||||
0x171: 0x7207, // details
|
||||
0x172: 0x10306, // canvas
|
||||
0x173: 0x12109, // autofocus
|
||||
0x174: 0xc05, // param
|
||||
0x176: 0x46308, // download
|
||||
0x177: 0x45203, // del
|
||||
0x178: 0x36c07, // onclose
|
||||
0x179: 0xb903, // kbd
|
||||
0x17a: 0x31906, // applet
|
||||
0x17b: 0x2e004, // href
|
||||
0x17c: 0x5f108, // onresize
|
||||
0x17e: 0x49d0c, // onloadeddata
|
||||
0x180: 0xcc02, // tr
|
||||
0x181: 0x2c00a, // formtarget
|
||||
0x182: 0x11005, // title
|
||||
0x183: 0x6ff05, // style
|
||||
0x184: 0xd206, // strike
|
||||
0x185: 0x59e06, // usemap
|
||||
0x186: 0x2fc06, // iframe
|
||||
0x187: 0x1004, // main
|
||||
0x189: 0x7b07, // picture
|
||||
0x18c: 0x31605, // ismap
|
||||
0x18e: 0x4a504, // data
|
||||
0x18f: 0x5905, // label
|
||||
0x191: 0x3d10e, // referrerpolicy
|
||||
0x192: 0x15602, // th
|
||||
0x194: 0x53606, // prompt
|
||||
0x195: 0x56807, // section
|
||||
0x197: 0x6d107, // optimum
|
||||
0x198: 0x2db04, // high
|
||||
0x199: 0x15c02, // h1
|
||||
0x19a: 0x65909, // onstalled
|
||||
0x19b: 0x16d03, // var
|
||||
0x19c: 0x4204, // time
|
||||
0x19e: 0x67402, // ms
|
||||
0x19f: 0x33106, // header
|
||||
0x1a0: 0x4da09, // onmessage
|
||||
0x1a1: 0x1a605, // nonce
|
||||
0x1a2: 0x26e0a, // formaction
|
||||
0x1a3: 0x22006, // center
|
||||
0x1a4: 0x3704, // nobr
|
||||
0x1a5: 0x59505, // table
|
||||
0x1a6: 0x4a907, // listing
|
||||
0x1a7: 0x18106, // legend
|
||||
0x1a9: 0x29b09, // challenge
|
||||
0x1aa: 0x24806, // figure
|
||||
0x1ab: 0xe605, // media
|
||||
0x1ae: 0xd904, // type
|
||||
0x1af: 0x3f04, // font
|
||||
0x1b0: 0x4da0e, // onmessageerror
|
||||
0x1b1: 0x37108, // seamless
|
||||
0x1b2: 0x8703, // dfn
|
||||
0x1b3: 0x5c705, // defer
|
||||
0x1b4: 0xc303, // low
|
||||
0x1b5: 0x19a03, // rtc
|
||||
0x1b6: 0x5230b, // onmouseover
|
||||
0x1b7: 0x2b20a, // novalidate
|
||||
0x1b8: 0x71c0a, // workertype
|
||||
0x1ba: 0x3cd07, // itemref
|
||||
0x1bd: 0x1, // a
|
||||
0x1be: 0x31803, // map
|
||||
0x1bf: 0x400c, // ontimeupdate
|
||||
0x1c0: 0x15e07, // bgsound
|
||||
0x1c1: 0x3206, // keygen
|
||||
0x1c2: 0x2705, // tbody
|
||||
0x1c5: 0x64406, // onshow
|
||||
0x1c7: 0x2501, // s
|
||||
0x1c8: 0x6607, // pattern
|
||||
0x1cc: 0x14d10, // oncanplaythrough
|
||||
0x1ce: 0x2d702, // dd
|
||||
0x1cf: 0x6f906, // srcset
|
||||
0x1d0: 0x17003, // big
|
||||
0x1d2: 0x65108, // sortable
|
||||
0x1d3: 0x48007, // onkeyup
|
||||
0x1d5: 0x5a406, // onplay
|
||||
0x1d7: 0x4b804, // meta
|
||||
0x1d8: 0x40306, // ondrop
|
||||
0x1da: 0x60008, // onscroll
|
||||
0x1db: 0x1fb0b, // crossorigin
|
||||
0x1dc: 0x5730a, // onpageshow
|
||||
0x1dd: 0x4, // abbr
|
||||
0x1de: 0x9202, // td
|
||||
0x1df: 0x58b0f, // contenteditable
|
||||
0x1e0: 0x27206, // action
|
||||
0x1e1: 0x1400b, // playsinline
|
||||
0x1e2: 0x43107, // onfocus
|
||||
0x1e3: 0x2e008, // hreflang
|
||||
0x1e5: 0x5160a, // onmouseout
|
||||
0x1e6: 0x5ea07, // onreset
|
||||
0x1e7: 0x13c08, // autoplay
|
||||
0x1e8: 0x63109, // onseeking
|
||||
0x1ea: 0x67506, // scoped
|
||||
0x1ec: 0x30a, // radiogroup
|
||||
0x1ee: 0x3800b, // contextmenu
|
||||
0x1ef: 0x52e09, // onmouseup
|
||||
0x1f1: 0x2ca06, // hgroup
|
||||
0x1f2: 0x2080f, // allowfullscreen
|
||||
0x1f3: 0x4be08, // tabindex
|
||||
0x1f6: 0x30f07, // isindex
|
||||
0x1f7: 0x1a0e, // accept-charset
|
||||
0x1f8: 0x2ae0e, // formnovalidate
|
||||
0x1fb: 0x1c90e, // annotation-xml
|
||||
0x1fc: 0x6e05, // embed
|
||||
0x1fd: 0x21806, // script
|
||||
0x1fe: 0xbb06, // dialog
|
||||
0x1ff: 0x1d707, // command
|
||||
}
|
||||
|
||||
const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
|
||||
"asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
|
||||
"sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
|
||||
"gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
|
||||
"ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
|
||||
"dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
|
||||
"bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
|
||||
"penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
|
||||
"ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
|
||||
"ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
|
||||
"ignObjectforeignobjectformactionautocompleteerrorformenctype" +
|
||||
"mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
|
||||
"osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
|
||||
"ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
|
||||
"inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
|
||||
"extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
|
||||
"enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
|
||||
"articleondropzonemptiedondurationchangeonendedonerroronfocus" +
|
||||
"paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
|
||||
"spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
|
||||
"onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
|
||||
"usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
|
||||
"seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
|
||||
"classectionbluronpageshowbronpastepublicontenteditableonpaus" +
|
||||
"emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
|
||||
"jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
|
||||
"violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
|
||||
"tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
|
||||
"handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
|
||||
"wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
|
||||
"arysupsvgsystemplateworkertypewrap"
|
@ -0,0 +1,111 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
// Section 12.2.4.2 of the HTML5 specification says "The following elements
|
||||
// have varying levels of special parsing rules".
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
|
||||
var isSpecialElementMap = map[string]bool{
|
||||
"address": true,
|
||||
"applet": true,
|
||||
"area": true,
|
||||
"article": true,
|
||||
"aside": true,
|
||||
"base": true,
|
||||
"basefont": true,
|
||||
"bgsound": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"button": true,
|
||||
"caption": true,
|
||||
"center": true,
|
||||
"col": true,
|
||||
"colgroup": true,
|
||||
"dd": true,
|
||||
"details": true,
|
||||
"dir": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"embed": true,
|
||||
"fieldset": true,
|
||||
"figcaption": true,
|
||||
"figure": true,
|
||||
"footer": true,
|
||||
"form": true,
|
||||
"frame": true,
|
||||
"frameset": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"header": true,
|
||||
"hgroup": true,
|
||||
"hr": true,
|
||||
"html": true,
|
||||
"iframe": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
||||
"li": true,
|
||||
"link": true,
|
||||
"listing": true,
|
||||
"main": true,
|
||||
"marquee": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nav": true,
|
||||
"noembed": true,
|
||||
"noframes": true,
|
||||
"noscript": true,
|
||||
"object": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"param": true,
|
||||
"plaintext": true,
|
||||
"pre": true,
|
||||
"script": true,
|
||||
"section": true,
|
||||
"select": true,
|
||||
"source": true,
|
||||
"style": true,
|
||||
"summary": true,
|
||||
"table": true,
|
||||
"tbody": true,
|
||||
"td": true,
|
||||
"template": true,
|
||||
"textarea": true,
|
||||
"tfoot": true,
|
||||
"th": true,
|
||||
"thead": true,
|
||||
"title": true,
|
||||
"tr": true,
|
||||
"track": true,
|
||||
"ul": true,
|
||||
"wbr": true,
|
||||
"xmp": true,
|
||||
}
|
||||
|
||||
func isSpecialElement(element *Node) bool {
|
||||
switch element.Namespace {
|
||||
case "", "html":
|
||||
return isSpecialElementMap[element.Data]
|
||||
case "math":
|
||||
switch element.Data {
|
||||
case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
|
||||
return true
|
||||
}
|
||||
case "svg":
|
||||
switch element.Data {
|
||||
case "foreignObject", "desc", "title":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -0,0 +1,106 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package html implements an HTML5-compliant tokenizer and parser.
|
||||
|
||||
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
||||
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
||||
|
||||
z := html.NewTokenizer(r)
|
||||
|
||||
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
|
||||
which parses the next token and returns its type, or an error:
|
||||
|
||||
for {
|
||||
tt := z.Next()
|
||||
if tt == html.ErrorToken {
|
||||
// ...
|
||||
return ...
|
||||
}
|
||||
// Process the current token.
|
||||
}
|
||||
|
||||
There are two APIs for retrieving the current token. The high-level API is to
|
||||
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
|
||||
allow optionally calling Raw after Next but before Token, Text, TagName, or
|
||||
TagAttr. In EBNF notation, the valid call sequence per token is:
|
||||
|
||||
Next {Raw} [ Token | Text | TagName {TagAttr} ]
|
||||
|
||||
Token returns an independent data structure that completely describes a token.
|
||||
Entities (such as "<") are unescaped, tag names and attribute keys are
|
||||
lower-cased, and attributes are collected into a []Attribute. For example:
|
||||
|
||||
for {
|
||||
if z.Next() == html.ErrorToken {
|
||||
// Returning io.EOF indicates success.
|
||||
return z.Err()
|
||||
}
|
||||
emitToken(z.Token())
|
||||
}
|
||||
|
||||
The low-level API performs fewer allocations and copies, but the contents of
|
||||
the []byte values returned by Text, TagName and TagAttr may change on the next
|
||||
call to Next. For example, to extract an HTML page's anchor text:
|
||||
|
||||
depth := 0
|
||||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
return z.Err()
|
||||
case html.TextToken:
|
||||
if depth > 0 {
|
||||
// emitBytes should copy the []byte it receives,
|
||||
// if it doesn't process it immediately.
|
||||
emitBytes(z.Text())
|
||||
}
|
||||
case html.StartTagToken, html.EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == html.StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Parsing is done by calling Parse with an io.Reader, which returns the root of
|
||||
the parse tree (the document element) as a *Node. It is the caller's
|
||||
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
|
||||
example, to process each anchor node in depth-first order:
|
||||
|
||||
doc, err := html.Parse(r)
|
||||
if err != nil {
|
||||
// ...
|
||||
}
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
// Do something with n...
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
|
||||
The relevant specifications include:
|
||||
https://html.spec.whatwg.org/multipage/syntax.html and
|
||||
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
|
||||
*/
|
||||
package html // import "golang.org/x/net/html"
|
||||
|
||||
// The tokenization algorithm implemented by this package is not a line-by-line
|
||||
// transliteration of the relatively verbose state-machine in the WHATWG
|
||||
// specification. A more direct approach is used instead, where the program
|
||||
// counter implies the state, such as whether it is tokenizing a tag or a text
|
||||
// node. Specification compliance is verified by checking expected and actual
|
||||
// outputs over a test suite rather than aiming for algorithmic fidelity.
|
||||
|
||||
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
|
||||
// TODO(nigeltao): How does parsing interact with a JavaScript engine?
|
@ -0,0 +1,156 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parseDoctype parses the data from a DoctypeToken into a name,
|
||||
// public identifier, and system identifier. It returns a Node whose Type
|
||||
// is DoctypeNode, whose Data is the name, and which has attributes
|
||||
// named "system" and "public" for the two identifiers if they were present.
|
||||
// quirks is whether the document should be parsed in "quirks mode".
|
||||
func parseDoctype(s string) (n *Node, quirks bool) {
|
||||
n = &Node{Type: DoctypeNode}
|
||||
|
||||
// Find the name.
|
||||
space := strings.IndexAny(s, whitespace)
|
||||
if space == -1 {
|
||||
space = len(s)
|
||||
}
|
||||
n.Data = s[:space]
|
||||
// The comparison to "html" is case-sensitive.
|
||||
if n.Data != "html" {
|
||||
quirks = true
|
||||
}
|
||||
n.Data = strings.ToLower(n.Data)
|
||||
s = strings.TrimLeft(s[space:], whitespace)
|
||||
|
||||
if len(s) < 6 {
|
||||
// It can't start with "PUBLIC" or "SYSTEM".
|
||||
// Ignore the rest of the string.
|
||||
return n, quirks || s != ""
|
||||
}
|
||||
|
||||
key := strings.ToLower(s[:6])
|
||||
s = s[6:]
|
||||
for key == "public" || key == "system" {
|
||||
s = strings.TrimLeft(s, whitespace)
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
quote := s[0]
|
||||
if quote != '"' && quote != '\'' {
|
||||
break
|
||||
}
|
||||
s = s[1:]
|
||||
q := strings.IndexRune(s, rune(quote))
|
||||
var id string
|
||||
if q == -1 {
|
||||
id = s
|
||||
s = ""
|
||||
} else {
|
||||
id = s[:q]
|
||||
s = s[q+1:]
|
||||
}
|
||||
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
|
||||
if key == "public" {
|
||||
key = "system"
|
||||
} else {
|
||||
key = ""
|
||||
}
|
||||
}
|
||||
|
||||
if key != "" || s != "" {
|
||||
quirks = true
|
||||
} else if len(n.Attr) > 0 {
|
||||
if n.Attr[0].Key == "public" {
|
||||
public := strings.ToLower(n.Attr[0].Val)
|
||||
switch public {
|
||||
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
|
||||
quirks = true
|
||||
default:
|
||||
for _, q := range quirkyIDs {
|
||||
if strings.HasPrefix(public, q) {
|
||||
quirks = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// The following two public IDs only cause quirks mode if there is no system ID.
|
||||
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
|
||||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
|
||||
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
|
||||
return n, quirks
|
||||
}
|
||||
|
||||
// quirkyIDs is a list of public doctype identifiers that cause a document
|
||||
// to be interpreted in quirks mode. The identifiers should be in lower case.
|
||||
var quirkyIDs = []string{
|
||||
"+//silmaril//dtd html pro v0r11 19970101//",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//",
|
||||
"-//ietf//dtd html 2.0 level 1//",
|
||||
"-//ietf//dtd html 2.0 level 2//",
|
||||
"-//ietf//dtd html 2.0 strict level 1//",
|
||||
"-//ietf//dtd html 2.0 strict level 2//",
|
||||
"-//ietf//dtd html 2.0 strict//",
|
||||
"-//ietf//dtd html 2.0//",
|
||||
"-//ietf//dtd html 2.1e//",
|
||||
"-//ietf//dtd html 3.0//",
|
||||
"-//ietf//dtd html 3.2 final//",
|
||||
"-//ietf//dtd html 3.2//",
|
||||
"-//ietf//dtd html 3//",
|
||||
"-//ietf//dtd html level 0//",
|
||||
"-//ietf//dtd html level 1//",
|
||||
"-//ietf//dtd html level 2//",
|
||||
"-//ietf//dtd html level 3//",
|
||||
"-//ietf//dtd html strict level 0//",
|
||||
"-//ietf//dtd html strict level 1//",
|
||||
"-//ietf//dtd html strict level 2//",
|
||||
"-//ietf//dtd html strict level 3//",
|
||||
"-//ietf//dtd html strict//",
|
||||
"-//ietf//dtd html//",
|
||||
"-//metrius//dtd metrius presentational//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//",
|
||||
"-//netscape comm. corp.//dtd html//",
|
||||
"-//netscape comm. corp.//dtd strict html//",
|
||||
"-//o'reilly and associates//dtd html 2.0//",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//",
|
||||
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
||||
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
||||
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
||||
"-//spyglass//dtd html 2.0 extended//",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//",
|
||||
"-//sun microsystems corp.//dtd hotjava html//",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//",
|
||||
"-//w3c//dtd html 3 1995-03-24//",
|
||||
"-//w3c//dtd html 3.2 draft//",
|
||||
"-//w3c//dtd html 3.2 final//",
|
||||
"-//w3c//dtd html 3.2//",
|
||||
"-//w3c//dtd html 3.2s draft//",
|
||||
"-//w3c//dtd html 4.0 frameset//",
|
||||
"-//w3c//dtd html 4.0 transitional//",
|
||||
"-//w3c//dtd html experimental 19960712//",
|
||||
"-//w3c//dtd html experimental 970421//",
|
||||
"-//w3c//dtd w3 html//",
|
||||
"-//w3o//dtd w3 html 3.0//",
|
||||
"-//webtechs//dtd mozilla html 2.0//",
|
||||
"-//webtechs//dtd mozilla html//",
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,258 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// These replacements permit compatibility with old numeric entities that
|
||||
// assumed Windows-1252 encoding.
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
|
||||
var replacementTable = [...]rune{
|
||||
'\u20AC', // First entry is what 0x80 should be replaced with.
|
||||
'\u0081',
|
||||
'\u201A',
|
||||
'\u0192',
|
||||
'\u201E',
|
||||
'\u2026',
|
||||
'\u2020',
|
||||
'\u2021',
|
||||
'\u02C6',
|
||||
'\u2030',
|
||||
'\u0160',
|
||||
'\u2039',
|
||||
'\u0152',
|
||||
'\u008D',
|
||||
'\u017D',
|
||||
'\u008F',
|
||||
'\u0090',
|
||||
'\u2018',
|
||||
'\u2019',
|
||||
'\u201C',
|
||||
'\u201D',
|
||||
'\u2022',
|
||||
'\u2013',
|
||||
'\u2014',
|
||||
'\u02DC',
|
||||
'\u2122',
|
||||
'\u0161',
|
||||
'\u203A',
|
||||
'\u0153',
|
||||
'\u009D',
|
||||
'\u017E',
|
||||
'\u0178', // Last entry is 0x9F.
|
||||
// 0x00->'\uFFFD' is handled programmatically.
|
||||
// 0x0D->'\u000D' is a no-op.
|
||||
}
|
||||
|
||||
// unescapeEntity reads an entity like "<" from b[src:] and writes the
|
||||
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
|
||||
// Precondition: b[src] == '&' && dst <= src.
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
|
||||
|
||||
// i starts at 1 because we already know that s[0] == '&'.
|
||||
i, s := 1, b[src:]
|
||||
|
||||
if len(s) <= 1 {
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if s[i] == '#' {
|
||||
if len(s) <= 3 { // We need to have at least "&#.".
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
i++
|
||||
c := s[i]
|
||||
hex := false
|
||||
if c == 'x' || c == 'X' {
|
||||
hex = true
|
||||
i++
|
||||
}
|
||||
|
||||
x := '\x00'
|
||||
for i < len(s) {
|
||||
c = s[i]
|
||||
i++
|
||||
if hex {
|
||||
if '0' <= c && c <= '9' {
|
||||
x = 16*x + rune(c) - '0'
|
||||
continue
|
||||
} else if 'a' <= c && c <= 'f' {
|
||||
x = 16*x + rune(c) - 'a' + 10
|
||||
continue
|
||||
} else if 'A' <= c && c <= 'F' {
|
||||
x = 16*x + rune(c) - 'A' + 10
|
||||
continue
|
||||
}
|
||||
} else if '0' <= c && c <= '9' {
|
||||
x = 10*x + rune(c) - '0'
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if i <= 3 { // No characters matched.
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if 0x80 <= x && x <= 0x9F {
|
||||
// Replace characters from Windows-1252 with UTF-8 equivalents.
|
||||
x = replacementTable[x-0x80]
|
||||
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
|
||||
// Replace invalid characters with the replacement character.
|
||||
x = '\uFFFD'
|
||||
}
|
||||
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
}
|
||||
|
||||
// Consume the maximum number of characters possible, with the
|
||||
// consumed characters matching one of the named references.
|
||||
|
||||
for i < len(s) {
|
||||
c := s[i]
|
||||
i++
|
||||
// Lower-cased characters are more common in entities, so we check for them first.
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
entityName := string(s[1:i])
|
||||
if entityName == "" {
|
||||
// No-op.
|
||||
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
|
||||
// No-op.
|
||||
} else if x := entity[entityName]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
} else if x := entity2[entityName]; x[0] != 0 {
|
||||
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
|
||||
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
|
||||
} else if !attribute {
|
||||
maxLen := len(entityName) - 1
|
||||
if maxLen > longestEntityWithoutSemicolon {
|
||||
maxLen = longestEntityWithoutSemicolon
|
||||
}
|
||||
for j := maxLen; j > 1; j-- {
|
||||
if x := entity[entityName[:j]]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst1, src1 = dst+i, src+i
|
||||
copy(b[dst:dst1], b[src:src1])
|
||||
return dst1, src1
|
||||
}
|
||||
|
||||
// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescape(b []byte, attribute bool) []byte {
|
||||
for i, c := range b {
|
||||
if c == '&' {
|
||||
dst, src := unescapeEntity(b, i, i, attribute)
|
||||
for src < len(b) {
|
||||
c := b[src]
|
||||
if c == '&' {
|
||||
dst, src = unescapeEntity(b, dst, src, attribute)
|
||||
} else {
|
||||
b[dst] = c
|
||||
dst, src = dst+1, src+1
|
||||
}
|
||||
}
|
||||
return b[0:dst]
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
|
||||
func lower(b []byte) []byte {
|
||||
for i, c := range b {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b[i] = c + 'a' - 'A'
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
const escapedChars = "&'<>\"\r"
|
||||
|
||||
func escape(w writer, s string) error {
|
||||
i := strings.IndexAny(s, escapedChars)
|
||||
for i != -1 {
|
||||
if _, err := w.WriteString(s[:i]); err != nil {
|
||||
return err
|
||||
}
|
||||
var esc string
|
||||
switch s[i] {
|
||||
case '&':
|
||||
esc = "&"
|
||||
case '\'':
|
||||
// "'" is shorter than "'" and apos was not in HTML until HTML5.
|
||||
esc = "'"
|
||||
case '<':
|
||||
esc = "<"
|
||||
case '>':
|
||||
esc = ">"
|
||||
case '"':
|
||||
// """ is shorter than """.
|
||||
esc = """
|
||||
case '\r':
|
||||
esc = " "
|
||||
default:
|
||||
panic("unrecognized escape character")
|
||||
}
|
||||
s = s[i+1:]
|
||||
if _, err := w.WriteString(esc); err != nil {
|
||||
return err
|
||||
}
|
||||
i = strings.IndexAny(s, escapedChars)
|
||||
}
|
||||
_, err := w.WriteString(s)
|
||||
return err
|
||||
}
|
||||
|
||||
// EscapeString escapes special characters like "<" to become "<". It
|
||||
// escapes only five such characters: <, >, &, ' and ".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func EscapeString(s string) string {
|
||||
if strings.IndexAny(s, escapedChars) == -1 {
|
||||
return s
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
escape(&buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
||||
// larger range of entities than EscapeString escapes. For example, "á"
|
||||
// unescapes to "á", as does "á" and "&xE1;".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func UnescapeString(s string) string {
|
||||
for _, c := range s {
|
||||
if c == '&' {
|
||||
return string(unescape([]byte(s), false))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
@ -0,0 +1,222 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
|
||||
for i := range aa {
|
||||
if newName, ok := nameMap[aa[i].Key]; ok {
|
||||
aa[i].Key = newName
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func adjustForeignAttributes(aa []Attribute) {
|
||||
for i, a := range aa {
|
||||
if a.Key == "" || a.Key[0] != 'x' {
|
||||
continue
|
||||
}
|
||||
switch a.Key {
|
||||
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
|
||||
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
|
||||
j := strings.Index(a.Key, ":")
|
||||
aa[i].Namespace = a.Key[:j]
|
||||
aa[i].Key = a.Key[j+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func htmlIntegrationPoint(n *Node) bool {
|
||||
if n.Type != ElementNode {
|
||||
return false
|
||||
}
|
||||
switch n.Namespace {
|
||||
case "math":
|
||||
if n.Data == "annotation-xml" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "encoding" {
|
||||
val := strings.ToLower(a.Val)
|
||||
if val == "text/html" || val == "application/xhtml+xml" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case "svg":
|
||||
switch n.Data {
|
||||
case "desc", "foreignObject", "title":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func mathMLTextIntegrationPoint(n *Node) bool {
|
||||
if n.Namespace != "math" {
|
||||
return false
|
||||
}
|
||||
switch n.Data {
|
||||
case "mi", "mo", "mn", "ms", "mtext":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.6.5.
|
||||
var breakout = map[string]bool{
|
||||
"b": true,
|
||||
"big": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"center": true,
|
||||
"code": true,
|
||||
"dd": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"em": true,
|
||||
"embed": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"hr": true,
|
||||
"i": true,
|
||||
"img": true,
|
||||
"li": true,
|
||||
"listing": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nobr": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"pre": true,
|
||||
"ruby": true,
|
||||
"s": true,
|
||||
"small": true,
|
||||
"span": true,
|
||||
"strong": true,
|
||||
"strike": true,
|
||||
"sub": true,
|
||||
"sup": true,
|
||||
"table": true,
|
||||
"tt": true,
|
||||
"u": true,
|
||||
"ul": true,
|
||||
"var": true,
|
||||
}
|
||||
|
||||
// Section 12.2.6.5.
|
||||
var svgTagNameAdjustments = map[string]string{
|
||||
"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
"altglyphitem": "altGlyphItem",
|
||||
"animatecolor": "animateColor",
|
||||
"animatemotion": "animateMotion",
|
||||
"animatetransform": "animateTransform",
|
||||
"clippath": "clipPath",
|
||||
"feblend": "feBlend",
|
||||
"fecolormatrix": "feColorMatrix",
|
||||
"fecomponenttransfer": "feComponentTransfer",
|
||||
"fecomposite": "feComposite",
|
||||
"feconvolvematrix": "feConvolveMatrix",
|
||||
"fediffuselighting": "feDiffuseLighting",
|
||||
"fedisplacementmap": "feDisplacementMap",
|
||||
"fedistantlight": "feDistantLight",
|
||||
"feflood": "feFlood",
|
||||
"fefunca": "feFuncA",
|
||||
"fefuncb": "feFuncB",
|
||||
"fefuncg": "feFuncG",
|
||||
"fefuncr": "feFuncR",
|
||||
"fegaussianblur": "feGaussianBlur",
|
||||
"feimage": "feImage",
|
||||
"femerge": "feMerge",
|
||||
"femergenode": "feMergeNode",
|
||||
"femorphology": "feMorphology",
|
||||
"feoffset": "feOffset",
|
||||
"fepointlight": "fePointLight",
|
||||
"fespecularlighting": "feSpecularLighting",
|
||||
"fespotlight": "feSpotLight",
|
||||
"fetile": "feTile",
|
||||
"feturbulence": "feTurbulence",
|
||||
"foreignobject": "foreignObject",
|
||||
"glyphref": "glyphRef",
|
||||
"lineargradient": "linearGradient",
|
||||
"radialgradient": "radialGradient",
|
||||
"textpath": "textPath",
|
||||
}
|
||||
|
||||
// Section 12.2.6.1
|
||||
var mathMLAttributeAdjustments = map[string]string{
|
||||
"definitionurl": "definitionURL",
|
||||
}
|
||||
|
||||
var svgAttributeAdjustments = map[string]string{
|
||||
"attributename": "attributeName",
|
||||
"attributetype": "attributeType",
|
||||
"basefrequency": "baseFrequency",
|
||||
"baseprofile": "baseProfile",
|
||||
"calcmode": "calcMode",
|
||||
"clippathunits": "clipPathUnits",
|
||||
"diffuseconstant": "diffuseConstant",
|
||||
"edgemode": "edgeMode",
|
||||
"filterunits": "filterUnits",
|
||||
"glyphref": "glyphRef",
|
||||
"gradienttransform": "gradientTransform",
|
||||
"gradientunits": "gradientUnits",
|
||||
"kernelmatrix": "kernelMatrix",
|
||||
"kernelunitlength": "kernelUnitLength",
|
||||
"keypoints": "keyPoints",
|
||||
"keysplines": "keySplines",
|
||||
"keytimes": "keyTimes",
|
||||
"lengthadjust": "lengthAdjust",
|
||||
"limitingconeangle": "limitingConeAngle",
|
||||
"markerheight": "markerHeight",
|
||||
"markerunits": "markerUnits",
|
||||
"markerwidth": "markerWidth",
|
||||
"maskcontentunits": "maskContentUnits",
|
||||
"maskunits": "maskUnits",
|
||||
"numoctaves": "numOctaves",
|
||||
"pathlength": "pathLength",
|
||||
"patterncontentunits": "patternContentUnits",
|
||||
"patterntransform": "patternTransform",
|
||||
"patternunits": "patternUnits",
|
||||
"pointsatx": "pointsAtX",
|
||||
"pointsaty": "pointsAtY",
|
||||
"pointsatz": "pointsAtZ",
|
||||
"preservealpha": "preserveAlpha",
|
||||
"preserveaspectratio": "preserveAspectRatio",
|
||||
"primitiveunits": "primitiveUnits",
|
||||
"refx": "refX",
|
||||
"refy": "refY",
|
||||
"repeatcount": "repeatCount",
|
||||
"repeatdur": "repeatDur",
|
||||
"requiredextensions": "requiredExtensions",
|
||||
"requiredfeatures": "requiredFeatures",
|
||||
"specularconstant": "specularConstant",
|
||||
"specularexponent": "specularExponent",
|
||||
"spreadmethod": "spreadMethod",
|
||||
"startoffset": "startOffset",
|
||||
"stddeviation": "stdDeviation",
|
||||
"stitchtiles": "stitchTiles",
|
||||
"surfacescale": "surfaceScale",
|
||||
"systemlanguage": "systemLanguage",
|
||||
"tablevalues": "tableValues",
|
||||
"targetx": "targetX",
|
||||
"targety": "targetY",
|
||||
"textlength": "textLength",
|
||||
"viewbox": "viewBox",
|
||||
"viewtarget": "viewTarget",
|
||||
"xchannelselector": "xChannelSelector",
|
||||
"ychannelselector": "yChannelSelector",
|
||||
"zoomandpan": "zoomAndPan",
|
||||
}
|
@ -0,0 +1,225 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// A NodeType is the type of a Node.
|
||||
type NodeType uint32
|
||||
|
||||
const (
|
||||
ErrorNode NodeType = iota
|
||||
TextNode
|
||||
DocumentNode
|
||||
ElementNode
|
||||
CommentNode
|
||||
DoctypeNode
|
||||
// RawNode nodes are not returned by the parser, but can be part of the
|
||||
// Node tree passed to func Render to insert raw HTML (without escaping).
|
||||
// If so, this package makes no guarantee that the rendered HTML is secure
|
||||
// (from e.g. Cross Site Scripting attacks) or well-formed.
|
||||
RawNode
|
||||
scopeMarkerNode
|
||||
)
|
||||
|
||||
// Section 12.2.4.3 says "The markers are inserted when entering applet,
|
||||
// object, marquee, template, td, th, and caption elements, and are used
|
||||
// to prevent formatting from "leaking" into applet, object, marquee,
|
||||
// template, td, th, and caption elements".
|
||||
var scopeMarker = Node{Type: scopeMarkerNode}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||
// content for text) and are part of a tree of Nodes. Element nodes may also
|
||||
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
|
||||
// that it looks like "a<b" rather than "a<b". For element nodes, DataAtom
|
||||
// is the atom for Data, or zero if Data is not a known tag name.
|
||||
//
|
||||
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
|
||||
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
|
||||
// "svg" is short for "http://www.w3.org/2000/svg".
|
||||
type Node struct {
|
||||
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
|
||||
|
||||
Type NodeType
|
||||
DataAtom atom.Atom
|
||||
Data string
|
||||
Namespace string
|
||||
Attr []Attribute
|
||||
}
|
||||
|
||||
// InsertBefore inserts newChild as a child of n, immediately before oldChild
|
||||
// in the sequence of n's children. oldChild may be nil, in which case newChild
|
||||
// is appended to the end of n's children.
|
||||
//
|
||||
// It will panic if newChild already has a parent or siblings.
|
||||
func (n *Node) InsertBefore(newChild, oldChild *Node) {
|
||||
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
|
||||
panic("html: InsertBefore called for an attached child Node")
|
||||
}
|
||||
var prev, next *Node
|
||||
if oldChild != nil {
|
||||
prev, next = oldChild.PrevSibling, oldChild
|
||||
} else {
|
||||
prev = n.LastChild
|
||||
}
|
||||
if prev != nil {
|
||||
prev.NextSibling = newChild
|
||||
} else {
|
||||
n.FirstChild = newChild
|
||||
}
|
||||
if next != nil {
|
||||
next.PrevSibling = newChild
|
||||
} else {
|
||||
n.LastChild = newChild
|
||||
}
|
||||
newChild.Parent = n
|
||||
newChild.PrevSibling = prev
|
||||
newChild.NextSibling = next
|
||||
}
|
||||
|
||||
// AppendChild adds a node c as a child of n.
|
||||
//
|
||||
// It will panic if c already has a parent or siblings.
|
||||
func (n *Node) AppendChild(c *Node) {
|
||||
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
|
||||
panic("html: AppendChild called for an attached child Node")
|
||||
}
|
||||
last := n.LastChild
|
||||
if last != nil {
|
||||
last.NextSibling = c
|
||||
} else {
|
||||
n.FirstChild = c
|
||||
}
|
||||
n.LastChild = c
|
||||
c.Parent = n
|
||||
c.PrevSibling = last
|
||||
}
|
||||
|
||||
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
|
||||
// no parent and no siblings.
|
||||
//
|
||||
// It will panic if c's parent is not n.
|
||||
func (n *Node) RemoveChild(c *Node) {
|
||||
if c.Parent != n {
|
||||
panic("html: RemoveChild called for a non-child Node")
|
||||
}
|
||||
if n.FirstChild == c {
|
||||
n.FirstChild = c.NextSibling
|
||||
}
|
||||
if c.NextSibling != nil {
|
||||
c.NextSibling.PrevSibling = c.PrevSibling
|
||||
}
|
||||
if n.LastChild == c {
|
||||
n.LastChild = c.PrevSibling
|
||||
}
|
||||
if c.PrevSibling != nil {
|
||||
c.PrevSibling.NextSibling = c.NextSibling
|
||||
}
|
||||
c.Parent = nil
|
||||
c.PrevSibling = nil
|
||||
c.NextSibling = nil
|
||||
}
|
||||
|
||||
// reparentChildren reparents all of src's child nodes to dst.
|
||||
func reparentChildren(dst, src *Node) {
|
||||
for {
|
||||
child := src.FirstChild
|
||||
if child == nil {
|
||||
break
|
||||
}
|
||||
src.RemoveChild(child)
|
||||
dst.AppendChild(child)
|
||||
}
|
||||
}
|
||||
|
||||
// clone returns a new node with the same type, data and attributes.
|
||||
// The clone has no parent, no siblings and no children.
|
||||
func (n *Node) clone() *Node {
|
||||
m := &Node{
|
||||
Type: n.Type,
|
||||
DataAtom: n.DataAtom,
|
||||
Data: n.Data,
|
||||
Attr: make([]Attribute, len(n.Attr)),
|
||||
}
|
||||
copy(m.Attr, n.Attr)
|
||||
return m
|
||||
}
|
||||
|
||||
// nodeStack is a stack of nodes.
|
||||
type nodeStack []*Node
|
||||
|
||||
// pop pops the stack. It will panic if s is empty.
|
||||
func (s *nodeStack) pop() *Node {
|
||||
i := len(*s)
|
||||
n := (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return n
|
||||
}
|
||||
|
||||
// top returns the most recently pushed node, or nil if s is empty.
|
||||
func (s *nodeStack) top() *Node {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// index returns the index of the top-most occurrence of n in the stack, or -1
|
||||
// if n is not present.
|
||||
func (s *nodeStack) index(n *Node) int {
|
||||
for i := len(*s) - 1; i >= 0; i-- {
|
||||
if (*s)[i] == n {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// contains returns whether a is within s.
|
||||
func (s *nodeStack) contains(a atom.Atom) bool {
|
||||
for _, n := range *s {
|
||||
if n.DataAtom == a && n.Namespace == "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// insert inserts a node at the given index.
|
||||
func (s *nodeStack) insert(i int, n *Node) {
|
||||
(*s) = append(*s, nil)
|
||||
copy((*s)[i+1:], (*s)[i:])
|
||||
(*s)[i] = n
|
||||
}
|
||||
|
||||
// remove removes a node from the stack. It is a no-op if n is not present.
|
||||
func (s *nodeStack) remove(n *Node) {
|
||||
i := s.index(n)
|
||||
if i == -1 {
|
||||
return
|
||||
}
|
||||
copy((*s)[i:], (*s)[i+1:])
|
||||
j := len(*s) - 1
|
||||
(*s)[j] = nil
|
||||
*s = (*s)[:j]
|
||||
}
|
||||
|
||||
type insertionModeStack []insertionMode
|
||||
|
||||
func (s *insertionModeStack) pop() (im insertionMode) {
|
||||
i := len(*s)
|
||||
im = (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return im
|
||||
}
|
||||
|
||||
func (s *insertionModeStack) top() insertionMode {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,273 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type writer interface {
|
||||
io.Writer
|
||||
io.ByteWriter
|
||||
WriteString(string) (int, error)
|
||||
}
|
||||
|
||||
// Render renders the parse tree n to the given writer.
|
||||
//
|
||||
// Rendering is done on a 'best effort' basis: calling Parse on the output of
|
||||
// Render will always result in something similar to the original tree, but it
|
||||
// is not necessarily an exact clone unless the original tree was 'well-formed'.
|
||||
// 'Well-formed' is not easily specified; the HTML5 specification is
|
||||
// complicated.
|
||||
//
|
||||
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
|
||||
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
|
||||
// For example, in a 'well-formed' parse tree, no <a> element is a child of
|
||||
// another <a> element: parsing "<a><a>" results in two sibling elements.
|
||||
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
|
||||
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
|
||||
// children; the <a> is reparented to the <table>'s parent. However, calling
|
||||
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
|
||||
// element with an <a> child, and is therefore not 'well-formed'.
|
||||
//
|
||||
// Programmatically constructed trees are typically also 'well-formed', but it
|
||||
// is possible to construct a tree that looks innocuous but, when rendered and
|
||||
// re-parsed, results in a different tree. A simple example is that a solitary
|
||||
// text node would become a tree containing <html>, <head> and <body> elements.
|
||||
// Another example is that the programmatic equivalent of "a<head>b</head>c"
|
||||
// becomes "<html><head><head/><body>abc</body></html>".
|
||||
func Render(w io.Writer, n *Node) error {
|
||||
if x, ok := w.(writer); ok {
|
||||
return render(x, n)
|
||||
}
|
||||
buf := bufio.NewWriter(w)
|
||||
if err := render(buf, n); err != nil {
|
||||
return err
|
||||
}
|
||||
return buf.Flush()
|
||||
}
|
||||
|
||||
// plaintextAbort is returned from render1 when a <plaintext> element
|
||||
// has been rendered. No more end tags should be rendered after that.
|
||||
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
|
||||
|
||||
func render(w writer, n *Node) error {
|
||||
err := render1(w, n)
|
||||
if err == plaintextAbort {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func render1(w writer, n *Node) error {
|
||||
// Render non-element nodes; these are the easy cases.
|
||||
switch n.Type {
|
||||
case ErrorNode:
|
||||
return errors.New("html: cannot render an ErrorNode node")
|
||||
case TextNode:
|
||||
return escape(w, n.Data)
|
||||
case DocumentNode:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case ElementNode:
|
||||
// No-op.
|
||||
case CommentNode:
|
||||
if _, err := w.WriteString("<!--"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString("-->"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
case DoctypeNode:
|
||||
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.Attr != nil {
|
||||
var p, s string
|
||||
for _, a := range n.Attr {
|
||||
switch a.Key {
|
||||
case "public":
|
||||
p = a.Val
|
||||
case "system":
|
||||
s = a.Val
|
||||
}
|
||||
}
|
||||
if p != "" {
|
||||
if _, err := w.WriteString(" PUBLIC "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, p); err != nil {
|
||||
return err
|
||||
}
|
||||
if s != "" {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if s != "" {
|
||||
if _, err := w.WriteString(" SYSTEM "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
case RawNode:
|
||||
_, err := w.WriteString(n.Data)
|
||||
return err
|
||||
default:
|
||||
return errors.New("html: unknown node type")
|
||||
}
|
||||
|
||||
// Render the <xxx> opening tag.
|
||||
if err := w.WriteByte('<'); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if a.Namespace != "" {
|
||||
if _, err := w.WriteString(a.Namespace); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(':'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(a.Key); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(`="`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := escape(w, a.Val); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('"'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if voidElements[n.Data] {
|
||||
if n.FirstChild != nil {
|
||||
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
|
||||
}
|
||||
_, err := w.WriteString("/>")
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('>'); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add initial newline where there is danger of a newline beging ignored.
|
||||
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
|
||||
switch n.Data {
|
||||
case "pre", "listing", "textarea":
|
||||
if err := w.WriteByte('\n'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render any child nodes.
|
||||
switch n.Data {
|
||||
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == TextNode {
|
||||
if _, err := w.WriteString(c.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if n.Data == "plaintext" {
|
||||
// Don't render anything else. <plaintext> must be the
|
||||
// last element in the file, with no closing tag.
|
||||
return plaintextAbort
|
||||
}
|
||||
default:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render the </xxx> closing tag.
|
||||
if _, err := w.WriteString("</"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
}
|
||||
|
||||
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
||||
// quotes, but if s contains a double quote, it will use single quotes.
|
||||
// It is used for writing the identifiers in a doctype declaration.
|
||||
// In valid HTML, they can't contain both types of quotes.
|
||||
func writeQuoted(w writer, s string) error {
|
||||
var q byte = '"'
|
||||
if strings.Contains(s, `"`) {
|
||||
q = '\''
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
|
||||
// are those that can't have any contents.
|
||||
var voidElements = map[string]bool{
|
||||
"area": true,
|
||||
"base": true,
|
||||
"br": true,
|
||||
"col": true,
|
||||
"embed": true,
|
||||
"hr": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
||||
"link": true,
|
||||
"meta": true,
|
||||
"param": true,
|
||||
"source": true,
|
||||
"track": true,
|
||||
"wbr": true,
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,12 @@
|
||||
# github.com/gabriel-vasile/mimetype v1.2.0
|
||||
# github.com/gabriel-vasile/mimetype v1.3.0
|
||||
github.com/gabriel-vasile/mimetype
|
||||
github.com/gabriel-vasile/mimetype/internal/charset
|
||||
github.com/gabriel-vasile/mimetype/internal/json
|
||||
github.com/gabriel-vasile/mimetype/internal/matchers
|
||||
github.com/gabriel-vasile/mimetype/internal/magic
|
||||
# github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d
|
||||
github.com/mattn/go-xmpp
|
||||
# github.com/pborman/getopt/v2 v2.1.0
|
||||
github.com/pborman/getopt/v2
|
||||
# golang.org/x/net v0.0.0-20210521195947-fe42d452be8f
|
||||
golang.org/x/net/html
|
||||
golang.org/x/net/html/atom
|
||||
|
Loading…
Reference in New Issue