Update vendored dependencies.

check-errors
Martin Dosch 3 years ago
parent f1a9a1f462
commit 04232fb4e5

Binary file not shown.

@ -3,7 +3,8 @@ module salsa.debian.org/mdosch/go-sendxmpp
go 1.13
require (
github.com/gabriel-vasile/mimetype v1.2.0
github.com/gabriel-vasile/mimetype v1.3.0
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d
github.com/pborman/getopt/v2 v2.1.0
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f // indirect
)

@ -1,6 +1,15 @@
github.com/gabriel-vasile/mimetype v1.2.0 h1:A6z5J8OhjiWFV91sQ3dMI8apYu/tvP9keDaMM3Xu6p4=
github.com/gabriel-vasile/mimetype v1.2.0/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/gabriel-vasile/mimetype v1.3.0 h1:4YOHITFLyYwF+iqG0ybSLGArRItynpfwdlWRmJnd75E=
github.com/gabriel-vasile/mimetype v1.3.0/go.mod h1:fA8fi6KUiG7MgQQ+mEWotXoEOvmxRtOJlERCzSmRvr8=
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d h1:LrXbX6iVhQ3Z50hnhTdyP4K60jevMzk/x2TpMYtOJqg=
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/pborman/getopt/v2 v2.1.0 h1:eNfR+r+dWLdWmV8g5OlpyrTYHkhVNxHBdN2cCrJmOEA=
github.com/pborman/getopt/v2 v2.1.0/go.mod h1:4NtW75ny4eBw9fO1bhtNdYTlZKYX5/tBLtsOpwKIKd0=
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 h1:Ugb8sMTWuWRC3+sz5WeN/4kejDx9BvIwnPUiJBjJE+8=
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f h1:Si4U+UcgJzya9kpiEUJKQvjr512OLli+gL4poHrz93U=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

@ -1,14 +0,0 @@
language: go
go:
- "1.12"
- "master"
before_install:
- go get github.com/mattn/goveralls
- go get github.com/client9/misspell/cmd/misspell
before_script:
- go vet .
script:
- diff -u <(echo -n) <(gofmt -d ./)
- go test -v -race
- $GOPATH/bin/goveralls -service=travis-ci
- misspell -locale US -error *.md *.go

@ -6,7 +6,7 @@
A package for detecting MIME types and extensions based on magic numbers
</h4>
<h6 align="center">
No C bindings, zero dependencies and thread safe
Goroutine safe, extensible, no C bindings
</h6>
<p align="center">
@ -30,11 +30,9 @@
## Features
- fast and precise MIME type and file extension detection
- long list of [supported MIME types](supported_mimes.md)
- posibility to [extend](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-Extend) with other file formats
- common file formats are prioritized
- small and simple API
- handles MIME type aliases
- thread safe
- low memory usage, besides the file header
- safe for concurrent usage
## Install
```bash
@ -43,22 +41,27 @@ go get github.com/gabriel-vasile/mimetype
## Usage
```go
mime := mimetype.Detect([]byte)
mtype := mimetype.Detect([]byte)
// OR
mime, err := mime.DetectReader(io.Reader)
mtype, err := mimetype.DetectReader(io.Reader)
// OR
mime, err := mime.DetectFile("/path/to/file")
fmt.Println(mime.String(), mime.Extension())
mtype, err := mimetype.DetectFile("/path/to/file")
fmt.Println(mtype.String(), mtype.Extension())
```
See the [runnable Go Playground examples](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#pkg-overview).
## Usage'
Only use libraries like **mimetype** as a last resort. Content type detection
using magic numbers is slow, inaccurate, and non-standard. Most of the times
protocols have methods for specifying such metadata; e.g., `Content-Type` header
in HTTP and SMTP.
## Structure
**mimetype** uses an hierarchical structure to keep the MIME type detection logic.
**mimetype** uses a hierarchical structure to keep the MIME type detection logic.
This reduces the number of calls needed for detecting the file type. The reason
behind this choice is that there are file formats used as containers for other
file formats. For example, Microsoft Office files are just zip archives,
containing specific metadata files. Once a file a file has been identified as a
containing specific metadata files. Once a file has been identified as a
zip, there is no need to check if it is a text file, but it is worth checking if
it is an Microsoft Office file.
@ -67,7 +70,7 @@ To prevent loading entire files into memory, when detecting from a
or from a [file](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectFile)
**mimetype** limits itself to reading only the header of the input.
<div align="center">
<img alt="structure" src="https://github.com/gabriel-vasile/mimetype/blob/33abbe6cb78fe1a8486c92f95008a9e0fcef10a1/mimetype.gif?raw=true" width="88%">
<img alt="structure" src="https://github.com/gabriel-vasile/mimetype/blob/420a05228c6a6efbb6e6f080168a25663414ff36/mimetype.gif?raw=true" width="88%">
</div>
## Performance
@ -75,8 +78,6 @@ Thanks to the hierarchical structure, searching for common formats first,
and limiting itself to file headers, **mimetype** matches the performance of
stdlib `http.DetectContentType` while outperforming the alternative package.
[Benchmarks](https://github.com/gabriel-vasile/mimetype/blob/d8628c314b5e59259afc7b0f4f84e6b31931b316/mimetype_test.go#L267)
were run on an Intel Xeon Gold 6136 24 core CPU @ 3.00GHz. Lower is better.
```bash
mimetype http.DetectContentType filetype
BenchmarkMatchTar-24 250 ns/op 400 ns/op 3778 ns/op

@ -1,3 +1,5 @@
module github.com/gabriel-vasile/mimetype
go 1.12
require golang.org/x/net v0.0.0-20210505024714-0287a6fb4125

@ -0,0 +1,7 @@
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 h1:Ugb8sMTWuWRC3+sz5WeN/4kejDx9BvIwnPUiJBjJE+8=
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

@ -0,0 +1,296 @@
package charset
import (
"bytes"
"encoding/xml"
"strings"
"unicode/utf8"
"golang.org/x/net/html"
)
const (
F = 0 /* character never appears in text */
T = 1 /* character appears in plain ASCII text */
I = 2 /* character appears in ISO-8859 text */
X = 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
)
var (
boms = []struct {
bom []byte
enc string
}{
{[]byte{0xEF, 0xBB, 0xBF}, "utf-8"},
{[]byte{0x00, 0x00, 0xFE, 0xFF}, "utf-32be"},
{[]byte{0xFF, 0xFE, 0x00, 0x00}, "utf-32le"},
{[]byte{0xFE, 0xFF}, "utf-16be"},
{[]byte{0xFF, 0xFE}, "utf-16le"},
}
// https://github.com/file/file/blob/fa93fb9f7d21935f1c7644c47d2975d31f12b812/src/encoding.c#L241
textChars [256]byte = [256]byte{
/* BEL BS HT LF VT FF CR */
F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */
/* ESC */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
/* NEL */
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xfX */
}
)
func FromBOM(content []byte) string {
for _, b := range boms {
if bytes.HasPrefix(content, b.bom) {
return b.enc
}
}
return ""
}
func FromPlain(content []byte) string {
if len(content) == 0 {
return ""
}
if cset := FromBOM(content); cset != "" {
return cset
}
origContent := content
// Try to detect UTF-8.
// First eliminate any partial rune at the end.
for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
b := content[i]
if b < 0x80 {
break
}
if utf8.RuneStart(b) {
content = content[:i]
break
}
}
hasHighBit := false
for _, c := range content {
if c >= 0x80 {
hasHighBit = true
break
}
}
if hasHighBit && utf8.Valid(content) {
return "utf-8"
}
// ASCII is a subset of UTF8. Follow W3C recommendation and replace with UTF8.
if ascii(origContent) {
return "utf-8"
}
return latin(origContent)
}
func latin(content []byte) string {
hasControlBytes := false
for _, b := range content {
t := textChars[b]
if t != T && t != I {
return ""
}
if b >= 0x80 && b <= 0x9F {
hasControlBytes = true
}
}
// Code range 0x80 to 0x9F is reserved for control characters in ISO-8859-1
// (so-called C1 Controls). Windows 1252, however, has printable punctuation
// characters in this range.
if hasControlBytes {
return "windows-1252"
}
return "iso-8859-1"
}
func ascii(content []byte) bool {
for _, b := range content {
if textChars[b] != T {
return false
}
}
return true
}
func FromXML(content []byte) string {
if cset := fromXML(content); cset != "" {
return cset
}
return FromPlain(content)
}
func fromXML(content []byte) string {
content = trimLWS(content)
dec := xml.NewDecoder(bytes.NewReader(content))
rawT, err := dec.RawToken()
if err != nil {
return ""
}
t, ok := rawT.(xml.ProcInst)
if !ok {
return ""
}
return strings.ToLower(xmlEncoding(string(t.Inst)))
}
func FromHTML(content []byte) string {
if cset := fromHTML(content); cset != "" {
return cset
}
return FromPlain(content)
}
func fromHTML(content []byte) string {
z := html.NewTokenizer(bytes.NewReader(content))
for {
switch z.Next() {
case html.ErrorToken:
return ""
case html.StartTagToken, html.SelfClosingTagToken:
tagName, hasAttr := z.TagName()
if !bytes.Equal(tagName, []byte("meta")) {
continue
}
attrList := make(map[string]bool)
gotPragma := false
const (
dontKnow = iota
doNeedPragma
doNotNeedPragma
)
needPragma := dontKnow
name := ""
for hasAttr {
var key, val []byte
key, val, hasAttr = z.TagAttr()
ks := string(key)
if attrList[ks] {
continue
}
attrList[ks] = true
for i, c := range val {
if 'A' <= c && c <= 'Z' {
val[i] = c + 0x20
}
}
switch ks {
case "http-equiv":
if bytes.Equal(val, []byte("content-type")) {
gotPragma = true
}
case "content":
name = fromMetaElement(string(val))
if name != "" {
needPragma = doNeedPragma
}
case "charset":
name = string(val)
needPragma = doNotNeedPragma
}
}
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
continue
}
if strings.HasPrefix(name, "utf-16") {
name = "utf-8"
}
return name
}
}
}
func fromMetaElement(s string) string {
for s != "" {
csLoc := strings.Index(s, "charset")
if csLoc == -1 {
return ""
}
s = s[csLoc+len("charset"):]
s = strings.TrimLeft(s, " \t\n\f\r")
if !strings.HasPrefix(s, "=") {
continue
}
s = s[1:]
s = strings.TrimLeft(s, " \t\n\f\r")
if s == "" {
return ""
}
if q := s[0]; q == '"' || q == '\'' {
s = s[1:]
closeQuote := strings.IndexRune(s, rune(q))
if closeQuote == -1 {
return ""
}
return s[:closeQuote]
}
end := strings.IndexAny(s, "; \t\n\f\r")
if end == -1 {
end = len(s)
}
return s[:end]
}
return ""
}
func xmlEncoding(s string) string {
param := "encoding="
idx := strings.Index(s, param)
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
}
// trimLWS trims whitespace from beginning of the input.
// TODO: find a way to call trimLWS once per detection instead of once in each
// detector which needs the trimmed input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}

@ -0,0 +1,81 @@
package magic
import (
"bytes"
"encoding/binary"
)
var (
// SevenZ matches a 7z archive.
SevenZ = prefix([]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
// Gzip matches gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
Gzip = prefix([]byte{0x1f, 0x8b})
// Tar matches a (t)ape (ar)chive file.
Tar = offset([]byte("ustar"), 257)
// Fits matches an Flexible Image Transport System file.
Fits = prefix([]byte{
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
})
// Xar matches an eXtensible ARchive format file.
Xar = prefix([]byte{0x78, 0x61, 0x72, 0x21})
// Bz2 matches a bzip2 file.
Bz2 = prefix([]byte{0x42, 0x5A, 0x68})
// Ar matches an ar (Unix) archive file.
Ar = prefix([]byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
// Deb matches a Debian package file.
Deb = offset([]byte{
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
}, 8)
// Warc matches a Web ARChive file.
Warc = prefix([]byte("WARC/"))
// Cab matches a Cabinet archive file.
Cab = prefix([]byte("MSCF"))
// Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
Xz = prefix([]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
// Lzip matches an Lzip compressed file.
Lzip = prefix([]byte{0x4c, 0x5a, 0x49, 0x50})
)
// Zstd matches a Zstandard archive file.
func Zstd(raw []byte, limit uint32) bool {
return len(raw) >= 4 &&
(0x22 <= raw[0] && raw[0] <= 0x28 || raw[0] == 0x1E) && // Different Zstandard versions.
bytes.HasPrefix(raw[1:], []byte{0xB5, 0x2F, 0xFD})
}
// Rpm matches an RPM or Delta RPM package file.
func Rpm(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte{0xed, 0xab, 0xee, 0xdb}) ||
bytes.HasPrefix(raw, []byte("drpm"))
}
// Cpio matches a cpio archive file.
func Cpio(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte("070707")) ||
bytes.HasPrefix(raw, []byte("070701")) ||
bytes.HasPrefix(raw, []byte("070702"))
}
// Rar matches a RAR archive file.
func Rar(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x00")) ||
bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x01\x00"))
}
// Crx matches a Chrome extension file: a zip archive prepended by a package header.
func Crx(raw []byte, limit uint32) bool {
const minHeaderLen = 16
if len(raw) < minHeaderLen || !bytes.HasPrefix(raw, []byte("Cr24")) {
return false
}
pubkeyLen := binary.LittleEndian.Uint32(raw[8:12])
sigLen := binary.LittleEndian.Uint32(raw[12:16])
zipOffset := minHeaderLen + pubkeyLen + sigLen
if uint32(len(raw)) < zipOffset {
return false
}
return Zip(raw[zipOffset:], limit)
}

@ -0,0 +1,80 @@
package magic
import (
"bytes"
"encoding/binary"
)
var (
// Flac matches a Free Lossless Audio Codec file.
Flac = prefix([]byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
// Midi matches a Musical Instrument Digital Interface file.
Midi = prefix([]byte("\x4D\x54\x68\x64"))
// Ape matches a Monkey's Audio file.
Ape = prefix([]byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
// MusePack matches a Musepack file.
MusePack = prefix([]byte("MPCK"))
// Au matches a Sun Microsystems au file.
Au = prefix([]byte("\x2E\x73\x6E\x64"))
// Amr matches an Adaptive Multi-Rate file.
Amr = prefix([]byte("\x23\x21\x41\x4D\x52"))
// Voc matches a Creative Voice file.
Voc = prefix([]byte("Creative Voice File"))
// M3u matches a Playlist file.
M3u = prefix([]byte("#EXTM3U"))
)
// Mp3 matches an mp3 file.
func Mp3(raw []byte, limit uint32) bool {
if len(raw) < 3 {
return false
}
if bytes.HasPrefix(raw, []byte("ID3")) {
// MP3s with an ID3v2 tag will start with "ID3"
// ID3v1 tags, however appear at the end of the file.
return true
}
// Match MP3 files without tags
switch binary.BigEndian.Uint16(raw[:2]) & 0xFFFE {
case 0xFFFA:
// MPEG ADTS, layer III, v1
return true
case 0xFFF2:
// MPEG ADTS, layer III, v2
return true
case 0xFFE2:
// MPEG ADTS, layer III, v2.5
return true
}
return false
}
// Aac matches an Advanced Audio Coding file.
func Aac(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte{0xFF, 0xF1}) ||
bytes.HasPrefix(raw, []byte{0xFF, 0xF9})
}
// Wav matches a Waveform Audio File Format file.
func Wav(raw []byte, limit uint32) bool {
return len(raw) > 12 &&
bytes.Equal(raw[:4], []byte("RIFF")) &&
bytes.Equal(raw[8:12], []byte("\x57\x41\x56\x45"))
}
// Aiff matches Audio Interchange File Format file.
func Aiff(raw []byte, limit uint32) bool {
return len(raw) > 12 &&
bytes.Equal(raw[:4], []byte("\x46\x4F\x52\x4D")) &&
bytes.Equal(raw[8:12], []byte("\x41\x49\x46\x46"))
}
// Qcp matches a Qualcomm Pure Voice file.
func Qcp(raw []byte, limit uint32) bool {
return len(raw) > 12 &&
bytes.Equal(raw[:4], []byte("RIFF")) &&
bytes.Equal(raw[8:12], []byte("QLCM"))
}

@ -0,0 +1,144 @@
package magic
import (
"bytes"
"debug/macho"
"encoding/binary"
)
var (
// Lnk matches Microsoft lnk binary format.
Lnk = prefix([]byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00})
// Wasm matches a web assembly File Format file.
Wasm = prefix([]byte{0x00, 0x61, 0x73, 0x6D})
// Exe matches a Windows/DOS executable file.
Exe = prefix([]byte{0x4D, 0x5A})
// Elf matches an Executable and Linkable Format file.
Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46})
// Nes matches a Nintendo Entertainment system ROM file.
Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A})
// TzIf matches a Time Zone Information Format (TZif) file.
TzIf = prefix([]byte("TZif"))
)
// Java bytecode and Mach-O binaries share the same magic number.
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
func classOrMachOFat(in []byte) bool {
// There should be at least 8 bytes for both of them because the only way to
// quickly distinguish them is by comparing byte at position 7
if len(in) < 8 {
return false
}
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
}
// Class matches a java class file.
func Class(raw []byte, limit uint32) bool {
return classOrMachOFat(raw) && raw[7] > 30
}
// MachO matches Mach-O binaries format.
func MachO(raw []byte, limit uint32) bool {
if classOrMachOFat(raw) && raw[7] < 20 {
return true
}
if len(raw) < 4 {
return false
}
be := binary.BigEndian.Uint32(raw)
le := binary.LittleEndian.Uint32(raw)
return be == macho.Magic32 ||
le == macho.Magic32 ||
be == macho.Magic64 ||
le == macho.Magic64
}
// Swf matches an Adobe Flash swf file.
func Swf(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte("CWS")) ||
bytes.HasPrefix(raw, []byte("FWS")) ||
bytes.HasPrefix(raw, []byte("ZWS"))
}
// Dbf matches a dBase file.
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
func Dbf(raw []byte, limit uint32) bool {
if len(raw) < 4 {
return false
}
// 3rd and 4th bytes contain the last update month and day of month
if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
return false
}
// dbf type is dictated by the first byte
dbfTypes := []byte{
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
}
for _, b := range dbfTypes {
if raw[0] == b {
return true
}
}
return false
}
// ElfObj matches an object file.
func ElfObj(raw []byte, limit uint32) bool {
return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) ||
(raw[16] == 0x00 && raw[17] == 0x01))
}
// ElfExe matches an executable file.
func ElfExe(raw []byte, limit uint32) bool {
return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) ||
(raw[16] == 0x00 && raw[17] == 0x02))
}
// ElfLib matches a shared library file.
func ElfLib(raw []byte, limit uint32) bool {
return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) ||
(raw[16] == 0x00 && raw[17] == 0x03))
}
// ElfDump matches a core dump file.
func ElfDump(raw []byte, limit uint32) bool {
return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) ||
(raw[16] == 0x00 && raw[17] == 0x04))
}
// Dcm matches a DICOM medical format file.
func Dcm(raw []byte, limit uint32) bool {
return len(raw) > 131 &&
bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
}
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
func Marc(raw []byte, limit uint32) bool {
// File is at least 24 bytes ("leader" field size).
if len(raw) < 24 {
return false
}
// Fixed bytes at offset 20.
if !bytes.Equal(raw[20:24], []byte("4500")) {
return false
}
// First 5 bytes are ASCII digits.
for i := 0; i < 5; i++ {
if raw[i] < '0' || raw[i] > '9' {
return false
}
}
// Field terminator is present.
return bytes.Contains(raw, []byte{0x1E})
}

@ -0,0 +1,13 @@
package magic
var (
// Sqlite matches an SQLite database file.
Sqlite = prefix([]byte{
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
})
// MsAccessAce matches Microsoft Access dababase file.
MsAccessAce = offset([]byte("Standard ACE DB"), 4)
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
MsAccessMdb = offset([]byte("Standard Jet DB"), 4)
)

@ -0,0 +1,54 @@
package magic
import "bytes"
var (
// Pdf matches a Portable Document Format file.
Pdf = prefix([]byte{0x25, 0x50, 0x44, 0x46})
// Fdf matches a Forms Data Format file.
Fdf = prefix([]byte("%FDF"))
// Mobi matches a Mobi file.
Mobi = offset([]byte("BOOKMOBI"), 60)
// Lit matches a Microsoft Lit file.
Lit = prefix([]byte("ITOLITLS"))
)
// DjVu matches a DjVu file.
func DjVu(raw []byte, limit uint32) bool {
if len(raw) < 12 {
return false
}
if !bytes.HasPrefix(raw, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
return false
}
return bytes.HasPrefix(raw[12:], []byte("DJVM")) ||
bytes.HasPrefix(raw[12:], []byte("DJVU")) ||
bytes.HasPrefix(raw[12:], []byte("DJVI")) ||
bytes.HasPrefix(raw[12:], []byte("THUM"))
}
// P7s matches an .p7s signature File (PEM, Base64).
func P7s(raw []byte, limit uint32) bool {
// Check for PEM Encoding.
if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) {
return true
}
// Check if DER Encoding is long enough.
if len(raw) < 20 {
return false
}
// Magic Bytes for the signedData ASN.1 encoding.
startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}}
signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07}
// Check if Header is correct. There are multiple valid headers.
for i, match := range startHeader {
// If first bytes match, then check for ASN.1 Object Type.
if bytes.HasPrefix(raw, match) {
if bytes.HasPrefix(raw[i+2:], signedDataMatch) {
return true
}
}
}
return false
}

@ -0,0 +1,31 @@
package magic
import (
"bytes"
)
var (
// Woff matches a Web Open Font Format file.
Woff = prefix([]byte("wOFF"))
// Woff2 matches a Web Open Font Format version 2 file.
Woff2 = prefix([]byte("wOF2"))
// Otf matches an OpenType font file.
Otf = prefix([]byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
)
// Ttf matches a TrueType font file.
func Ttf(raw []byte, limit uint32) bool {
if !bytes.HasPrefix(raw, []byte{0x00, 0x01, 0x00, 0x00}) {
return false
}
return !MsAccessAce(raw, limit) && !MsAccessMdb(raw, limit)
}
// Eot matches an Embedded OpenType font file.
func Eot(raw []byte, limit uint32) bool {
return len(raw) > 35 &&
bytes.Equal(raw[34:36], []byte{0x4C, 0x50}) &&
(bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x01}) ||
bytes.Equal(raw[8:11], []byte{0x01, 0x00, 0x00}) ||
bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x02}))
}

@ -0,0 +1,53 @@
package magic
var (
// Mp4 matches an MP4 file.
Mp4 = ftyp(
[]byte("avc1"), []byte("dash"), []byte("iso2"), []byte("iso3"),
[]byte("iso4"), []byte("iso5"), []byte("iso6"), []byte("isom"),
[]byte("mmp4"), []byte("mp41"), []byte("mp42"), []byte("mp4v"),
[]byte("mp71"), []byte("MSNV"), []byte("NDAS"), []byte("NDSC"),
[]byte("NSDC"), []byte("NSDH"), []byte("NDSM"), []byte("NDSP"),
[]byte("NDSS"), []byte("NDXC"), []byte("NDXH"), []byte("NDXM"),
[]byte("NDXP"), []byte("NDXS"), []byte("F4V "), []byte("F4P "),
)
// ThreeGP matches a 3GPP file.
ThreeGP = ftyp(
[]byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"),
[]byte("3gp5"), []byte("3gp6"), []byte("3gp7"), []byte("3gs7"),
[]byte("3ge6"), []byte("3ge7"), []byte("3gg6"),
)
// ThreeG2 matches a 3GPP2 file.
ThreeG2 = ftyp(
[]byte("3g24"), []byte("3g25"), []byte("3g26"), []byte("3g2a"),
[]byte("3g2b"), []byte("3g2c"), []byte("KDDI"),
)
// AMp4 matches an audio MP4 file.
AMp4 = ftyp(
// audio for Adobe Flash Player 9+
[]byte("F4A "), []byte("F4B "),
// Apple iTunes AAC-LC (.M4A) Audio
[]byte("M4B "), []byte("M4P "),
// MPEG-4 (.MP4) for SonyPSP
[]byte("MSNV"),
// Nero Digital AAC Audio
[]byte("NDAS"),
)
// QuickTime matches a QuickTime File Format file.
QuickTime = ftyp([]byte("qt "), []byte("moov"))
// Mqv matches a Sony / Mobile QuickTime file.
Mqv = ftyp([]byte("mqt "))
// M4a matches an audio M4A file.
M4a = ftyp([]byte("M4A "))
// M4v matches an Appl4 M4V video file.
M4v = ftyp([]byte("M4V "), []byte("M4VH"), []byte("M4VP"))
// Heic matches a High Efficiency Image Coding (HEIC) file.
Heic = ftyp([]byte("heic"), []byte("heix"))
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
HeicSequence = ftyp([]byte("hevc"), []byte("hevx"))
// Heif matches a High Efficiency Image File Format (HEIF) file.
Heif = ftyp([]byte("mif1"), []byte("heim"), []byte("heis"), []byte("avic"))
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
HeifSequence = ftyp([]byte("msf1"), []byte("hevm"), []byte("hevs"), []byte("avcs"))
// TODO: add support for remaining video formats at ftyps.com.
)

@ -1,4 +1,4 @@
package matchers
package magic
import (
"bytes"
@ -7,8 +7,8 @@ import (
// Shp matches a shape format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shp(in []byte, _ uint32) bool {
if len(in) < 112 {
func Shp(raw []byte, limit uint32) bool {
if len(raw) < 112 {
return false
}
shapeTypes := []int{
@ -29,7 +29,7 @@ func Shp(in []byte, _ uint32) bool {
}
for _, st := range shapeTypes {
if st == int(binary.LittleEndian.Uint32(in[108:112])) {
if st == int(binary.LittleEndian.Uint32(raw[108:112])) {
return true
}
}
@ -39,6 +39,6 @@ func Shp(in []byte, _ uint32) bool {
// Shx matches a shape index format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shx(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x27, 0x0A})
func Shx(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x27, 0x0A})
}

@ -0,0 +1,96 @@
package magic
import "bytes"
var (
// Png matches a Portable Network Graphics file.
Png = prefix([]byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
// Jpg matches a Joint Photographic Experts Group file.
Jpg = prefix([]byte{0xFF, 0xD8, 0xFF})
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
Jp2 = jpeg2k([]byte{0x6a, 0x70, 0x32, 0x20})
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
Jpx = jpeg2k([]byte{0x6a, 0x70, 0x78, 0x20})
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
Jpm = jpeg2k([]byte{0x6a, 0x70, 0x6D, 0x20})
// Gif matches a Graphics Interchange Format file.
Gif = prefix([]byte("GIF87a"), []byte("GIF89a"))
// Bmp matches a bitmap image file.
Bmp = prefix([]byte{0x42, 0x4D})
// Ps matches a PostScript file.
Ps = prefix([]byte("%!PS-Adobe-"))
// Psd matches a Photoshop Document file.
Psd = prefix([]byte("8BPS"))
// Ico matches an ICO file.
Ico = prefix([]byte{0x00, 0x00, 0x01, 0x00}, []byte{0x00, 0x00, 0x02, 0x00})
// Icns matches an ICNS (Apple Icon Image format) file.
Icns = prefix([]byte("icns"))
// Tiff matches a Tagged Image File Format file.
Tiff = prefix([]byte{0x49, 0x49, 0x2A, 0x00}, []byte{0x4D, 0x4D, 0x00, 0x2A})
// Bpg matches a Better Portable Graphics file.
Bpg = prefix([]byte{0x42, 0x50, 0x47, 0xFB})
// Xcf matches GIMP image data.
Xcf = prefix([]byte("gimp xcf"))
// Pat matches GIMP pattern data.
Pat = offset([]byte("GPAT"), 20)
// Gbr matches GIMP brush data.
Gbr = offset([]byte("GIMP"), 20)
// Hdr matches Radiance HDR image.
// https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
Hdr = prefix([]byte("#?RADIANCE\n"))
// Xpm matches X PixMap image data.
Xpm = prefix([]byte{0x2F, 0x2A, 0x20, 0x58, 0x50, 0x4D, 0x20, 0x2A, 0x2F})
)
func jpeg2k(sig []byte) Detector {
return func(raw []byte, limit uint32) bool {
if len(raw) < 24 {
return false
}
if !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x20, 0x20}) &&
!bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x32, 0x20}) {
return false
}
return bytes.Equal(raw[20:24], sig)
}
}
// Webp matches a WebP file.
func Webp(raw []byte, limit uint32) bool {
return len(raw) > 12 &&
bytes.Equal(raw[0:4], []byte("RIFF")) &&
bytes.Equal(raw[8:12], []byte{0x57, 0x45, 0x42, 0x50})
}
// Dwg matches a CAD drawing file.
func Dwg(raw []byte, limit uint32) bool {
if len(raw) < 6 || raw[0] != 0x41 || raw[1] != 0x43 {
return false
}
dwgVersions := [][]byte{
{0x31, 0x2E, 0x34, 0x30},
{0x31, 0x2E, 0x35, 0x30},
{0x32, 0x2E, 0x31, 0x30},
{0x31, 0x30, 0x30, 0x32},
{0x31, 0x30, 0x30, 0x33},
{0x31, 0x30, 0x30, 0x34},
{0x31, 0x30, 0x30, 0x36},
{0x31, 0x30, 0x30, 0x39},
{0x31, 0x30, 0x31, 0x32},
{0x31, 0x30, 0x31, 0x34},
{0x31, 0x30, 0x31, 0x35},
{0x31, 0x30, 0x31, 0x38},
{0x31, 0x30, 0x32, 0x31},
{0x31, 0x30, 0x32, 0x34},
{0x31, 0x30, 0x33, 0x32},
}
for _, d := range dwgVersions {
if bytes.Equal(raw[2:6], d) {
return true
}
}
return false
}

@ -0,0 +1,17 @@
package magic
// Glb matches a glTF model format file.
// GLB is the binary file format representation of 3D models save in
// the GL transmission Format (glTF).
// see more: https://docs.fileformat.com/3d/glb/
// https://www.iana.org/assignments/media-types/model/gltf-binary
// GLB file format is based on little endian and its header structure
// show below:
//
// <-- 12-byte header -->
// | magic | version | length |
// | (uint32) | (uint32) | (uint32) |
// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... |
// | g l T F | 1 | ... |
var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
[]byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))

@ -1,4 +1,4 @@
package matchers
package magic
import (
"bytes"
@ -92,36 +92,36 @@ func msoXML(in []byte, prefixes ...string) bool {
}
// Xlsx matches a Microsoft Excel 2007 file.
func Xlsx(in []byte, _ uint32) bool {
return msoXML(in, xlsxSigFiles...)
func Xlsx(raw []byte, limit uint32) bool {
return msoXML(raw, xlsxSigFiles...)
}
// Docx matches a Microsoft Word 2007 file.
func Docx(in []byte, _ uint32) bool {
return msoXML(in, docxSigFiles...)
func Docx(raw []byte, limit uint32) bool {
return msoXML(raw, docxSigFiles...)
}
// Pptx matches a Microsoft PowerPoint 2007 file.
func Pptx(in []byte, _ uint32) bool {
return msoXML(in, pptxSigFiles...)
func Pptx(raw []byte, limit uint32) bool {
return msoXML(raw, pptxSigFiles...)
}
// Ole matches an Open Linking and Embedding file.
//
// https://en.wikipedia.org/wiki/Object_Linking_and_Embedding
func Ole(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1})
func Ole(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1})
}
// Aaf matches an Advanced Authoring Format file.
// See: https://pyaaf.readthedocs.io/en/latest/about.html
// See: https://en.wikipedia.org/wiki/Advanced_Authoring_Format
func Aaf(in []byte, _ uint32) bool {
if len(in) < 31 {
func Aaf(raw []byte, limit uint32) bool {
if len(raw) < 31 {
return false
}
return bytes.HasPrefix(in[8:], []byte{0x41, 0x41, 0x46, 0x42, 0x0D, 0x00, 0x4F, 0x4D}) &&
(in[30] == 0x09 || in[30] == 0x0C)
return bytes.HasPrefix(raw[8:], []byte{0x41, 0x41, 0x46, 0x42, 0x0D, 0x00, 0x4F, 0x4D}) &&
(raw[30] == 0x09 || raw[30] == 0x0C)
}
// Doc matches a Microsoft Word 97-2003 file.
@ -131,25 +131,25 @@ func Aaf(in []byte, _ uint32) bool {
// Ole is a container for Doc, Ppt, Pub and Xls.
// Right now, when an Ole file is detected, it is considered to be a Doc file
// if the checks for Ppt, Pub and Xls failed.
func Doc(in []byte, _ uint32) bool {
func Doc(raw []byte, limit uint32) bool {
return true
}
// Ppt matches a Microsoft PowerPoint 97-2003 file or a PowerPoint 95 presentation.
func Ppt(in []byte, _ uint32) bool {
func Ppt(raw []byte, limit uint32) bool {
// Root CLSID test is the safest way to detect identify OLE, however, the format
// often places the root CLSID at the end of the file.
if matchOleClsid(in, []byte{
if matchOleClsid(raw, []byte{
0x10, 0x8d, 0x81, 0x64, 0x9b, 0x4f, 0xcf, 0x11,
0x86, 0xea, 0x00, 0xaa, 0x00, 0xb9, 0x29, 0xe8,
}) || matchOleClsid(in, []byte{
}) || matchOleClsid(raw, []byte{
0x70, 0xae, 0x7b, 0xea, 0x3b, 0xfb, 0xcd, 0x11,
0xa9, 0x03, 0x00, 0xaa, 0x00, 0x51, 0x0e, 0xa3,
}) {
return true
}
lin := len(in)
lin := len(raw)
if lin < 520 {
return false
}
@ -159,33 +159,33 @@ func Ppt(in []byte, _ uint32) bool {
{0x0F, 0x00, 0xE8, 0x03},
}
for _, h := range pptSubHeaders {
if bytes.HasPrefix(in[512:], h) {
if bytes.HasPrefix(raw[512:], h) {
return true
}
}
if bytes.HasPrefix(in[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) &&
in[518] == 0x00 && in[519] == 0x00 {
if bytes.HasPrefix(raw[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) &&
raw[518] == 0x00 && raw[519] == 0x00 {
return true
}
return lin > 1152 && bytes.Contains(in[1152:min(4096, lin)],
return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)],
[]byte("P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00 D\x00o\x00c\x00u\x00m\x00e\x00n\x00t"))
}
// Xls matches a Microsoft Excel 97-2003 file.
func Xls(in []byte, _ uint32) bool {
func Xls(raw []byte, limit uint32) bool {
// Root CLSID test is the safest way to detect identify OLE, however, the format
// often places the root CLSID at the end of the file.
if matchOleClsid(in, []byte{
if matchOleClsid(raw, []byte{
0x10, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
}) || matchOleClsid(in, []byte{
}) || matchOleClsid(raw, []byte{
0x20, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
}) {
return true
}
lin := len(in)
lin := len(raw)
if lin < 520 {
return false
}
@ -199,26 +199,26 @@ func Xls(in []byte, _ uint32) bool {
{0xFD, 0xFF, 0xFF, 0xFF, 0x29},
}
for _, h := range xlsSubHeaders {
if bytes.HasPrefix(in[512:], h) {
if bytes.HasPrefix(raw[512:], h) {
return true
}
}
return lin > 1152 && bytes.Contains(in[1152:min(4096, lin)],
return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)],
[]byte("W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k"))
}
// Pub matches a Microsoft Publisher file.
func Pub(in []byte, _ uint32) bool {
return matchOleClsid(in, []byte{
func Pub(raw []byte, limit uint32) bool {
return matchOleClsid(raw, []byte{
0x01, 0x12, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
})
}
// Msg matches a Microsoft Outlook email file.
func Msg(in []byte, _ uint32) bool {
return matchOleClsid(in, []byte{
func Msg(raw []byte, limit uint32) bool {
return matchOleClsid(raw, []byte{
0x0B, 0x0D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
})

@ -0,0 +1,42 @@
package magic
import (
"bytes"
)
/*
NOTE:
In May 2003, two Internet RFCs were published relating to the format.
The Ogg bitstream was defined in RFC 3533 (which is classified as
'informative') and its Internet content type (application/ogg) in RFC
3534 (which is, as of 2006, a proposed standard protocol). In
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
.oga, .spx.
See:
https://tools.ietf.org/html/rfc3533
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
https://github.com/file/file/blob/master/magic/Magdir/vorbis
*/
// Ogg matches an Ogg file.
func Ogg(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte("\x4F\x67\x67\x53\x00"))
}
// OggAudio matches an audio ogg file.
func OggAudio(raw []byte, limit uint32) bool {
return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x7fFLAC")) ||
bytes.HasPrefix(raw[28:], []byte("\x01vorbis")) ||
bytes.HasPrefix(raw[28:], []byte("OpusHead")) ||
bytes.HasPrefix(raw[28:], []byte("Speex\x20\x20\x20")))
}
// OggVideo matches a video ogg file.
func OggVideo(raw []byte, limit uint32) bool {
return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x80theora")) ||
bytes.HasPrefix(raw[28:], []byte("fishead\x00")) ||
bytes.HasPrefix(raw[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
}

@ -0,0 +1,226 @@
// package magic holds the matching functions used to find MIME types.
package magic
import (
"bytes"
"fmt"
)
type Detector func(raw []byte, limit uint32) bool
type xmlSig struct {
// the local name of the root tag
localName []byte
// the namespace of the XML document
xmlns []byte
}
// prefix creates a Detector which returns true if any of the provided signatures
// is the prefix of the raw input.
func prefix(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
for _, s := range sigs {
if bytes.HasPrefix(raw, s) {
return true
}
}
return false
}
}
// offset creates a Detector which returns true if the provided signature can be
// found at offset in the raw input.
func offset(sig []byte, offset int) Detector {
return func(raw []byte, limit uint32) bool {
return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig)
}
}
// ciPrefix is like prefix but the check is case insensitive.
func ciPrefix(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
for _, s := range sigs {
if ciCheck(s, raw) {
return true
}
}
return false
}
}
func ciCheck(sig, raw []byte) bool {
if len(raw) < len(sig)+1 {
return false
}
// perform case insensitive check
for i, b := range sig {
db := raw[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
return true
}
// xml creates a Detector which returns true if any of the provided XML signatures
// matches the raw input.
func xml(sigs ...xmlSig) Detector {
return func(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
return false
}
for _, s := range sigs {
if xmlCheck(s, raw) {
return true
}
}
return false
}
}
func xmlCheck(sig xmlSig, raw []byte) bool {
raw = raw[:min(len(raw), 512)]
if len(sig.localName) == 0 {
return bytes.Index(raw, sig.xmlns) > 0
}
if len(sig.xmlns) == 0 {
return bytes.Index(raw, sig.localName) > 0
}
localNameIndex := bytes.Index(raw, sig.localName)
return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns)
}
// markup creates a Detector which returns true is any of the HTML signatures
// matches the raw input.
func markup(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
return false
}
for _, s := range sigs {
if markupCheck(s, raw) {
return true
}
}
return false
}
}
func markupCheck(sig, raw []byte) bool {
if len(raw) < len(sig)+1 {
return false
}
// perform case insensitive check
for i, b := range sig {
db := raw[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
// Next byte must be space or right angle bracket.
if db := raw[len(sig)]; db != ' ' && db != '>' {
return false
}
return true
}
// ftyp creates a Detector which returns true if any of the FTYP signatures
// matches the raw input.
func ftyp(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
if len(raw) < 12 {
return false
}
for _, s := range sigs {
if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) {
return true
}
}
return false
}
}
func newXmlSig(localName, xmlns string) xmlSig {
ret := xmlSig{xmlns: []byte(xmlns)}
if localName != "" {
ret.localName = []byte(fmt.Sprintf("<%s", localName))
}
return ret
}
// A valid shebang starts with the "#!" characters,
// followed by any number of spaces,
// followed by the path to the interpreter,
// and, optionally, followed by the arguments for the interpreter.
//
// Ex:
// #! /usr/bin/env php
// /usr/bin/env is the interpreter, php is the first and only argument.
func shebang(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
for _, s := range sigs {
if shebangCheck(s, firstLine(raw)) {
return true
}
}
return false
}
}
func shebangCheck(sig, raw []byte) bool {
if len(raw) < len(sig)+2 {
return false
}
if raw[0] != '#' || raw[1] != '!' {
return false
}
return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
}
// trimLWS trims whitespace from beginning of the input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
// trimRWS trims whitespace from the end of the input.
func trimRWS(in []byte) []byte {
lastNonWS := len(in) - 1
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
}
return in[:lastNonWS+1]
}
func firstLine(in []byte) []byte {
lineEnd := 0
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
}
return in[:lineEnd]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

@ -0,0 +1,268 @@
package magic
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/json"
)
var (
// Html matches a Hypertext Markup Language file.
Html = markup(
[]byte("<!DOCTYPE HTML"),
[]byte("<HTML"),
[]byte("<HEAD"),
[]byte("<SCRIPT"),
[]byte("<IFRAME"),
[]byte("<H1"),
[]byte("<DIV"),
[]byte("<FONT"),
[]byte("<TABLE"),
[]byte("<A"),
[]byte("<STYLE"),
[]byte("<TITLE"),
[]byte("<B"),
[]byte("<BODY"),
[]byte("<BR"),
[]byte("<P"),
[]byte("<!--"),
)
// Xml matches an Extensible Markup Language file.
Xml = markup([]byte("<?XML"))
// Owl2 matches an Owl ontology file.
Owl2 = xml(newXmlSig("Ontology", `xmlns="http://www.w3.org/2002/07/owl#"`))
// Rss matches a Rich Site Summary file.
Rss = xml(newXmlSig("rss", ""))
// Atom matches an Atom Syndication Format file.
Atom = xml(newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`))
// Kml matches a Keyhole Markup Language file.
Kml = xml(
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
)
// Xliff matches a XML Localization Interchange File Format file.
Xliff = xml(newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`))
// Collada matches a COLLAborative Design Activity file.
Collada = xml(newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`))
// Gml matches a Geography Markup Language file.
Gml = xml(
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
)
// Gpx matches a GPS Exchange Format file.
Gpx = xml(newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`))
// Tcx matches a Training Center XML file.
Tcx = xml(newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`))
// X3d matches an Extensible 3D Graphics file.
X3d = xml(newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`))
// Amf matches an Additive Manufacturing XML file.
Amf = xml(newXmlSig("amf", ""))
// Threemf matches a 3D Manufacturing Format file.
Threemf = xml(newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`))
// Xfdf matches a XML Forms Data Format file.
Xfdf = xml(newXmlSig("xfdf", `xmlns="http://ns.adobe.com/xfdf/"`))
// VCard matches a Virtual Contact File.
VCard = ciPrefix([]byte("BEGIN:VCARD\n"), []byte("BEGIN:VCARD\r\n"))
// ICalendar matches a iCalendar file.
ICalendar = ciPrefix([]byte("BEGIN:VCALENDAR\n"), []byte("BEGIN:VCALENDAR\r\n"))
phpPageF = ciPrefix(
[]byte("<?PHP"),
[]byte("<?\n"),
[]byte("<?\r"),
[]byte("<? "),
)
phpScriptF = shebang(
[]byte("/usr/local/bin/php"),
[]byte("/usr/bin/php"),
[]byte("/usr/bin/env php"),
)
// Js matches a Javascript file.
Js = shebang(
[]byte("/bin/node"),
[]byte("/usr/bin/node"),
[]byte("/bin/nodejs"),
[]byte("/usr/bin/nodejs"),
[]byte("/usr/bin/env node"),
[]byte("/usr/bin/env nodejs"),
)
// Lua matches a Lua programming language file.
Lua = shebang(
[]byte("/usr/bin/lua"),
[]byte("/usr/local/bin/lua"),
[]byte("/usr/bin/env lua"),
)
// Perl matches a Perl programming language file.
Perl = shebang(
[]byte("/usr/bin/perl"),
[]byte("/usr/bin/env perl"),
)
// Python matches a Python programming language file.
Python = shebang(
[]byte("/usr/bin/python"),
[]byte("/usr/local/bin/python"),
[]byte("/usr/bin/env python"),
)
// Tcl matches a Tcl programming language file.
Tcl = shebang(
[]byte("/usr/bin/tcl"),
[]byte("/usr/local/bin/tcl"),
[]byte("/usr/bin/env tcl"),
[]byte("/usr/bin/tclsh"),
[]byte("/usr/local/bin/tclsh"),
[]byte("/usr/bin/env tclsh"),
[]byte("/usr/bin/wish"),
[]byte("/usr/local/bin/wish"),
[]byte("/usr/bin/env wish"),
)
// Rtf matches a Rich Text Format file.
Rtf = prefix([]byte("{\\rtf1"))
)
// Text matches a plain text file.
//
// TODO: This function does not parse BOM-less UTF16 and UTF32 files. Not really
// sure it should. Linux file utility also requires a BOM for UTF16 and UTF32.
func Text(raw []byte, limit uint32) bool {
// First look for BOM.
if cset := charset.FromBOM(raw); cset != "" {
return true
}
return isText(raw)
}
// Php matches a PHP: Hypertext Preprocessor file.
func Php(raw []byte, limit uint32) bool {
if res := phpPageF(raw, limit); res {
return res
}
return phpScriptF(raw, limit)
}
// Json matches a JavaScript Object Notation file.
func Json(raw []byte, limit uint32) bool {
parsed, err := json.Scan(raw)
if len(raw) < int(limit) {
return err == nil
}
return parsed == len(raw)
}
// GeoJson matches a RFC 7946 GeoJSON file.
//
// GeoJson detection implies searching for key:value pairs like: `"type": "Feature"`
// in the input.
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJson(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
return false
}
// GeoJSON is always a JSON object, not a JSON array.
if raw[0] != '{' {
return false
}
s := []byte(`"type"`)
si, sl := bytes.Index(raw, s), len(s)
if si == -1 {
return false
}
// If the "type" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(raw) {
return false
}
// Skip the "type" part.
raw = raw[si+sl:]
// Skip any whitespace before the colon.
raw = trimLWS(raw)
// Check for colon.
if len(raw) == 0 || raw[0] != ':' {
return false
}
// Skip any whitespace after the colon.
raw = trimLWS(raw[1:])
geoJsonTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJsonTypes {
if bytes.HasPrefix(raw, t) {
return true
}
}
return false
}
// NdJson matches a Newline delimited JSON file.
func NdJson(raw []byte, limit uint32) bool {
// Separator with carriage return and new line `\r\n`.
srn := []byte{0x0D, 0x0A}
// Separator with only new line `\n`.
sn := []byte{0x0A}
// Total bytes scanned.
parsed := 0
// Split by `srn`.
for rni, insrn := range bytes.Split(raw, srn) {
// Separator byte count should be added only after the first split.
if rni != 0 {
// Add two as `\r\n` is used for split.
parsed += 2
}
// Split again by `sn`.
for ni, insn := range bytes.Split(insrn, sn) {
// Separator byte count should be added only after the first split.
if ni != 0 {
// Add one as `\n` is used for split.
parsed++
}
// Empty line is valid.
if len(insn) == 0 {
continue
}
p, err := json.Scan(insn)
parsed += p
if parsed < int(limit) && err != nil {
return false
}
}
}
// Empty inputs should not pass as valid NDJSON with 0 lines.
return parsed > 2 && parsed == len(raw)
}
// Svg matches a SVG file.
func Svg(raw []byte, limit uint32) bool {
return bytes.Contains(raw, []byte("<svg"))
}
// isText considers any file containing null bytes as a binary file.
// There is plenty room for disagreement regarding what should be considered a
// text file. This approach is used by diff, cat, and other linux utilities.
func isText(raw []byte) bool {
l := 8000
if len(raw) > l {
raw = raw[:l]
}
return bytes.IndexByte(raw, 0) == -1
}

@ -1,4 +1,4 @@
package matchers
package magic
import (
"bytes"
@ -7,13 +7,13 @@ import (
)
// Csv matches a comma-separated values file.
func Csv(in []byte, _ uint32) bool {
return sv(in, ',')
func Csv(raw []byte, limit uint32) bool {
return sv(raw, ',')
}
// Tsv matches a tab-separated values file.
func Tsv(in []byte, _ uint32) bool {
return sv(in, '\t')
func Tsv(raw []byte, limit uint32) bool {
return sv(raw, '\t')
}
func sv(in []byte, comma rune) bool {

@ -0,0 +1,4 @@
package magic
// Torrent has bencoded text in the beginning.
var Torrent = prefix([]byte("d8:announce"))

@ -1,17 +1,29 @@
package matchers
package magic
import (
"bytes"
)
var (
// Flv matches a Flash video file.
Flv = prefix([]byte("\x46\x4C\x56\x01"))
// Asf matches an Advanced Systems Format file.
Asf = prefix([]byte{
0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C,
})
// Rmvb matches a RealMedia Variable Bitrate file.
Rmvb = prefix([]byte{0x2E, 0x52, 0x4D, 0x46})
)
// WebM matches a WebM file.
func WebM(in []byte, _ uint32) bool {
return isMatroskaFileTypeMatched(in, "webm")
func WebM(raw []byte, limit uint32) bool {
return isMatroskaFileTypeMatched(raw, "webm")
}
// Mkv matches a mkv file.
func Mkv(in []byte, _ uint32) bool {
return isMatroskaFileTypeMatched(in, "matroska")
func Mkv(raw []byte, limit uint32) bool {
return isMatroskaFileTypeMatched(raw, "matroska")
}
// isMatroskaFileTypeMatched is used for webm and mkv file matching.
@ -44,33 +56,15 @@ func isFileTypeNamePresent(in []byte, flType string) bool {
return false
}
// Flv matches a Flash video file.
func Flv(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x46\x4C\x56\x01"))
}
// Mpeg matches a Moving Picture Experts Group file.
func Mpeg(in []byte, _ uint32) bool {
return len(in) > 3 && bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01}) &&
in[3] >= 0xB0 && in[3] <= 0xBF
func Mpeg(raw []byte, limit uint32) bool {
return len(raw) > 3 && bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x01}) &&
raw[3] >= 0xB0 && raw[3] <= 0xBF
}
// Avi matches an Audio Video Interleaved file.
func Avi(in []byte, _ uint32) bool {
return len(in) > 16 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:16], []byte("AVI LIST"))
}
// Asf matches an Advanced Systems Format file.
func Asf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{
0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C,
})
}
// Rmvb matches a RealMedia Variable Bitrate file.
func Rmvb(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x2E, 0x52, 0x4D, 0x46})
func Avi(raw []byte, limit uint32) bool {
return len(raw) > 16 &&
bytes.Equal(raw[:4], []byte("RIFF")) &&
bytes.Equal(raw[8:16], []byte("AVI LIST"))
}

@ -0,0 +1,48 @@
package magic
var (
// Odt matches an OpenDocument Text file.
Odt = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.text"), 30)
// Ott matches an OpenDocument Text Template file.
Ott = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.text-template"), 30)
// Ods matches an OpenDocument Spreadsheet file.
Ods = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"), 30)
// Ots matches an OpenDocument Spreadsheet Template file.
Ots = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"), 30)
// Odp matches an OpenDocument Presentation file.
Odp = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.presentation"), 30)
// Otp matches an OpenDocument Presentation Template file.
Otp = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"), 30)
// Odg matches an OpenDocument Drawing file.
Odg = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.graphics"), 30)
// Otg matches an OpenDocument Drawing Template file.
Otg = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"), 30)
// Odf matches an OpenDocument Formula file.
Odf = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.formula"), 30)
// Odc matches an OpenDocument Chart file.
Odc = offset([]byte("mimetypeapplication/vnd.oasis.opendocument.chart"), 30)
// Epub matches an EPUB file.
Epub = offset([]byte("mimetypeapplication/epub+zip"), 30)
// Sxc matches an OpenOffice Spreadsheet file.
Sxc = offset([]byte("mimetypeapplication/vnd.sun.xml.calc"), 30)
)
// Zip matches a zip archive.
func Zip(raw []byte, limit uint32) bool {
return len(raw) > 3 &&
raw[0] == 0x50 && raw[1] == 0x4B &&
(raw[2] == 0x3 || raw[2] == 0x5 || raw[2] == 0x7) &&
(raw[3] == 0x4 || raw[3] == 0x6 || raw[3] == 0x8)
}
// Jar matches a Java archive file.
func Jar(raw []byte, limit uint32) bool {
t := zipTokenizer{in: raw}
for i, tok := 0, t.next(); i < 10 && tok != ""; i, tok = i+1, t.next() {
if tok == "META-INF/MANIFEST.MF" {
return true
}
}
return false
}

@ -1,102 +0,0 @@
package matchers
import "bytes"
// SevenZ matches a 7z archive.
func SevenZ(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
}
// Gzip matched gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
func Gzip(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x1f, 0x8b})
}
// Crx matches a Chrome extension file: a zip archive prepended by "Cr24".
func Crx(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("Cr24"))
}
// Tar matches a (t)ape (ar)chive file.
func Tar(in []byte, _ uint32) bool {
return len(in) > 262 && bytes.Equal(in[257:262], []byte("ustar"))
}
// Fits matches an Flexible Image Transport System file.
func Fits(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
})
}
// Xar matches an eXtensible ARchive format file.
func Xar(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x78, 0x61, 0x72, 0x21})
}
// Bz2 matches a bzip2 file.
func Bz2(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x5A, 0x68})
}
// Ar matches an ar (Unix) archive file.
func Ar(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
}
// Deb matches a Debian package file.
func Deb(in []byte, _ uint32) bool {
return len(in) > 8 && bytes.HasPrefix(in[8:], []byte{
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
})
}
// Rar matches a RAR archive file.
func Rar(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("Rar!\x1A\x07\x00")) ||
bytes.HasPrefix(in, []byte("Rar!\x1A\x07\x01\x00"))
}
// Warc matches a Web ARChive file.
func Warc(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("WARC/"))
}
// Zstd matches a Zstandard archive file.
func Zstd(in []byte, _ uint32) bool {
return len(in) >= 4 &&
(0x22 <= in[0] && in[0] <= 0x28 || in[0] == 0x1E) && // Different Zstandard versions.
bytes.HasPrefix(in[1:], []byte{0xB5, 0x2F, 0xFD})
}
// Cab matches a Cabinet archive file.
func Cab(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("MSCF"))
}
// Rpm matches an RPM or Delta RPM package file.
func Rpm(in []byte, _ uint32) bool {
return len(in) > 4 &&
(bytes.HasPrefix(in, []byte{0xed, 0xab, 0xee, 0xdb}) ||
bytes.HasPrefix(in, []byte("drpm")))
}
// Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
func Xz(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
}
// Lzip matches an Lzip compressed file.
func Lzip(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x4c, 0x5a, 0x49, 0x50})
}
// Cpio matches a cpio archive file
func Cpio(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("070707")) ||
bytes.HasPrefix(in, []byte("070701")) ||
bytes.HasPrefix(in, []byte("070702"))
}

@ -1,101 +0,0 @@
package matchers
import (
"bytes"
"encoding/binary"
)
// Mp3 matches an mp3 file.
func Mp3(in []byte, _ uint32) bool {
if len(in) < 3 {
return false
}
if bytes.HasPrefix(in, []byte("ID3")) {
// MP3s with an ID3v2 tag will start with "ID3"
// ID3v1 tags, however appear at the end of the file.
return true
}
// Match MP3 files without tags
switch binary.BigEndian.Uint16(in[:2]) & 0xFFFE {
case 0xFFFA:
// MPEG ADTS, layer III, v1
return true
case 0xFFF2:
// MPEG ADTS, layer III, v2
return true
case 0xFFE2:
// MPEG ADTS, layer III, v2.5
return true
}
return false
}
// Flac matches a Free Lossless Audio Codec file.
func Flac(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
}
// Midi matches a Musical Instrument Digital Interface file.
func Midi(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x4D\x54\x68\x64"))
}
// Ape matches a Monkey's Audio file.
func Ape(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
}
// MusePack matches a Musepack file.
func MusePack(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("MPCK"))
}
// Wav matches a Waveform Audio File Format file.
func Wav(in []byte, _ uint32) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("\x57\x41\x56\x45"))
}
// Aiff matches Audio Interchange File Format file.
func Aiff(in []byte, _ uint32) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("\x46\x4F\x52\x4D")) &&
bytes.Equal(in[8:12], []byte("\x41\x49\x46\x46"))
}
// Au matches a Sun Microsystems au file.
func Au(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x2E\x73\x6E\x64"))
}
// Amr matches an Adaptive Multi-Rate file.
func Amr(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x23\x21\x41\x4D\x52"))
}
// Aac matches an Advanced Audio Coding file.
func Aac(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xF1}) || bytes.HasPrefix(in, []byte{0xFF, 0xF9})
}
// Voc matches a Creative Voice file.
func Voc(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("Creative Voice File"))
}
// Qcp matches a Qualcomm Pure Voice file.
func Qcp(in []byte, _ uint32) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("QLCM"))
}
// M3u matches a Playlist file.
func M3u(in []byte, _ uint32) bool {
return len(in) > 7 &&
bytes.Equal(in[:7], []byte("#EXTM3U"))
}

@ -1,151 +0,0 @@
package matchers
import (
"bytes"
"debug/macho"
"encoding/binary"
)
// Java bytecode and Mach-O binaries share the same magic number.
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
func classOrMachOFat(in []byte) bool {
// There should be at least 8 bytes for both of them because the only way to
// quickly distinguish them is by comparing byte at position 7
if len(in) < 8 {
return false
}
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
}
// Class matches a java class file.
func Class(in []byte, _ uint32) bool {
return classOrMachOFat(in) && in[7] > 30
}
// MachO matches Mach-O binaries format.
func MachO(in []byte, _ uint32) bool {
if classOrMachOFat(in) && in[7] < 20 {
return true
}
if len(in) < 4 {
return false
}
be := binary.BigEndian.Uint32(in)
le := binary.LittleEndian.Uint32(in)
return be == macho.Magic32 || le == macho.Magic32 || be == macho.Magic64 || le == macho.Magic64
}
// Swf matches an Adobe Flash swf file.
func Swf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("CWS")) ||
bytes.HasPrefix(in, []byte("FWS")) ||
bytes.HasPrefix(in, []byte("ZWS"))
}
// Wasm matches a web assembly File Format file.
func Wasm(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x61, 0x73, 0x6D})
}
// Dbf matches a dBase file.
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
func Dbf(in []byte, _ uint32) bool {
if len(in) < 4 {
return false
}
// 3rd and 4th bytes contain the last update month and day of month
if !(0 < in[2] && in[2] < 13 && 0 < in[3] && in[3] < 32) {
return false
}
// dbf type is dictated by the first byte
dbfTypes := []byte{
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
}
for _, b := range dbfTypes {
if in[0] == b {
return true
}
}
return false
}
// Exe matches a Windows/DOS executable file.
func Exe(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x4D, 0x5A})
}
// Elf matches an Executable and Linkable Format file.
func Elf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x7F, 0x45, 0x4C, 0x46})
}
// ElfObj matches an object file.
func ElfObj(in []byte, _ uint32) bool {
return len(in) > 17 && ((in[16] == 0x01 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x01))
}
// ElfExe matches an executable file.
func ElfExe(in []byte, _ uint32) bool {
return len(in) > 17 && ((in[16] == 0x02 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x02))
}
// ElfLib matches a shared library file.
func ElfLib(in []byte, _ uint32) bool {
return len(in) > 17 && ((in[16] == 0x03 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x03))
}
// ElfDump matches a core dump file.
func ElfDump(in []byte, _ uint32) bool {
return len(in) > 17 && ((in[16] == 0x04 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x04))
}
// Dcm matches a DICOM medical format file.
func Dcm(in []byte, _ uint32) bool {
return len(in) > 131 &&
bytes.Equal(in[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
}
// Nes matches a Nintendo Entertainment system ROM file.
func Nes(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x4E, 0x45, 0x53, 0x1A})
}
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
func Marc(in []byte, _ uint32) bool {
// File is at least 24 bytes ("leader" field size)
if len(in) < 24 {
return false
}
// Fixed bytes at offset 20
if !bytes.Equal(in[20:24], []byte("4500")) {
return false
}
// First 5 bytes are ASCII digits
for i := 0; i < 5; i++ {
if in[i] < '0' || in[i] > '9' {
return false
}
}
// Field terminator is present
return bytes.Contains(in, []byte{0x1E})
}
// TzIf matches a Time Zone Information Format (TZif) file.
func TzIf(in []byte, _ uint32) bool {
return len(in) > 4 && bytes.HasPrefix(in, []byte("TZif"))
}

@ -1,25 +0,0 @@
package matchers
import "bytes"
// Sqlite matches an SQLite database file.
func Sqlite(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
})
}
// MsAccessAce matches Microsoft Access dababase file.
func MsAccessAce(in []byte, _ uint32) bool {
return msAccess(in, []byte("Standard ACE DB"))
}
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
func MsAccessMdb(in []byte, _ uint32) bool {
return msAccess(in, []byte("Standard Jet DB"))
}
func msAccess(in []byte, magic []byte) bool {
return len(in) > 19 && bytes.Equal(in[4:19], magic)
}

@ -1,63 +0,0 @@
package matchers
import "bytes"
// Pdf matches a Portable Document Format file.
func Pdf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x25, 0x50, 0x44, 0x46})
}
// Fdf matches a Forms Data Format file.
func Fdf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("%FDF"))
}
// DjVu matches a DjVu file.
func DjVu(in []byte, _ uint32) bool {
if len(in) < 12 {
return false
}
if !bytes.HasPrefix(in, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
return false
}
return bytes.HasPrefix(in[12:], []byte("DJVM")) ||
bytes.HasPrefix(in[12:], []byte("DJVU")) ||
bytes.HasPrefix(in[12:], []byte("DJVI")) ||
bytes.HasPrefix(in[12:], []byte("THUM"))
}
// Mobi matches a Mobi file.
func Mobi(in []byte, _ uint32) bool {
return len(in) > 67 && bytes.Equal(in[60:68], []byte("BOOKMOBI"))
}
// Lit matches a Microsoft Lit file.
func Lit(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("ITOLITLS"))
}
// P7s matches an .p7s signature File (PEM, Base64).
func P7s(in []byte, _ uint32) bool {
// Check for PEM Encoding.
if bytes.HasPrefix(in, []byte("-----BEGIN PKCS7")) {
return true
}
// Check if DER Encoding is long enough.
if len(in) < 20 {
return false
}
// Magic Bytes for the signedData ASN.1 encoding.
startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}}
signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07}
// Check if Header is correct. There are multiple valid headers.
for i, match := range startHeader {
// If first bytes match, then check for ASN.1 Object Type.
if bytes.HasPrefix(in, match) {
if bytes.HasPrefix(in[i+2:], signedDataMatch) {
return true
}
}
}
return false
}

@ -1,35 +0,0 @@
package matchers
import (
"bytes"
)
// Ttf matches a TrueType font file.
func Ttf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x01, 0x00, 0x00}) &&
!MsAccessAce(in, 0) && !MsAccessMdb(in, 0)
}
// Woff matches a Web Open Font Format file.
func Woff(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("wOFF"))
}
// Woff2 matches a Web Open Font Format version 2 file.
func Woff2(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("wOF2"))
}
// Otf matches an OpenType font file.
func Otf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
}
// Eot matches an Embedded OpenType font file.
func Eot(in []byte, _ uint32) bool {
return len(in) > 35 &&
bytes.Equal(in[34:36], []byte{0x4C, 0x50}) &&
(bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x01}) ||
bytes.Equal(in[8:11], []byte{0x01, 0x00, 0x00}) ||
bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x02}))
}

@ -1,105 +0,0 @@
package matchers
var (
mp4Sigs = []sig{
ftypSig("avc1"), ftypSig("dash"), ftypSig("iso2"), ftypSig("iso3"),
ftypSig("iso4"), ftypSig("iso5"), ftypSig("iso6"), ftypSig("isom"),
ftypSig("mmp4"), ftypSig("mp41"), ftypSig("mp42"), ftypSig("mp4v"),
ftypSig("mp71"), ftypSig("MSNV"), ftypSig("NDAS"), ftypSig("NDSC"),
ftypSig("NSDC"), ftypSig("NSDH"), ftypSig("NDSM"), ftypSig("NDSP"),
ftypSig("NDSS"), ftypSig("NDXC"), ftypSig("NDXH"), ftypSig("NDXM"),
ftypSig("NDXP"), ftypSig("NDXS"), ftypSig("F4V "), ftypSig("F4P "),
}
threeGPSigs = []sig{
ftypSig("3gp1"), ftypSig("3gp2"), ftypSig("3gp3"), ftypSig("3gp4"),
ftypSig("3gp5"), ftypSig("3gp6"), ftypSig("3gp7"), ftypSig("3gs7"),
ftypSig("3ge6"), ftypSig("3ge7"), ftypSig("3gg6"),
}
threeG2Sigs = []sig{
ftypSig("3g24"), ftypSig("3g25"), ftypSig("3g26"), ftypSig("3g2a"),
ftypSig("3g2b"), ftypSig("3g2c"), ftypSig("KDDI"),
}
amp4Sigs = []sig{
// audio for Adobe Flash Player 9+
ftypSig("F4A "), ftypSig("F4B "),
// Apple iTunes AAC-LC (.M4A) Audio
ftypSig("M4B "), ftypSig("M4P "),
// MPEG-4 (.MP4) for SonyPSP
ftypSig("MSNV"),
// Nero Digital AAC Audio
ftypSig("NDAS"),
}
qtSigs = []sig{ftypSig("qt "), ftypSig("moov")}
mqvSigs = []sig{ftypSig("mqt ")}
m4aSigs = []sig{ftypSig("M4A ")}
m4vSigs = []sig{ftypSig("M4V "), ftypSig("M4VH"), ftypSig("M4VP")}
heicSigs = []sig{ftypSig("heic"), ftypSig("heix")}
heicSeqSigs = []sig{ftypSig("hevc"), ftypSig("hevx")}
heifSigs = []sig{
ftypSig("mif1"), ftypSig("heim"), ftypSig("heis"), ftypSig("avic"),
}
heifSeqSigs = []sig{
ftypSig("msf1"), ftypSig("hevm"), ftypSig("hevs"), ftypSig("avcs"),
}
// TODO: add support for remaining video formats at ftyps.com.
)
// Mp4 matches an MP4 file.
func Mp4(in []byte, _ uint32) bool {
return detect(in, mp4Sigs)
}
// ThreeGP matches a 3GPP file.
func ThreeGP(in []byte, _ uint32) bool {
return detect(in, threeGPSigs)
}
// ThreeG2 matches a 3GPP2 file.
func ThreeG2(in []byte, _ uint32) bool {
return detect(in, threeG2Sigs)
}
// AMp4 matches an audio MP4 file.
func AMp4(in []byte, _ uint32) bool {
return detect(in, amp4Sigs)
}
// QuickTime matches a QuickTime File Format file.
func QuickTime(in []byte, _ uint32) bool {
return detect(in, qtSigs)
}
// Mqv matches a Sony / Mobile QuickTime file.
func Mqv(in []byte, _ uint32) bool {
return detect(in, mqvSigs)
}
// M4a matches an audio M4A file.
func M4a(in []byte, _ uint32) bool {
return detect(in, m4aSigs)
}
// Heic matches a High Efficiency Image Coding (HEIC) file.
func Heic(in []byte, _ uint32) bool {
return detect(in, heicSigs)
}
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
func HeicSequence(in []byte, _ uint32) bool {
return detect(in, heicSeqSigs)
}
// Heif matches a High Efficiency Image File Format (HEIF) file.
func Heif(in []byte, _ uint32) bool {
return detect(in, heifSigs)
}
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
func HeifSequence(in []byte, _ uint32) bool {
return detect(in, heifSeqSigs)
}
// M4v matches an Appl4 M4V video file.
func M4v(in []byte, _ uint32) bool {
return detect(in, m4vSigs)
}

@ -1,142 +0,0 @@
package matchers
import "bytes"
// Png matches a Portable Network Graphics file.
func Png(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
}
// Jpg matches a Joint Photographic Experts Group file.
func Jpg(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xD8, 0xFF})
}
// isJpeg2k matches a generic JPEG2000 file.
func isJpeg2k(in []byte) bool {
if len(in) < 24 {
return false
}
signature := in[4:8]
return bytes.Equal(signature, []byte{0x6A, 0x50, 0x20, 0x20}) ||
bytes.Equal(signature, []byte{0x6A, 0x50, 0x32, 0x20})
}
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
func Jp2(in []byte, _ uint32) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x32, 0x20})
}
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
func Jpx(in []byte, _ uint32) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x78, 0x20})
}
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
func Jpm(in []byte, _ uint32) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x6D, 0x20})
}
// Gif matches a Graphics Interchange Format file.
func Gif(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("GIF87a")) ||
bytes.HasPrefix(in, []byte("GIF89a"))
}
// Webp matches a WebP file.
func Webp(in []byte, _ uint32) bool {
return len(in) > 12 &&
bytes.Equal(in[0:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte{0x57, 0x45, 0x42, 0x50})
}
// Bmp matches a bitmap image file.
func Bmp(in []byte, _ uint32) bool {
return len(in) > 1 && in[0] == 0x42 && in[1] == 0x4D
}
// Ps matches a PostScript file.
func Ps(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("%!PS-Adobe-"))
}
// Psd matches a Photoshop Document file.
func Psd(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("8BPS"))
}
// Ico matches an ICO file.
func Ico(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01, 0x00}) ||
bytes.HasPrefix(in, []byte{0x00, 0x00, 0x02, 0x00})
}
// Icns matches an ICNS (Apple Icon Image format) file.
func Icns(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("icns"))
}
// Tiff matches a Tagged Image File Format file.
func Tiff(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x49, 0x49, 0x2A, 0x00}) ||
bytes.HasPrefix(in, []byte{0x4D, 0x4D, 0x00, 0x2A})
}
// Bpg matches a Better Portable Graphics file.
func Bpg(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x50, 0x47, 0xFB})
}
// Dwg matches a CAD drawing file.
func Dwg(in []byte, _ uint32) bool {
if len(in) < 6 || in[0] != 0x41 || in[1] != 0x43 {
return false
}
dwgVersions := [][]byte{
{0x31, 0x2E, 0x34, 0x30},
{0x31, 0x2E, 0x35, 0x30},
{0x32, 0x2E, 0x31, 0x30},
{0x31, 0x30, 0x30, 0x32},
{0x31, 0x30, 0x30, 0x33},
{0x31, 0x30, 0x30, 0x34},
{0x31, 0x30, 0x30, 0x36},
{0x31, 0x30, 0x30, 0x39},
{0x31, 0x30, 0x31, 0x32},
{0x31, 0x30, 0x31, 0x34},
{0x31, 0x30, 0x31, 0x35},
{0x31, 0x30, 0x31, 0x38},
{0x31, 0x30, 0x32, 0x31},
{0x31, 0x30, 0x32, 0x34},
{0x31, 0x30, 0x33, 0x32},
}
for _, d := range dwgVersions {
if bytes.Equal(in[2:6], d) {
return true
}
}
return false
}
// Xcf matches GIMP image data.
func Xcf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("gimp xcf"))
}
// Pat matches GIMP pattern data.
func Pat(in []byte, _ uint32) bool {
return len(in) >= 24 && bytes.Equal(in[20:24], []byte("GPAT"))
}
// Gbr matches GIMP brush data.
func Gbr(in []byte, _ uint32) bool {
return len(in) >= 24 && bytes.Equal(in[20:24], []byte("GIMP"))
}
// Hdr matches Radiance HDR image.
// https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
func Hdr(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("#?RADIANCE\n"))
}

@ -1,39 +0,0 @@
// Package matchers holds the matching functions used to find MIME types.
package matchers
// trimLWS trims whitespace from beginning of the input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
// trimRWS trims whitespace from the end of the input.
func trimRWS(in []byte) []byte {
lastNonWS := len(in) - 1
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
}
return in[:lastNonWS+1]
}
func firstLine(in []byte) []byte {
lineEnd := 0
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
}
return in[:lineEnd]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

@ -1,25 +0,0 @@
package matchers
import "bytes"
// Glb matches a glTF model format file.
func Glb(in []byte, _ uint32) bool {
// GLB is the binary file format representation of 3D models save in
// the GL transmission Format (glTF).
// see more: https://docs.fileformat.com/3d/glb/
// https://www.iana.org/assignments/media-types/model/gltf-binary
if len(in) < 8 {
return false
}
// GLB file format is based on little endian and its header structure
// show below:
//
// <-- 12-byte header -->
// | magic | version | length |
// | (uint32) | (uint32) | (uint32) |
// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... |
// | g l T F | 1 | ... |
return bytes.HasPrefix(in, []byte("\x67\x6C\x54\x46\x02\x00\x00\x00")) ||
bytes.HasPrefix(in, []byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
}

@ -1,42 +0,0 @@
package matchers
import (
"bytes"
)
/*
NOTE:
In May 2003, two Internet RFCs were published relating to the format.
The Ogg bitstream was defined in RFC 3533 (which is classified as
'informative') and its Internet content type (application/ogg) in RFC
3534 (which is, as of 2006, a proposed standard protocol). In
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
.oga, .spx.
See:
https://tools.ietf.org/html/rfc3533
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
https://github.com/file/file/blob/master/magic/Magdir/vorbis
*/
// Ogg matches an Ogg file.
func Ogg(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("\x4F\x67\x67\x53\x00"))
}
// OggAudio matches an audio ogg file.
func OggAudio(in []byte, _ uint32) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x7fFLAC")) ||
bytes.HasPrefix(in[28:], []byte("\x01vorbis")) ||
bytes.HasPrefix(in[28:], []byte("OpusHead")) ||
bytes.HasPrefix(in[28:], []byte("Speex\x20\x20\x20")))
}
// OggVideo matches a video ogg file.
func OggVideo(in []byte, _ uint32) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x80theora")) ||
bytes.HasPrefix(in[28:], []byte("fishead\x00")) ||
bytes.HasPrefix(in[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
}

@ -1,131 +0,0 @@
package matchers
import (
"bytes"
"fmt"
)
type (
markupSig []byte
ciSig []byte // case insensitive signature
shebangSig []byte // matches !# followed by the signature
ftypSig []byte // matches audio/video files. www.ftyps.com
xmlSig struct {
// the local name of the root tag
localName []byte
// the namespace of the XML document
xmlns []byte
}
sig interface {
detect([]byte) bool
}
)
func newXmlSig(localName, xmlns string) xmlSig {
ret := xmlSig{xmlns: []byte(xmlns)}
if localName != "" {
ret.localName = []byte(fmt.Sprintf("<%s", localName))
}
return ret
}
// Implement sig interface.
func (hSig markupSig) detect(in []byte) bool {
if len(in) < len(hSig)+1 {
return false
}
// perform case insensitive check
for i, b := range hSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
// Next byte must be space or right angle bracket.
if db := in[len(hSig)]; db != ' ' && db != '>' {
return false
}
return true
}
// Implement sig interface.
func (tSig ciSig) detect(in []byte) bool {
if len(in) < len(tSig)+1 {
return false
}
// perform case insensitive check
for i, b := range tSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
return true
}
// A valid shebang starts with the "#!" characters,
// followed by any number of spaces,
// followed by the path to the interpreter,
// and, optionally, followed by the arguments for the interpreter.
//
// Ex:
// #! /usr/bin/env php
// /usr/bin/env is the interpreter, php is the first and only argument.
func (sSig shebangSig) detect(in []byte) bool {
in = firstLine(in)
if len(in) < len(sSig)+2 {
return false
}
if in[0] != '#' || in[1] != '!' {
return false
}
in = trimLWS(trimRWS(in[2:]))
return bytes.Equal(in, sSig)
}
// Implement sig interface.
func (fSig ftypSig) detect(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[4:8], []byte("ftyp")) &&
bytes.Equal(in[8:12], fSig)
}
// Implement sig interface.
func (xSig xmlSig) detect(in []byte) bool {
in = in[:min(len(in), 512)]
if len(xSig.localName) == 0 {
return bytes.Index(in, xSig.xmlns) > 0
}
if len(xSig.xmlns) == 0 {
return bytes.Index(in, xSig.localName) > 0
}
localNameIndex := bytes.Index(in, xSig.localName)
return localNameIndex != -1 && localNameIndex < bytes.Index(in, xSig.xmlns)
}
// detect returns true if any of the provided signatures pass for in input.
func detect(in []byte, sigs []sig) bool {
for _, sig := range sigs {
if sig.detect(in) {
return true
}
}
return false
}

@ -1,407 +0,0 @@
package matchers
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/json"
)
var (
htmlSigs = []sig{
markupSig("<!DOCTYPE HTML"),
markupSig("<HTML"),
markupSig("<HEAD"),
markupSig("<SCRIPT"),
markupSig("<IFRAME"),
markupSig("<H1"),
markupSig("<DIV"),
markupSig("<FONT"),
markupSig("<TABLE"),
markupSig("<A"),
markupSig("<STYLE"),
markupSig("<TITLE"),
markupSig("<B"),
markupSig("<BODY"),
markupSig("<BR"),
markupSig("<P"),
markupSig("<!--"),
}
xmlSigs = []sig{
markupSig("<?XML"),
}
owlSigs = []sig{
newXmlSig("Ontology", `xmlns="http://www.w3.org/2002/07/owl#"`),
}
rssSigs = []sig{
newXmlSig("rss", ""),
}
atomSigs = []sig{
newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`),
}
kmlSigs = []sig{
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
}
xliffSigs = []sig{
newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`),
}
colladaSigs = []sig{
newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`),
}
gmlSigs = []sig{
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
}
gpxSigs = []sig{
newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`),
}
tcxSigs = []sig{
newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`),
}
x3dSigs = []sig{
newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`),
}
amfSigs = []sig{
newXmlSig("amf", ""),
}
threemfSigs = []sig{
newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`),
}
xfdfSigs = []sig{
newXmlSig("xfdf", `xmlns="http://ns.adobe.com/xfdf/"`),
}
vCardSigs = []sig{
ciSig("BEGIN:VCARD\n"),
ciSig("BEGIN:VCARD\r\n"),
}
iCalSigs = []sig{
ciSig("BEGIN:VCALENDAR\n"),
ciSig("BEGIN:VCALENDAR\r\n"),
}
phpSigs = []sig{
ciSig("<?PHP"),
ciSig("<?\n"),
ciSig("<?\r"),
ciSig("<? "),
shebangSig("/usr/local/bin/php"),
shebangSig("/usr/bin/php"),
shebangSig("/usr/bin/env php"),
}
jsSigs = []sig{
shebangSig("/bin/node"),
shebangSig("/usr/bin/node"),
shebangSig("/bin/nodejs"),
shebangSig("/usr/bin/nodejs"),
shebangSig("/usr/bin/env node"),
shebangSig("/usr/bin/env nodejs"),
}
luaSigs = []sig{
shebangSig("/usr/bin/lua"),
shebangSig("/usr/local/bin/lua"),
shebangSig("/usr/bin/env lua"),
}
perlSigs = []sig{
shebangSig("/usr/bin/perl"),
shebangSig("/usr/bin/env perl"),
}
pythonSigs = []sig{
shebangSig("/usr/bin/python"),
shebangSig("/usr/local/bin/python"),
shebangSig("/usr/bin/env python"),
}
tclSigs = []sig{
shebangSig("/usr/bin/tcl"),
shebangSig("/usr/local/bin/tcl"),
shebangSig("/usr/bin/env tcl"),
shebangSig("/usr/bin/tclsh"),
shebangSig("/usr/local/bin/tclsh"),
shebangSig("/usr/bin/env tclsh"),
shebangSig("/usr/bin/wish"),
shebangSig("/usr/local/bin/wish"),
shebangSig("/usr/bin/env wish"),
}
)
// Utf32be matches a text file encoded with UTF-32 and with the characters
// represented in big endian.
func Utf32be(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0xFE, 0xFF})
}
// Utf32le matches a text file encoded with UTF-32 and with the characters
// represented in little endian.
func Utf32le(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE, 0x00, 0x00})
}
// Utf16be matches a text file encoded with UTF-16 and with the characters
// represented in big endian.
func Utf16be(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFE, 0xFF})
}
// Utf16le matches a text file encoded with UTF-16 and with the characters
// represented in little endian.
func Utf16le(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE})
}
// Utf8 matches an UTF-8 text file.
func Utf8(in []byte, _ uint32) bool {
in = trimLWS(in)
for _, b := range in {
if b <= 0x08 ||
b == 0x0B ||
0x0E <= b && b <= 0x1A ||
0x1C <= b && b <= 0x1F {
return false
}
}
return true
}
// Owl2 matches an Owl ontology file.
func Owl2(in []byte, _ uint32) bool {
return detect(in, owlSigs)
}
// Html matches a Hypertext Markup Language file.
func Html(in []byte, _ uint32) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, htmlSigs)
}
// Xml matches an Extensible Markup Language file.
func Xml(in []byte, _ uint32) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, xmlSigs)
}
// Php matches a PHP: Hypertext Preprocessor file.
func Php(in []byte, _ uint32) bool {
return detect(in, phpSigs)
}
// Json matches a JavaScript Object Notation file.
func Json(in []byte, readLimit uint32) bool {
parsed, err := json.Scan(in)
if len(in) < int(readLimit) {
return err == nil
}
return parsed == len(in)
}
// GeoJson matches a RFC 7946 GeoJSON file.
//
// GeoJson detection implies searching for key:value pairs like: `"type": "Feature"`
// in the input.
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJson(in []byte, _ uint32) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
// GeoJSON is always a JSON object, not a JSON array.
if in[0] != '{' {
return false
}
s := []byte(`"type"`)
si, sl := bytes.Index(in, s), len(s)
if si == -1 {
return false
}
// If the "type" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(in) {
return false
}
// Skip the "type" part.
in = in[si+sl:]
// Skip any whitespace before the colon.
in = trimLWS(in)
// Check for colon.
if len(in) == 0 || in[0] != ':' {
return false
}
// Skip any whitespace after the colon.
in = trimLWS(in[1:])
geoJsonTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJsonTypes {
if bytes.HasPrefix(in, t) {
return true
}
}
return false
}
// NdJson matches a Newline delimited JSON file.
func NdJson(in []byte, readLimit uint32) bool {
// Separator with carriage return and new line `\r\n`.
srn := []byte{0x0D, 0x0A}
// Separator with only new line `\n`.
sn := []byte{0x0A}
// Total bytes scanned.
parsed := 0
// Split by `srn`.
for rni, insrn := range bytes.Split(in, srn) {
// Separator byte count should be added only after the first split.
if rni != 0 {
// Add two as `\r\n` is used for split.
parsed += 2
}
// Split again by `sn`.
for ni, insn := range bytes.Split(insrn, sn) {
// Separator byte count should be added only after the first split.
if ni != 0 {
// Add one as `\n` is used for split.
parsed++
}
// Empty line is valid.
if len(insn) == 0 {
continue
}
p, err := json.Scan(insn)
parsed += p
if parsed < int(readLimit) && err != nil {
return false
}
}
}
// Empty inputs should not pass as valid NDJSON with 0 lines.
return parsed > 2 && parsed == len(in)
}
// Js matches a Javascript file.
func Js(in []byte, _ uint32) bool {
return detect(in, jsSigs)
}
// Lua matches a Lua programming language file.
func Lua(in []byte, _ uint32) bool {
return detect(in, luaSigs)
}
// Perl matches a Perl programming language file.
func Perl(in []byte, _ uint32) bool {
return detect(in, perlSigs)
}
// Python matches a Python programming language file.
func Python(in []byte, _ uint32) bool {
return detect(in, pythonSigs)
}
// Tcl matches a Tcl programming language file.
func Tcl(in []byte, _ uint32) bool {
return detect(in, tclSigs)
}
// Rtf matches a Rich Text Format file.
func Rtf(in []byte, _ uint32) bool {
return bytes.HasPrefix(in, []byte("{\\rtf1"))
}
// Svg matches a SVG file.
func Svg(in []byte, _ uint32) bool {
return bytes.Contains(in, []byte("<svg"))
}
// Rss matches a Rich Site Summary file.
func Rss(in []byte, _ uint32) bool {
return detect(in, rssSigs)
}
// Atom matches an Atom Syndication Format file.
func Atom(in []byte, _ uint32) bool {
return detect(in, atomSigs)
}
// Kml matches a Keyhole Markup Language file.
func Kml(in []byte, _ uint32) bool {
return detect(in, kmlSigs)
}
// Xliff matches a XML Localization Interchange File Format file.
func Xliff(in []byte, _ uint32) bool {
return detect(in, xliffSigs)
}
// Collada matches a COLLAborative Design Activity file.
func Collada(in []byte, _ uint32) bool {
return detect(in, colladaSigs)
}
// Gml matches a Geography Markup Language file.
func Gml(in []byte, _ uint32) bool {
return detect(in, gmlSigs)
}
// Gpx matches a GPS Exchange Format file.
func Gpx(in []byte, _ uint32) bool {
return detect(in, gpxSigs)
}
// Tcx matches a Training Center XML file.
func Tcx(in []byte, _ uint32) bool {
return detect(in, tcxSigs)
}
// Amf matches an Additive Manufacturing XML file.
func Amf(in []byte, _ uint32) bool {
return detect(in, amfSigs)
}
// Threemf matches a 3D Manufacturing Format file.
func Threemf(in []byte, _ uint32) bool {
return detect(in, threemfSigs)
}
// X3d matches an Extensible 3D Graphics file.
func X3d(in []byte, _ uint32) bool {
return detect(in, x3dSigs)
}
// VCard matches a Virtual Contact File.
func VCard(in []byte, _ uint32) bool {
return detect(in, vCardSigs)
}
// ICalendar matches a iCalendar file.
func ICalendar(in []byte, _ uint32) bool {
return detect(in, iCalSigs)
}
// Xfdf matches a XML Forms Data Format file.
func Xfdf(in []byte, _ uint32) bool {
return detect(in, xfdfSigs)
}

@ -1,11 +0,0 @@
package matchers
import (
"bytes"
)
// Torrent has bencoded text in the beginning
func Torrent(in []byte, _ uint32) bool {
return len(in) > 11 &&
bytes.Equal(in[:11], []byte("d8:announce"))
}

@ -1,83 +0,0 @@
package matchers
import "bytes"
// Zip matches a zip archive.
func Zip(in []byte, _ uint32) bool {
return len(in) > 3 &&
in[0] == 0x50 && in[1] == 0x4B &&
(in[2] == 0x3 || in[2] == 0x5 || in[2] == 0x7) &&
(in[3] == 0x4 || in[3] == 0x6 || in[3] == 0x8)
}
// Odt matches an OpenDocument Text file.
func Odt(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.text"))
}
// Ott matches an OpenDocument Text Template file.
func Ott(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.text-template"))
}
// Ods matches an OpenDocument Spreadsheet file.
func Ods(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"))
}
// Ots matches an OpenDocument Spreadsheet Template file.
func Ots(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"))
}
// Odp matches an OpenDocument Presentation file.
func Odp(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.presentation"))
}
// Otp matches an OpenDocument Presentation Template file.
func Otp(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"))
}
// Odg matches an OpenDocument Drawing file.
func Odg(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.graphics"))
}
// Otg matches an OpenDocument Drawing Template file.
func Otg(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"))
}
// Odf matches an OpenDocument Formula file.
func Odf(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.formula"))
}
// Odc matches an OpenDocument Chart file.
func Odc(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.oasis.opendocument.chart"))
}
// Epub matches an EPUB file.
func Epub(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/epub+zip"))
}
// Sxc matches an OpenOffice Spreadsheet file.
func Sxc(in []byte, _ uint32) bool {
return len(in) > 30 && bytes.HasPrefix(in[30:], []byte("mimetypeapplication/vnd.sun.xml.calc"))
}
// Jar matches a Java archive file.
func Jar(in []byte, _ uint32) bool {
t := zipTokenizer{in: in}
for i, tok := 0, t.next(); i < 10 && tok != ""; i, tok = i+1, t.next() {
if tok == "META-INF/MANIFEST.MF" {
return true
}
}
return false
}

@ -2,6 +2,9 @@ package mimetype
import (
"mime"
"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/magic"
)
// MIME struct holds information about a file format: the string representation
@ -10,9 +13,11 @@ type MIME struct {
mime string
aliases []string
extension string
matchFunc func([]byte, uint32) bool
children []*MIME
parent *MIME
// detector receives the raw input and a limit for the number of bytes it is
// allowed to check. It returns whether the input matches a signature or not.
detector magic.Detector
children []*MIME
parent *MIME
}
// String returns the string representation of the MIME type, e.g., "application/zip".
@ -60,11 +65,15 @@ func (m *MIME) Is(expectedMIME string) bool {
return false
}
func newMIME(mime, extension string, matchFunc func([]byte, uint32) bool, children ...*MIME) *MIME {
func newMIME(
mime, extension string,
detector magic.Detector,
children ...*MIME) *MIME {
m := &MIME{
mime: mime,
extension: extension,
matchFunc: matchFunc,
detector: detector,
children: children,
}
@ -80,16 +89,29 @@ func (m *MIME) alias(aliases ...string) *MIME {
return m
}
// match does a depth-first search on the matchers tree. It returns the deepest
// successful node for which all the children matching functions fail.
// match does a depth-first search on the signature tree. It returns the deepest
// successful node for which all the children detection functions fail.
func (m *MIME) match(in []byte, readLimit uint32) *MIME {
for _, c := range m.children {
if c.matchFunc(in, readLimit) {
if c.detector(in, readLimit) {
return c.match(in, readLimit)
}
}
return m
needsCharset := map[string]func([]byte) string{
"text/plain": charset.FromPlain,
"text/html": charset.FromHTML,
"text/xml": charset.FromXML,
}
// ps holds optional MIME parameters.
ps := map[string]string{}
if f, ok := needsCharset[m.mime]; ok {
if cset := f(in); cset != "" {
ps["charset"] = cset
}
}
return m.cloneHierarchy(ps)
}
// flatten transforms an hierarchy of MIMEs into a slice of MIMEs.
@ -101,3 +123,31 @@ func (m *MIME) flatten() []*MIME {
return out
}
// clone creates a new MIME with the provided optional MIME parameters.
func (m *MIME) clone(ps map[string]string) *MIME {
clonedMIME := m.mime
if len(ps) > 0 {
clonedMIME = mime.FormatMediaType(m.mime, ps)
}
return &MIME{
mime: clonedMIME,
aliases: m.aliases,
extension: m.extension,
}
}
// cloneHierarchy creates a clone of m and all its ancestors. The optional MIME
// parametes are set on the last child of the hierarchy.
func (m *MIME) cloneHierarchy(ps map[string]string) *MIME {
ret := m.clone(ps)
lastChild := ret
for p := m.Parent(); p != nil; p = p.Parent() {
pClone := p.clone(nil)
lastChild.parent = pClone
lastChild = pClone
}
return ret
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 744 KiB

After

Width:  |  Height:  |  Size: 1.3 MiB

@ -6,7 +6,6 @@ import (
"io/ioutil"
"mime"
"os"
"sync/atomic"
)
// readLimit is the maximum number of bytes from the input used when detecting.
@ -17,13 +16,12 @@ var readLimit uint32 = 3072
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed.
func Detect(in []byte) *MIME {
// using atomic because readLimit can be concurrently
// altered by another goroutine calling SetLimit.
l := atomic.LoadUint32(&readLimit)
if l > 0 && len(in) > int(l) {
in = in[:l]
rootMu.RLock()
defer rootMu.RUnlock()
if readLimit > 0 && len(in) > int(readLimit) {
in = in[:readLimit]
}
return root.match(in, l)
return root.match(in, readLimit)
}
// DetectReader returns the MIME type of the provided reader.
@ -36,13 +34,12 @@ func Detect(in []byte) *MIME {
// io.ReadSeeker you previously read from, it should be rewinded before detection:
// reader.Seek(0, io.SeekStart)
func DetectReader(r io.Reader) (*MIME, error) {
// using atomic because readLimit can be concurrently
// altered by another goroutine calling SetLimit.
l := atomic.LoadUint32(&readLimit)
rootMu.RLock()
defer rootMu.RUnlock()
var in []byte
var err error
if l == 0 {
if readLimit == 0 {
in, err = ioutil.ReadAll(r)
if err != nil {
return root, err
@ -51,7 +48,7 @@ func DetectReader(r io.Reader) (*MIME, error) {
// io.UnexpectedEOF means len(r) < len(in). It is not an error in this case,
// it just means the input file is smaller than the allocated bytes slice.
n := 0
in = make([]byte, l)
in = make([]byte, readLimit)
n, err = io.ReadFull(r, in)
if err != nil && err != io.ErrUnexpectedEOF {
return root, err
@ -59,7 +56,7 @@ func DetectReader(r io.Reader) (*MIME, error) {
in = in[:n]
}
return root.match(in, l), nil
return root.match(in, readLimit), nil
}
// DetectFile returns the MIME type of the provided file.
@ -67,8 +64,8 @@ func DetectReader(r io.Reader) (*MIME, error) {
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed with or without an error.
// Any error returned is related to the opening and reading from the input file.
func DetectFile(file string) (*MIME, error) {
f, err := os.Open(file)
func DetectFile(path string) (*MIME, error) {
f, err := os.Open(path)
if err != nil {
return root, err
}
@ -98,5 +95,24 @@ func EqualsAny(s string, mimes ...string) bool {
// their magical numbers towards the end of the file: docx, pptx, xlsx, etc.
// A limit of 0 means the whole input file will be used.
func SetLimit(limit uint32) {
atomic.StoreUint32(&readLimit, limit)
rootMu.Lock()
readLimit = limit
rootMu.Unlock()
}
// Extend adds detection for other file formats. The detector is a function
// returning true when the raw input file satisfies a signature.
// The extension should include the leading dot, as in ".html".
func Extend(detector func(raw []byte, limit uint32) bool, mime, extension string, aliases ...string) {
m := &MIME{
mime: mime,
extension: extension,
detector: detector,
parent: root,
aliases: aliases,
}
rootMu.Lock()
root.children = append([]*MIME{m}, root.children...)
rootMu.Unlock()
}

@ -1,9 +1,10 @@
## 164 Supported MIME types
## 162 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.
Extension | MIME type | Aliases
--------- | --------- | -------
**n/a** | application/octet-stream | -
**.xpm** | image/x-xpixmap | -
**.7z** | application/x-7z-compressed | -
**.zip** | application/zip | application/x-zip, application/x-zip-compressed
**.xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | -
@ -86,12 +87,8 @@ Extension | MIME type | Aliases
**.m3u** | application/vnd.apple.mpegurl | audio/mpegurl
**.m4v** | video/x-m4v | -
**.rmvb** | application/vnd.rn-realmedia-vbr | -
**.txt** | text/plain; charset=utf-32le | -
**.txt** | text/plain; charset=utf-32be | -
**.txt** | text/plain; charset=utf-16le | -
**.txt** | text/plain; charset=utf-16be | -
**.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document
**.class** | application/x-java-applet; charset=binary | -
**.class** | application/x-java-applet | -
**.swf** | application/x-shockwave-flash | -
**.crx** | application/x-chrome-extension | -
**.ttf** | font/ttf | font/sfnt, application/x-font-ttf, application/font-sfnt
@ -112,6 +109,7 @@ Extension | MIME type | Aliases
**.sqlite** | application/x-sqlite3 | -
**.dwg** | image/vnd.dwg | image/x-dwg, application/acad, application/x-acad, application/autocad_dwg, application/dwg, application/x-dwg, application/x-autocad, drawing/dwg
**.nes** | application/vnd.nintendo.snes.rom | -
**.lnk** | application/x-ms-shortcut | -
**.macho** | application/x-mach-binary | -
**.qcp** | audio/qcelp | -
**.icns** | image/x-icns | -
@ -125,10 +123,20 @@ Extension | MIME type | Aliases
**.accdb** | application/x-msaccess | -
**.zst** | application/zstd | -
**.cab** | application/vnd.ms-cab-compressed | -
**.txt** | text/plain; charset=utf-8 | -
**.html** | text/html; charset=utf-8 | -
**.rpm** | application/x-rpm | -
**.xz** | application/x-xz | -
**.lz** | application/lzip | -
**.torrent** | application/x-bittorrent | -
**.cpio** | application/x-cpio | -
**n/a** | application/tzif | -
**.xcf** | image/x-xcf | -
**.pat** | image/x-gimp-pat | -
**.gbr** | image/x-gimp-gbr | -
**.glb** | model/gltf-binary | -
**.txt** | text/plain | -
**.html** | text/html | -
**.svg** | image/svg+xml | -
**.xml** | text/xml; charset=utf-8 | -
**.xml** | text/xml | -
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
**.x3d** | model/x3d+xml | -
@ -142,7 +150,7 @@ Extension | MIME type | Aliases
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.xfdf** | application/vnd.adobe.xfdf | -
**.owl** | application/owl+xml | -
**.php** | text/x-php; charset=utf-8 | -
**.php** | text/x-php | -
**.js** | application/javascript | application/x-javascript, text/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
@ -157,13 +165,3 @@ Extension | MIME type | Aliases
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -
**.rpm** | application/x-rpm | -
**.xz** | application/x-xz | -
**.lz** | application/lzip | -
**.torrent** | application/x-bittorrent | -
**.cpio** | application/x-cpio | -
**n/a** | application/tzif | -
**.xcf** | image/x-xcf | -
**.pat** | image/x-gimp-pat | -
**.gbr** | image/x-gimp-gbr | -
**.glb** | model/gltf-binary | -

@ -1,6 +1,10 @@
package mimetype
import "github.com/gabriel-vasile/mimetype/internal/matchers"
import (
"sync"
"github.com/gabriel-vasile/mimetype/internal/magic"
)
// mimetype stores the list of MIME types in a tree structure with
// "application/octet-stream" at the root of the hierarchy. The hierarchy
@ -9,220 +13,228 @@ import "github.com/gabriel-vasile/mimetype/internal/matchers"
// identified.
//
// root is a matcher which passes for any slice of bytes.
// When a matcher passes the check, the children matchers
// When a matcher passes the check, the children magic
// are tried in order to find a more accurate MIME type.
var root = newMIME("application/octet-stream", "", func([]byte, uint32) bool { return true },
sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jp2, jpx, jpm, gif, webp,
var root = newMIME("application/octet-stream", "",
func([]byte, uint32) bool { return true },
xpm, sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jp2, jpx, jpm, gif, webp,
exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape,
musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP,
threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, m3u, m4v, rmvb, utf32le, utf32be,
utf16le, utf16be, gzip, class, swf, crx, ttf, woff, woff2, otf, eot, wasm,
shx, dbf, dcm, rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, macho, qcp,
icns, heic, heicSeq, heif, heifSeq, hdr, mrc, mdb, accdb, zstd, cab, utf8,
threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, m3u, m4v, rmvb,
gzip, class, swf, crx, ttf, woff, woff2, otf, eot, wasm,
shx, dbf, dcm, rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, lnk, macho, qcp,
icns, heic, heicSeq, heif, heifSeq, hdr, mrc, mdb, accdb, zstd, cab,
rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb,
// Keep text last because it is the slowest check
text,
)
// rootMu guards the root tree and the readLimit used when creating the detection buffer.
var rootMu sync.RWMutex
// The list of nodes appended to the root node.
var (
xz = newMIME("application/x-xz", ".xz", matchers.Xz)
gzip = newMIME("application/gzip", ".gz", matchers.Gzip).
alias("application/x-gzip", "application/x-gunzip", "application/gzipped", "application/gzip-compressed", "application/x-gzip-compressed", "gzip/document")
sevenZ = newMIME("application/x-7z-compressed", ".7z", matchers.SevenZ)
zip = newMIME("application/zip", ".zip", matchers.Zip, xlsx, docx, pptx, epub, jar, odt, ods, odp, odg, odf, odc, sxc).
xz = newMIME("application/x-xz", ".xz", magic.Xz)
gzip = newMIME("application/gzip", ".gz", magic.Gzip).alias(
"application/x-gzip", "application/x-gunzip", "application/gzipped",
"application/gzip-compressed", "application/x-gzip-compressed",
"gzip/document")
sevenZ = newMIME("application/x-7z-compressed", ".7z", magic.SevenZ)
zip = newMIME("application/zip", ".zip", magic.Zip, xlsx, docx, pptx, epub, jar, odt, ods, odp, odg, odf, odc, sxc).
alias("application/x-zip", "application/x-zip-compressed")
tar = newMIME("application/x-tar", ".tar", matchers.Tar)
xar = newMIME("application/x-xar", ".xar", matchers.Xar)
bz2 = newMIME("application/x-bzip2", ".bz2", matchers.Bz2)
pdf = newMIME("application/pdf", ".pdf", matchers.Pdf).
tar = newMIME("application/x-tar", ".tar", magic.Tar)
xar = newMIME("application/x-xar", ".xar", magic.Xar)
bz2 = newMIME("application/x-bzip2", ".bz2", magic.Bz2)
pdf = newMIME("application/pdf", ".pdf", magic.Pdf).
alias("application/x-pdf")
fdf = newMIME("application/vnd.fdf", ".fdf", matchers.Fdf)
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", matchers.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", matchers.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", matchers.Pptx)
epub = newMIME("application/epub+zip", ".epub", matchers.Epub)
jar = newMIME("application/jar", ".jar", matchers.Jar)
ole = newMIME("application/x-ole-storage", "", matchers.Ole, aaf, msg, xls, pub, ppt, doc)
aaf = newMIME("application/octet-stream", ".aaf", matchers.Aaf)
doc = newMIME("application/msword", ".doc", matchers.Doc).
fdf = newMIME("application/vnd.fdf", ".fdf", magic.Fdf)
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", magic.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", magic.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", magic.Pptx)
epub = newMIME("application/epub+zip", ".epub", magic.Epub)
jar = newMIME("application/jar", ".jar", magic.Jar)
ole = newMIME("application/x-ole-storage", "", magic.Ole, aaf, msg, xls, pub, ppt, doc)
aaf = newMIME("application/octet-stream", ".aaf", magic.Aaf)
doc = newMIME("application/msword", ".doc", magic.Doc).
alias("application/vnd.ms-word")
ppt = newMIME("application/vnd.ms-powerpoint", ".ppt", matchers.Ppt).
ppt = newMIME("application/vnd.ms-powerpoint", ".ppt", magic.Ppt).
alias("application/mspowerpoint")
pub = newMIME("application/vnd.ms-publisher", ".pub", matchers.Pub)
xls = newMIME("application/vnd.ms-excel", ".xls", matchers.Xls).
pub = newMIME("application/vnd.ms-publisher", ".pub", magic.Pub)
xls = newMIME("application/vnd.ms-excel", ".xls", magic.Xls).
alias("application/msexcel")
msg = newMIME("application/vnd.ms-outlook", ".msg", matchers.Msg)
ps = newMIME("application/postscript", ".ps", matchers.Ps)
fits = newMIME("application/fits", ".fits", matchers.Fits)
ogg = newMIME("application/ogg", ".ogg", matchers.Ogg, oggAudio, oggVideo).
msg = newMIME("application/vnd.ms-outlook", ".msg", magic.Msg)
ps = newMIME("application/postscript", ".ps", magic.Ps)
fits = newMIME("application/fits", ".fits", magic.Fits)
ogg = newMIME("application/ogg", ".ogg", magic.Ogg, oggAudio, oggVideo).
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", matchers.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", matchers.OggVideo)
utf32le = newMIME("text/plain; charset=utf-32le", ".txt", matchers.Utf32le)
utf32be = newMIME("text/plain; charset=utf-32be", ".txt", matchers.Utf32be)
utf16le = newMIME("text/plain; charset=utf-16le", ".txt", matchers.Utf16le)
utf16be = newMIME("text/plain; charset=utf-16be", ".txt", matchers.Utf16be)
utf8 = newMIME("text/plain; charset=utf-8", ".txt", matchers.Utf8, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml; charset=utf-8", ".xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2)
json = newMIME("application/json", ".json", matchers.Json, geoJson)
csv = newMIME("text/csv", ".csv", matchers.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", matchers.Tsv)
geoJson = newMIME("application/geo+json", ".geojson", matchers.GeoJson)
ndJson = newMIME("application/x-ndjson", ".ndjson", matchers.NdJson)
html = newMIME("text/html; charset=utf-8", ".html", matchers.Html)
php = newMIME("text/x-php; charset=utf-8", ".php", matchers.Php)
rtf = newMIME("text/rtf", ".rtf", matchers.Rtf)
js = newMIME("application/javascript", ".js", matchers.Js).
oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml", ".xml", magic.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2)
json = newMIME("application/json", ".json", magic.Json, geoJson)
csv = newMIME("text/csv", ".csv", magic.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv)
geoJson = newMIME("application/geo+json", ".geojson", magic.GeoJson)
ndJson = newMIME("application/x-ndjson", ".ndjson", magic.NdJson)
html = newMIME("text/html", ".html", magic.Html)
php = newMIME("text/x-php", ".php", magic.Php)
rtf = newMIME("text/rtf", ".rtf", magic.Rtf)
js = newMIME("application/javascript", ".js", magic.Js).
alias("application/x-javascript", "text/javascript")
lua = newMIME("text/x-lua", ".lua", matchers.Lua)
perl = newMIME("text/x-perl", ".pl", matchers.Perl)
python = newMIME("application/x-python", ".py", matchers.Python)
tcl = newMIME("text/x-tcl", ".tcl", matchers.Tcl).
lua = newMIME("text/x-lua", ".lua", magic.Lua)
perl = newMIME("text/x-perl", ".pl", magic.Perl)
python = newMIME("application/x-python", ".py", magic.Python)
tcl = newMIME("text/x-tcl", ".tcl", magic.Tcl).
alias("application/x-tcl")
vCard = newMIME("text/vcard", ".vcf", matchers.VCard)
iCalendar = newMIME("text/calendar", ".ics", matchers.ICalendar)
svg = newMIME("image/svg+xml", ".svg", matchers.Svg)
rss = newMIME("application/rss+xml", ".rss", matchers.Rss).
vCard = newMIME("text/vcard", ".vcf", magic.VCard)
iCalendar = newMIME("text/calendar", ".ics", magic.ICalendar)
svg = newMIME("image/svg+xml", ".svg", magic.Svg)
rss = newMIME("application/rss+xml", ".rss", magic.Rss).
alias("text/rss")
owl2 = newMIME("application/owl+xml", ".owl", matchers.Owl2)
atom = newMIME("application/atom+xml", ".atom", matchers.Atom)
x3d = newMIME("model/x3d+xml", ".x3d", matchers.X3d)
kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", matchers.Kml)
xliff = newMIME("application/x-xliff+xml", ".xlf", matchers.Xliff)
collada = newMIME("model/vnd.collada+xml", ".dae", matchers.Collada)
gml = newMIME("application/gml+xml", ".gml", matchers.Gml)
gpx = newMIME("application/gpx+xml", ".gpx", matchers.Gpx)
tcx = newMIME("application/vnd.garmin.tcx+xml", ".tcx", matchers.Tcx)
amf = newMIME("application/x-amf", ".amf", matchers.Amf)
threemf = newMIME("application/vnd.ms-package.3dmanufacturing-3dmodel+xml", ".3mf", matchers.Threemf)
png = newMIME("image/png", ".png", matchers.Png)
jpg = newMIME("image/jpeg", ".jpg", matchers.Jpg)
jp2 = newMIME("image/jp2", ".jp2", matchers.Jp2)
jpx = newMIME("image/jpx", ".jpf", matchers.Jpx)
jpm = newMIME("image/jpm", ".jpm", matchers.Jpm).
owl2 = newMIME("application/owl+xml", ".owl", magic.Owl2)
atom = newMIME("application/atom+xml", ".atom", magic.Atom)
x3d = newMIME("model/x3d+xml", ".x3d", magic.X3d)
kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", magic.Kml)
xliff = newMIME("application/x-xliff+xml", ".xlf", magic.Xliff)
collada = newMIME("model/vnd.collada+xml", ".dae", magic.Collada)
gml = newMIME("application/gml+xml", ".gml", magic.Gml)
gpx = newMIME("application/gpx+xml", ".gpx", magic.Gpx)
tcx = newMIME("application/vnd.garmin.tcx+xml", ".tcx", magic.Tcx)
amf = newMIME("application/x-amf", ".amf", magic.Amf)
threemf = newMIME("application/vnd.ms-package.3dmanufacturing-3dmodel+xml", ".3mf", magic.Threemf)
png = newMIME("image/png", ".png", magic.Png)
jpg = newMIME("image/jpeg", ".jpg", magic.Jpg)
jp2 = newMIME("image/jp2", ".jp2", magic.Jp2)
jpx = newMIME("image/jpx", ".jpf", magic.Jpx)
jpm = newMIME("image/jpm", ".jpm", magic.Jpm).
alias("video/jpm")
bpg = newMIME("image/bpg", ".bpg", matchers.Bpg)
gif = newMIME("image/gif", ".gif", matchers.Gif)
webp = newMIME("image/webp", ".webp", matchers.Webp)
tiff = newMIME("image/tiff", ".tiff", matchers.Tiff)
bmp = newMIME("image/bmp", ".bmp", matchers.Bmp).
xpm = newMIME("image/x-xpixmap", ".xpm", magic.Xpm)
bpg = newMIME("image/bpg", ".bpg", magic.Bpg)
gif = newMIME("image/gif", ".gif", magic.Gif)
webp = newMIME("image/webp", ".webp", magic.Webp)
tiff = newMIME("image/tiff", ".tiff", magic.Tiff)
bmp = newMIME("image/bmp", ".bmp", magic.Bmp).
alias("image/x-bmp", "image/x-ms-bmp")
ico = newMIME("image/x-icon", ".ico", matchers.Ico)
icns = newMIME("image/x-icns", ".icns", matchers.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", matchers.Psd).
ico = newMIME("image/x-icon", ".ico", magic.Ico)
icns = newMIME("image/x-icns", ".icns", magic.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", magic.Psd).
alias("image/x-psd", "application/photoshop")
heic = newMIME("image/heic", ".heic", matchers.Heic)
heicSeq = newMIME("image/heic-sequence", ".heic", matchers.HeicSequence)
heif = newMIME("image/heif", ".heif", matchers.Heif)
heifSeq = newMIME("image/heif-sequence", ".heif", matchers.HeifSequence)
hdr = newMIME("image/vnd.radiance", ".hdr", matchers.Hdr)
mp3 = newMIME("audio/mpeg", ".mp3", matchers.Mp3).
heic = newMIME("image/heic", ".heic", magic.Heic)
heicSeq = newMIME("image/heic-sequence", ".heic", magic.HeicSequence)
heif = newMIME("image/heif", ".heif", magic.Heif)
heifSeq = newMIME("image/heif-sequence", ".heif", magic.HeifSequence)
hdr = newMIME("image/vnd.radiance", ".hdr", magic.Hdr)
mp3 = newMIME("audio/mpeg", ".mp3", magic.Mp3).
alias("audio/x-mpeg", "audio/mp3")
flac = newMIME("audio/flac", ".flac", matchers.Flac)
midi = newMIME("audio/midi", ".midi", matchers.Midi).
flac = newMIME("audio/flac", ".flac", magic.Flac)
midi = newMIME("audio/midi", ".midi", magic.Midi).
alias("audio/mid", "audio/sp-midi", "audio/x-mid", "audio/x-midi")
ape = newMIME("audio/ape", ".ape", matchers.Ape)
musePack = newMIME("audio/musepack", ".mpc", matchers.MusePack)
wav = newMIME("audio/wav", ".wav", matchers.Wav).
ape = newMIME("audio/ape", ".ape", magic.Ape)
musePack = newMIME("audio/musepack", ".mpc", magic.MusePack)
wav = newMIME("audio/wav", ".wav", magic.Wav).
alias("audio/x-wav", "audio/vnd.wave", "audio/wave")
aiff = newMIME("audio/aiff", ".aiff", matchers.Aiff)
au = newMIME("audio/basic", ".au", matchers.Au)
amr = newMIME("audio/amr", ".amr", matchers.Amr).
aiff = newMIME("audio/aiff", ".aiff", magic.Aiff)
au = newMIME("audio/basic", ".au", magic.Au)
amr = newMIME("audio/amr", ".amr", magic.Amr).
alias("audio/amr-nb")
aac = newMIME("audio/aac", ".aac", matchers.Aac)
voc = newMIME("audio/x-unknown", ".voc", matchers.Voc)
aMp4 = newMIME("audio/mp4", ".mp4", matchers.AMp4).
aac = newMIME("audio/aac", ".aac", magic.Aac)
voc = newMIME("audio/x-unknown", ".voc", magic.Voc)
aMp4 = newMIME("audio/mp4", ".mp4", magic.AMp4).
alias("audio/x-m4a", "audio/x-mp4a")
m4a = newMIME("audio/x-m4a", ".m4a", matchers.M4a)
m3u = newMIME("application/vnd.apple.mpegurl", ".m3u", matchers.M3u).
m4a = newMIME("audio/x-m4a", ".m4a", magic.M4a)
m3u = newMIME("application/vnd.apple.mpegurl", ".m3u", magic.M3u).
alias("audio/mpegurl")
m4v = newMIME("video/x-m4v", ".m4v", matchers.M4v)
mp4 = newMIME("video/mp4", ".mp4", matchers.Mp4)
webM = newMIME("video/webm", ".webm", matchers.WebM).
m4v = newMIME("video/x-m4v", ".m4v", magic.M4v)
mp4 = newMIME("video/mp4", ".mp4", magic.Mp4)
webM = newMIME("video/webm", ".webm", magic.WebM).
alias("audio/webm")
mpeg = newMIME("video/mpeg", ".mpeg", matchers.Mpeg)
quickTime = newMIME("video/quicktime", ".mov", matchers.QuickTime)
mqv = newMIME("video/quicktime", ".mqv", matchers.Mqv)
threeGP = newMIME("video/3gpp", ".3gp", matchers.ThreeGP).
mpeg = newMIME("video/mpeg", ".mpeg", magic.Mpeg)
quickTime = newMIME("video/quicktime", ".mov", magic.QuickTime)
mqv = newMIME("video/quicktime", ".mqv", magic.Mqv)
threeGP = newMIME("video/3gpp", ".3gp", magic.ThreeGP).
alias("video/3gp", "audio/3gpp")
threeG2 = newMIME("video/3gpp2", ".3g2", matchers.ThreeG2).
threeG2 = newMIME("video/3gpp2", ".3g2", magic.ThreeG2).
alias("video/3g2", "audio/3gpp2")
avi = newMIME("video/x-msvideo", ".avi", matchers.Avi).
avi = newMIME("video/x-msvideo", ".avi", magic.Avi).
alias("video/avi", "video/msvideo")
flv = newMIME("video/x-flv", ".flv", matchers.Flv)
mkv = newMIME("video/x-matroska", ".mkv", matchers.Mkv)
asf = newMIME("video/x-ms-asf", ".asf", matchers.Asf).
flv = newMIME("video/x-flv", ".flv", magic.Flv)
mkv = newMIME("video/x-matroska", ".mkv", magic.Mkv)
asf = newMIME("video/x-ms-asf", ".asf", magic.Asf).
alias("video/asf", "video/x-ms-wmv")
rmvb = newMIME("application/vnd.rn-realmedia-vbr", ".rmvb", matchers.Rmvb)
class = newMIME("application/x-java-applet; charset=binary", ".class", matchers.Class)
swf = newMIME("application/x-shockwave-flash", ".swf", matchers.Swf)
crx = newMIME("application/x-chrome-extension", ".crx", matchers.Crx)
ttf = newMIME("font/ttf", ".ttf", matchers.Ttf).
rmvb = newMIME("application/vnd.rn-realmedia-vbr", ".rmvb", magic.Rmvb)
class = newMIME("application/x-java-applet", ".class", magic.Class)
swf = newMIME("application/x-shockwave-flash", ".swf", magic.Swf)
crx = newMIME("application/x-chrome-extension", ".crx", magic.Crx)
ttf = newMIME("font/ttf", ".ttf", magic.Ttf).
alias("font/sfnt", "application/x-font-ttf", "application/font-sfnt")
woff = newMIME("font/woff", ".woff", matchers.Woff)
woff2 = newMIME("font/woff2", ".woff2", matchers.Woff2)
otf = newMIME("font/otf", ".otf", matchers.Otf)
eot = newMIME("application/vnd.ms-fontobject", ".eot", matchers.Eot)
wasm = newMIME("application/wasm", ".wasm", matchers.Wasm)
shp = newMIME("application/octet-stream", ".shp", matchers.Shp)
shx = newMIME("application/octet-stream", ".shx", matchers.Shx, shp)
dbf = newMIME("application/x-dbf", ".dbf", matchers.Dbf)
exe = newMIME("application/vnd.microsoft.portable-executable", ".exe", matchers.Exe)
elf = newMIME("application/x-elf", "", matchers.Elf, elfObj, elfExe, elfLib, elfDump)
elfObj = newMIME("application/x-object", "", matchers.ElfObj)
elfExe = newMIME("application/x-executable", "", matchers.ElfExe)
elfLib = newMIME("application/x-sharedlib", ".so", matchers.ElfLib)
elfDump = newMIME("application/x-coredump", "", matchers.ElfDump)
ar = newMIME("application/x-archive", ".a", matchers.Ar, deb).
woff = newMIME("font/woff", ".woff", magic.Woff)
woff2 = newMIME("font/woff2", ".woff2", magic.Woff2)
otf = newMIME("font/otf", ".otf", magic.Otf)
eot = newMIME("application/vnd.ms-fontobject", ".eot", magic.Eot)
wasm = newMIME("application/wasm", ".wasm", magic.Wasm)
shp = newMIME("application/octet-stream", ".shp", magic.Shp)
shx = newMIME("application/octet-stream", ".shx", magic.Shx, shp)
dbf = newMIME("application/x-dbf", ".dbf", magic.Dbf)
exe = newMIME("application/vnd.microsoft.portable-executable", ".exe", magic.Exe)
elf = newMIME("application/x-elf", "", magic.Elf, elfObj, elfExe, elfLib, elfDump)
elfObj = newMIME("application/x-object", "", magic.ElfObj)
elfExe = newMIME("application/x-executable", "", magic.ElfExe)
elfLib = newMIME("application/x-sharedlib", ".so", magic.ElfLib)
elfDump = newMIME("application/x-coredump", "", magic.ElfDump)
ar = newMIME("application/x-archive", ".a", magic.Ar, deb).
alias("application/x-unix-archive")
deb = newMIME("application/vnd.debian.binary-package", ".deb", matchers.Deb)
rpm = newMIME("application/x-rpm", ".rpm", matchers.Rpm)
dcm = newMIME("application/dicom", ".dcm", matchers.Dcm)
odt = newMIME("application/vnd.oasis.opendocument.text", ".odt", matchers.Odt, ott).
deb = newMIME("application/vnd.debian.binary-package", ".deb", magic.Deb)
rpm = newMIME("application/x-rpm", ".rpm", magic.Rpm)
dcm = newMIME("application/dicom", ".dcm", magic.Dcm)
odt = newMIME("application/vnd.oasis.opendocument.text", ".odt", magic.Odt, ott).
alias("application/x-vnd.oasis.opendocument.text")
ott = newMIME("application/vnd.oasis.opendocument.text-template", ".ott", matchers.Ott).
ott = newMIME("application/vnd.oasis.opendocument.text-template", ".ott", magic.Ott).
alias("application/x-vnd.oasis.opendocument.text-template")
ods = newMIME("application/vnd.oasis.opendocument.spreadsheet", ".ods", matchers.Ods, ots).
ods = newMIME("application/vnd.oasis.opendocument.spreadsheet", ".ods", magic.Ods, ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet")
ots = newMIME("application/vnd.oasis.opendocument.spreadsheet-template", ".ots", matchers.Ots).
ots = newMIME("application/vnd.oasis.opendocument.spreadsheet-template", ".ots", magic.Ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet-template")
odp = newMIME("application/vnd.oasis.opendocument.presentation", ".odp", matchers.Odp, otp).
odp = newMIME("application/vnd.oasis.opendocument.presentation", ".odp", magic.Odp, otp).
alias("application/x-vnd.oasis.opendocument.presentation")
otp = newMIME("application/vnd.oasis.opendocument.presentation-template", ".otp", matchers.Otp).
otp = newMIME("application/vnd.oasis.opendocument.presentation-template", ".otp", magic.Otp).
alias("application/x-vnd.oasis.opendocument.presentation-template")
odg = newMIME("application/vnd.oasis.opendocument.graphics", ".odg", matchers.Odg, otg).
odg = newMIME("application/vnd.oasis.opendocument.graphics", ".odg", magic.Odg, otg).
alias("application/x-vnd.oasis.opendocument.graphics")
otg = newMIME("application/vnd.oasis.opendocument.graphics-template", ".otg", matchers.Otg).
otg = newMIME("application/vnd.oasis.opendocument.graphics-template", ".otg", magic.Otg).
alias("application/x-vnd.oasis.opendocument.graphics-template")
odf = newMIME("application/vnd.oasis.opendocument.formula", ".odf", matchers.Odf).
odf = newMIME("application/vnd.oasis.opendocument.formula", ".odf", magic.Odf).
alias("application/x-vnd.oasis.opendocument.formula")
odc = newMIME("application/vnd.oasis.opendocument.chart", ".odc", matchers.Odc).
odc = newMIME("application/vnd.oasis.opendocument.chart", ".odc", magic.Odc).
alias("application/x-vnd.oasis.opendocument.chart")
sxc = newMIME("application/vnd.sun.xml.calc", ".sxc", matchers.Sxc)
rar = newMIME("application/x-rar-compressed", ".rar", matchers.Rar).
sxc = newMIME("application/vnd.sun.xml.calc", ".sxc", magic.Sxc)
rar = newMIME("application/x-rar-compressed", ".rar", magic.Rar).
alias("application/x-rar")
djvu = newMIME("image/vnd.djvu", ".djvu", matchers.DjVu)
mobi = newMIME("application/x-mobipocket-ebook", ".mobi", matchers.Mobi)
lit = newMIME("application/x-ms-reader", ".lit", matchers.Lit)
sqlite3 = newMIME("application/x-sqlite3", ".sqlite", matchers.Sqlite)
dwg = newMIME("image/vnd.dwg", ".dwg", matchers.Dwg).
alias("image/x-dwg", "application/acad", "application/x-acad", "application/autocad_dwg", "application/dwg", "application/x-dwg", "application/x-autocad", "drawing/dwg")
warc = newMIME("application/warc", ".warc", matchers.Warc)
nes = newMIME("application/vnd.nintendo.snes.rom", ".nes", matchers.Nes)
macho = newMIME("application/x-mach-binary", ".macho", matchers.MachO)
qcp = newMIME("audio/qcelp", ".qcp", matchers.Qcp)
mrc = newMIME("application/marc", ".mrc", matchers.Marc)
mdb = newMIME("application/x-msaccess", ".mdb", matchers.MsAccessMdb)
accdb = newMIME("application/x-msaccess", ".accdb", matchers.MsAccessAce)
zstd = newMIME("application/zstd", ".zst", matchers.Zstd)
cab = newMIME("application/vnd.ms-cab-compressed", ".cab", matchers.Cab)
lzip = newMIME("application/lzip", ".lz", matchers.Lzip)
torrent = newMIME("application/x-bittorrent", ".torrent", matchers.Torrent)
cpio = newMIME("application/x-cpio", ".cpio", matchers.Cpio)
tzif = newMIME("application/tzif", "", matchers.TzIf)
p7s = newMIME("application/pkcs7-signature", ".p7s", matchers.P7s)
xcf = newMIME("image/x-xcf", ".xcf", matchers.Xcf)
pat = newMIME("image/x-gimp-pat", ".pat", matchers.Pat)
gbr = newMIME("image/x-gimp-gbr", ".gbr", matchers.Gbr)
xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", matchers.Xfdf)
glb = newMIME("model/gltf-binary", ".glb", matchers.Glb)
djvu = newMIME("image/vnd.djvu", ".djvu", magic.DjVu)
mobi = newMIME("application/x-mobipocket-ebook", ".mobi", magic.Mobi)
lit = newMIME("application/x-ms-reader", ".lit", magic.Lit)
sqlite3 = newMIME("application/x-sqlite3", ".sqlite", magic.Sqlite)
dwg = newMIME("image/vnd.dwg", ".dwg", magic.Dwg).
alias("image/x-dwg", "application/acad", "application/x-acad",
"application/autocad_dwg", "application/dwg", "application/x-dwg",
"application/x-autocad", "drawing/dwg")
warc = newMIME("application/warc", ".warc", magic.Warc)
nes = newMIME("application/vnd.nintendo.snes.rom", ".nes", magic.Nes)
lnk = newMIME("application/x-ms-shortcut", ".lnk", magic.Lnk)
macho = newMIME("application/x-mach-binary", ".macho", magic.MachO)
qcp = newMIME("audio/qcelp", ".qcp", magic.Qcp)
mrc = newMIME("application/marc", ".mrc", magic.Marc)
mdb = newMIME("application/x-msaccess", ".mdb", magic.MsAccessMdb)
accdb = newMIME("application/x-msaccess", ".accdb", magic.MsAccessAce)
zstd = newMIME("application/zstd", ".zst", magic.Zstd)
cab = newMIME("application/vnd.ms-cab-compressed", ".cab", magic.Cab)
lzip = newMIME("application/lzip", ".lz", magic.Lzip)
torrent = newMIME("application/x-bittorrent", ".torrent", magic.Torrent)
cpio = newMIME("application/x-cpio", ".cpio", magic.Cpio)
tzif = newMIME("application/tzif", "", magic.TzIf)
p7s = newMIME("application/pkcs7-signature", ".p7s", magic.P7s)
xcf = newMIME("image/x-xcf", ".xcf", magic.Xcf)
pat = newMIME("image/x-gimp-pat", ".pat", magic.Pat)
gbr = newMIME("image/x-gimp-gbr", ".gbr", magic.Gbr)
xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf)
glb = newMIME("model/gltf-binary", ".glb", magic.Glb)
)

3
vendor/golang.org/x/net/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

27
vendor/golang.org/x/net/LICENSE generated vendored

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
vendor/golang.org/x/net/PATENTS generated vendored

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

@ -0,0 +1,78 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package atom provides integer codes (also known as atoms) for a fixed set of
// frequently occurring HTML strings: tag names and attribute keys such as "p"
// and "id".
//
// Sharing an atom's name between all elements with the same tag can result in
// fewer string allocations when tokenizing and parsing HTML. Integer
// comparisons are also generally faster than string comparisons.
//
// The value of an atom's particular code is not guaranteed to stay the same
// between versions of this package. Neither is any ordering guaranteed:
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
// be dense. The only guarantees are that e.g. looking up "div" will yield
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
package atom // import "golang.org/x/net/html/atom"
// Atom is an integer code for a string. The zero value maps to "".
type Atom uint32
// String returns the atom's name.
func (a Atom) String() string {
start := uint32(a >> 8)
n := uint32(a & 0xff)
if start+n > uint32(len(atomText)) {
return ""
}
return atomText[start : start+n]
}
func (a Atom) string() string {
return atomText[a>>8 : a>>8+a&0xff]
}
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s []byte) uint32 {
for i := range s {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
func match(s string, t []byte) bool {
for i, c := range t {
if s[i] != c {
return false
}
}
return true
}
// Lookup returns the atom whose name is s. It returns zero if there is no
// such atom. The lookup is case sensitive.
func Lookup(s []byte) Atom {
if len(s) == 0 || len(s) > maxAtomLen {
return 0
}
h := fnv(hash0, s)
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
return 0
}
// String returns a string whose contents are equal to s. In that sense, it is
// equivalent to string(s) but may be more efficient.
func String(s []byte) string {
if a := Lookup(s); a != 0 {
return a.String()
}
return string(s)
}

@ -0,0 +1,783 @@
// Code generated by go generate gen.go; DO NOT EDIT.
//go:generate go run gen.go
package atom
const (
A Atom = 0x1
Abbr Atom = 0x4
Accept Atom = 0x1a06
AcceptCharset Atom = 0x1a0e
Accesskey Atom = 0x2c09
Acronym Atom = 0xaa07
Action Atom = 0x27206
Address Atom = 0x6f307
Align Atom = 0xb105
Allowfullscreen Atom = 0x2080f
Allowpaymentrequest Atom = 0xc113
Allowusermedia Atom = 0xdd0e
Alt Atom = 0xf303
Annotation Atom = 0x1c90a
AnnotationXml Atom = 0x1c90e
Applet Atom = 0x31906
Area Atom = 0x35604
Article Atom = 0x3fc07
As Atom = 0x3c02
Aside Atom = 0x10705
Async Atom = 0xff05
Audio Atom = 0x11505
Autocomplete Atom = 0x2780c
Autofocus Atom = 0x12109
Autoplay Atom = 0x13c08
B Atom = 0x101
Base Atom = 0x3b04
Basefont Atom = 0x3b08
Bdi Atom = 0xba03
Bdo Atom = 0x14b03
Bgsound Atom = 0x15e07
Big Atom = 0x17003
Blink Atom = 0x17305
Blockquote Atom = 0x1870a
Body Atom = 0x2804
Br Atom = 0x202
Button Atom = 0x19106
Canvas Atom = 0x10306
Caption Atom = 0x23107
Center Atom = 0x22006
Challenge Atom = 0x29b09
Charset Atom = 0x2107
Checked Atom = 0x47907
Cite Atom = 0x19c04
Class Atom = 0x56405
Code Atom = 0x5c504
Col Atom = 0x1ab03
Colgroup Atom = 0x1ab08
Color Atom = 0x1bf05
Cols Atom = 0x1c404
Colspan Atom = 0x1c407
Command Atom = 0x1d707
Content Atom = 0x58b07
Contenteditable Atom = 0x58b0f
Contextmenu Atom = 0x3800b
Controls Atom = 0x1de08
Coords Atom = 0x1ea06
Crossorigin Atom = 0x1fb0b
Data Atom = 0x4a504
Datalist Atom = 0x4a508
Datetime Atom = 0x2b808
Dd Atom = 0x2d702
Default Atom = 0x10a07
Defer Atom = 0x5c705
Del Atom = 0x45203
Desc Atom = 0x56104
Details Atom = 0x7207
Dfn Atom = 0x8703
Dialog Atom = 0xbb06
Dir Atom = 0x9303
Dirname Atom = 0x9307
Disabled Atom = 0x16408
Div Atom = 0x16b03
Dl Atom = 0x5e602
Download Atom = 0x46308
Draggable Atom = 0x17a09
Dropzone Atom = 0x40508
Dt Atom = 0x64b02
Em Atom = 0x6e02
Embed Atom = 0x6e05
Enctype Atom = 0x28d07
Face Atom = 0x21e04
Fieldset Atom = 0x22608
Figcaption Atom = 0x22e0a
Figure Atom = 0x24806
Font Atom = 0x3f04
Footer Atom = 0xf606
For Atom = 0x25403
ForeignObject Atom = 0x2540d
Foreignobject Atom = 0x2610d
Form Atom = 0x26e04
Formaction Atom = 0x26e0a
Formenctype Atom = 0x2890b
Formmethod Atom = 0x2a40a
Formnovalidate Atom = 0x2ae0e
Formtarget Atom = 0x2c00a
Frame Atom = 0x8b05
Frameset Atom = 0x8b08
H1 Atom = 0x15c02
H2 Atom = 0x2de02
H3 Atom = 0x30d02
H4 Atom = 0x34502
H5 Atom = 0x34f02
H6 Atom = 0x64d02
Head Atom = 0x33104
Header Atom = 0x33106
Headers Atom = 0x33107
Height Atom = 0x5206
Hgroup Atom = 0x2ca06
Hidden Atom = 0x2d506
High Atom = 0x2db04
Hr Atom = 0x15702
Href Atom = 0x2e004
Hreflang Atom = 0x2e008
Html Atom = 0x5604
HttpEquiv Atom = 0x2e80a
I Atom = 0x601
Icon Atom = 0x58a04
Id Atom = 0x10902
Iframe Atom = 0x2fc06
Image Atom = 0x30205
Img Atom = 0x30703
Input Atom = 0x44b05
Inputmode Atom = 0x44b09
Ins Atom = 0x20403
Integrity Atom = 0x23f09
Is Atom = 0x16502
Isindex Atom = 0x30f07
Ismap Atom = 0x31605
Itemid Atom = 0x38b06
Itemprop Atom = 0x19d08
Itemref Atom = 0x3cd07
Itemscope Atom = 0x67109
Itemtype Atom = 0x31f08
Kbd Atom = 0xb903
Keygen Atom = 0x3206
Keytype Atom = 0xd607
Kind Atom = 0x17704
Label Atom = 0x5905
Lang Atom = 0x2e404
Legend Atom = 0x18106
Li Atom = 0xb202
Link Atom = 0x17404
List Atom = 0x4a904
Listing Atom = 0x4a907
Loop Atom = 0x5d04
Low Atom = 0xc303
Main Atom = 0x1004
Malignmark Atom = 0xb00a
Manifest Atom = 0x6d708
Map Atom = 0x31803
Mark Atom = 0xb604
Marquee Atom = 0x32707
Math Atom = 0x32e04
Max Atom = 0x33d03
Maxlength Atom = 0x33d09
Media Atom = 0xe605
Mediagroup Atom = 0xe60a
Menu Atom = 0x38704
Menuitem Atom = 0x38708
Meta Atom = 0x4b804
Meter Atom = 0x9805
Method Atom = 0x2a806
Mglyph Atom = 0x30806
Mi Atom = 0x34702
Min Atom = 0x34703
Minlength Atom = 0x34709
Mn Atom = 0x2b102
Mo Atom = 0xa402
Ms Atom = 0x67402
Mtext Atom = 0x35105
Multiple Atom = 0x35f08
Muted Atom = 0x36705
Name Atom = 0x9604
Nav Atom = 0x1303
Nobr Atom = 0x3704
Noembed Atom = 0x6c07
Noframes Atom = 0x8908
Nomodule Atom = 0xa208
Nonce Atom = 0x1a605
Noscript Atom = 0x21608
Novalidate Atom = 0x2b20a
Object Atom = 0x26806
Ol Atom = 0x13702
Onabort Atom = 0x19507
Onafterprint Atom = 0x2360c
Onautocomplete Atom = 0x2760e
Onautocompleteerror Atom = 0x27613
Onauxclick Atom = 0x61f0a
Onbeforeprint Atom = 0x69e0d
Onbeforeunload Atom = 0x6e70e
Onblur Atom = 0x56d06
Oncancel Atom = 0x11908
Oncanplay Atom = 0x14d09
Oncanplaythrough Atom = 0x14d10
Onchange Atom = 0x41b08
Onclick Atom = 0x2f507
Onclose Atom = 0x36c07
Oncontextmenu Atom = 0x37e0d
Oncopy Atom = 0x39106
Oncuechange Atom = 0x3970b
Oncut Atom = 0x3a205
Ondblclick Atom = 0x3a70a
Ondrag Atom = 0x3b106
Ondragend Atom = 0x3b109
Ondragenter Atom = 0x3ba0b
Ondragexit Atom = 0x3c50a
Ondragleave Atom = 0x3df0b
Ondragover Atom = 0x3ea0a
Ondragstart Atom = 0x3f40b
Ondrop Atom = 0x40306
Ondurationchange Atom = 0x41310
Onemptied Atom = 0x40a09
Onended Atom = 0x42307
Onerror Atom = 0x42a07
Onfocus Atom = 0x43107
Onhashchange Atom = 0x43d0c
Oninput Atom = 0x44907
Oninvalid Atom = 0x45509
Onkeydown Atom = 0x45e09
Onkeypress Atom = 0x46b0a
Onkeyup Atom = 0x48007
Onlanguagechange Atom = 0x48d10
Onload Atom = 0x49d06
Onloadeddata Atom = 0x49d0c
Onloadedmetadata Atom = 0x4b010
Onloadend Atom = 0x4c609
Onloadstart Atom = 0x4cf0b
Onmessage Atom = 0x4da09
Onmessageerror Atom = 0x4da0e
Onmousedown Atom = 0x4e80b
Onmouseenter Atom = 0x4f30c
Onmouseleave Atom = 0x4ff0c
Onmousemove Atom = 0x50b0b
Onmouseout Atom = 0x5160a
Onmouseover Atom = 0x5230b
Onmouseup Atom = 0x52e09
Onmousewheel Atom = 0x53c0c
Onoffline Atom = 0x54809
Ononline Atom = 0x55108
Onpagehide Atom = 0x5590a
Onpageshow Atom = 0x5730a
Onpaste Atom = 0x57f07
Onpause Atom = 0x59a07
Onplay Atom = 0x5a406
Onplaying Atom = 0x5a409
Onpopstate Atom = 0x5ad0a
Onprogress Atom = 0x5b70a
Onratechange Atom = 0x5cc0c
Onrejectionhandled Atom = 0x5d812
Onreset Atom = 0x5ea07
Onresize Atom = 0x5f108
Onscroll Atom = 0x60008
Onsecuritypolicyviolation Atom = 0x60819
Onseeked Atom = 0x62908
Onseeking Atom = 0x63109
Onselect Atom = 0x63a08
Onshow Atom = 0x64406
Onsort Atom = 0x64f06
Onstalled Atom = 0x65909
Onstorage Atom = 0x66209
Onsubmit Atom = 0x66b08
Onsuspend Atom = 0x67b09
Ontimeupdate Atom = 0x400c
Ontoggle Atom = 0x68408
Onunhandledrejection Atom = 0x68c14
Onunload Atom = 0x6ab08
Onvolumechange Atom = 0x6b30e
Onwaiting Atom = 0x6c109
Onwheel Atom = 0x6ca07
Open Atom = 0x1a304
Optgroup Atom = 0x5f08
Optimum Atom = 0x6d107
Option Atom = 0x6e306
Output Atom = 0x51d06
P Atom = 0xc01
Param Atom = 0xc05
Pattern Atom = 0x6607
Picture Atom = 0x7b07
Ping Atom = 0xef04
Placeholder Atom = 0x1310b
Plaintext Atom = 0x1b209
Playsinline Atom = 0x1400b
Poster Atom = 0x2cf06
Pre Atom = 0x47003
Preload Atom = 0x48607
Progress Atom = 0x5b908
Prompt Atom = 0x53606
Public Atom = 0x58606
Q Atom = 0xcf01
Radiogroup Atom = 0x30a
Rb Atom = 0x3a02
Readonly Atom = 0x35708
Referrerpolicy Atom = 0x3d10e
Rel Atom = 0x48703
Required Atom = 0x24c08
Reversed Atom = 0x8008
Rows Atom = 0x9c04
Rowspan Atom = 0x9c07
Rp Atom = 0x23c02
Rt Atom = 0x19a02
Rtc Atom = 0x19a03
Ruby Atom = 0xfb04
S Atom = 0x2501
Samp Atom = 0x7804
Sandbox Atom = 0x12907
Scope Atom = 0x67505
Scoped Atom = 0x67506
Script Atom = 0x21806
Seamless Atom = 0x37108
Section Atom = 0x56807
Select Atom = 0x63c06
Selected Atom = 0x63c08
Shape Atom = 0x1e505
Size Atom = 0x5f504
Sizes Atom = 0x5f505
Slot Atom = 0x1ef04
Small Atom = 0x20605
Sortable Atom = 0x65108
Sorted Atom = 0x33706
Source Atom = 0x37806
Spacer Atom = 0x43706
Span Atom = 0x9f04
Spellcheck Atom = 0x4740a
Src Atom = 0x5c003
Srcdoc Atom = 0x5c006
Srclang Atom = 0x5f907
Srcset Atom = 0x6f906
Start Atom = 0x3fa05
Step Atom = 0x58304
Strike Atom = 0xd206
Strong Atom = 0x6dd06
Style Atom = 0x6ff05
Sub Atom = 0x66d03
Summary Atom = 0x70407
Sup Atom = 0x70b03
Svg Atom = 0x70e03
System Atom = 0x71106
Tabindex Atom = 0x4be08
Table Atom = 0x59505
Target Atom = 0x2c406
Tbody Atom = 0x2705
Td Atom = 0x9202
Template Atom = 0x71408
Textarea Atom = 0x35208
Tfoot Atom = 0xf505
Th Atom = 0x15602
Thead Atom = 0x33005
Time Atom = 0x4204
Title Atom = 0x11005
Tr Atom = 0xcc02
Track Atom = 0x1ba05
Translate Atom = 0x1f209
Tt Atom = 0x6802
Type Atom = 0xd904
Typemustmatch Atom = 0x2900d
U Atom = 0xb01
Ul Atom = 0xa702
Updateviacache Atom = 0x460e
Usemap Atom = 0x59e06
Value Atom = 0x1505
Var Atom = 0x16d03
Video Atom = 0x2f105
Wbr Atom = 0x57c03
Width Atom = 0x64905
Workertype Atom = 0x71c0a
Wrap Atom = 0x72604
Xmp Atom = 0x12f03
)
const hash0 = 0x81cdf10e
const maxAtomLen = 25
var table = [1 << 9]Atom{
0x1: 0xe60a, // mediagroup
0x2: 0x2e404, // lang
0x4: 0x2c09, // accesskey
0x5: 0x8b08, // frameset
0x7: 0x63a08, // onselect
0x8: 0x71106, // system
0xa: 0x64905, // width
0xc: 0x2890b, // formenctype
0xd: 0x13702, // ol
0xe: 0x3970b, // oncuechange
0x10: 0x14b03, // bdo
0x11: 0x11505, // audio
0x12: 0x17a09, // draggable
0x14: 0x2f105, // video
0x15: 0x2b102, // mn
0x16: 0x38704, // menu
0x17: 0x2cf06, // poster
0x19: 0xf606, // footer
0x1a: 0x2a806, // method
0x1b: 0x2b808, // datetime
0x1c: 0x19507, // onabort
0x1d: 0x460e, // updateviacache
0x1e: 0xff05, // async
0x1f: 0x49d06, // onload
0x21: 0x11908, // oncancel
0x22: 0x62908, // onseeked
0x23: 0x30205, // image
0x24: 0x5d812, // onrejectionhandled
0x26: 0x17404, // link
0x27: 0x51d06, // output
0x28: 0x33104, // head
0x29: 0x4ff0c, // onmouseleave
0x2a: 0x57f07, // onpaste
0x2b: 0x5a409, // onplaying
0x2c: 0x1c407, // colspan
0x2f: 0x1bf05, // color
0x30: 0x5f504, // size
0x31: 0x2e80a, // http-equiv
0x33: 0x601, // i
0x34: 0x5590a, // onpagehide
0x35: 0x68c14, // onunhandledrejection
0x37: 0x42a07, // onerror
0x3a: 0x3b08, // basefont
0x3f: 0x1303, // nav
0x40: 0x17704, // kind
0x41: 0x35708, // readonly
0x42: 0x30806, // mglyph
0x44: 0xb202, // li
0x46: 0x2d506, // hidden
0x47: 0x70e03, // svg
0x48: 0x58304, // step
0x49: 0x23f09, // integrity
0x4a: 0x58606, // public
0x4c: 0x1ab03, // col
0x4d: 0x1870a, // blockquote
0x4e: 0x34f02, // h5
0x50: 0x5b908, // progress
0x51: 0x5f505, // sizes
0x52: 0x34502, // h4
0x56: 0x33005, // thead
0x57: 0xd607, // keytype
0x58: 0x5b70a, // onprogress
0x59: 0x44b09, // inputmode
0x5a: 0x3b109, // ondragend
0x5d: 0x3a205, // oncut
0x5e: 0x43706, // spacer
0x5f: 0x1ab08, // colgroup
0x62: 0x16502, // is
0x65: 0x3c02, // as
0x66: 0x54809, // onoffline
0x67: 0x33706, // sorted
0x69: 0x48d10, // onlanguagechange
0x6c: 0x43d0c, // onhashchange
0x6d: 0x9604, // name
0x6e: 0xf505, // tfoot
0x6f: 0x56104, // desc
0x70: 0x33d03, // max
0x72: 0x1ea06, // coords
0x73: 0x30d02, // h3
0x74: 0x6e70e, // onbeforeunload
0x75: 0x9c04, // rows
0x76: 0x63c06, // select
0x77: 0x9805, // meter
0x78: 0x38b06, // itemid
0x79: 0x53c0c, // onmousewheel
0x7a: 0x5c006, // srcdoc
0x7d: 0x1ba05, // track
0x7f: 0x31f08, // itemtype
0x82: 0xa402, // mo
0x83: 0x41b08, // onchange
0x84: 0x33107, // headers
0x85: 0x5cc0c, // onratechange
0x86: 0x60819, // onsecuritypolicyviolation
0x88: 0x4a508, // datalist
0x89: 0x4e80b, // onmousedown
0x8a: 0x1ef04, // slot
0x8b: 0x4b010, // onloadedmetadata
0x8c: 0x1a06, // accept
0x8d: 0x26806, // object
0x91: 0x6b30e, // onvolumechange
0x92: 0x2107, // charset
0x93: 0x27613, // onautocompleteerror
0x94: 0xc113, // allowpaymentrequest
0x95: 0x2804, // body
0x96: 0x10a07, // default
0x97: 0x63c08, // selected
0x98: 0x21e04, // face
0x99: 0x1e505, // shape
0x9b: 0x68408, // ontoggle
0x9e: 0x64b02, // dt
0x9f: 0xb604, // mark
0xa1: 0xb01, // u
0xa4: 0x6ab08, // onunload
0xa5: 0x5d04, // loop
0xa6: 0x16408, // disabled
0xaa: 0x42307, // onended
0xab: 0xb00a, // malignmark
0xad: 0x67b09, // onsuspend
0xae: 0x35105, // mtext
0xaf: 0x64f06, // onsort
0xb0: 0x19d08, // itemprop
0xb3: 0x67109, // itemscope
0xb4: 0x17305, // blink
0xb6: 0x3b106, // ondrag
0xb7: 0xa702, // ul
0xb8: 0x26e04, // form
0xb9: 0x12907, // sandbox
0xba: 0x8b05, // frame
0xbb: 0x1505, // value
0xbc: 0x66209, // onstorage
0xbf: 0xaa07, // acronym
0xc0: 0x19a02, // rt
0xc2: 0x202, // br
0xc3: 0x22608, // fieldset
0xc4: 0x2900d, // typemustmatch
0xc5: 0xa208, // nomodule
0xc6: 0x6c07, // noembed
0xc7: 0x69e0d, // onbeforeprint
0xc8: 0x19106, // button
0xc9: 0x2f507, // onclick
0xca: 0x70407, // summary
0xcd: 0xfb04, // ruby
0xce: 0x56405, // class
0xcf: 0x3f40b, // ondragstart
0xd0: 0x23107, // caption
0xd4: 0xdd0e, // allowusermedia
0xd5: 0x4cf0b, // onloadstart
0xd9: 0x16b03, // div
0xda: 0x4a904, // list
0xdb: 0x32e04, // math
0xdc: 0x44b05, // input
0xdf: 0x3ea0a, // ondragover
0xe0: 0x2de02, // h2
0xe2: 0x1b209, // plaintext
0xe4: 0x4f30c, // onmouseenter
0xe7: 0x47907, // checked
0xe8: 0x47003, // pre
0xea: 0x35f08, // multiple
0xeb: 0xba03, // bdi
0xec: 0x33d09, // maxlength
0xed: 0xcf01, // q
0xee: 0x61f0a, // onauxclick
0xf0: 0x57c03, // wbr
0xf2: 0x3b04, // base
0xf3: 0x6e306, // option
0xf5: 0x41310, // ondurationchange
0xf7: 0x8908, // noframes
0xf9: 0x40508, // dropzone
0xfb: 0x67505, // scope
0xfc: 0x8008, // reversed
0xfd: 0x3ba0b, // ondragenter
0xfe: 0x3fa05, // start
0xff: 0x12f03, // xmp
0x100: 0x5f907, // srclang
0x101: 0x30703, // img
0x104: 0x101, // b
0x105: 0x25403, // for
0x106: 0x10705, // aside
0x107: 0x44907, // oninput
0x108: 0x35604, // area
0x109: 0x2a40a, // formmethod
0x10a: 0x72604, // wrap
0x10c: 0x23c02, // rp
0x10d: 0x46b0a, // onkeypress
0x10e: 0x6802, // tt
0x110: 0x34702, // mi
0x111: 0x36705, // muted
0x112: 0xf303, // alt
0x113: 0x5c504, // code
0x114: 0x6e02, // em
0x115: 0x3c50a, // ondragexit
0x117: 0x9f04, // span
0x119: 0x6d708, // manifest
0x11a: 0x38708, // menuitem
0x11b: 0x58b07, // content
0x11d: 0x6c109, // onwaiting
0x11f: 0x4c609, // onloadend
0x121: 0x37e0d, // oncontextmenu
0x123: 0x56d06, // onblur
0x124: 0x3fc07, // article
0x125: 0x9303, // dir
0x126: 0xef04, // ping
0x127: 0x24c08, // required
0x128: 0x45509, // oninvalid
0x129: 0xb105, // align
0x12b: 0x58a04, // icon
0x12c: 0x64d02, // h6
0x12d: 0x1c404, // cols
0x12e: 0x22e0a, // figcaption
0x12f: 0x45e09, // onkeydown
0x130: 0x66b08, // onsubmit
0x131: 0x14d09, // oncanplay
0x132: 0x70b03, // sup
0x133: 0xc01, // p
0x135: 0x40a09, // onemptied
0x136: 0x39106, // oncopy
0x137: 0x19c04, // cite
0x138: 0x3a70a, // ondblclick
0x13a: 0x50b0b, // onmousemove
0x13c: 0x66d03, // sub
0x13d: 0x48703, // rel
0x13e: 0x5f08, // optgroup
0x142: 0x9c07, // rowspan
0x143: 0x37806, // source
0x144: 0x21608, // noscript
0x145: 0x1a304, // open
0x146: 0x20403, // ins
0x147: 0x2540d, // foreignObject
0x148: 0x5ad0a, // onpopstate
0x14a: 0x28d07, // enctype
0x14b: 0x2760e, // onautocomplete
0x14c: 0x35208, // textarea
0x14e: 0x2780c, // autocomplete
0x14f: 0x15702, // hr
0x150: 0x1de08, // controls
0x151: 0x10902, // id
0x153: 0x2360c, // onafterprint
0x155: 0x2610d, // foreignobject
0x156: 0x32707, // marquee
0x157: 0x59a07, // onpause
0x158: 0x5e602, // dl
0x159: 0x5206, // height
0x15a: 0x34703, // min
0x15b: 0x9307, // dirname
0x15c: 0x1f209, // translate
0x15d: 0x5604, // html
0x15e: 0x34709, // minlength
0x15f: 0x48607, // preload
0x160: 0x71408, // template
0x161: 0x3df0b, // ondragleave
0x162: 0x3a02, // rb
0x164: 0x5c003, // src
0x165: 0x6dd06, // strong
0x167: 0x7804, // samp
0x168: 0x6f307, // address
0x169: 0x55108, // ononline
0x16b: 0x1310b, // placeholder
0x16c: 0x2c406, // target
0x16d: 0x20605, // small
0x16e: 0x6ca07, // onwheel
0x16f: 0x1c90a, // annotation
0x170: 0x4740a, // spellcheck
0x171: 0x7207, // details
0x172: 0x10306, // canvas
0x173: 0x12109, // autofocus
0x174: 0xc05, // param
0x176: 0x46308, // download
0x177: 0x45203, // del
0x178: 0x36c07, // onclose
0x179: 0xb903, // kbd
0x17a: 0x31906, // applet
0x17b: 0x2e004, // href
0x17c: 0x5f108, // onresize
0x17e: 0x49d0c, // onloadeddata
0x180: 0xcc02, // tr
0x181: 0x2c00a, // formtarget
0x182: 0x11005, // title
0x183: 0x6ff05, // style
0x184: 0xd206, // strike
0x185: 0x59e06, // usemap
0x186: 0x2fc06, // iframe
0x187: 0x1004, // main
0x189: 0x7b07, // picture
0x18c: 0x31605, // ismap
0x18e: 0x4a504, // data
0x18f: 0x5905, // label
0x191: 0x3d10e, // referrerpolicy
0x192: 0x15602, // th
0x194: 0x53606, // prompt
0x195: 0x56807, // section
0x197: 0x6d107, // optimum
0x198: 0x2db04, // high
0x199: 0x15c02, // h1
0x19a: 0x65909, // onstalled
0x19b: 0x16d03, // var
0x19c: 0x4204, // time
0x19e: 0x67402, // ms
0x19f: 0x33106, // header
0x1a0: 0x4da09, // onmessage
0x1a1: 0x1a605, // nonce
0x1a2: 0x26e0a, // formaction
0x1a3: 0x22006, // center
0x1a4: 0x3704, // nobr
0x1a5: 0x59505, // table
0x1a6: 0x4a907, // listing
0x1a7: 0x18106, // legend
0x1a9: 0x29b09, // challenge
0x1aa: 0x24806, // figure
0x1ab: 0xe605, // media
0x1ae: 0xd904, // type
0x1af: 0x3f04, // font
0x1b0: 0x4da0e, // onmessageerror
0x1b1: 0x37108, // seamless
0x1b2: 0x8703, // dfn
0x1b3: 0x5c705, // defer
0x1b4: 0xc303, // low
0x1b5: 0x19a03, // rtc
0x1b6: 0x5230b, // onmouseover
0x1b7: 0x2b20a, // novalidate
0x1b8: 0x71c0a, // workertype
0x1ba: 0x3cd07, // itemref
0x1bd: 0x1, // a
0x1be: 0x31803, // map
0x1bf: 0x400c, // ontimeupdate
0x1c0: 0x15e07, // bgsound
0x1c1: 0x3206, // keygen
0x1c2: 0x2705, // tbody
0x1c5: 0x64406, // onshow
0x1c7: 0x2501, // s
0x1c8: 0x6607, // pattern
0x1cc: 0x14d10, // oncanplaythrough
0x1ce: 0x2d702, // dd
0x1cf: 0x6f906, // srcset
0x1d0: 0x17003, // big
0x1d2: 0x65108, // sortable
0x1d3: 0x48007, // onkeyup
0x1d5: 0x5a406, // onplay
0x1d7: 0x4b804, // meta
0x1d8: 0x40306, // ondrop
0x1da: 0x60008, // onscroll
0x1db: 0x1fb0b, // crossorigin
0x1dc: 0x5730a, // onpageshow
0x1dd: 0x4, // abbr
0x1de: 0x9202, // td
0x1df: 0x58b0f, // contenteditable
0x1e0: 0x27206, // action
0x1e1: 0x1400b, // playsinline
0x1e2: 0x43107, // onfocus
0x1e3: 0x2e008, // hreflang
0x1e5: 0x5160a, // onmouseout
0x1e6: 0x5ea07, // onreset
0x1e7: 0x13c08, // autoplay
0x1e8: 0x63109, // onseeking
0x1ea: 0x67506, // scoped
0x1ec: 0x30a, // radiogroup
0x1ee: 0x3800b, // contextmenu
0x1ef: 0x52e09, // onmouseup
0x1f1: 0x2ca06, // hgroup
0x1f2: 0x2080f, // allowfullscreen
0x1f3: 0x4be08, // tabindex
0x1f6: 0x30f07, // isindex
0x1f7: 0x1a0e, // accept-charset
0x1f8: 0x2ae0e, // formnovalidate
0x1fb: 0x1c90e, // annotation-xml
0x1fc: 0x6e05, // embed
0x1fd: 0x21806, // script
0x1fe: 0xbb06, // dialog
0x1ff: 0x1d707, // command
}
const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
"asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
"sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
"gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
"ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
"dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
"bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
"penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
"ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
"ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
"ignObjectforeignobjectformactionautocompleteerrorformenctype" +
"mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
"osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
"ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
"inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
"extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
"enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
"articleondropzonemptiedondurationchangeonendedonerroronfocus" +
"paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
"spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
"onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
"usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
"seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
"classectionbluronpageshowbronpastepublicontenteditableonpaus" +
"emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
"jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
"violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
"tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
"handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
"wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
"arysupsvgsystemplateworkertypewrap"

@ -0,0 +1,111 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.4.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
"address": true,
"applet": true,
"area": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"bgsound": true,
"blockquote": true,
"body": true,
"br": true,
"button": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"dd": true,
"details": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"embed": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hgroup": true,
"hr": true,
"html": true,
"iframe": true,
"img": true,
"input": true,
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"li": true,
"link": true,
"listing": true,
"main": true,
"marquee": true,
"menu": true,
"meta": true,
"nav": true,
"noembed": true,
"noframes": true,
"noscript": true,
"object": true,
"ol": true,
"p": true,
"param": true,
"plaintext": true,
"pre": true,
"script": true,
"section": true,
"select": true,
"source": true,
"style": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"template": true,
"textarea": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"track": true,
"ul": true,
"wbr": true,
"xmp": true,
}
func isSpecialElement(element *Node) bool {
switch element.Namespace {
case "", "html":
return isSpecialElementMap[element.Data]
case "math":
switch element.Data {
case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
return true
}
case "svg":
switch element.Data {
case "foreignObject", "desc", "title":
return true
}
}
return false
}

106
vendor/golang.org/x/net/html/doc.go generated vendored

@ -0,0 +1,106 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package html implements an HTML5-compliant tokenizer and parser.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
z := html.NewTokenizer(r)
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
which parses the next token and returns its type, or an error:
for {
tt := z.Next()
if tt == html.ErrorToken {
// ...
return ...
}
// Process the current token.
}
There are two APIs for retrieving the current token. The high-level API is to
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
allow optionally calling Raw after Next but before Token, Text, TagName, or
TagAttr. In EBNF notation, the valid call sequence per token is:
Next {Raw} [ Token | Text | TagName {TagAttr} ]
Token returns an independent data structure that completely describes a token.
Entities (such as "&lt;") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
if z.Next() == html.ErrorToken {
// Returning io.EOF indicates success.
return z.Err()
}
emitToken(z.Token())
}
The low-level API performs fewer allocations and copies, but the contents of
the []byte values returned by Text, TagName and TagAttr may change on the next
call to Next. For example, to extract an HTML page's anchor text:
depth := 0
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return z.Err()
case html.TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
case html.StartTagToken, html.EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == html.StartTagToken {
depth++
} else {
depth--
}
}
}
}
Parsing is done by calling Parse with an io.Reader, which returns the root of
the parse tree (the document element) as a *Node. It is the caller's
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
example, to process each anchor node in depth-first order:
doc, err := html.Parse(r)
if err != nil {
// ...
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
*/
package html // import "golang.org/x/net/html"
// The tokenization algorithm implemented by this package is not a line-by-line
// transliteration of the relatively verbose state-machine in the WHATWG
// specification. A more direct approach is used instead, where the program
// counter implies the state, such as whether it is tokenizing a tag or a text
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?

@ -0,0 +1,156 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
// parseDoctype parses the data from a DoctypeToken into a name,
// public identifier, and system identifier. It returns a Node whose Type
// is DoctypeNode, whose Data is the name, and which has attributes
// named "system" and "public" for the two identifiers if they were present.
// quirks is whether the document should be parsed in "quirks mode".
func parseDoctype(s string) (n *Node, quirks bool) {
n = &Node{Type: DoctypeNode}
// Find the name.
space := strings.IndexAny(s, whitespace)
if space == -1 {
space = len(s)
}
n.Data = s[:space]
// The comparison to "html" is case-sensitive.
if n.Data != "html" {
quirks = true
}
n.Data = strings.ToLower(n.Data)
s = strings.TrimLeft(s[space:], whitespace)
if len(s) < 6 {
// It can't start with "PUBLIC" or "SYSTEM".
// Ignore the rest of the string.
return n, quirks || s != ""
}
key := strings.ToLower(s[:6])
s = s[6:]
for key == "public" || key == "system" {
s = strings.TrimLeft(s, whitespace)
if s == "" {
break
}
quote := s[0]
if quote != '"' && quote != '\'' {
break
}
s = s[1:]
q := strings.IndexRune(s, rune(quote))
var id string
if q == -1 {
id = s
s = ""
} else {
id = s[:q]
s = s[q+1:]
}
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
if key == "public" {
key = "system"
} else {
key = ""
}
}
if key != "" || s != "" {
quirks = true
} else if len(n.Attr) > 0 {
if n.Attr[0].Key == "public" {
public := strings.ToLower(n.Attr[0].Val)
switch public {
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
quirks = true
default:
for _, q := range quirkyIDs {
if strings.HasPrefix(public, q) {
quirks = true
break
}
}
}
// The following two public IDs only cause quirks mode if there is no system ID.
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
quirks = true
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
quirks = true
}
}
return n, quirks
}
// quirkyIDs is a list of public doctype identifiers that cause a document
// to be interpreted in quirks mode. The identifiers should be in lower case.
var quirkyIDs = []string{
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//",
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,258 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"strings"
"unicode/utf8"
)
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
'\u0192',
'\u201E',
'\u2026',
'\u2020',
'\u2021',
'\u02C6',
'\u2030',
'\u0160',
'\u2039',
'\u0152',
'\u008D',
'\u017D',
'\u008F',
'\u0090',
'\u2018',
'\u2019',
'\u201C',
'\u201D',
'\u2022',
'\u2013',
'\u2014',
'\u02DC',
'\u2122',
'\u0161',
'\u203A',
'\u0153',
'\u009D',
'\u017E',
'\u0178', // Last entry is 0x9F.
// 0x00->'\uFFFD' is handled programmatically.
// 0x0D->'\u000D' is a no-op.
}
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
// attribute should be true if parsing an attribute value.
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
i, s := 1, b[src:]
if len(s) <= 1 {
b[dst] = b[src]
return dst + 1, src + 1
}
if s[i] == '#' {
if len(s) <= 3 { // We need to have at least "&#.".
b[dst] = b[src]
return dst + 1, src + 1
}
i++
c := s[i]
hex := false
if c == 'x' || c == 'X' {
hex = true
i++
}
x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
i--
}
break
}
if i <= 3 { // No characters matched.
b[dst] = b[src]
return dst + 1, src + 1
}
if 0x80 <= x && x <= 0x9F {
// Replace characters from Windows-1252 with UTF-8 equivalents.
x = replacementTable[x-0x80]
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
// Replace invalid characters with the replacement character.
x = '\uFFFD'
}
return dst + utf8.EncodeRune(b[dst:], x), src + i
}
// Consume the maximum number of characters possible, with the
// consumed characters matching one of the named references.
for i < len(s) {
c := s[i]
i++
// Lower-cased characters are more common in entities, so we check for them first.
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
continue
}
if c != ';' {
i--
}
break
}
entityName := string(s[1:i])
if entityName == "" {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if x := entity[entityName]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
} else if x := entity2[entityName]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if x := entity[entityName[:j]]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
}
dst1, src1 = dst+i, src+i
copy(b[dst:dst1], b[src:src1])
return dst1, src1
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
// attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
}
}
return b[0:dst]
}
}
return b
}
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
func lower(b []byte) []byte {
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + 'a' - 'A'
}
}
return b
}
const escapedChars = "&'<>\"\r"
func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
// "&#34;" is shorter than "&quot;".
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
panic("unrecognized escape character")
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: <, >, &, ' and ".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func EscapeString(s string) string {
if strings.IndexAny(s, escapedChars) == -1 {
return s
}
var buf bytes.Buffer
escape(&buf, s)
return buf.String()
}
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s), false))
}
}
return s
}

@ -0,0 +1,222 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
for i := range aa {
if newName, ok := nameMap[aa[i].Key]; ok {
aa[i].Key = newName
}
}
}
func adjustForeignAttributes(aa []Attribute) {
for i, a := range aa {
if a.Key == "" || a.Key[0] != 'x' {
continue
}
switch a.Key {
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
j := strings.Index(a.Key, ":")
aa[i].Namespace = a.Key[:j]
aa[i].Key = a.Key[j+1:]
}
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
if n.Data == "annotation-xml" {
for _, a := range n.Attr {
if a.Key == "encoding" {
val := strings.ToLower(a.Val)
if val == "text/html" || val == "application/xhtml+xml" {
return true
}
}
}
}
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
func mathMLTextIntegrationPoint(n *Node) bool {
if n.Namespace != "math" {
return false
}
switch n.Data {
case "mi", "mo", "mn", "ms", "mtext":
return true
}
return false
}
// Section 12.2.6.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// Section 12.2.6.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// Section 12.2.6.1
var mathMLAttributeAdjustments = map[string]string{
"definitionurl": "definitionURL",
}
var svgAttributeAdjustments = map[string]string{
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan",
}

@ -0,0 +1,225 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"golang.org/x/net/html/atom"
)
// A NodeType is the type of a Node.
type NodeType uint32
const (
ErrorNode NodeType = iota
TextNode
DocumentNode
ElementNode
CommentNode
DoctypeNode
// RawNode nodes are not returned by the parser, but can be part of the
// Node tree passed to func Render to insert raw HTML (without escaping).
// If so, this package makes no guarantee that the rendered HTML is secure
// (from e.g. Cross Site Scripting attacks) or well-formed.
RawNode
scopeMarkerNode
)
// Section 12.2.4.3 says "The markers are inserted when entering applet,
// object, marquee, template, td, th, and caption elements, and are used
// to prevent formatting from "leaking" into applet, object, marquee,
// template, td, th, and caption elements".
var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
// is the atom for Data, or zero if Data is not a known tag name.
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
DataAtom atom.Atom
Data string
Namespace string
Attr []Attribute
}
// InsertBefore inserts newChild as a child of n, immediately before oldChild
// in the sequence of n's children. oldChild may be nil, in which case newChild
// is appended to the end of n's children.
//
// It will panic if newChild already has a parent or siblings.
func (n *Node) InsertBefore(newChild, oldChild *Node) {
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
panic("html: InsertBefore called for an attached child Node")
}
var prev, next *Node
if oldChild != nil {
prev, next = oldChild.PrevSibling, oldChild
} else {
prev = n.LastChild
}
if prev != nil {
prev.NextSibling = newChild
} else {
n.FirstChild = newChild
}
if next != nil {
next.PrevSibling = newChild
} else {
n.LastChild = newChild
}
newChild.Parent = n
newChild.PrevSibling = prev
newChild.NextSibling = next
}
// AppendChild adds a node c as a child of n.
//
// It will panic if c already has a parent or siblings.
func (n *Node) AppendChild(c *Node) {
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
panic("html: AppendChild called for an attached child Node")
}
last := n.LastChild
if last != nil {
last.NextSibling = c
} else {
n.FirstChild = c
}
n.LastChild = c
c.Parent = n
c.PrevSibling = last
}
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
// no parent and no siblings.
//
// It will panic if c's parent is not n.
func (n *Node) RemoveChild(c *Node) {
if c.Parent != n {
panic("html: RemoveChild called for a non-child Node")
}
if n.FirstChild == c {
n.FirstChild = c.NextSibling
}
if c.NextSibling != nil {
c.NextSibling.PrevSibling = c.PrevSibling
}
if n.LastChild == c {
n.LastChild = c.PrevSibling
}
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = c.NextSibling
}
c.Parent = nil
c.PrevSibling = nil
c.NextSibling = nil
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *Node) {
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
}
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func (n *Node) clone() *Node {
m := &Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}
// nodeStack is a stack of nodes.
type nodeStack []*Node
// pop pops the stack. It will panic if s is empty.
func (s *nodeStack) pop() *Node {
i := len(*s)
n := (*s)[i-1]
*s = (*s)[:i-1]
return n
}
// top returns the most recently pushed node, or nil if s is empty.
func (s *nodeStack) top() *Node {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}
// index returns the index of the top-most occurrence of n in the stack, or -1
// if n is not present.
func (s *nodeStack) index(n *Node) int {
for i := len(*s) - 1; i >= 0; i-- {
if (*s)[i] == n {
return i
}
}
return -1
}
// contains returns whether a is within s.
func (s *nodeStack) contains(a atom.Atom) bool {
for _, n := range *s {
if n.DataAtom == a && n.Namespace == "" {
return true
}
}
return false
}
// insert inserts a node at the given index.
func (s *nodeStack) insert(i int, n *Node) {
(*s) = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = n
}
// remove removes a node from the stack. It is a no-op if n is not present.
func (s *nodeStack) remove(n *Node) {
i := s.index(n)
if i == -1 {
return
}
copy((*s)[i:], (*s)[i+1:])
j := len(*s) - 1
(*s)[j] = nil
*s = (*s)[:j]
}
type insertionModeStack []insertionMode
func (s *insertionModeStack) pop() (im insertionMode) {
i := len(*s)
im = (*s)[i-1]
*s = (*s)[:i-1]
return im
}
func (s *insertionModeStack) top() insertionMode {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,273 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
type writer interface {
io.Writer
io.ByteWriter
WriteString(string) (int, error)
}
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error {
if x, ok := w.(writer); ok {
return render(x, n)
}
buf := bufio.NewWriter(w)
if err := render(buf, n); err != nil {
return err
}
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case TextNode:
return escape(w, n.Data)
case DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case ElementNode:
// No-op.
case CommentNode:
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
case RawNode:
_, err := w.WriteString(n.Data)
return err
default:
return errors.New("html: unknown node type")
}
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if n.FirstChild != nil {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
// Add initial newline where there is danger of a newline beging ignored.
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := render1(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
}
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}

File diff suppressed because it is too large Load Diff

@ -1,8 +1,12 @@
# github.com/gabriel-vasile/mimetype v1.2.0
# github.com/gabriel-vasile/mimetype v1.3.0
github.com/gabriel-vasile/mimetype
github.com/gabriel-vasile/mimetype/internal/charset
github.com/gabriel-vasile/mimetype/internal/json
github.com/gabriel-vasile/mimetype/internal/matchers
github.com/gabriel-vasile/mimetype/internal/magic
# github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d
github.com/mattn/go-xmpp
# github.com/pborman/getopt/v2 v2.1.0
github.com/pborman/getopt/v2
# golang.org/x/net v0.0.0-20210521195947-fe42d452be8f
golang.org/x/net/html
golang.org/x/net/html/atom

Loading…
Cancel
Save