package extensions import ( "strings" "unicode" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" ) // Tags represents a list of inline tags in a Markdown document. type Tags struct { gast.BaseInline // Tags in this list. Tags []string } func (n *Tags) Dump(source []byte, level int) { m := map[string]string{} m["Tags"] = strings.Join(n.Tags, ", ") gast.DumpHelper(n, source, level, m, nil) } // KindTags is a NodeKind of the Tags node. var KindTags = gast.NewNodeKind("Tags") func (n *Tags) Kind() gast.NodeKind { return KindTags } // TagExt is an extension parsing various flavors of tags. // // * #hashtags, including Bear's #multi words# tags // * :colon:separated:tags:`, e.g. vimwiki and Org mode // // Are authorized in a tag: // * unicode categories [L]etter and [N]umber // * / @ ' ~ - _ $ % & + = and when possible # : // * any character escaped with \, including whitespace type TagExt struct { // Indicates whether #hashtags are parsed. HashtagEnabled bool // Indicates whether Bear's multi-word tags are parsed. Hashtags must be enabled as well. MultiWordTagEnabled bool // Indicates whether :colon:tags: are parsed. ColontagEnabled bool } func (t *TagExt) Extend(m goldmark.Markdown) { parsers := []util.PrioritizedValue{} if t.HashtagEnabled { parsers = append(parsers, util.Prioritized(&hashtagParser{ multiWordTagEnabled: t.MultiWordTagEnabled, }, 2000)) } if t.ColontagEnabled { parsers = append(parsers, util.Prioritized(&colontagParser{}, 2000)) } if len(parsers) > 0 { m.Parser().AddOptions(parser.WithInlineParsers(parsers...)) } } // hashtagParser parses #hashtags, including Bear's #multi words# tags type hashtagParser struct { multiWordTagEnabled bool } func (p *hashtagParser) Trigger() []byte { return []byte{'#'} } func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { previousChar := block.PrecendingCharacter() line, _ := block.PeekLine() // A hashtag can't be directly preceded by a # or any other valid character. if isValidTagChar(previousChar, '\x00') { return nil } var ( tag string // Accumulator for the hashtag multiWordTagCandidate string // Accumulator for a potential Bear multi-word tag ) var ( escaping = false // Found a backslash, next character will be literal parsingMultiWordTag = false // Finished parsing a hashtag, now attempt parsing a Bear multi-word tag endPos = 0 // Last position of the tag in the line multiWordTagEndPos = 0 // Last position of the multi-word tag in the line ) appendChar := func(c rune) { if parsingMultiWordTag { multiWordTagCandidate += string(c) } else { tag += string(c) } } for i, char := range string(line) { if i == 0 { // Skip the first character, as it is # continue } if parsingMultiWordTag { multiWordTagEndPos = i } else { endPos = i } if escaping { // Currently escaping? The character will be appended literally. appendChar(char) escaping = false } else if char == '\\' { // Found a backslash, next character will be escaped. escaping = true } else if parsingMultiWordTag { // Parsing a multi-word tag candidate. if isValidTagChar(char, '#') || unicode.IsSpace(char) { appendChar(char) } else if char == '#' { // A valid multi-word tag must not have a space before the closing #. if !unicode.IsSpace(previousChar) { tag = multiWordTagCandidate endPos = multiWordTagEndPos } break } previousChar = char } else if !p.multiWordTagEnabled && char == '#' { // A tag terminated with a # is invalid when not in a multi-word tag. return nil } else if p.multiWordTagEnabled && unicode.IsSpace(char) { // Found a space, let's try to parse a multi-word tag. previousChar = char multiWordTagCandidate = tag parsingMultiWordTag = true appendChar(char) } else if !isValidTagChar(char, '#') { // Found an invalid character, the hashtag is complete. break } else { appendChar(char) } } tag = strings.TrimSpace(tag) if len(tag) == 0 || !isValidHashTag(tag) { return nil } block.Advance(endPos) return &Tags{ BaseInline: gast.BaseInline{}, Tags: []string{tag}, } } func isValidHashTag(tag string) bool { for _, char := range tag { if !unicode.IsNumber(char) { return true } } return false } // colontagParser parses :colon:separated:tags:. type colontagParser struct{} func (p *colontagParser) Trigger() []byte { return []byte{':'} } func (p *colontagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { previousChar := block.PrecendingCharacter() line, _ := block.PeekLine() // A colontag can't be directly preceded by a : or any other valid character. if isValidTagChar(previousChar, '\x00') { return nil } var ( tag string // Accumulator for the current colontag tags = []string{} // All colontags found ) var ( escaping = false // Found a backslash, next character will be literal endPos = 0 // Last position of the colontags in the line ) appendChar := func(c rune) { tag += string(c) } for i, char := range string(line[1:]) { endPos = i if escaping { // Currently escaping? The character will be appended literally. appendChar(char) escaping = false } else if char == '\\' { // Found a backslash, next character will be escaped. escaping = true } else if char == ':' { tag = strings.TrimSpace(tag) if !isValidTag(tag) { break } tags = append(tags, tag) tag = "" } else if !isValidTagChar(char, ':') { // Found an invalid character, the colontag is complete. break } else { appendChar(char) } } if len(tags) == 0 { return nil } block.Advance(endPos) return &Tags{ BaseInline: gast.BaseInline{}, Tags: tags, } } func isValidTagChar(r rune, excluded rune) bool { return r != excluded && (unicode.IsLetter(r) || unicode.IsNumber(r) || r == '/' || r == '@' || r == '\'' || r == '~' || r == '-' || r == '_' || r == '$' || r == '%' || r == '&' || r == '+' || r == '=' || r == ':' || r == '#') } func isValidTag(tag string) bool { if len(tag) == 0 { return false } // Prevent Markdown table syntax to be parsed a a colon tag, e.g. |:---:| // https://github.com/zk-org/zk/issues/185 for _, c := range tag { if c != '-' { return true } } return false }