2017-10-20 10:51:56 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2017-11-09 20:27:03 +00:00
|
|
|
"regexp"
|
2018-10-24 16:16:42 +00:00
|
|
|
"time"
|
2017-10-20 10:51:56 +00:00
|
|
|
)
|
|
|
|
|
2017-11-09 20:27:03 +00:00
|
|
|
const (
|
2017-11-30 15:08:12 +00:00
|
|
|
// First group is tag
|
|
|
|
// TODO: use named groups
|
|
|
|
// [named groups](https://github.com/StefanSchroeder/Golang-Regex-Tutorial/blob/master/01-chapter2.markdown)
|
|
|
|
|
2018-05-29 16:24:47 +00:00
|
|
|
// Regex matching tests:
|
|
|
|
|
|
|
|
//#start test2 #test3 elol
|
|
|
|
//#start word with #end
|
|
|
|
//word in the #middle of sentence
|
|
|
|
//tags with a #dot.caracter
|
|
|
|
//this is a end of sentence #tag
|
|
|
|
|
|
|
|
ReTags = "\\B#(?P<tag>\\w+\\.?\\w+)"
|
2017-11-30 15:08:12 +00:00
|
|
|
TagJoinSep = "|"
|
2017-11-09 20:27:03 +00:00
|
|
|
)
|
2017-10-20 10:51:56 +00:00
|
|
|
|
2017-11-20 15:05:44 +00:00
|
|
|
type ParserStats struct {
|
2018-10-24 16:16:42 +00:00
|
|
|
lastParseTime time.Duration
|
2017-11-17 18:06:34 +00:00
|
|
|
lastNodeCount int
|
2017-11-20 15:05:44 +00:00
|
|
|
lastURLCount int
|
2017-11-17 18:06:34 +00:00
|
|
|
currentNodeCount int
|
|
|
|
currentUrlCount int
|
|
|
|
}
|
2017-10-20 10:51:56 +00:00
|
|
|
|
2017-11-30 15:08:12 +00:00
|
|
|
type ParseHook func(node *Node)
|
2017-11-20 18:07:15 +00:00
|
|
|
|
2017-11-30 15:08:12 +00:00
|
|
|
func ParseTags(node *Node) {
|
2017-11-20 18:07:15 +00:00
|
|
|
|
2017-11-30 15:08:12 +00:00
|
|
|
var regex = regexp.MustCompile(ReTags)
|
2017-11-20 18:07:15 +00:00
|
|
|
|
2017-11-30 15:08:12 +00:00
|
|
|
matches := regex.FindAllStringSubmatch(node.Name, -1)
|
|
|
|
for _, m := range matches {
|
2018-05-27 15:36:03 +00:00
|
|
|
node.Tags = append(node.Tags, m[1])
|
2017-11-30 15:08:12 +00:00
|
|
|
}
|
|
|
|
//res := regex.FindAllStringSubmatch(bk.Metadata, -1)
|
2017-11-23 01:51:23 +00:00
|
|
|
|
2018-10-26 16:25:07 +00:00
|
|
|
if IsDebugging() {
|
|
|
|
if len(node.Tags) > 0 {
|
|
|
|
log.Debugf("[in title] found following tags: %s", node.Tags)
|
|
|
|
}
|
2017-11-20 18:07:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-09 20:27:03 +00:00
|
|
|
func _s(value interface{}) string {
|
|
|
|
return string(value.([]byte))
|
|
|
|
}
|