wip recursive parse node tree
This commit is contained in:
parent
e1d281de99
commit
2ff96e1509
2
api.go
2
api.go
@ -20,7 +20,7 @@ func getBookmarks(c *gin.Context) {
|
||||
err = rows.Scan(&bookmark.URL, &bookmark.Metadata, &tags)
|
||||
logPanic(err)
|
||||
|
||||
bookmark.Tags = strings.Split(tags, " ")
|
||||
bookmark.Tags = strings.Split(tags, TagJoinSep)
|
||||
|
||||
//log.Debugf("GET %s", tags)
|
||||
//log.Debugf("%v", bookmark)
|
||||
|
@ -10,7 +10,7 @@ type Bookmark struct {
|
||||
Metadata string `json:"metadata"`
|
||||
Tags []string `json:"tags"`
|
||||
Desc string `json:"desc"`
|
||||
Node Node
|
||||
Node Node `json:"-"`
|
||||
//flags int
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ func (bk *Bookmark) add(db *DB) {
|
||||
logError(err)
|
||||
defer stmt.Close()
|
||||
|
||||
_, err = stmt.Exec(bk.URL, bk.Metadata, strings.Join(bk.Tags, " "), "", 0)
|
||||
_, err = stmt.Exec(bk.URL, bk.Metadata, strings.Join(bk.Tags, TagJoinSep), "", 0)
|
||||
sqlErrorMsg(err, bk.URL)
|
||||
|
||||
err = tx.Commit()
|
||||
|
13
browsers.go
13
browsers.go
@ -5,6 +5,7 @@ import (
|
||||
"path"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/sp4ke/hashmap"
|
||||
)
|
||||
|
||||
type BrowserType uint8
|
||||
@ -27,6 +28,7 @@ var Chrome = struct {
|
||||
type IBrowser interface {
|
||||
IWatchable
|
||||
InitBuffer() // init buffer db, should be defered to close after call
|
||||
InitIndex() // Creates in memory Index
|
||||
RegisterHooks(...ParseHook)
|
||||
Load() // Loads bookmarks to db without watching
|
||||
//Parse(...ParseHook) // Main parsing method with different parsing hooks
|
||||
@ -42,6 +44,9 @@ type BaseBrowser struct {
|
||||
baseDir string
|
||||
bkFile string
|
||||
bufferDB *DB
|
||||
URLIndex *hashmap.RBTree
|
||||
nodeTree *Node
|
||||
cNode *Node //current node
|
||||
stats *ParserStats
|
||||
bType BrowserType
|
||||
name string
|
||||
@ -79,6 +84,10 @@ func (bw *BaseBrowser) Close() {
|
||||
logPanic(err)
|
||||
}
|
||||
|
||||
func (b *BaseBrowser) InitIndex() {
|
||||
b.URLIndex = NewIndex()
|
||||
}
|
||||
|
||||
func (b *BaseBrowser) InitBuffer() {
|
||||
|
||||
bufferName := fmt.Sprintf("buffer_%s", b.name)
|
||||
@ -98,8 +107,8 @@ func (b *BaseBrowser) RegisterHooks(hooks ...ParseHook) {
|
||||
}
|
||||
|
||||
// Runs browsed defined hooks on bookmark
|
||||
func (b *BaseBrowser) RunParseHooks(bk *Bookmark) {
|
||||
func (b *BaseBrowser) RunParseHooks(node *Node) {
|
||||
for _, hook := range b.parseHooks {
|
||||
hook(bk)
|
||||
hook(node)
|
||||
}
|
||||
}
|
||||
|
130
chrome.go
130
chrome.go
@ -4,6 +4,7 @@ import (
|
||||
"io/ioutil"
|
||||
"path"
|
||||
|
||||
"github.com/OneOfOne/xxhash"
|
||||
"github.com/buger/jsonparser"
|
||||
)
|
||||
|
||||
@ -29,6 +30,8 @@ func NewChromeBrowser() IBrowser {
|
||||
browser.baseDir = Chrome.BookmarkDir
|
||||
browser.bkFile = Chrome.BookmarkFile
|
||||
browser.stats = &ParserStats{}
|
||||
browser.nodeTree = &Node{Name: "root", Parent: nil}
|
||||
browser.cNode = browser.nodeTree
|
||||
|
||||
browser.SetupWatcher()
|
||||
|
||||
@ -47,6 +50,8 @@ func (bw *ChromeBrowser) Watch() bool {
|
||||
|
||||
func (bw *ChromeBrowser) Load() {
|
||||
|
||||
bw.InitIndex()
|
||||
|
||||
// Check if cache is initialized
|
||||
if cacheDB == nil || cacheDB.handle == nil {
|
||||
log.Critical("cache is not yet initialized !")
|
||||
@ -84,21 +89,22 @@ func (bw *ChromeBrowser) Run() {
|
||||
gJsonParseRecursive(nil, childVal, dataType, offset)
|
||||
}
|
||||
|
||||
rootsNode := new(Node)
|
||||
currentNode := rootsNode
|
||||
|
||||
gJsonParseRecursive = func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
|
||||
// Core of google chrome bookmark parsing
|
||||
// Any loading to local db is done here
|
||||
bw.stats.currentNodeCount++
|
||||
|
||||
parentNode := currentNode
|
||||
//log.Debugf("moving current node %v as parent", currentNode.Name)
|
||||
currentNode := new(Node)
|
||||
currentNode.Parent = parentNode
|
||||
|
||||
var nodeType, children []byte
|
||||
currentNode.Parent = bw.cNode
|
||||
bw.cNode.Children = append(bw.cNode.Children, currentNode)
|
||||
bw.cNode = currentNode
|
||||
|
||||
var nodeType, nodeName, nodeURL, children []byte
|
||||
var childrenType jsonparser.ValueType
|
||||
bookmark := &Bookmark{}
|
||||
|
||||
//log.Debugf("parent %v", parentNode)
|
||||
|
||||
// Paths to lookup in node payload
|
||||
paths := [][]string{
|
||||
@ -112,49 +118,101 @@ func (bw *ChromeBrowser) Run() {
|
||||
switch idx {
|
||||
case 0:
|
||||
nodeType = value
|
||||
currentNode.Type = _s(value)
|
||||
//currentNode.Type = _s(value)
|
||||
|
||||
case 1: // name or title
|
||||
currentNode.Name = _s(value)
|
||||
//currentNode.Name = _s(value)
|
||||
nodeName = value
|
||||
case 2:
|
||||
currentNode.URL = _s(value)
|
||||
//currentNode.URL = _s(value)
|
||||
nodeURL = value
|
||||
case 3:
|
||||
children, childrenType = value, vt
|
||||
}
|
||||
}, paths...)
|
||||
|
||||
bookmark.Metadata = currentNode.Name
|
||||
bookmark.URL = currentNode.URL
|
||||
log.Debugf("parsing node %s", nodeName)
|
||||
|
||||
// If node type is string ignore (needed for sync_transaction_version)
|
||||
if dataType == jsonparser.String {
|
||||
return nil
|
||||
}
|
||||
|
||||
// if node is url(leaf), handle the url
|
||||
if _s(nodeType) == jsonNodeTypes.URL {
|
||||
// Add bookmark to db here
|
||||
//debugPrint("%s", url)
|
||||
//debugPrint("%s", node)
|
||||
|
||||
// Find tags in title
|
||||
//findTagsInTitle(name)
|
||||
bw.stats.currentUrlCount++
|
||||
|
||||
// Run parsehoos before adding bookmark
|
||||
bw.RunParseHooks(bookmark)
|
||||
|
||||
// Add bookmark
|
||||
bookmark.add(bw.bufferDB)
|
||||
}
|
||||
|
||||
parentNode.Children = append(parentNode.Children, currentNode)
|
||||
|
||||
// if node is a folder with children
|
||||
if childrenType == jsonparser.Array && len(children) > 2 { // if len(children) > len("[]")
|
||||
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
|
||||
|
||||
// Finished parsing all children
|
||||
// Add them into current node ?
|
||||
}
|
||||
|
||||
currentNode.Type = _s(nodeType)
|
||||
currentNode.Name = _s(nodeName)
|
||||
|
||||
// if node is url(leaf), handle the url
|
||||
if _s(nodeType) == jsonNodeTypes.URL {
|
||||
|
||||
currentNode.URL = _s(nodeURL)
|
||||
|
||||
bw.stats.currentUrlCount++
|
||||
|
||||
// Check if url-node already in index
|
||||
var nodeVal *Node
|
||||
iVal, found := bw.URLIndex.Get(currentNode.URL)
|
||||
|
||||
nameHash := xxhash.ChecksumString64(currentNode.Name)
|
||||
// If node url not in index, add it to index
|
||||
if !found {
|
||||
//log.Debugf("Not found")
|
||||
|
||||
// store hash(name)
|
||||
currentNode.NameHash = nameHash
|
||||
|
||||
// The value in the index will be a
|
||||
// pointer to currentNode
|
||||
//log.Debugf("Inserting url %s to index", nodeURL)
|
||||
bw.URLIndex.Insert(currentNode.URL, currentNode)
|
||||
|
||||
// If we find the node already in index
|
||||
// we check if the hash(name) changed meaning
|
||||
// the data changed
|
||||
} else {
|
||||
//log.Debugf("Found")
|
||||
nodeVal = iVal.(*Node)
|
||||
|
||||
// hash(name) is different, we will update the
|
||||
// index and parse the bookmark
|
||||
if nodeVal.NameHash != nameHash {
|
||||
|
||||
// Update node in index
|
||||
currentNode.NameHash = nameHash
|
||||
|
||||
if currentNode.NameHash != nodeVal.NameHash {
|
||||
panic("currentNode.NameHash != indexValue.NameHash")
|
||||
}
|
||||
|
||||
// Run parse hooks on node
|
||||
bw.RunParseHooks(currentNode)
|
||||
|
||||
}
|
||||
|
||||
// Else we do nothing, the node will not
|
||||
// change
|
||||
}
|
||||
|
||||
// If parent is folder, add it as tag and add current node as child
|
||||
// And add this link as child
|
||||
if currentNode.Parent.Type == jsonNodeTypes.Folder {
|
||||
log.Debug("Parent is folder, parsing as tag ...")
|
||||
currentNode.Tags = append(currentNode.Tags, currentNode.Parent.Name)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//log.Debugf("Adding current node %v to parent %v", currentNode.Name, parentNode)
|
||||
//parentNode.Children = append(parentNode.Children, currentNode)
|
||||
//currentNode.Parent = parentNode
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -162,12 +220,12 @@ func (bw *ChromeBrowser) Run() {
|
||||
// Begin parsing
|
||||
rootsData, _, _, _ := jsonparser.Get(f, "roots")
|
||||
|
||||
log.Debug("loading bookmarks to bufferdb")
|
||||
// Load bookmarks to currentJobDB
|
||||
log.Debug("loading bookmarks to index")
|
||||
|
||||
jsonparser.ObjectEach(rootsData, gJsonParseRecursive)
|
||||
|
||||
// Debug walk tree
|
||||
//go WalkNode(rootsNode)
|
||||
go WalkNode(bw.nodeTree)
|
||||
|
||||
// Finished parsing
|
||||
log.Debugf("parsed %d bookmarks", bw.stats.currentUrlCount)
|
||||
@ -178,7 +236,9 @@ func (bw *ChromeBrowser) Run() {
|
||||
bw.stats.currentNodeCount = 0
|
||||
bw.stats.currentUrlCount = 0
|
||||
|
||||
// Compare currentDb with memCacheDb for new bookmarks
|
||||
// Compare currentDb with index for new bookmarks
|
||||
|
||||
log.Debug("TODO: Compare cacheDB with index")
|
||||
|
||||
// If cacheDB is empty just copy bufferDB to cacheDB
|
||||
// until local db is already populated and preloaded
|
||||
|
26
index.go
Normal file
26
index.go
Normal file
@ -0,0 +1,26 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/OneOfOne/xxhash"
|
||||
"github.com/sp4ke/hashmap"
|
||||
)
|
||||
|
||||
// In memory index used for fast lookup of url-title(tags) pairs
|
||||
// to quickly detect bookmark which changed when bookmarks are reloaded
|
||||
// from browser on a watch event
|
||||
// Input `in` must be of type []byte
|
||||
// The index is a map of [urlhash]*Node
|
||||
func xxHashFunc(in interface{}) uint64 {
|
||||
input, ok := in.(string)
|
||||
if !ok {
|
||||
log.Panicf("wrong data type to hash, exptected string given %T", in)
|
||||
}
|
||||
sum := xxhash.ChecksumString64(input)
|
||||
//log.Debugf("Calculating hash of %s as %d", input, sum)
|
||||
return sum
|
||||
}
|
||||
|
||||
// Returns *hashmap.RBTree
|
||||
func NewIndex() *hashmap.RBTree {
|
||||
return hashmap.New(xxHashFunc)
|
||||
}
|
41
parse.go
41
parse.go
@ -5,15 +5,22 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
RE_TAGS = `\B#\w+`
|
||||
// First group is tag
|
||||
// TODO: use named groups
|
||||
// [named groups](https://github.com/StefanSchroeder/Golang-Regex-Tutorial/blob/master/01-chapter2.markdown)
|
||||
|
||||
ReTags = "\\B#(?P<tag>\\w+)"
|
||||
TagJoinSep = "|"
|
||||
)
|
||||
|
||||
type NodeType uint8
|
||||
|
||||
type Node struct {
|
||||
Type string
|
||||
Name string
|
||||
Type string
|
||||
URL string
|
||||
Tags []string
|
||||
NameHash uint64 // hash of the metadata
|
||||
Parent *Node
|
||||
Children []*Node
|
||||
}
|
||||
@ -25,10 +32,12 @@ type ParserStats struct {
|
||||
currentUrlCount int
|
||||
}
|
||||
|
||||
type ParseHook func(bk *Bookmark)
|
||||
type ParseHook func(node *Node)
|
||||
|
||||
// Debuggin bookmark node tree
|
||||
// TODO: Better usage of node trees
|
||||
func WalkNode(node *Node) {
|
||||
log.Debugf("Node --> %s | %s", node.Name, node.Type)
|
||||
log.Debugf("Node --> %s | %s | children: %d | parent: %v", node.Name, node.Type, len(node.Children), node.Parent)
|
||||
|
||||
if len(node.Children) > 0 {
|
||||
for _, node := range node.Children {
|
||||
@ -37,28 +46,22 @@ func WalkNode(node *Node) {
|
||||
}
|
||||
}
|
||||
|
||||
func ParseTags(bk *Bookmark) {
|
||||
func ParseTags(node *Node) {
|
||||
|
||||
var regex = regexp.MustCompile(RE_TAGS)
|
||||
var regex = regexp.MustCompile(ReTags)
|
||||
|
||||
bk.Tags = regex.FindAllString(bk.Metadata, -1)
|
||||
matches := regex.FindAllStringSubmatch(node.Name, -1)
|
||||
for _, m := range matches {
|
||||
node.Tags = append(node.Tags, _s(m[1]))
|
||||
}
|
||||
//res := regex.FindAllStringSubmatch(bk.Metadata, -1)
|
||||
|
||||
if len(bk.Tags) > 0 {
|
||||
log.Debugf("[Title] found following tags: %s", bk.Tags)
|
||||
if len(node.Tags) > 0 {
|
||||
log.Debugf("[Title] found following tags: %s", node.Tags)
|
||||
}
|
||||
|
||||
//bk.tags = regex.FindAllString(bk.url, -1)
|
||||
//if len(tags) > 0 {
|
||||
//log.Debugf("[URL] found following tags: %s", tags)
|
||||
//}
|
||||
}
|
||||
|
||||
func _s(value interface{}) string {
|
||||
return string(value.([]byte))
|
||||
}
|
||||
|
||||
func findTagsInTitle(title []byte) {
|
||||
var regex = regexp.MustCompile(RE_TAGS)
|
||||
tags := regex.FindAll(title, -1)
|
||||
debugPrint("%s ---> found following tags: %s", title, tags)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user