wip recursive parse node tree
This commit is contained in:
parent
e1d281de99
commit
2ff96e1509
2
api.go
2
api.go
@ -20,7 +20,7 @@ func getBookmarks(c *gin.Context) {
|
|||||||
err = rows.Scan(&bookmark.URL, &bookmark.Metadata, &tags)
|
err = rows.Scan(&bookmark.URL, &bookmark.Metadata, &tags)
|
||||||
logPanic(err)
|
logPanic(err)
|
||||||
|
|
||||||
bookmark.Tags = strings.Split(tags, " ")
|
bookmark.Tags = strings.Split(tags, TagJoinSep)
|
||||||
|
|
||||||
//log.Debugf("GET %s", tags)
|
//log.Debugf("GET %s", tags)
|
||||||
//log.Debugf("%v", bookmark)
|
//log.Debugf("%v", bookmark)
|
||||||
|
@ -10,7 +10,7 @@ type Bookmark struct {
|
|||||||
Metadata string `json:"metadata"`
|
Metadata string `json:"metadata"`
|
||||||
Tags []string `json:"tags"`
|
Tags []string `json:"tags"`
|
||||||
Desc string `json:"desc"`
|
Desc string `json:"desc"`
|
||||||
Node Node
|
Node Node `json:"-"`
|
||||||
//flags int
|
//flags int
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ func (bk *Bookmark) add(db *DB) {
|
|||||||
logError(err)
|
logError(err)
|
||||||
defer stmt.Close()
|
defer stmt.Close()
|
||||||
|
|
||||||
_, err = stmt.Exec(bk.URL, bk.Metadata, strings.Join(bk.Tags, " "), "", 0)
|
_, err = stmt.Exec(bk.URL, bk.Metadata, strings.Join(bk.Tags, TagJoinSep), "", 0)
|
||||||
sqlErrorMsg(err, bk.URL)
|
sqlErrorMsg(err, bk.URL)
|
||||||
|
|
||||||
err = tx.Commit()
|
err = tx.Commit()
|
||||||
|
13
browsers.go
13
browsers.go
@ -5,6 +5,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
|
|
||||||
"github.com/fsnotify/fsnotify"
|
"github.com/fsnotify/fsnotify"
|
||||||
|
"github.com/sp4ke/hashmap"
|
||||||
)
|
)
|
||||||
|
|
||||||
type BrowserType uint8
|
type BrowserType uint8
|
||||||
@ -27,6 +28,7 @@ var Chrome = struct {
|
|||||||
type IBrowser interface {
|
type IBrowser interface {
|
||||||
IWatchable
|
IWatchable
|
||||||
InitBuffer() // init buffer db, should be defered to close after call
|
InitBuffer() // init buffer db, should be defered to close after call
|
||||||
|
InitIndex() // Creates in memory Index
|
||||||
RegisterHooks(...ParseHook)
|
RegisterHooks(...ParseHook)
|
||||||
Load() // Loads bookmarks to db without watching
|
Load() // Loads bookmarks to db without watching
|
||||||
//Parse(...ParseHook) // Main parsing method with different parsing hooks
|
//Parse(...ParseHook) // Main parsing method with different parsing hooks
|
||||||
@ -42,6 +44,9 @@ type BaseBrowser struct {
|
|||||||
baseDir string
|
baseDir string
|
||||||
bkFile string
|
bkFile string
|
||||||
bufferDB *DB
|
bufferDB *DB
|
||||||
|
URLIndex *hashmap.RBTree
|
||||||
|
nodeTree *Node
|
||||||
|
cNode *Node //current node
|
||||||
stats *ParserStats
|
stats *ParserStats
|
||||||
bType BrowserType
|
bType BrowserType
|
||||||
name string
|
name string
|
||||||
@ -79,6 +84,10 @@ func (bw *BaseBrowser) Close() {
|
|||||||
logPanic(err)
|
logPanic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *BaseBrowser) InitIndex() {
|
||||||
|
b.URLIndex = NewIndex()
|
||||||
|
}
|
||||||
|
|
||||||
func (b *BaseBrowser) InitBuffer() {
|
func (b *BaseBrowser) InitBuffer() {
|
||||||
|
|
||||||
bufferName := fmt.Sprintf("buffer_%s", b.name)
|
bufferName := fmt.Sprintf("buffer_%s", b.name)
|
||||||
@ -98,8 +107,8 @@ func (b *BaseBrowser) RegisterHooks(hooks ...ParseHook) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Runs browsed defined hooks on bookmark
|
// Runs browsed defined hooks on bookmark
|
||||||
func (b *BaseBrowser) RunParseHooks(bk *Bookmark) {
|
func (b *BaseBrowser) RunParseHooks(node *Node) {
|
||||||
for _, hook := range b.parseHooks {
|
for _, hook := range b.parseHooks {
|
||||||
hook(bk)
|
hook(node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
130
chrome.go
130
chrome.go
@ -4,6 +4,7 @@ import (
|
|||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"path"
|
"path"
|
||||||
|
|
||||||
|
"github.com/OneOfOne/xxhash"
|
||||||
"github.com/buger/jsonparser"
|
"github.com/buger/jsonparser"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -29,6 +30,8 @@ func NewChromeBrowser() IBrowser {
|
|||||||
browser.baseDir = Chrome.BookmarkDir
|
browser.baseDir = Chrome.BookmarkDir
|
||||||
browser.bkFile = Chrome.BookmarkFile
|
browser.bkFile = Chrome.BookmarkFile
|
||||||
browser.stats = &ParserStats{}
|
browser.stats = &ParserStats{}
|
||||||
|
browser.nodeTree = &Node{Name: "root", Parent: nil}
|
||||||
|
browser.cNode = browser.nodeTree
|
||||||
|
|
||||||
browser.SetupWatcher()
|
browser.SetupWatcher()
|
||||||
|
|
||||||
@ -47,6 +50,8 @@ func (bw *ChromeBrowser) Watch() bool {
|
|||||||
|
|
||||||
func (bw *ChromeBrowser) Load() {
|
func (bw *ChromeBrowser) Load() {
|
||||||
|
|
||||||
|
bw.InitIndex()
|
||||||
|
|
||||||
// Check if cache is initialized
|
// Check if cache is initialized
|
||||||
if cacheDB == nil || cacheDB.handle == nil {
|
if cacheDB == nil || cacheDB.handle == nil {
|
||||||
log.Critical("cache is not yet initialized !")
|
log.Critical("cache is not yet initialized !")
|
||||||
@ -84,21 +89,22 @@ func (bw *ChromeBrowser) Run() {
|
|||||||
gJsonParseRecursive(nil, childVal, dataType, offset)
|
gJsonParseRecursive(nil, childVal, dataType, offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
rootsNode := new(Node)
|
|
||||||
currentNode := rootsNode
|
|
||||||
|
|
||||||
gJsonParseRecursive = func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
|
gJsonParseRecursive = func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
|
||||||
// Core of google chrome bookmark parsing
|
// Core of google chrome bookmark parsing
|
||||||
// Any loading to local db is done here
|
// Any loading to local db is done here
|
||||||
bw.stats.currentNodeCount++
|
bw.stats.currentNodeCount++
|
||||||
|
|
||||||
parentNode := currentNode
|
//log.Debugf("moving current node %v as parent", currentNode.Name)
|
||||||
currentNode := new(Node)
|
currentNode := new(Node)
|
||||||
currentNode.Parent = parentNode
|
|
||||||
|
|
||||||
var nodeType, children []byte
|
currentNode.Parent = bw.cNode
|
||||||
|
bw.cNode.Children = append(bw.cNode.Children, currentNode)
|
||||||
|
bw.cNode = currentNode
|
||||||
|
|
||||||
|
var nodeType, nodeName, nodeURL, children []byte
|
||||||
var childrenType jsonparser.ValueType
|
var childrenType jsonparser.ValueType
|
||||||
bookmark := &Bookmark{}
|
|
||||||
|
//log.Debugf("parent %v", parentNode)
|
||||||
|
|
||||||
// Paths to lookup in node payload
|
// Paths to lookup in node payload
|
||||||
paths := [][]string{
|
paths := [][]string{
|
||||||
@ -112,49 +118,101 @@ func (bw *ChromeBrowser) Run() {
|
|||||||
switch idx {
|
switch idx {
|
||||||
case 0:
|
case 0:
|
||||||
nodeType = value
|
nodeType = value
|
||||||
currentNode.Type = _s(value)
|
//currentNode.Type = _s(value)
|
||||||
|
|
||||||
case 1: // name or title
|
case 1: // name or title
|
||||||
currentNode.Name = _s(value)
|
//currentNode.Name = _s(value)
|
||||||
|
nodeName = value
|
||||||
case 2:
|
case 2:
|
||||||
currentNode.URL = _s(value)
|
//currentNode.URL = _s(value)
|
||||||
|
nodeURL = value
|
||||||
case 3:
|
case 3:
|
||||||
children, childrenType = value, vt
|
children, childrenType = value, vt
|
||||||
}
|
}
|
||||||
}, paths...)
|
}, paths...)
|
||||||
|
|
||||||
bookmark.Metadata = currentNode.Name
|
log.Debugf("parsing node %s", nodeName)
|
||||||
bookmark.URL = currentNode.URL
|
|
||||||
|
|
||||||
// If node type is string ignore (needed for sync_transaction_version)
|
// If node type is string ignore (needed for sync_transaction_version)
|
||||||
if dataType == jsonparser.String {
|
if dataType == jsonparser.String {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// if node is url(leaf), handle the url
|
|
||||||
if _s(nodeType) == jsonNodeTypes.URL {
|
|
||||||
// Add bookmark to db here
|
|
||||||
//debugPrint("%s", url)
|
|
||||||
//debugPrint("%s", node)
|
|
||||||
|
|
||||||
// Find tags in title
|
|
||||||
//findTagsInTitle(name)
|
|
||||||
bw.stats.currentUrlCount++
|
|
||||||
|
|
||||||
// Run parsehoos before adding bookmark
|
|
||||||
bw.RunParseHooks(bookmark)
|
|
||||||
|
|
||||||
// Add bookmark
|
|
||||||
bookmark.add(bw.bufferDB)
|
|
||||||
}
|
|
||||||
|
|
||||||
parentNode.Children = append(parentNode.Children, currentNode)
|
|
||||||
|
|
||||||
// if node is a folder with children
|
// if node is a folder with children
|
||||||
if childrenType == jsonparser.Array && len(children) > 2 { // if len(children) > len("[]")
|
if childrenType == jsonparser.Array && len(children) > 2 { // if len(children) > len("[]")
|
||||||
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
|
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
|
||||||
|
|
||||||
|
// Finished parsing all children
|
||||||
|
// Add them into current node ?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
currentNode.Type = _s(nodeType)
|
||||||
|
currentNode.Name = _s(nodeName)
|
||||||
|
|
||||||
|
// if node is url(leaf), handle the url
|
||||||
|
if _s(nodeType) == jsonNodeTypes.URL {
|
||||||
|
|
||||||
|
currentNode.URL = _s(nodeURL)
|
||||||
|
|
||||||
|
bw.stats.currentUrlCount++
|
||||||
|
|
||||||
|
// Check if url-node already in index
|
||||||
|
var nodeVal *Node
|
||||||
|
iVal, found := bw.URLIndex.Get(currentNode.URL)
|
||||||
|
|
||||||
|
nameHash := xxhash.ChecksumString64(currentNode.Name)
|
||||||
|
// If node url not in index, add it to index
|
||||||
|
if !found {
|
||||||
|
//log.Debugf("Not found")
|
||||||
|
|
||||||
|
// store hash(name)
|
||||||
|
currentNode.NameHash = nameHash
|
||||||
|
|
||||||
|
// The value in the index will be a
|
||||||
|
// pointer to currentNode
|
||||||
|
//log.Debugf("Inserting url %s to index", nodeURL)
|
||||||
|
bw.URLIndex.Insert(currentNode.URL, currentNode)
|
||||||
|
|
||||||
|
// If we find the node already in index
|
||||||
|
// we check if the hash(name) changed meaning
|
||||||
|
// the data changed
|
||||||
|
} else {
|
||||||
|
//log.Debugf("Found")
|
||||||
|
nodeVal = iVal.(*Node)
|
||||||
|
|
||||||
|
// hash(name) is different, we will update the
|
||||||
|
// index and parse the bookmark
|
||||||
|
if nodeVal.NameHash != nameHash {
|
||||||
|
|
||||||
|
// Update node in index
|
||||||
|
currentNode.NameHash = nameHash
|
||||||
|
|
||||||
|
if currentNode.NameHash != nodeVal.NameHash {
|
||||||
|
panic("currentNode.NameHash != indexValue.NameHash")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run parse hooks on node
|
||||||
|
bw.RunParseHooks(currentNode)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Else we do nothing, the node will not
|
||||||
|
// change
|
||||||
|
}
|
||||||
|
|
||||||
|
// If parent is folder, add it as tag and add current node as child
|
||||||
|
// And add this link as child
|
||||||
|
if currentNode.Parent.Type == jsonNodeTypes.Folder {
|
||||||
|
log.Debug("Parent is folder, parsing as tag ...")
|
||||||
|
currentNode.Tags = append(currentNode.Tags, currentNode.Parent.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//log.Debugf("Adding current node %v to parent %v", currentNode.Name, parentNode)
|
||||||
|
//parentNode.Children = append(parentNode.Children, currentNode)
|
||||||
|
//currentNode.Parent = parentNode
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,12 +220,12 @@ func (bw *ChromeBrowser) Run() {
|
|||||||
// Begin parsing
|
// Begin parsing
|
||||||
rootsData, _, _, _ := jsonparser.Get(f, "roots")
|
rootsData, _, _, _ := jsonparser.Get(f, "roots")
|
||||||
|
|
||||||
log.Debug("loading bookmarks to bufferdb")
|
log.Debug("loading bookmarks to index")
|
||||||
// Load bookmarks to currentJobDB
|
|
||||||
jsonparser.ObjectEach(rootsData, gJsonParseRecursive)
|
jsonparser.ObjectEach(rootsData, gJsonParseRecursive)
|
||||||
|
|
||||||
// Debug walk tree
|
// Debug walk tree
|
||||||
//go WalkNode(rootsNode)
|
go WalkNode(bw.nodeTree)
|
||||||
|
|
||||||
// Finished parsing
|
// Finished parsing
|
||||||
log.Debugf("parsed %d bookmarks", bw.stats.currentUrlCount)
|
log.Debugf("parsed %d bookmarks", bw.stats.currentUrlCount)
|
||||||
@ -178,7 +236,9 @@ func (bw *ChromeBrowser) Run() {
|
|||||||
bw.stats.currentNodeCount = 0
|
bw.stats.currentNodeCount = 0
|
||||||
bw.stats.currentUrlCount = 0
|
bw.stats.currentUrlCount = 0
|
||||||
|
|
||||||
// Compare currentDb with memCacheDb for new bookmarks
|
// Compare currentDb with index for new bookmarks
|
||||||
|
|
||||||
|
log.Debug("TODO: Compare cacheDB with index")
|
||||||
|
|
||||||
// If cacheDB is empty just copy bufferDB to cacheDB
|
// If cacheDB is empty just copy bufferDB to cacheDB
|
||||||
// until local db is already populated and preloaded
|
// until local db is already populated and preloaded
|
||||||
|
26
index.go
Normal file
26
index.go
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/OneOfOne/xxhash"
|
||||||
|
"github.com/sp4ke/hashmap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// In memory index used for fast lookup of url-title(tags) pairs
|
||||||
|
// to quickly detect bookmark which changed when bookmarks are reloaded
|
||||||
|
// from browser on a watch event
|
||||||
|
// Input `in` must be of type []byte
|
||||||
|
// The index is a map of [urlhash]*Node
|
||||||
|
func xxHashFunc(in interface{}) uint64 {
|
||||||
|
input, ok := in.(string)
|
||||||
|
if !ok {
|
||||||
|
log.Panicf("wrong data type to hash, exptected string given %T", in)
|
||||||
|
}
|
||||||
|
sum := xxhash.ChecksumString64(input)
|
||||||
|
//log.Debugf("Calculating hash of %s as %d", input, sum)
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns *hashmap.RBTree
|
||||||
|
func NewIndex() *hashmap.RBTree {
|
||||||
|
return hashmap.New(xxHashFunc)
|
||||||
|
}
|
41
parse.go
41
parse.go
@ -5,15 +5,22 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
RE_TAGS = `\B#\w+`
|
// First group is tag
|
||||||
|
// TODO: use named groups
|
||||||
|
// [named groups](https://github.com/StefanSchroeder/Golang-Regex-Tutorial/blob/master/01-chapter2.markdown)
|
||||||
|
|
||||||
|
ReTags = "\\B#(?P<tag>\\w+)"
|
||||||
|
TagJoinSep = "|"
|
||||||
)
|
)
|
||||||
|
|
||||||
type NodeType uint8
|
type NodeType uint8
|
||||||
|
|
||||||
type Node struct {
|
type Node struct {
|
||||||
Type string
|
|
||||||
Name string
|
Name string
|
||||||
|
Type string
|
||||||
URL string
|
URL string
|
||||||
|
Tags []string
|
||||||
|
NameHash uint64 // hash of the metadata
|
||||||
Parent *Node
|
Parent *Node
|
||||||
Children []*Node
|
Children []*Node
|
||||||
}
|
}
|
||||||
@ -25,10 +32,12 @@ type ParserStats struct {
|
|||||||
currentUrlCount int
|
currentUrlCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
type ParseHook func(bk *Bookmark)
|
type ParseHook func(node *Node)
|
||||||
|
|
||||||
|
// Debuggin bookmark node tree
|
||||||
|
// TODO: Better usage of node trees
|
||||||
func WalkNode(node *Node) {
|
func WalkNode(node *Node) {
|
||||||
log.Debugf("Node --> %s | %s", node.Name, node.Type)
|
log.Debugf("Node --> %s | %s | children: %d | parent: %v", node.Name, node.Type, len(node.Children), node.Parent)
|
||||||
|
|
||||||
if len(node.Children) > 0 {
|
if len(node.Children) > 0 {
|
||||||
for _, node := range node.Children {
|
for _, node := range node.Children {
|
||||||
@ -37,28 +46,22 @@ func WalkNode(node *Node) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseTags(bk *Bookmark) {
|
func ParseTags(node *Node) {
|
||||||
|
|
||||||
var regex = regexp.MustCompile(RE_TAGS)
|
var regex = regexp.MustCompile(ReTags)
|
||||||
|
|
||||||
bk.Tags = regex.FindAllString(bk.Metadata, -1)
|
matches := regex.FindAllStringSubmatch(node.Name, -1)
|
||||||
|
for _, m := range matches {
|
||||||
|
node.Tags = append(node.Tags, _s(m[1]))
|
||||||
|
}
|
||||||
|
//res := regex.FindAllStringSubmatch(bk.Metadata, -1)
|
||||||
|
|
||||||
if len(bk.Tags) > 0 {
|
if len(node.Tags) > 0 {
|
||||||
log.Debugf("[Title] found following tags: %s", bk.Tags)
|
log.Debugf("[Title] found following tags: %s", node.Tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
//bk.tags = regex.FindAllString(bk.url, -1)
|
|
||||||
//if len(tags) > 0 {
|
|
||||||
//log.Debugf("[URL] found following tags: %s", tags)
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func _s(value interface{}) string {
|
func _s(value interface{}) string {
|
||||||
return string(value.([]byte))
|
return string(value.([]byte))
|
||||||
}
|
}
|
||||||
|
|
||||||
func findTagsInTitle(title []byte) {
|
|
||||||
var regex = regexp.MustCompile(RE_TAGS)
|
|
||||||
tags := regex.FindAll(title, -1)
|
|
||||||
debugPrint("%s ---> found following tags: %s", title, tags)
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user