gosuki/chrome.go

331 lines
8.4 KiB
Go
Raw Normal View History

2017-11-20 15:05:44 +00:00
package main
import (
"io/ioutil"
"path"
2017-12-02 12:30:59 +00:00
"time"
2017-11-20 15:05:44 +00:00
2017-11-30 15:08:12 +00:00
"github.com/OneOfOne/xxhash"
2017-11-20 15:05:44 +00:00
"github.com/buger/jsonparser"
)
var jsonNodeTypes = struct {
Folder, URL string
}{"folder", "url"}
var jsonNodePaths = struct {
Type, Children, URL string
}{"type", "children", "url"}
type ParseChildFunc func([]byte, jsonparser.ValueType, int, error)
type RecursiveParseFunc func([]byte, []byte, jsonparser.ValueType, int) error
2017-12-02 12:30:59 +00:00
type RawNode struct {
name []byte
nType []byte
url []byte
children []byte
childrenType jsonparser.ValueType
}
func (rawNode *RawNode) parseItems(nodeData []byte) {
// Paths to lookup in node payload
paths := [][]string{
[]string{"type"},
[]string{"name"}, // Title of page
[]string{"url"},
[]string{"children"},
}
jsonparser.EachKey(nodeData, func(idx int, value []byte, vt jsonparser.ValueType, err error) {
switch idx {
case 0:
rawNode.nType = value
//currentNode.Type = _s(value)
case 1: // name or title
//currentNode.Name = _s(value)
rawNode.name = value
case 2:
//currentNode.URL = _s(value)
rawNode.url = value
case 3:
rawNode.children, rawNode.childrenType = value, vt
}
}, paths...)
}
// Returns *Node from *RawNode
func (rawNode *RawNode) getNode() *Node {
node := new(Node)
node.Type = _s(rawNode.nType)
node.Name = _s(rawNode.name)
return node
}
2017-11-20 15:05:44 +00:00
type ChromeBrowser struct {
2017-11-20 15:12:20 +00:00
BaseBrowser //embedding
2017-11-20 15:05:44 +00:00
}
func NewChromeBrowser() IBrowser {
browser := &ChromeBrowser{}
browser.name = "chrome"
browser.bType = TChrome
browser.baseDir = Chrome.BookmarkDir
browser.bkFile = Chrome.BookmarkFile
browser.stats = &ParserStats{}
2017-11-30 15:08:12 +00:00
browser.nodeTree = &Node{Name: "root", Parent: nil}
2017-11-20 15:05:44 +00:00
browser.SetupWatcher()
return browser
}
func (bw *ChromeBrowser) Watch() bool {
if !bw.isWatching {
go WatcherThread(bw)
bw.isWatching = true
return true
}
return false
}
func (bw *ChromeBrowser) Load() {
2017-12-02 12:44:15 +00:00
// BaseBrowser load method
bw.BaseBrowser.Load()
2017-11-30 15:08:12 +00:00
2017-11-20 18:07:15 +00:00
bw.Run()
2017-11-20 15:05:44 +00:00
}
2017-11-20 18:07:15 +00:00
func (bw *ChromeBrowser) Run() {
2017-11-20 15:05:44 +00:00
// Create buffer db
//bufferDB := DB{"buffer", DB_BUFFER_PATH, nil, false}
bw.InitBuffer()
defer bw.bufferDB.Close()
// Load bookmark file
bookmarkPath := path.Join(bw.baseDir, bw.bkFile)
f, err := ioutil.ReadFile(bookmarkPath)
logPanic(err)
var parseChildren ParseChildFunc
2017-12-02 12:30:59 +00:00
var jsonParseRecursive RecursiveParseFunc
2017-11-20 15:05:44 +00:00
parseChildren = func(childVal []byte, dataType jsonparser.ValueType, offset int, err error) {
if err != nil {
log.Panic(err)
}
2017-12-02 12:30:59 +00:00
jsonParseRecursive(nil, childVal, dataType, offset)
2017-11-20 15:05:44 +00:00
}
2017-12-02 12:30:59 +00:00
// Needed to store the parent of each child node
var parentNodes []*Node
jsonParseRoots := func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
// If node type is string ignore (needed for sync_transaction_version)
if dataType == jsonparser.String {
return nil
}
2017-11-20 15:05:44 +00:00
bw.stats.currentNodeCount++
2017-12-02 12:30:59 +00:00
rawNode := new(RawNode)
rawNode.parseItems(node)
log.Debugf("Parsing root folder %s", rawNode.name)
2017-11-26 20:17:30 +00:00
2017-12-02 12:30:59 +00:00
currentNode := rawNode.getNode()
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
// Process this node as parent node later
parentNodes = append(parentNodes, currentNode)
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
// add the root node as parent to this node
currentNode.Parent = bw.nodeTree
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Add this root node as a child of the root node
bw.nodeTree.Children = append(bw.nodeTree.Children, currentNode)
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Call recursive parsing of this node which must
// a root folder node
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
// Finished parsing this root, it is not anymore a parent
_, parentNodes = parentNodes[len(parentNodes)-1], parentNodes[:len(parentNodes)-1]
log.Debugf("Parsed root %s folder", rawNode.name)
return nil
}
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Main recursive parsing function that parses underneath
// each root folder
jsonParseRecursive = func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
// If node type is string ignore (needed for sync_transaction_version)
if dataType == jsonparser.String {
return nil
}
2017-12-02 12:30:59 +00:00
bw.stats.currentNodeCount++
rawNode := new(RawNode)
rawNode.parseItems(node)
currentNode := rawNode.getNode()
log.Debugf("parsing node %s", currentNode.Name)
// if parents array is not empty
if len(parentNodes) != 0 {
parent := parentNodes[len(parentNodes)-1]
log.Debugf("Adding current node to parent %s", parent.Name)
// Add current node to closest parent
currentNode.Parent = parent
// add as parent children
currentNode.Parent.Children = append(currentNode.Parent.Children, currentNode)
}
2017-11-30 15:08:12 +00:00
// if node is a folder with children
2017-12-02 12:30:59 +00:00
if rawNode.childrenType == jsonparser.Array && len(rawNode.children) > 2 { // if len(children) > len("[]")
log.Debugf("Started folder %s", rawNode.name)
parentNodes = append(parentNodes, currentNode)
// Process recursively all child nodes of this folder node
2017-11-30 15:08:12 +00:00
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
2017-12-02 12:30:59 +00:00
log.Debugf("Finished folder %s", rawNode.name)
_, parentNodes = parentNodes[len(parentNodes)-1], parentNodes[:len(parentNodes)-1]
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
}
2017-11-30 15:08:12 +00:00
2017-11-20 15:05:44 +00:00
// if node is url(leaf), handle the url
2017-12-02 12:30:59 +00:00
if _s(rawNode.nType) == jsonNodeTypes.URL {
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
currentNode.URL = _s(rawNode.url)
2017-11-30 15:08:12 +00:00
2017-11-20 15:05:44 +00:00
bw.stats.currentUrlCount++
2017-11-30 15:08:12 +00:00
// Check if url-node already in index
var nodeVal *Node
iVal, found := bw.URLIndex.Get(currentNode.URL)
2017-11-30 15:08:12 +00:00
nameHash := xxhash.ChecksumString64(currentNode.Name)
// If node url not in index, add it to index
if !found {
//log.Debugf("Not found")
2017-11-20 15:05:44 +00:00
2017-11-30 15:08:12 +00:00
// store hash(name)
currentNode.NameHash = nameHash
// The value in the index will be a
// pointer to currentNode
//log.Debugf("Inserting url %s to index", nodeURL)
bw.URLIndex.Insert(currentNode.URL, currentNode)
// If we find the node already in index
// we check if the hash(name) changed meaning
// the data changed
} else {
2018-05-26 13:34:55 +00:00
log.Debugf("URL Found in index")
2017-11-30 15:08:12 +00:00
nodeVal = iVal.(*Node)
2018-05-26 13:34:55 +00:00
// hash(name) is different, we will:
// 1- update the index by updating the name and namehash
// 2- Run the hooks on the node in case of new commands
2017-11-30 15:08:12 +00:00
if nodeVal.NameHash != nameHash {
2018-05-26 13:34:55 +00:00
log.Debugf("URL name changed !")
2017-11-30 15:08:12 +00:00
// Update node in index
2018-05-26 13:34:55 +00:00
log.Debugf("Current node: name: %s | hash: %v", currentNode.Name, currentNode.NameHash)
log.Debugf("Index node: name: %s | hash: %v", nodeVal.Name, nodeVal.NameHash)
2017-11-30 15:08:12 +00:00
currentNode.NameHash = nameHash
2018-05-26 13:34:55 +00:00
nodeVal.Name = currentNode.Name
nodeVal.NameHash = nameHash
2017-11-30 15:08:12 +00:00
if currentNode.NameHash != nodeVal.NameHash {
2018-05-26 13:34:55 +00:00
panic("currentNode.NameHash != nodeVal.NameHash")
2017-11-30 15:08:12 +00:00
}
// Run parse hooks on node
bw.RunParseHooks(currentNode)
}
// Else we do nothing, the node will not
// change
}
2017-12-02 12:30:59 +00:00
//If parent is folder, add it as tag and add current node as child
//And add this link as child
2017-11-30 15:08:12 +00:00
if currentNode.Parent.Type == jsonNodeTypes.Folder {
log.Debug("Parent is folder, parsing as tag ...")
currentNode.Tags = append(currentNode.Tags, currentNode.Parent.Name)
}
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
}
return nil
}
rootsData, _, _, _ := jsonparser.Get(f, "roots")
2017-12-02 12:30:59 +00:00
start := time.Now()
jsonparser.ObjectEach(rootsData, jsonParseRoots)
elapsed := time.Since(start)
log.Debugf("Parsing tree in %s", elapsed)
2017-11-20 15:05:44 +00:00
2017-11-27 08:46:50 +00:00
// Debug walk tree
2018-05-26 13:34:55 +00:00
//go WalkNode(bw.nodeTree)
// Reset the index to represent the nodetree
bw.RebuildIndex()
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
// Finished parsing
2017-11-20 18:31:17 +00:00
log.Debugf("parsed %d bookmarks", bw.stats.currentUrlCount)
2017-11-20 15:05:44 +00:00
// Reset parser counter
bw.stats.lastURLCount = bw.stats.currentUrlCount
bw.stats.lastNodeCount = bw.stats.currentNodeCount
bw.stats.currentNodeCount = 0
bw.stats.currentUrlCount = 0
2018-03-07 14:05:30 +00:00
// Compare cacheDB with index for new bookmarks
// cacheDB is an sqlite and buffer is a RBTree hash map
// I need to implement the cache in a RBTree also to
// keep everything in a tree,
//
// or compare between tree and sqlite
2017-11-30 15:08:12 +00:00
log.Debug("TODO: Compare cacheDB with index")
2017-11-20 15:05:44 +00:00
// If cacheDB is empty just copy bufferDB to cacheDB
// until local db is already populated and preloaded
//debugPrint("%d", bufferDB.Count())
if empty, err := cacheDB.isEmpty(); empty {
logPanic(err)
2017-11-20 18:31:17 +00:00
log.Debug("cache empty: loading bufferdb to cachedb")
2017-11-20 15:05:44 +00:00
//start := time.Now()
bw.bufferDB.SyncTo(cacheDB)
//debugPrint("<%s> is now (%d)", cacheDB.name, cacheDB.Count())
//elapsed := time.Since(start)
//debugPrint("copy in %s", elapsed)
debugPrint("syncing <%s> to disk", cacheDB.name)
cacheDB.SyncToDisk(getDBFullPath())
}
// TODO: Check if new/modified bookmarks in buffer compared to cache
2017-11-20 18:31:17 +00:00
log.Debugf("TODO: check if new/modified bookmarks in %s compared to %s", bw.bufferDB.name, cacheDB.name)
2017-11-20 15:05:44 +00:00
}