gosuki/chrome.go

361 lines
9.1 KiB
Go
Raw Normal View History

2017-11-20 15:05:44 +00:00
package main
import (
"io/ioutil"
"path"
2017-12-02 12:30:59 +00:00
"time"
2017-11-20 15:05:44 +00:00
2017-11-30 15:08:12 +00:00
"github.com/OneOfOne/xxhash"
2017-11-20 15:05:44 +00:00
"github.com/buger/jsonparser"
2018-11-02 17:21:18 +00:00
"github.com/fsnotify/fsnotify"
2017-11-20 15:05:44 +00:00
)
var ChromeData = BrowserPaths{
2018-11-02 17:21:18 +00:00
BookmarkFile: "Bookmarks",
BookmarkDir: "/home/spike/.config/google-chrome-unstable/Default/",
}
type ChromeBrowser struct {
BaseBrowser //embedding
}
2017-11-20 15:05:44 +00:00
var jsonNodeTypes = struct {
Folder, URL string
}{"folder", "url"}
var jsonNodePaths = struct {
Type, Children, URL string
}{"type", "children", "url"}
type ParseChildFunc func([]byte, jsonparser.ValueType, int, error)
type RecursiveParseFunc func([]byte, []byte, jsonparser.ValueType, int) error
2017-12-02 12:30:59 +00:00
type RawNode struct {
name []byte
nType []byte
url []byte
children []byte
childrenType jsonparser.ValueType
}
func (rawNode *RawNode) parseItems(nodeData []byte) {
// Paths to lookup in node payload
paths := [][]string{
[]string{"type"},
[]string{"name"}, // Title of page
[]string{"url"},
[]string{"children"},
}
jsonparser.EachKey(nodeData, func(idx int, value []byte, vt jsonparser.ValueType, err error) {
switch idx {
case 0:
rawNode.nType = value
//currentNode.Type = _s(value)
case 1: // name or title
//currentNode.Name = _s(value)
rawNode.name = value
case 2:
//currentNode.URL = _s(value)
rawNode.url = value
case 3:
rawNode.children, rawNode.childrenType = value, vt
}
}, paths...)
}
// Returns *Node from *RawNode
func (rawNode *RawNode) getNode() *Node {
node := new(Node)
node.Type = _s(rawNode.nType)
node.Name = _s(rawNode.name)
return node
}
2017-11-20 15:05:44 +00:00
func NewChromeBrowser() IBrowser {
2018-11-02 17:21:18 +00:00
browser := new(ChromeBrowser)
2017-11-20 15:05:44 +00:00
browser.name = "chrome"
browser.bType = TChrome
browser.baseDir = ChromeData.BookmarkDir
browser.bkFile = ChromeData.BookmarkFile
2018-11-02 17:21:18 +00:00
browser.Stats = new(ParserStats)
2018-10-25 16:09:03 +00:00
browser.NodeTree = &Node{Name: "root", Parent: nil, Type: "root"}
browser.useFileWatcher = true
2017-11-20 15:05:44 +00:00
2018-05-27 15:36:03 +00:00
// Across jobs buffer
browser.InitBuffer()
2018-11-02 17:21:18 +00:00
// Create watch objects, we will watch the basedir for create events
watchedEvents := []fsnotify.Op{fsnotify.Create}
w := &Watch{
path: browser.baseDir,
eventTypes: watchedEvents,
eventNames: []string{path.Join(browser.baseDir, browser.bkFile)},
resetWatch: true,
}
browser.SetupFileWatcher(w)
2017-11-20 15:05:44 +00:00
return browser
}
2018-10-28 19:19:12 +00:00
func (bw ChromeBrowser) Shutdown() {
log.Debugf("<%s> shutting down ... ", bw.name)
err := bw.BaseBrowser.Close()
if err != nil {
log.Error(err)
}
}
2017-11-20 15:05:44 +00:00
func (bw *ChromeBrowser) Watch() bool {
if !bw.isWatching {
go WatcherThread(bw)
bw.isWatching = true
log.Infof("<%s> Watching %s", bw.name, bw.GetPath())
2017-11-20 15:05:44 +00:00
return true
}
return false
}
func (bw *ChromeBrowser) Load() {
2017-12-02 12:44:15 +00:00
// BaseBrowser load method
bw.BaseBrowser.Load()
2017-11-30 15:08:12 +00:00
2017-11-20 18:07:15 +00:00
bw.Run()
2017-11-20 15:05:44 +00:00
}
2017-11-20 18:07:15 +00:00
func (bw *ChromeBrowser) Run() {
2017-11-20 15:05:44 +00:00
2018-05-27 15:36:03 +00:00
// Rebuild node tree
bw.RebuildNodeTree()
2017-11-20 15:05:44 +00:00
// Load bookmark file
bookmarkPath := path.Join(bw.baseDir, bw.bkFile)
f, err := ioutil.ReadFile(bookmarkPath)
2018-10-28 19:19:12 +00:00
if err != nil {
log.Critical(err)
}
2017-11-20 15:05:44 +00:00
var parseChildren ParseChildFunc
2017-12-02 12:30:59 +00:00
var jsonParseRecursive RecursiveParseFunc
2017-11-20 15:05:44 +00:00
parseChildren = func(childVal []byte, dataType jsonparser.ValueType, offset int, err error) {
if err != nil {
log.Panic(err)
}
2017-12-02 12:30:59 +00:00
jsonParseRecursive(nil, childVal, dataType, offset)
2017-11-20 15:05:44 +00:00
}
2017-12-02 12:30:59 +00:00
// Needed to store the parent of each child node
var parentNodes []*Node
jsonParseRoots := func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
// If node type is string ignore (needed for sync_transaction_version)
if dataType == jsonparser.String {
return nil
}
2018-05-27 16:17:17 +00:00
bw.Stats.currentNodeCount++
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
rawNode := new(RawNode)
rawNode.parseItems(node)
2018-10-25 16:09:03 +00:00
//log.Debugf("Parsing root folder %s", rawNode.name)
2017-11-26 20:17:30 +00:00
2017-12-02 12:30:59 +00:00
currentNode := rawNode.getNode()
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
// Process this node as parent node later
parentNodes = append(parentNodes, currentNode)
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
// add the root node as parent to this node
2018-05-27 16:17:17 +00:00
currentNode.Parent = bw.NodeTree
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Add this root node as a child of the root node
2018-05-27 16:17:17 +00:00
bw.NodeTree.Children = append(bw.NodeTree.Children, currentNode)
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Call recursive parsing of this node which must
// a root folder node
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
// Finished parsing this root, it is not anymore a parent
_, parentNodes = parentNodes[len(parentNodes)-1], parentNodes[:len(parentNodes)-1]
2018-10-25 16:09:03 +00:00
//log.Debugf("Parsed root %s folder", rawNode.name)
2017-12-02 12:30:59 +00:00
return nil
}
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
// Main recursive parsing function that parses underneath
// each root folder
jsonParseRecursive = func(key []byte, node []byte, dataType jsonparser.ValueType, offset int) error {
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
// If node type is string ignore (needed for sync_transaction_version)
if dataType == jsonparser.String {
return nil
}
2018-05-27 16:17:17 +00:00
bw.Stats.currentNodeCount++
2017-12-02 12:30:59 +00:00
rawNode := new(RawNode)
rawNode.parseItems(node)
currentNode := rawNode.getNode()
2018-10-25 16:09:03 +00:00
//log.Debugf("parsing node %s", currentNode.Name)
2017-12-02 12:30:59 +00:00
// if parents array is not empty
if len(parentNodes) != 0 {
parent := parentNodes[len(parentNodes)-1]
2018-10-25 16:09:03 +00:00
//log.Debugf("Adding current node to parent %s", parent.Name)
2017-12-02 12:30:59 +00:00
// Add current node to closest parent
currentNode.Parent = parent
2018-05-26 13:42:46 +00:00
// Add current node as child to parent
2017-12-02 12:30:59 +00:00
currentNode.Parent.Children = append(currentNode.Parent.Children, currentNode)
}
2017-11-30 15:08:12 +00:00
// if node is a folder with children
2017-12-02 12:30:59 +00:00
if rawNode.childrenType == jsonparser.Array && len(rawNode.children) > 2 { // if len(children) > len("[]")
2018-10-25 16:09:03 +00:00
//log.Debugf("Started folder %s", rawNode.name)
2017-12-02 12:30:59 +00:00
parentNodes = append(parentNodes, currentNode)
// Process recursively all child nodes of this folder node
2017-11-30 15:08:12 +00:00
jsonparser.ArrayEach(node, parseChildren, jsonNodePaths.Children)
2018-10-25 16:09:03 +00:00
//log.Debugf("Finished folder %s", rawNode.name)
2017-12-02 12:30:59 +00:00
_, parentNodes = parentNodes[len(parentNodes)-1], parentNodes[:len(parentNodes)-1]
2017-11-30 15:08:12 +00:00
2017-12-02 12:30:59 +00:00
}
2017-11-30 15:08:12 +00:00
2017-11-20 15:05:44 +00:00
// if node is url(leaf), handle the url
2017-12-02 12:30:59 +00:00
if _s(rawNode.nType) == jsonNodeTypes.URL {
2017-11-20 15:05:44 +00:00
2017-12-02 12:30:59 +00:00
currentNode.URL = _s(rawNode.url)
2017-11-30 15:08:12 +00:00
2018-05-27 16:17:17 +00:00
bw.Stats.currentUrlCount++
2017-11-30 15:08:12 +00:00
// Check if url-node already in index
var nodeVal *Node
iVal, found := bw.URLIndex.Get(currentNode.URL)
2017-11-30 15:08:12 +00:00
nameHash := xxhash.ChecksumString64(currentNode.Name)
// If node url not in index, add it to index
if !found {
//log.Debugf("Not found")
2017-11-20 15:05:44 +00:00
2017-11-30 15:08:12 +00:00
// store hash(name)
currentNode.NameHash = nameHash
// The value in the index will be a
// pointer to currentNode
//log.Debugf("Inserting url %s to index", nodeURL)
bw.URLIndex.Insert(currentNode.URL, currentNode)
2018-05-27 15:36:03 +00:00
// Run tag parsing hooks
bw.RunParseHooks(currentNode)
2017-11-30 15:08:12 +00:00
// If we find the node already in index
// we check if the hash(name) changed meaning
// the data changed
} else {
2018-10-25 16:09:03 +00:00
//log.Debugf("URL Found in index")
2017-11-30 15:08:12 +00:00
nodeVal = iVal.(*Node)
2018-05-26 14:31:06 +00:00
// hash(name) is different meaning new commands/tags could
// be added, we need to process the parsing hoos
2017-11-30 15:08:12 +00:00
if nodeVal.NameHash != nameHash {
2018-05-26 13:42:46 +00:00
//log.Debugf("URL name changed !")
2017-11-30 15:08:12 +00:00
// Run parse hooks on node
bw.RunParseHooks(currentNode)
}
// Else we do nothing, the node will not
// change
}
2017-12-02 12:30:59 +00:00
//If parent is folder, add it as tag and add current node as child
//And add this link as child
2017-11-30 15:08:12 +00:00
if currentNode.Parent.Type == jsonNodeTypes.Folder {
2018-10-25 16:09:03 +00:00
//log.Debug("Parent is folder, parsing as tag ...")
2017-11-30 15:08:12 +00:00
currentNode.Tags = append(currentNode.Tags, currentNode.Parent.Name)
}
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
}
return nil
}
rootsData, _, _, _ := jsonparser.Get(f, "roots")
2018-05-27 15:36:03 +00:00
// Start a new node tree building job
2017-12-02 12:30:59 +00:00
start := time.Now()
jsonparser.ObjectEach(rootsData, jsonParseRoots)
bw.Stats.lastParseTime = time.Since(start)
2018-10-25 16:09:03 +00:00
log.Debugf("<%s> parsed tree in %s", bw.name, bw.Stats.lastParseTime)
2018-05-27 15:36:03 +00:00
// Finished node tree building job
2017-11-20 15:05:44 +00:00
2017-11-27 08:46:50 +00:00
// Debug walk tree
2018-10-26 01:04:26 +00:00
//go PrintTree(bw.NodeTree)
2018-05-26 13:34:55 +00:00
// Reset the index to represent the nodetree
bw.RebuildIndex()
2017-11-26 20:17:30 +00:00
2017-11-20 15:05:44 +00:00
// Finished parsing
log.Debugf("<%s> parsed %d bookmarks and %d nodes", bw.name, bw.Stats.currentUrlCount, bw.Stats.currentNodeCount)
2017-11-20 15:05:44 +00:00
// Reset parser counter
bw.ResetStats()
2017-11-20 15:05:44 +00:00
2018-05-27 15:36:03 +00:00
//Add nodeTree to Cache
2018-10-25 16:09:03 +00:00
//log.Debugf("<%s> buffer content", bw.name)
//bw.BufferDB.Print()
2018-05-27 15:36:03 +00:00
2018-10-25 16:09:03 +00:00
log.Debugf("<%s> syncing to buffer", bw.name)
2018-05-27 16:21:27 +00:00
syncTreeToBuffer(bw.NodeTree, bw.BufferDB)
2018-10-25 16:09:03 +00:00
log.Debugf("<%s> tree synced to buffer", bw.name)
2018-05-27 15:36:03 +00:00
2018-10-25 16:09:03 +00:00
//bw.BufferDB.Print()
2018-05-27 15:36:03 +00:00
2018-05-26 14:31:06 +00:00
// cacheDB represents bookmarks across all browsers
// From browsers it should support: add/update
// Delete method should only be possible through admin interface
// We could have an @ignore command to ignore a bookmark
2018-05-26 13:42:46 +00:00
2018-05-26 14:31:06 +00:00
// URLIndex is a hashmap index of all URLS representing current state
// of the browser
// nodeTree is current state of the browser as tree
// Buffer is the current state of the browser represetned by
// URLIndex and nodeTree
2017-11-30 15:08:12 +00:00
2018-05-26 14:31:06 +00:00
// If cacheDB is empty just copy buffer to cacheDB
2017-11-20 15:05:44 +00:00
// until local db is already populated and preloaded
2018-05-27 16:21:27 +00:00
//debugPrint("%d", BufferDB.Count())
2018-05-27 15:45:06 +00:00
if empty, err := CacheDB.isEmpty(); empty {
2018-10-28 19:19:12 +00:00
if err != nil {
log.Error(err)
}
log.Info("cache empty: loading buffer to Cachedb")
2017-11-20 15:05:44 +00:00
//start := time.Now()
2018-10-26 01:04:26 +00:00
bw.BufferDB.CopyTo(CacheDB)
2018-05-27 15:45:06 +00:00
//debugPrint("<%s> is now (%d)", CacheDB.name, cacheDB.Count())
2017-11-20 15:05:44 +00:00
//elapsed := time.Since(start)
//debugPrint("copy in %s", elapsed)
log.Debugf("syncing <%s> to disk", CacheDB.name)
2018-05-27 15:45:06 +00:00
CacheDB.SyncToDisk(getDBFullPath())
2017-11-20 15:05:44 +00:00
}
bw.BufferDB.SyncTo(CacheDB)
2018-05-27 15:36:03 +00:00
2017-11-20 15:05:44 +00:00
}