mirror of
https://github.com/creekorful/bathyscaphe
synced 2024-11-19 15:25:44 +00:00
Some cleanup
This commit is contained in:
parent
fa348dca5d
commit
e61dc42d3c
@ -32,7 +32,7 @@ Ensure you have at least 3GB of memory as the Elasticsearch stack docker will re
|
|||||||
|
|
||||||
# How to start the crawling process
|
# How to start the crawling process
|
||||||
|
|
||||||
Since the API is explosed on localhost:15005, one can use it to start the crawling process:
|
Since the API is exposed on localhost:15005, one can use it to start the crawling process:
|
||||||
|
|
||||||
using trandoshanctl executable:
|
using trandoshanctl executable:
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ func handleMessage(apiClient api.Client) natsutil.MsgHandler {
|
|||||||
// Extract & process resource
|
// Extract & process resource
|
||||||
resDto, urls, err := extractResource(resMsg)
|
resDto, urls, err := extractResource(resMsg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Err(err).Msg("Ersror while extracting resource")
|
log.Err(err).Msg("Error while extracting resource")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,7 +148,6 @@ func extractTitle(body string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO improve
|
|
||||||
startPos := strings.Index(cleanBody, "<title>") + len("<title>")
|
startPos := strings.Index(cleanBody, "<title>") + len("<title>")
|
||||||
endPos := strings.Index(cleanBody, "</title>")
|
endPos := strings.Index(cleanBody, "</title>")
|
||||||
|
|
||||||
|
@ -73,6 +73,10 @@ func search(c *cli.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(res) == 0 {
|
||||||
|
fmt.Println("No resources crawled (yet).")
|
||||||
|
}
|
||||||
|
|
||||||
for _, r := range res {
|
for _, r := range res {
|
||||||
fmt.Printf("%s - %s\n", r.URL, r.Title)
|
fmt.Printf("%s - %s\n", r.URL, r.Title)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user