|
|
@ -3,7 +3,6 @@ package scheduler
|
|
|
|
import (
|
|
|
|
import (
|
|
|
|
"encoding/base64"
|
|
|
|
"encoding/base64"
|
|
|
|
"fmt"
|
|
|
|
"fmt"
|
|
|
|
"github.com/PuerkitoBio/purell"
|
|
|
|
|
|
|
|
"github.com/creekorful/trandoshan/api"
|
|
|
|
"github.com/creekorful/trandoshan/api"
|
|
|
|
"github.com/creekorful/trandoshan/internal/messaging"
|
|
|
|
"github.com/creekorful/trandoshan/internal/messaging"
|
|
|
|
"github.com/creekorful/trandoshan/internal/util/logging"
|
|
|
|
"github.com/creekorful/trandoshan/internal/util/logging"
|
|
|
@ -74,19 +73,20 @@ func handleMessage(apiClient api.Client) natsutil.MsgHandler {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
log.Debug().Str("url", urlMsg.URL).Msg("Processing URL: %s")
|
|
|
|
log.Debug().Str("url", urlMsg.URL).Msg("Processing URL: %s")
|
|
|
|
normalizedURL, err := normalizeURL(urlMsg.URL)
|
|
|
|
|
|
|
|
|
|
|
|
u, err := url.Parse(urlMsg.URL)
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
log.Err(err).Msg("Error while normalizing URL")
|
|
|
|
log.Err(err).Msg("Error while parsing URL")
|
|
|
|
return err
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Make sure URL is valid .onion
|
|
|
|
// Make sure URL is valid .onion
|
|
|
|
if !strings.Contains(normalizedURL.Host, ".onion") {
|
|
|
|
if !strings.Contains(u.Host, ".onion") {
|
|
|
|
log.Debug().Stringer("url", normalizedURL).Msg("URL is not a valid hidden service")
|
|
|
|
log.Debug().Stringer("url", u).Msg("URL is not a valid hidden service")
|
|
|
|
return err
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
b64URI := base64.URLEncoding.EncodeToString([]byte(normalizedURL.String()))
|
|
|
|
b64URI := base64.URLEncoding.EncodeToString([]byte(u.String()))
|
|
|
|
urls, err := apiClient.SearchResources(b64URI)
|
|
|
|
urls, err := apiClient.SearchResources(b64URI)
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
log.Err(err).Msg("Error while searching URL")
|
|
|
|
log.Err(err).Msg("Error while searching URL")
|
|
|
@ -95,29 +95,14 @@ func handleMessage(apiClient api.Client) natsutil.MsgHandler {
|
|
|
|
|
|
|
|
|
|
|
|
// No matches: schedule!
|
|
|
|
// No matches: schedule!
|
|
|
|
if len(urls) == 0 {
|
|
|
|
if len(urls) == 0 {
|
|
|
|
log.Debug().Stringer("url", normalizedURL).Msg("URL should be scheduled")
|
|
|
|
log.Debug().Stringer("url", u).Msg("URL should be scheduled")
|
|
|
|
if err := natsutil.PublishMsg(nc, &messaging.URLTodoMsg{URL: urlMsg.URL}); err != nil {
|
|
|
|
if err := natsutil.PublishMsg(nc, &messaging.URLTodoMsg{URL: urlMsg.URL}); err != nil {
|
|
|
|
return fmt.Errorf("error while publishing URL: %s", err)
|
|
|
|
return fmt.Errorf("error while publishing URL: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
log.Trace().Stringer("url", normalizedURL).Msg("URL should not be scheduled")
|
|
|
|
log.Trace().Stringer("url", u).Msg("URL should not be scheduled")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func normalizeURL(u string) (*url.URL, error) {
|
|
|
|
|
|
|
|
normalizedURL, err := purell.NormalizeURLString(u, purell.FlagsUsuallySafeGreedy|
|
|
|
|
|
|
|
|
purell.FlagRemoveDirectoryIndex|purell.FlagRemoveFragment|purell.FlagRemoveDuplicateSlashes)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("error while normalizing URL %s: %s", u, err)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nu, err := url.Parse(normalizedURL)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("error while parsing URL: %s", err)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return nu, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|