Setup crawler to use tor proxy to reach hidden services

pull/3/head
Aloïs Micard 5 years ago
parent 33269f7ffa
commit 28f32042c6
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -5,17 +5,20 @@ services:
image: nats:2.1.6-alpine3.11
logging:
driver: none
proxy:
torproxy:
image: dperson/torproxy:latest
logging:
driver: none
crawler:
image: trandoshan.io/crawler:latest
command: --log-level debug --nats-uri nats --tor-uri torproxy
command: --log-level debug --nats-uri nats --tor-uri torproxy:9050
restart: always
depends_on:
- nats
- proxy
- torproxy
#feeder:
# image: trandoshan.io/feeder:latest
# command: --log-level debug --nats-uri nats --url https://www.facebookcorewwwi.onion
scheduler:
image: trandoshan.io/scheduler:latest
command: --log-level debug --nats-uri nats

@ -2,13 +2,16 @@ package crawler
import (
"context"
"crypto/tls"
"github.com/creekorful/trandoshan/internal/natsutil"
"github.com/creekorful/trandoshan/pkg/proto"
"github.com/nats-io/nats.go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"github.com/valyala/fasthttp"
"github.com/valyala/fasthttp/fasthttpproxy"
"mvdan.cc/xurls/v2"
"time"
)
// GetApp return the crawler app
@ -66,6 +69,16 @@ func execute(ctx *cli.Context) error {
return err
}
// Create the HTTP client
httpClient := &fasthttp.Client{
// Use given TOR proxy to reach the hidden services
Dial: fasthttpproxy.FasthttpSocksDialer(ctx.String("tor-uri")),
// Disable SSL verification since we do not really care about this
TLSConfig: &tls.Config{InsecureSkipVerify: true},
ReadTimeout: time.Second * 5,
WriteTimeout: time.Second * 5,
}
logrus.Info("Successfully initialized trandoshan-crawler. Waiting for URLs")
for {
@ -77,7 +90,7 @@ func execute(ctx *cli.Context) error {
}
// ... And process it
if err := handleMessage(nc, msg); err != nil {
if err := handleMessage(nc, httpClient, msg); err != nil {
logrus.Warnf("Skipping current message because of error: %s", err)
continue
}
@ -86,7 +99,7 @@ func execute(ctx *cli.Context) error {
return nil
}
func handleMessage(nc *nats.Conn, msg *nats.Msg) error {
func handleMessage(nc *nats.Conn, httpClient *fasthttp.Client, msg *nats.Msg) error {
var urlMsg proto.URLTodoMessage
if err := natsutil.ReadJSON(msg, &urlMsg); err != nil {
return err
@ -94,7 +107,7 @@ func handleMessage(nc *nats.Conn, msg *nats.Msg) error {
logrus.Debugf("Processing URL: %s", urlMsg.URL)
httpClient := fasthttp.Client{}
// Query the website
_, body, err := httpClient.Get(nil, urlMsg.URL)
if err != nil {
return err

Loading…
Cancel
Save