Merge remote-tracking branch 'origin/develop' into rabbitmq-refactoring
commit
db983c584b
@ -1 +1,118 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/creekorful/trandoshan/internal/http_mock"
|
||||
"github.com/creekorful/trandoshan/internal/messaging"
|
||||
"github.com/creekorful/trandoshan/internal/messaging_mock"
|
||||
"github.com/golang/mock/gomock"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCrawlURLForbiddenContentType(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
||||
httpClientMock := http_mock.NewMockClient(mockCtrl)
|
||||
url := "https://example.onion"
|
||||
allowedContentTypes := []string{"text/plain"}
|
||||
|
||||
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
|
||||
httpResponseMock.EXPECT().Headers().Return(map[string]string{"Content-Type": "image/png"})
|
||||
|
||||
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
|
||||
|
||||
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
|
||||
if body != "" || headers != nil || err == nil {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlURLSameContentType(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
||||
httpClientMock := http_mock.NewMockClient(mockCtrl)
|
||||
url := "https://example.onion"
|
||||
allowedContentTypes := []string{"text/plain"}
|
||||
|
||||
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
|
||||
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain"})
|
||||
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
|
||||
|
||||
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
|
||||
|
||||
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
|
||||
if err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
if body != "Hello" {
|
||||
t.Fail()
|
||||
}
|
||||
if len(headers) != 1 {
|
||||
t.Fail()
|
||||
}
|
||||
if headers["Content-Type"] != "text/plain" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlURLNoContentTypeFiltering(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
||||
httpClientMock := http_mock.NewMockClient(mockCtrl)
|
||||
url := "https://example.onion"
|
||||
allowedContentTypes := []string{""}
|
||||
|
||||
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
|
||||
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain"})
|
||||
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
|
||||
|
||||
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
|
||||
|
||||
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
|
||||
if err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
if body != "Hello" {
|
||||
t.Fail()
|
||||
}
|
||||
if len(headers) != 1 {
|
||||
t.Fail()
|
||||
}
|
||||
if headers["Content-Type"] != "text/plain" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleMessage(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
||||
subscriberMock := messaging_mock.NewMockSubscriber(mockCtrl)
|
||||
httpClientMock := http_mock.NewMockClient(mockCtrl)
|
||||
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
|
||||
|
||||
msg := bytes.NewReader(nil)
|
||||
subscriberMock.EXPECT().
|
||||
ReadMsg(msg, &messaging.URLTodoMsg{}).
|
||||
SetArg(1, messaging.URLTodoMsg{URL: "https://example.onion/image.png?id=12&test=2"}).
|
||||
Return(nil)
|
||||
|
||||
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain", "Server": "Debian"})
|
||||
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
|
||||
|
||||
httpClientMock.EXPECT().Get("https://example.onion/image.png?id=12&test=2").Return(httpResponseMock, nil)
|
||||
|
||||
subscriberMock.EXPECT().PublishMsg(&messaging.NewResourceMsg{
|
||||
URL: "https://example.onion/image.png?id=12&test=2",
|
||||
Body: "Hello",
|
||||
Headers: map[string]string{"Content-Type": "text/plain", "Server": "Debian"},
|
||||
}).Return(nil)
|
||||
|
||||
if err := handleMessage(httpClientMock, []string{"text/plain", "text/css"})(subscriberMock, msg); err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,52 @@
|
||||
package http
|
||||
|
||||
//go:generate mockgen -destination=../http_mock/client_mock.go -package=http_mock . Client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
// Client is an HTTP client
|
||||
type Client interface {
|
||||
// Get the corresponding URL
|
||||
// this methods follows redirections
|
||||
Get(URL string) (Response, error)
|
||||
}
|
||||
|
||||
type client struct {
|
||||
c *fasthttp.Client
|
||||
}
|
||||
|
||||
// NewFastHTTPClient create a new Client using fasthttp.Client as backend
|
||||
func NewFastHTTPClient(c *fasthttp.Client) Client {
|
||||
return &client{c: c}
|
||||
}
|
||||
|
||||
func (c *client) Get(URL string) (Response, error) {
|
||||
req := fasthttp.AcquireRequest()
|
||||
resp := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseRequest(req)
|
||||
defer fasthttp.ReleaseResponse(resp)
|
||||
|
||||
req.SetRequestURI(URL)
|
||||
|
||||
if err := c.c.Do(req, resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch code := resp.StatusCode(); {
|
||||
case code > 302:
|
||||
return nil, fmt.Errorf("non-managed error code %d", code)
|
||||
// follow redirect
|
||||
case code == 301 || code == 302:
|
||||
if location := string(resp.Header.Peek("Location")); location != "" {
|
||||
return c.Get(location)
|
||||
}
|
||||
}
|
||||
|
||||
r := &response{}
|
||||
resp.CopyTo(&r.raw)
|
||||
|
||||
return r, nil
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package http
|
||||
|
||||
//go:generate mockgen -destination=../http_mock/response_mock.go -package=http_mock . Response
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/valyala/fasthttp"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Response is an HTTP response
|
||||
type Response interface {
|
||||
// Headers returns the response headers
|
||||
Headers() map[string]string
|
||||
// Body return the response body
|
||||
Body() io.Reader
|
||||
}
|
||||
|
||||
type response struct {
|
||||
raw fasthttp.Response
|
||||
}
|
||||
|
||||
func (r *response) Headers() map[string]string {
|
||||
headers := map[string]string{}
|
||||
r.raw.Header.VisitAll(func(key, value []byte) {
|
||||
headers[string(key)] = string(value) // TODO manage multiple values?
|
||||
})
|
||||
return headers
|
||||
}
|
||||
|
||||
func (r *response) Body() io.Reader {
|
||||
return bytes.NewReader(r.raw.Body())
|
||||
}
|
Loading…
Reference in New Issue