Last cleanups

- API: implement pagination for search endpoints
- Crawler: do not save body when code > 302
- Scripts: add stop.sh
pull/24/head
Aloïs Micard 4 years ago
parent e0dfc648b6
commit fa348dca5d
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -50,7 +50,13 @@ this will schedule given URL for crawling.
## How to view results ## How to view results
At the moment there is no Trandoshan dashboard. ## Using trandoshanctl
You can use the Kibana dashboard available at http://localhost:15004.
```sh
trandoshanctl search <term>
```
## Using kibana
You can use the Kibana dashboard available at http://localhost:15004.
You will need to create an index pattern named 'resources', and when it asks for the time field, choose 'time'. You will need to create an index pattern named 'resources', and when it asks for the time field, choose 'time'.

@ -14,13 +14,28 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"net/http" "net/http"
"strconv"
"time" "time"
) )
var ( var (
resourcesIndex = "resources" resourcesIndex = "resources"
paginationPageHeader = "X-Pagination-Page"
paginationSizeHeader = "X-Pagination-Size"
paginationCountHeader = "X-Pagination-Count"
paginationPageQueryParam = "pagination-page"
paginationSizeQueryParam = "pagination-size"
defaultPaginationSize = 50
maxPaginationSize = 100
) )
type pagination struct {
page int
size int
}
// Represent a resource in elasticsearch // Represent a resource in elasticsearch
type resourceIndex struct { type resourceIndex struct {
URL string `json:"url"` URL string `json:"url"`
@ -115,11 +130,26 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
return c.NoContent(http.StatusUnprocessableEntity) return c.NoContent(http.StatusUnprocessableEntity)
} }
// Perform the search request. // Acquire pagination
p := readPagination(c)
from := (p.page - 1) * p.size
// Build up search query
query := buildSearchQuery(string(b), c.QueryParam("keyword")) query := buildSearchQuery(string(b), c.QueryParam("keyword"))
// Get total count
totalCount, err := es.Count(resourcesIndex).Query(query).Do(context.Background())
if err != nil {
log.Err(err).Msg("Error while counting on ES")
return c.NoContent(http.StatusInternalServerError)
}
// Perform the search request.
res, err := es.Search(). res, err := es.Search().
Index(resourcesIndex). Index(resourcesIndex).
Query(query). Query(query).
From(from).
Size(p.size).
Do(context.Background()) Do(context.Background())
if err != nil { if err != nil {
log.Err(err).Msg("Error while searching on ES") log.Err(err).Msg("Error while searching on ES")
@ -142,6 +172,9 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
resources = append(resources, resource) resources = append(resources, resource)
} }
// Write pagination
writePagination(c, p, totalCount)
return c.JSON(http.StatusOK, resources) return c.JSON(http.StatusOK, resources)
} }
} }
@ -238,3 +271,29 @@ func setupElasticSearch(ctx context.Context, es *elastic.Client) error {
return nil return nil
} }
func readPagination(c echo.Context) pagination {
paginationPage, err := strconv.Atoi(c.QueryParam(paginationPageQueryParam))
if err != nil {
paginationPage = 1
}
paginationSize, err := strconv.Atoi(c.QueryParam(paginationSizeQueryParam))
if err != nil {
paginationSize = defaultPaginationSize
}
// Prevent too much results from being returned
if paginationSize > maxPaginationSize {
paginationSize = maxPaginationSize
}
return pagination{
page: paginationPage,
size: paginationSize,
}
}
func writePagination(c echo.Context, p pagination, totalCount int64) {
c.Response().Header().Set(paginationPageHeader, strconv.Itoa(p.page))
c.Response().Header().Set(paginationSizeHeader, strconv.Itoa(p.size))
c.Response().Header().Set(paginationCountHeader, strconv.FormatInt(totalCount, 10))
}

@ -129,6 +129,8 @@ func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []stri
} }
switch code := resp.StatusCode(); { switch code := resp.StatusCode(); {
case code > 302:
return "", fmt.Errorf("non-managed error code %d", code)
// follow redirect // follow redirect
case code == 301 || code == 302: case code == 301 || code == 302:
if location := string(resp.Header.Peek("Location")); location != "" { if location := string(resp.Header.Peek("Location")); location != "" {

@ -0,0 +1,3 @@
#!/bin/bash
./scripts/exec.sh stop "$@"
Loading…
Cancel
Save