Last cleanups

- API: implement pagination for search endpoints
- Crawler: do not save body when code > 302
- Scripts: add stop.sh
This commit is contained in:
Aloïs Micard 2020-09-22 16:07:31 +02:00
parent e0dfc648b6
commit fa348dca5d
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE
4 changed files with 73 additions and 3 deletions

View File

@ -50,7 +50,13 @@ this will schedule given URL for crawling.
## How to view results
At the moment there is no Trandoshan dashboard.
You can use the Kibana dashboard available at http://localhost:15004.
## Using trandoshanctl
```sh
trandoshanctl search <term>
```
## Using kibana
You can use the Kibana dashboard available at http://localhost:15004.
You will need to create an index pattern named 'resources', and when it asks for the time field, choose 'time'.

View File

@ -14,13 +14,28 @@ import (
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
"net/http"
"strconv"
"time"
)
var (
resourcesIndex = "resources"
paginationPageHeader = "X-Pagination-Page"
paginationSizeHeader = "X-Pagination-Size"
paginationCountHeader = "X-Pagination-Count"
paginationPageQueryParam = "pagination-page"
paginationSizeQueryParam = "pagination-size"
defaultPaginationSize = 50
maxPaginationSize = 100
)
type pagination struct {
page int
size int
}
// Represent a resource in elasticsearch
type resourceIndex struct {
URL string `json:"url"`
@ -115,11 +130,26 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
return c.NoContent(http.StatusUnprocessableEntity)
}
// Perform the search request.
// Acquire pagination
p := readPagination(c)
from := (p.page - 1) * p.size
// Build up search query
query := buildSearchQuery(string(b), c.QueryParam("keyword"))
// Get total count
totalCount, err := es.Count(resourcesIndex).Query(query).Do(context.Background())
if err != nil {
log.Err(err).Msg("Error while counting on ES")
return c.NoContent(http.StatusInternalServerError)
}
// Perform the search request.
res, err := es.Search().
Index(resourcesIndex).
Query(query).
From(from).
Size(p.size).
Do(context.Background())
if err != nil {
log.Err(err).Msg("Error while searching on ES")
@ -142,6 +172,9 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
resources = append(resources, resource)
}
// Write pagination
writePagination(c, p, totalCount)
return c.JSON(http.StatusOK, resources)
}
}
@ -238,3 +271,29 @@ func setupElasticSearch(ctx context.Context, es *elastic.Client) error {
return nil
}
func readPagination(c echo.Context) pagination {
paginationPage, err := strconv.Atoi(c.QueryParam(paginationPageQueryParam))
if err != nil {
paginationPage = 1
}
paginationSize, err := strconv.Atoi(c.QueryParam(paginationSizeQueryParam))
if err != nil {
paginationSize = defaultPaginationSize
}
// Prevent too much results from being returned
if paginationSize > maxPaginationSize {
paginationSize = maxPaginationSize
}
return pagination{
page: paginationPage,
size: paginationSize,
}
}
func writePagination(c echo.Context, p pagination, totalCount int64) {
c.Response().Header().Set(paginationPageHeader, strconv.Itoa(p.page))
c.Response().Header().Set(paginationSizeHeader, strconv.Itoa(p.size))
c.Response().Header().Set(paginationCountHeader, strconv.FormatInt(totalCount, 10))
}

View File

@ -129,6 +129,8 @@ func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []stri
}
switch code := resp.StatusCode(); {
case code > 302:
return "", fmt.Errorf("non-managed error code %d", code)
// follow redirect
case code == 301 || code == 302:
if location := string(resp.Header.Peek("Location")); location != "" {

3
scripts/stop.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
./scripts/exec.sh stop "$@"