From fa348dca5d3a1ae9c9f299c3e3494c84a51e8bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alo=C3=AFs=20Micard?= Date: Tue, 22 Sep 2020 16:07:31 +0200 Subject: [PATCH] Last cleanups - API: implement pagination for search endpoints - Crawler: do not save body when code > 302 - Scripts: add stop.sh --- README.md | 10 ++++-- internal/api/api.go | 61 ++++++++++++++++++++++++++++++++++++- internal/crawler/crawler.go | 2 ++ scripts/stop.sh | 3 ++ 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100755 scripts/stop.sh diff --git a/README.md b/README.md index 544e36b..cec6b2e 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,13 @@ this will schedule given URL for crawling. ## How to view results -At the moment there is no Trandoshan dashboard. -You can use the Kibana dashboard available at http://localhost:15004. +## Using trandoshanctl +```sh +trandoshanctl search +``` + +## Using kibana + +You can use the Kibana dashboard available at http://localhost:15004. You will need to create an index pattern named 'resources', and when it asks for the time field, choose 'time'. diff --git a/internal/api/api.go b/internal/api/api.go index dab14da..4eb534a 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -14,13 +14,28 @@ import ( "github.com/rs/zerolog/log" "github.com/urfave/cli/v2" "net/http" + "strconv" "time" ) var ( resourcesIndex = "resources" + + paginationPageHeader = "X-Pagination-Page" + paginationSizeHeader = "X-Pagination-Size" + paginationCountHeader = "X-Pagination-Count" + paginationPageQueryParam = "pagination-page" + paginationSizeQueryParam = "pagination-size" + + defaultPaginationSize = 50 + maxPaginationSize = 100 ) +type pagination struct { + page int + size int +} + // Represent a resource in elasticsearch type resourceIndex struct { URL string `json:"url"` @@ -115,11 +130,26 @@ func searchResources(es *elastic.Client) echo.HandlerFunc { return c.NoContent(http.StatusUnprocessableEntity) } - // Perform the search request. + // Acquire pagination + p := readPagination(c) + from := (p.page - 1) * p.size + + // Build up search query query := buildSearchQuery(string(b), c.QueryParam("keyword")) + + // Get total count + totalCount, err := es.Count(resourcesIndex).Query(query).Do(context.Background()) + if err != nil { + log.Err(err).Msg("Error while counting on ES") + return c.NoContent(http.StatusInternalServerError) + } + + // Perform the search request. res, err := es.Search(). Index(resourcesIndex). Query(query). + From(from). + Size(p.size). Do(context.Background()) if err != nil { log.Err(err).Msg("Error while searching on ES") @@ -142,6 +172,9 @@ func searchResources(es *elastic.Client) echo.HandlerFunc { resources = append(resources, resource) } + // Write pagination + writePagination(c, p, totalCount) + return c.JSON(http.StatusOK, resources) } } @@ -238,3 +271,29 @@ func setupElasticSearch(ctx context.Context, es *elastic.Client) error { return nil } + +func readPagination(c echo.Context) pagination { + paginationPage, err := strconv.Atoi(c.QueryParam(paginationPageQueryParam)) + if err != nil { + paginationPage = 1 + } + paginationSize, err := strconv.Atoi(c.QueryParam(paginationSizeQueryParam)) + if err != nil { + paginationSize = defaultPaginationSize + } + // Prevent too much results from being returned + if paginationSize > maxPaginationSize { + paginationSize = maxPaginationSize + } + + return pagination{ + page: paginationPage, + size: paginationSize, + } +} + +func writePagination(c echo.Context, p pagination, totalCount int64) { + c.Response().Header().Set(paginationPageHeader, strconv.Itoa(p.page)) + c.Response().Header().Set(paginationSizeHeader, strconv.Itoa(p.size)) + c.Response().Header().Set(paginationCountHeader, strconv.FormatInt(totalCount, 10)) +} diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go index 7e4ce65..6c3df52 100644 --- a/internal/crawler/crawler.go +++ b/internal/crawler/crawler.go @@ -129,6 +129,8 @@ func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []stri } switch code := resp.StatusCode(); { + case code > 302: + return "", fmt.Errorf("non-managed error code %d", code) // follow redirect case code == 301 || code == 302: if location := string(resp.Header.Peek("Location")); location != "" { diff --git a/scripts/stop.sh b/scripts/stop.sh new file mode 100755 index 0000000..4c7d11f --- /dev/null +++ b/scripts/stop.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./scripts/exec.sh stop "$@"