mirror of
https://github.com/creekorful/bathyscaphe
synced 2024-11-16 00:12:56 +00:00
Last cleanups
- API: implement pagination for search endpoints - Crawler: do not save body when code > 302 - Scripts: add stop.sh
This commit is contained in:
parent
e0dfc648b6
commit
fa348dca5d
10
README.md
10
README.md
@ -50,7 +50,13 @@ this will schedule given URL for crawling.
|
||||
|
||||
## How to view results
|
||||
|
||||
At the moment there is no Trandoshan dashboard.
|
||||
You can use the Kibana dashboard available at http://localhost:15004.
|
||||
## Using trandoshanctl
|
||||
|
||||
```sh
|
||||
trandoshanctl search <term>
|
||||
```
|
||||
|
||||
## Using kibana
|
||||
|
||||
You can use the Kibana dashboard available at http://localhost:15004.
|
||||
You will need to create an index pattern named 'resources', and when it asks for the time field, choose 'time'.
|
||||
|
@ -14,13 +14,28 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/urfave/cli/v2"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
resourcesIndex = "resources"
|
||||
|
||||
paginationPageHeader = "X-Pagination-Page"
|
||||
paginationSizeHeader = "X-Pagination-Size"
|
||||
paginationCountHeader = "X-Pagination-Count"
|
||||
paginationPageQueryParam = "pagination-page"
|
||||
paginationSizeQueryParam = "pagination-size"
|
||||
|
||||
defaultPaginationSize = 50
|
||||
maxPaginationSize = 100
|
||||
)
|
||||
|
||||
type pagination struct {
|
||||
page int
|
||||
size int
|
||||
}
|
||||
|
||||
// Represent a resource in elasticsearch
|
||||
type resourceIndex struct {
|
||||
URL string `json:"url"`
|
||||
@ -115,11 +130,26 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
|
||||
return c.NoContent(http.StatusUnprocessableEntity)
|
||||
}
|
||||
|
||||
// Perform the search request.
|
||||
// Acquire pagination
|
||||
p := readPagination(c)
|
||||
from := (p.page - 1) * p.size
|
||||
|
||||
// Build up search query
|
||||
query := buildSearchQuery(string(b), c.QueryParam("keyword"))
|
||||
|
||||
// Get total count
|
||||
totalCount, err := es.Count(resourcesIndex).Query(query).Do(context.Background())
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Error while counting on ES")
|
||||
return c.NoContent(http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
// Perform the search request.
|
||||
res, err := es.Search().
|
||||
Index(resourcesIndex).
|
||||
Query(query).
|
||||
From(from).
|
||||
Size(p.size).
|
||||
Do(context.Background())
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Error while searching on ES")
|
||||
@ -142,6 +172,9 @@ func searchResources(es *elastic.Client) echo.HandlerFunc {
|
||||
resources = append(resources, resource)
|
||||
}
|
||||
|
||||
// Write pagination
|
||||
writePagination(c, p, totalCount)
|
||||
|
||||
return c.JSON(http.StatusOK, resources)
|
||||
}
|
||||
}
|
||||
@ -238,3 +271,29 @@ func setupElasticSearch(ctx context.Context, es *elastic.Client) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPagination(c echo.Context) pagination {
|
||||
paginationPage, err := strconv.Atoi(c.QueryParam(paginationPageQueryParam))
|
||||
if err != nil {
|
||||
paginationPage = 1
|
||||
}
|
||||
paginationSize, err := strconv.Atoi(c.QueryParam(paginationSizeQueryParam))
|
||||
if err != nil {
|
||||
paginationSize = defaultPaginationSize
|
||||
}
|
||||
// Prevent too much results from being returned
|
||||
if paginationSize > maxPaginationSize {
|
||||
paginationSize = maxPaginationSize
|
||||
}
|
||||
|
||||
return pagination{
|
||||
page: paginationPage,
|
||||
size: paginationSize,
|
||||
}
|
||||
}
|
||||
|
||||
func writePagination(c echo.Context, p pagination, totalCount int64) {
|
||||
c.Response().Header().Set(paginationPageHeader, strconv.Itoa(p.page))
|
||||
c.Response().Header().Set(paginationSizeHeader, strconv.Itoa(p.size))
|
||||
c.Response().Header().Set(paginationCountHeader, strconv.FormatInt(totalCount, 10))
|
||||
}
|
||||
|
@ -129,6 +129,8 @@ func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []stri
|
||||
}
|
||||
|
||||
switch code := resp.StatusCode(); {
|
||||
case code > 302:
|
||||
return "", fmt.Errorf("non-managed error code %d", code)
|
||||
// follow redirect
|
||||
case code == 301 || code == 302:
|
||||
if location := string(resp.Header.Peek("Location")); location != "" {
|
||||
|
3
scripts/stop.sh
Executable file
3
scripts/stop.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
./scripts/exec.sh stop "$@"
|
Loading…
Reference in New Issue
Block a user