mirror of
https://github.com/creekorful/bathyscaphe
synced 2024-11-16 00:12:56 +00:00
Scheduler: normalized received URLs
This commit is contained in:
parent
cf8c2875cb
commit
29da7859b4
2
go.mod
2
go.mod
@ -3,6 +3,8 @@ module github.com/creekorful/trandoshan
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/purell v1.1.1
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/nats-io/nats.go v1.9.2
|
||||
github.com/sirupsen/logrus v1.5.0
|
||||
github.com/urfave/cli/v2 v2.2.0
|
||||
|
5
go.sum
5
go.sum
@ -1,4 +1,8 @@
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
|
||||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@ -44,6 +48,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
@ -3,6 +3,7 @@ package scheduler
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/purell"
|
||||
"github.com/creekorful/trandoshan/internal/natsutil"
|
||||
"github.com/creekorful/trandoshan/pkg/proto"
|
||||
"github.com/nats-io/nats.go"
|
||||
@ -87,6 +88,16 @@ func handleMessage(nc *nats.Conn, msg *nats.Msg) error {
|
||||
|
||||
logrus.Debugf("Processing URL: %s", urlMsg.URL)
|
||||
|
||||
// Normalized received URL
|
||||
normalizedURL, err := purell.NormalizeURLString(urlMsg.URL, purell.FlagsUsuallySafeGreedy|
|
||||
purell.FlagRemoveDirectoryIndex|purell.FlagRemoveFragment|purell.FlagRemoveDuplicateSlashes)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while normalizing URL %s: %s", urlMsg.URL, err)
|
||||
}
|
||||
|
||||
logrus.Debugf("Normalizing URL: %s", normalizedURL)
|
||||
|
||||
// TODO implement scheduling logic
|
||||
|
||||
if err := natsutil.PublishJSON(nc, proto.URLTodoSubject, &proto.URLTodoMessage{URL: urlMsg.URL}); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user