From 240d7d48fc387456691d67b4220fb27f1dff8366 Mon Sep 17 00:00:00 2001 From: Urban Guacamole Date: Sun, 29 Mar 2020 13:38:48 +0200 Subject: [PATCH] Add RSS crawl service To provide more fresh content, starting with YIFY/YTS. --- crawl-rss/crawlrss.service | 13 +++++ crawl-rss/main.go | 114 +++++++++++++++++++++++++++++++++++++ crawl-rss/main_test.go | 12 ++++ static/index.html | 4 ++ 4 files changed, 143 insertions(+) create mode 100644 crawl-rss/crawlrss.service create mode 100644 crawl-rss/main.go create mode 100644 crawl-rss/main_test.go diff --git a/crawl-rss/crawlrss.service b/crawl-rss/crawlrss.service new file mode 100644 index 0000000..6d719e7 --- /dev/null +++ b/crawl-rss/crawlrss.service @@ -0,0 +1,13 @@ +[Unit] +Description=Automatic crawl of new torrent feeds +Requires=postgresql + +[Service] +User=nextgen +WorkingDirectory=/home/nextgen +ExecStart=/home/nextgen/crawl-rss +Restart=always +RestartSec=30 + +[Install] +WantedBy=multi-user.target diff --git a/crawl-rss/main.go b/crawl-rss/main.go new file mode 100644 index 0000000..008a0ba --- /dev/null +++ b/crawl-rss/main.go @@ -0,0 +1,114 @@ +package main + +import ( + "database/sql" + "errors" + "log" + "strconv" + "strings" + "time" + + "github.com/lib/pq" + _ "github.com/lib/pq" + "github.com/mmcdole/gofeed" +) + +type Torrent struct { + Infohash string + Name string + Length int +} + +func main() { + db := initDb() + crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already + for { + torrents := CrawlYts() + for _, torrent := range torrents { + addTorrent(db, torrent, &crawled) + } + time.Sleep(time.Minute * 30) + } +} + +func addTorrent(db *sql.DB, torr Torrent, crawled *map[string]bool) { + if !(*crawled)[string(torr.Infohash)] { + _, err := db.Exec("INSERT INTO torrent (infohash, name, length) VALUES ($1, $2, $3)", torr.Infohash, torr.Name, torr.Length) + if err, ok := err.(*pq.Error); ok { //dark magic + if err.Code != "23505" { + log.Fatal(err) + } + } + (*crawled)[torr.Infohash] = true + } +} + +//todo https://rarbg.to/rssdd.php?category=44 +func CrawlYts() []Torrent { + fp := gofeed.NewParser() + feed, err := fp.ParseURL("https://yts.mx/rss/0/all/all/0") + if err != nil { + log.Fatal(err) + } + var torrents []Torrent + for _, item := range feed.Items { + size, err := parseSizeYts(item.Description) + if err != nil { + log.Print(err) + continue + } + ih, err := parseInfohashYts(item.Enclosures[0].URL) + torrents = append(torrents, Torrent{ih, item.Title, size}) + } + return torrents +} + +// Parses torrent length from YTS description +func parseSizeYts(description string) (int, error) { + s := strings.Split(description, "
Size: ") + if len(s) == 0 { + return 0, errors.New("Couldn't find '
Size: ' in item description") + } + s = strings.Split(s[1], "B
Runtime: ") + if len(s) == 0 { + return 0, errors.New("Couldn't find 'B
Runtime: ' in item description") + } + length, err := strconv.ParseFloat(s[0][:len(s[0])-2], 64) + if err != nil { + return 0, err + } + if s[0][len(s[0])-1:] == "G" { + return int(length * 1000000000), nil + } else if s[0][len(s[0])-1:] == "M" { + return int(length * 1000000), nil + } else { + return 0, errors.New("Invalid char in place of length specifier") + } +} + +func parseInfohashYts(url string) (string, error) { + s := strings.Split(url, "torrent/download/") + if len(s) == 0 { + return "", errors.New("invalid URL") + } + return strings.ToLower(s[1]), nil +} + +func initDb() *sql.DB { + connStr := "user=nextgen dbname=nextgen host=/var/run/postgresql" + db, err := sql.Open("postgres", connStr) + if err != nil { + log.Fatal(err) + } + + _, err = db.Exec(`CREATE TABLE IF NOT EXISTS torrent ( + infohash char(40) PRIMARY KEY NOT NULL, + name varchar NOT NULL, + length bigint, + added timestamp DEFAULT current_timestamp + )`) + if err != nil { + log.Fatal(err) + } + return db +} diff --git a/crawl-rss/main_test.go b/crawl-rss/main_test.go new file mode 100644 index 0000000..0d2d9fa --- /dev/null +++ b/crawl-rss/main_test.go @@ -0,0 +1,12 @@ +package main + +import ( + "testing" +) + +func TestCrawlYts(t *testing.T) { + torrents := CrawlYts() + if len(torrents) < 1 { + t.Error("no torrents crawled from yts") + } +} diff --git a/static/index.html b/static/index.html index 97d3212..b7ff65a 100644 --- a/static/index.html +++ b/static/index.html @@ -38,6 +38,10 @@
+
+
+ Now with all new YIFY movies, straight from their RSS feed. Enjoy and #StayHome +