2020-03-29 11:38:48 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"database/sql"
|
|
|
|
"errors"
|
|
|
|
"log"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/lib/pq"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Torrent struct {
|
|
|
|
Infohash string
|
|
|
|
Name string
|
|
|
|
Length int
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
db := initDb()
|
|
|
|
crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already
|
2020-07-05 10:31:28 +00:00
|
|
|
var i int
|
2020-03-29 11:38:48 +00:00
|
|
|
for {
|
|
|
|
torrents := CrawlYts()
|
|
|
|
for _, torrent := range torrents {
|
2020-07-04 12:48:26 +00:00
|
|
|
addTorrent(db, torrent, crawled)
|
2020-03-29 11:38:48 +00:00
|
|
|
}
|
2020-07-04 12:48:26 +00:00
|
|
|
torrents = CrawlEztv()
|
|
|
|
for _, torrent := range torrents {
|
|
|
|
addTorrent(db, torrent, crawled)
|
|
|
|
}
|
2020-07-10 08:23:01 +00:00
|
|
|
torrents = CrawlTPBVideoRecent()
|
|
|
|
for _, torrent := range torrents {
|
|
|
|
addTorrent(db, torrent, crawled)
|
|
|
|
}
|
2020-07-05 10:31:28 +00:00
|
|
|
if i%10 == 0 {
|
|
|
|
torrents = CrawlTPB48hTop()
|
|
|
|
for _, torrent := range torrents {
|
|
|
|
addTorrent(db, torrent, crawled)
|
|
|
|
}
|
|
|
|
if len(torrents) == 0 {
|
|
|
|
log.Println("weird, no torrents crawled from TPB")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i++
|
2020-07-19 20:38:46 +00:00
|
|
|
refresh(db)
|
2020-12-30 15:05:04 +00:00
|
|
|
time.Sleep(time.Minute * 100)
|
2020-03-29 11:38:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-29 12:06:41 +00:00
|
|
|
func refresh(db *sql.DB) {
|
|
|
|
db.Exec("REFRESH MATERIALIZED VIEW fresh")
|
|
|
|
db.Exec("REFRESH MATERIALIZED VIEW CONCURRENTLY search")
|
|
|
|
}
|
|
|
|
|
2020-07-04 12:48:26 +00:00
|
|
|
func addTorrent(db *sql.DB, torr Torrent, crawled map[string]bool) {
|
|
|
|
if !(crawled[string(torr.Infohash)]) {
|
2020-07-10 08:23:01 +00:00
|
|
|
_, err := db.Exec("INSERT INTO torrent (infohash, name, length) VALUES ($1, $2, $3)", strings.ToLower(torr.Infohash), torr.Name, torr.Length)
|
2020-03-29 11:38:48 +00:00
|
|
|
if err, ok := err.(*pq.Error); ok { //dark magic
|
|
|
|
if err.Code != "23505" {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
2020-07-04 12:48:26 +00:00
|
|
|
crawled[torr.Infohash] = true
|
2020-03-29 11:38:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//todo https://rarbg.to/rssdd.php?category=44
|
|
|
|
func CrawlYts() []Torrent {
|
|
|
|
fp := gofeed.NewParser()
|
|
|
|
feed, err := fp.ParseURL("https://yts.mx/rss/0/all/all/0")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
var torrents []Torrent
|
|
|
|
for _, item := range feed.Items {
|
|
|
|
size, err := parseSizeYts(item.Description)
|
|
|
|
if err != nil {
|
|
|
|
log.Print(err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
ih, err := parseInfohashYts(item.Enclosures[0].URL)
|
|
|
|
torrents = append(torrents, Torrent{ih, item.Title, size})
|
|
|
|
}
|
|
|
|
return torrents
|
|
|
|
}
|
|
|
|
|
2020-07-05 10:31:28 +00:00
|
|
|
//TODO https://rarbg.to/rssdd.php?category=2;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48;49;50;51;52;54
|
|
|
|
// ^^ rarbg w/o porn
|
|
|
|
|
2020-07-04 12:48:26 +00:00
|
|
|
func CrawlEztv() []Torrent { //maybe is there some kind of interface that this can share with CrawlYts? This function has the same signature and purpose.
|
|
|
|
fp := gofeed.NewParser()
|
2020-12-30 15:05:04 +00:00
|
|
|
feed, err := fp.ParseURL("https://eztv.re/ezrss.xml")
|
2020-07-04 12:48:26 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
var torrents []Torrent
|
|
|
|
for _, item := range feed.Items {
|
|
|
|
size, err := strconv.Atoi(item.Extensions["torrent"]["contentLength"][0].Value)
|
|
|
|
if err != nil {
|
|
|
|
log.Print(err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
torrents = append(torrents, Torrent{item.Extensions["torrent"]["infoHash"][0].Value, item.Extensions["torrent"]["fileName"][0].Value, size})
|
|
|
|
}
|
|
|
|
return torrents
|
|
|
|
}
|
|
|
|
|
2020-03-29 11:38:48 +00:00
|
|
|
// Parses torrent length from YTS description
|
|
|
|
func parseSizeYts(description string) (int, error) {
|
|
|
|
s := strings.Split(description, "<br />Size: ")
|
|
|
|
if len(s) == 0 {
|
|
|
|
return 0, errors.New("Couldn't find '<br />Size: ' in item description")
|
|
|
|
}
|
|
|
|
s = strings.Split(s[1], "B<br />Runtime: ")
|
|
|
|
if len(s) == 0 {
|
|
|
|
return 0, errors.New("Couldn't find 'B<br />Runtime: ' in item description")
|
|
|
|
}
|
|
|
|
length, err := strconv.ParseFloat(s[0][:len(s[0])-2], 64)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if s[0][len(s[0])-1:] == "G" {
|
|
|
|
return int(length * 1000000000), nil
|
|
|
|
} else if s[0][len(s[0])-1:] == "M" {
|
|
|
|
return int(length * 1000000), nil
|
|
|
|
} else {
|
|
|
|
return 0, errors.New("Invalid char in place of length specifier")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseInfohashYts(url string) (string, error) {
|
|
|
|
s := strings.Split(url, "torrent/download/")
|
|
|
|
if len(s) == 0 {
|
|
|
|
return "", errors.New("invalid URL")
|
|
|
|
}
|
|
|
|
return strings.ToLower(s[1]), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func initDb() *sql.DB {
|
|
|
|
connStr := "user=nextgen dbname=nextgen host=/var/run/postgresql"
|
|
|
|
db, err := sql.Open("postgres", connStr)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = db.Exec(`CREATE TABLE IF NOT EXISTS torrent (
|
|
|
|
infohash char(40) PRIMARY KEY NOT NULL,
|
|
|
|
name varchar NOT NULL,
|
|
|
|
length bigint,
|
|
|
|
added timestamp DEFAULT current_timestamp
|
|
|
|
)`)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
return db
|
|
|
|
}
|