diff --git a/.gitignore b/.gitignore index e98f9c2..6a4f7d3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ dump.sql ipfs.html about.html copyright.html -torrent_dump_full.csv.gz \ No newline at end of file +torrent_dump_full.csv.gz +index-generator/main.js \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2092511 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# About +## What is this? + +This is a repository of all the tools I use to build and run torrent-paradise.ml. Some people asked for a source, so I'm just putting this out here. I did make *some* effort to clean it up. The 'code name' of the project is nextgen (next gen torrent search), so don't be surprised if it comes up somewhere. + +## Can you help me? +Maybe, open an issue. Be sure to demonstrate an effort that you tried to solve the problem yourself. + +## This is a big mess. Fix it maybe? +WIP ❤️ + +# Setup + +Here's what the setup looks like rn: +- VPS, Debian Stretch, 2 GB RAM + - PostgreSQL 9.6. pg_hba.conf contains this: + + ``` + local all all peer + # IPv4 local connections: + host nextgen nextgen localhost md5 + ``` + - IPFS v0.4.18 + - user with username nextgen on the server +- my laptop w/ Linux + - Go toolchain installed + - node v10.9.0 & npm + +Schema for the database is sth like this (taken from index-generator/README, runs on sqlite, probably also on pg.) +```sql +CREATE TABLE peercount ( infohash char(40), tracker varchar, seeders int, leechers int, completed int, scraped timestamp); + +CREATE TABLE torrent( infohash char(40), name varchar, length bigint, added timestamp); +``` + + + +What I did first after getting the server up and running was importing the TPB dump. Download https://thepiratebay.org/static/dump/csv/torrent_dump_full.csv.gz to the import-tpb-dump directory and run `go run`. + +I probably forgot sth. Open an issue! + +# Usage + +## Generate the index + +This is a half-broken process that is partially described in update-index.sh. Read the script to understand what it does. + +## Spider the DHT + +Run `go build` in spider/ to compile and scp the binary it to the server. You can use the systemd service file in `spider/spider.service` to start the spider on boot. + + +# Contributing + +Before working on something, open an issue to ask if it would be okay. I would love to [KISS](https://en.wikipedia.org/wiki/KISS_principle). \ No newline at end of file diff --git a/index-generator/README b/index-generator/README index 10ced2a..a0226b0 100644 --- a/index-generator/README +++ b/index-generator/README @@ -5,13 +5,19 @@ nextgen@ipfsearch: ~$ pg_dump --data-only --inserts nextgen > dump.sql # remove header from dump (manually) -$ sed -i -e 's/public.peercount/peercount/g' dump.sql -$ sed -i -e 's/public.torrent/torrent/g' dump.sql +sed -i -e 's/public.peercount/peercount/g' dump.sql +sed -i -e 's/public.torrent/torrent/g' dump.sql +tail -n +2 dump.sql > newdump.sql +mv newdump.sql dump.sql gzip dump.sql # copy dump.sql.gz to index-generator directory, unzip -onda@localhost $ sqlite3 db.sqlite3 +user@localhost $ scp user@server:/home/nextgen/dump.sql.gz . +$ sqlite3 db.sqlite3 + sqlite> CREATE TABLE peercount ( infohash char(40), tracker varchar, seeders int, leechers int, completed int, scraped timestamp); sqlite> CREATE TABLE torrent( infohash char(40), name varchar, length bigint, added timestamp); -sqlite> .read dump.sql \ No newline at end of file +sqlite> BEGIN; +sqlite> .read dump.sql +sqlite> END; \ No newline at end of file diff --git a/index-generator/main.js b/index-generator/main.js index ee5d832..605d958 100644 --- a/index-generator/main.js +++ b/index-generator/main.js @@ -13,7 +13,7 @@ db.each("SELECT torrent.infohash, torrent.name, torrent.length, torrent.added, p }, function (err, num) { console.log("Read all " + i + " records."); console.log("Persisting " + num + " records."); - indexer.persist("generated/inv", "generated/inx", "@tensojka", "nextgen torrent search", "todo", 1000); + indexer.persist("../website/generated/inv", "../website/generated/inx", "Urban Guacamole", "Torrent Paradise index", "", 1000); }); class Torrent extends ipfsearch.Document { constructor(id, text, size, seeders, leechers, completed) { diff --git a/index-generator/main.ts b/index-generator/main.ts index 4a335c4..4a138c2 100644 --- a/index-generator/main.ts +++ b/index-generator/main.ts @@ -15,7 +15,7 @@ db.each("SELECT torrent.infohash, torrent.name, torrent.length, torrent.added, p },function(err,num){ console.log("Read all "+i+" records.") console.log("Persisting "+num+" records.") - indexer.persist("generated/inv", "generated/inx", "@tensojka", "nextgen torrent search","todo", 1000) + indexer.persist("generated/inv", "generated/inx", "", "Torrent Paradise torrent index","todo", 1000) }) class Torrent extends ipfsearch.Document { diff --git a/spider/spider.service b/spider/spider.service new file mode 100644 index 0000000..37f3bfe --- /dev/null +++ b/spider/spider.service @@ -0,0 +1,11 @@ +[Unit] +Description=nextgen DHT spider +Requires=postgresql + +[Service] +User=nextgen +WorkingDirectory=/home/nextgen +ExecStart=/home/nextgen/spider + +[Install] +WantedBy=multi-user.target diff --git a/spider/tempo-sql b/spider/tempo-sql new file mode 100644 index 0000000..0c46f14 --- /dev/null +++ b/spider/tempo-sql @@ -0,0 +1,5 @@ +script to see how fast the spiders are adding new torrents + +select added::date, count(infohash) +from torrent +group by added::date; \ No newline at end of file diff --git a/update-index.sh b/update-index.sh new file mode 100755 index 0000000..08c86ea --- /dev/null +++ b/update-index.sh @@ -0,0 +1,33 @@ +# This script updates the index and pushes it to IPFS. Should be run often. + +echo "Scraping trackers for seed/leech data" +mosh nextgen@dev.ipfsearch.xyz "~/tracker-scraper" +echo "Generating SQL dump" +ssh nextgen@dev.ipfsearch.xyz pg_dump --data-only --inserts nextgen > index-generator/dump.sql + +sed -i -e 's/public.peercount/peercount/g' index-generator/dump.sql +sed -i -e 's/public.torrent/torrent/g' index-generator/dump.sql +tail -n +21 index-generator/dump.sql > index-generator/newdump.sql # remove headers +mv index-generator/newdump.sql index-generator/dump.sql +rm index-generator/db.sqlite3 +echo """Do the following: +$ sqlite3 index-generator/db.sqlite3 + +sqlite> CREATE TABLE peercount ( infohash char(40), tracker varchar, seeders int, leechers int, completed int, scraped timestamp, ws boolean); +sqlite> CREATE TABLE torrent( infohash char(40), name varchar, length bigint, added timestamp); +sqlite> BEGIN; +sqlite> .read index-generator/dump.sql +sqlite> END;""" +bash +echo "Generating index now..." +cd index-generator +node --max-old-space-size=10000 main.js +cd .. +echo "Check meta.json, add resultPage='resultpage', fix invURLBase, inxURLBase" +nano website/generated/inx.meta.json +echo "Uploading website" +cd website +scp -r . user@server:/www/torrent-paradise.ml +echo "Finished uploading website to server. Adding to IPFS" +ssh user@server sudo -u ipfs ipfs add -r /www/torrent-paradise.ml/ +echo "Check if it works, maybe publish to IPNS." \ No newline at end of file diff --git a/website/bundle.js b/website/bundle.js index a4afc85..2138ef9 100644 --- a/website/bundle.js +++ b/website/bundle.js @@ -1,3 +1,7 @@ +/** + * This is the bundle.js used on ipfsearch.xyz, modified for the purposes of Torrent-Paradise. I edit the bundle directly to not have to fuck around w/ Typescript and Node.js + */ + class IndexFetcher { constructor() { this.combinedIndex = new Map(); diff --git a/website/ipfs.pug b/website/ipfs.pug index c68fc1b..1510245 100644 --- a/website/ipfs.pug +++ b/website/ipfs.pug @@ -2,5 +2,5 @@ include head.html .container.content h1 Remove ads and access the index even when the public website is down p With IPFS (see the #[a(href="https://ipfs.io") official website]), a copy of the site and the whole index can be distributed just like a torrent. When you open it via IPFS, your IPFS node fetches the parts of the index and website it needs from the network. The public website is simply a public IPFS node. - p To use it without the public server, use #[a(href="https://ipfs.io") IPFS]. Address in IPFS is #[a(href="https://cloudflare-ipfs.com/ipns/torrent-paradise.ml") /ipns/torrent-paradise.ml]. - p Just like all torrents need seeders, Torrent Paradise needs IPFS nodes pinning it. Please run #[code ipfs pin add /ipns/torrent-paradise.ml] on your IPFS node to help host the site. Rerun this command ideally every day to seed the newest version of the index. \ No newline at end of file + p To use it without the public server, use #[a(href="https://ipfs.io") IPFS]. Address in IPFS is #[a(href="https://cloudflare-ipfs.com/ipns/torrent-paradise.ml") /ipns/torrent-paradise.ml]. You can also use /ipns/12D3KooWB3GY1u6zMLqnf3MJ8zhX3SS1oBj7VXk3xp6sJJiFGZXp in case the domain doesn't work. + p Just like all torrents need seeders, Torrent Paradise needs IPFS nodes pinning it. Please run #[code ipfs pin add /ipns/12D3KooWB3GY1u6zMLqnf3MJ8zhX3SS1oBj7VXk3xp6sJJiFGZXp] on your IPFS node to help host the site. Rerun this command ideally every day to seed the newest version of the index. \ No newline at end of file