diff --git a/pipeline.py b/pipeline.py index 47e40ab..3ea946d 100644 --- a/pipeline.py +++ b/pipeline.py @@ -69,7 +69,7 @@ if not WGET_LUA: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20190405.01' +VERSION = '20190729.01' USER_AGENT = 'ArchiveTeam' TRACKER_ID = 'reddit' TRACKER_HOST = 'tracker.archiveteam.org' diff --git a/reddit.lua b/reddit.lua index aae55ca..0fadac4 100644 --- a/reddit.lua +++ b/reddit.lua @@ -70,8 +70,7 @@ allowed = function(url, parenturl) return false end - if string.match(url, "^https?://i%.redd%.it/") - or string.match(url, "^https?://[^/]*redditmedia%.com/") + if string.match(url, "^https?://[^/]*redditmedia%.com/") or string.match(url, "^https?://old%.reddit%.com/api/morechildren$") or string.match(url, "^https?://v%.redd%.it/[^/]+/[^/]+$") then return true @@ -84,13 +83,11 @@ allowed = function(url, parenturl) end if parenturl - and (string.match(parenturl, "^https?://www%.reddit%.com/") or outlinks[parenturl]) + and string.match(parenturl, "^https?://www%.reddit%.com/") and not string.match(url, "^https?://[^/]*reddit%.com/") and not string.match(url, "^https?://[^/]*youtube%.com") - and not string.match(url, "^https?://[^/]*youtu%.be") then - if outlinks[parenturl] == nil then - outlinks[url] = true - end + and not string.match(url, "^https?://[^/]*youtu%.be") + and not string.match(url, "^https?://[^/]*redd%.it/") then return true end diff --git a/warrior-install.sh b/warrior-install.sh deleted file mode 100755 index 135477f..0000000 --- a/warrior-install.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -PIP=pip - -if type pip3 > /dev/null 2>&1 -then - PIP=pip3 -fi - -echo "Installing warcio" -if ! sudo $PIP install warcio --upgrade -then - exit 1 -fi - -exit 0 -