From 147c6416ed84c4b17baeaec4ac7c7346d46f27bc Mon Sep 17 00:00:00 2001 From: arkiver Date: Sun, 26 Jul 2020 20:04:24 -0400 Subject: [PATCH] Version 20200727.01. Use trackerproxy for dictionaries. Ignore irc: URLs. --- pipeline.py | 6 +++--- reddit.lua | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pipeline.py b/pipeline.py index a5ce7ed..e5953da 100644 --- a/pipeline.py +++ b/pipeline.py @@ -54,9 +54,9 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20200726.06' +VERSION = '20200727.01' USER_AGENT = 'Archive Team' -TRACKER_ID = 'reddittest' +TRACKER_ID = 'reddit' TRACKER_HOST = 'trackerproxy.meo.ws' @@ -161,7 +161,7 @@ class ZstdDict(object): if cls.data is not None and time.time() - cls.created < 1800: return cls.data response = requests.get( - 'http://tracker.archiveteam.org:25654/dictionary', + 'http://trackerproxy.meo.ws:25654/dictionary', params={ 'project': 'reddit' } diff --git a/reddit.lua b/reddit.lua index fcb983e..fdf18e0 100644 --- a/reddit.lua +++ b/reddit.lua @@ -217,6 +217,7 @@ wget.callbacks.get_urls = function(file, url, is_css, iri) or string.match(newurl, "^android%-app:") or string.match(newurl, "^ios%-app:") or string.match(newurl, "^data:") + or string.match(newurl, "^irc:") or string.match(newurl, "^%${")) then check(string.match(url, "^(https?://.+/)") .. newurl) end