diff --git a/pipeline.py b/pipeline.py index 750dde6..4243a1d 100644 --- a/pipeline.py +++ b/pipeline.py @@ -60,7 +60,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20210306.01' +VERSION = '20210312.01' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 20 diff --git a/reddit.lua b/reddit.lua index e7ed9b3..35a98e2 100644 --- a/reddit.lua +++ b/reddit.lua @@ -312,6 +312,9 @@ wget.callbacks.get_urls = function(file, url, is_css, iri) and not string.match(url, "%.mpd") ) then html = read_file(file) + if string.match(url, "^https?://www%.reddit%.com/[^/]+/[^/]+/comments/[0-9a-z]+/[^/]+/[0-9a-z]*/?$") then + check(url .. "?utm_source=reddit&utm_medium=web2x&context=3") + end if string.match(url, "^https?://old%.reddit%.com/api/morechildren$") then html = string.gsub(html, '\\"', '"') elseif string.match(url, "^https?://old%.reddit%.com/r/[^/]+/comments/")