Version 20210312.01. Get URLs with utm_* and context params.

pull/10/head
arkiver 3 years ago
parent a5c798945c
commit d3b6659419

@ -60,7 +60,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20210306.01'
VERSION = '20210312.01'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20

@ -312,6 +312,9 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
and not string.match(url, "%.mpd")
) then
html = read_file(file)
if string.match(url, "^https?://www%.reddit%.com/[^/]+/[^/]+/comments/[0-9a-z]+/[^/]+/[0-9a-z]*/?$") then
check(url .. "?utm_source=reddit&utm_medium=web2x&context=3")
end
if string.match(url, "^https?://old%.reddit%.com/api/morechildren$") then
html = string.gsub(html, '\\"', '"')
elseif string.match(url, "^https?://old%.reddit%.com/r/[^/]+/comments/")

Loading…
Cancel
Save