Version 20200726.03. Support galleries and comments.

This commit is contained in:
arkiver 2020-07-26 16:43:41 -04:00
parent 2f6a602313
commit 23fec56409
2 changed files with 15 additions and 5 deletions

View File

@ -54,7 +54,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20200726.01' VERSION = '20200726.03'
USER_AGENT = 'Archive Team' USER_AGENT = 'Archive Team'
TRACKER_ID = 'reddit' TRACKER_ID = 'reddit'
TRACKER_HOST = 'trackerproxy.meo.ws' TRACKER_HOST = 'trackerproxy.meo.ws'

View File

@ -72,6 +72,8 @@ allowed = function(url, parenturl)
or string.match(url, "^https?://out%.reddit%.com/r/") or string.match(url, "^https?://out%.reddit%.com/r/")
or string.match(url, "^https?://emoji%.redditmedia%.com/") or string.match(url, "^https?://emoji%.redditmedia%.com/")
or string.match(url, "^https?://styles%.redditmedia%.com/") or string.match(url, "^https?://styles%.redditmedia%.com/")
or string.match(url, "^https?://old%.reddit%.com/gallery/")
or string.match(url, "^https?://old%.reddit%.com/gold%?")
or string.match(url, "^https?://[^%.]+%.redd%.it/award_images/") or string.match(url, "^https?://[^%.]+%.redd%.it/award_images/")
or ( or (
string.match(url, "^https?://gateway%.reddit%.com/") string.match(url, "^https?://gateway%.reddit%.com/")
@ -109,8 +111,12 @@ allowed = function(url, parenturl)
return false return false
end end
if string.match(url, "^https?://gateway%.reddit%.com/desktopapi/v1/morecomments/")
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$") then
return true
end
if (string.match(url, "^https?://[^/]*redditmedia%.com/") if (string.match(url, "^https?://[^/]*redditmedia%.com/")
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$")
or string.match(url, "^https?://v%.redd%.it/") or string.match(url, "^https?://v%.redd%.it/")
or string.match(url, "^https?://i%.redd%.it/") or string.match(url, "^https?://i%.redd%.it/")
or string.match(url, "^https?://[^%.]*preview%.redd%.it/.") or string.match(url, "^https?://[^%.]*preview%.redd%.it/.")
@ -216,10 +222,14 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end end
end end
if string.match(url, "^https?://www%.reddit%.com/") then if string.match(url, "^https?://www%.reddit%.com/")
and not string.match(url, "/api/") then
check(string.gsub(url, "^https?://www%.reddit%.com/", "https://old.reddit.com/")) check(string.gsub(url, "^https?://www%.reddit%.com/", "https://old.reddit.com/"))
--elseif string.match(url, "^https?://old%.reddit%.com/") then end
-- check(string.gsub(url, "^https?://old%.reddit%.com/", "https://www.reddit.com/"))
local match = string.match(url, "^https?://preview%.redd%.it/([a-zA-Z0-9]+%.[a-zA-Z0-9]+)")
if match then
check("https://i.redd.it/" .. match)
end end
if allowed(url) if allowed(url)