mirror of
https://github.com/ArchiveTeam/reddit-grab
synced 2024-11-10 07:10:38 +00:00
Version 20200726.03. Support galleries and comments.
This commit is contained in:
parent
2f6a602313
commit
23fec56409
@ -54,7 +54,7 @@ if not WGET_AT:
|
|||||||
#
|
#
|
||||||
# Update this each time you make a non-cosmetic change.
|
# Update this each time you make a non-cosmetic change.
|
||||||
# It will be added to the WARC files and reported to the tracker.
|
# It will be added to the WARC files and reported to the tracker.
|
||||||
VERSION = '20200726.01'
|
VERSION = '20200726.03'
|
||||||
USER_AGENT = 'Archive Team'
|
USER_AGENT = 'Archive Team'
|
||||||
TRACKER_ID = 'reddit'
|
TRACKER_ID = 'reddit'
|
||||||
TRACKER_HOST = 'trackerproxy.meo.ws'
|
TRACKER_HOST = 'trackerproxy.meo.ws'
|
||||||
|
18
reddit.lua
18
reddit.lua
@ -72,6 +72,8 @@ allowed = function(url, parenturl)
|
|||||||
or string.match(url, "^https?://out%.reddit%.com/r/")
|
or string.match(url, "^https?://out%.reddit%.com/r/")
|
||||||
or string.match(url, "^https?://emoji%.redditmedia%.com/")
|
or string.match(url, "^https?://emoji%.redditmedia%.com/")
|
||||||
or string.match(url, "^https?://styles%.redditmedia%.com/")
|
or string.match(url, "^https?://styles%.redditmedia%.com/")
|
||||||
|
or string.match(url, "^https?://old%.reddit%.com/gallery/")
|
||||||
|
or string.match(url, "^https?://old%.reddit%.com/gold%?")
|
||||||
or string.match(url, "^https?://[^%.]+%.redd%.it/award_images/")
|
or string.match(url, "^https?://[^%.]+%.redd%.it/award_images/")
|
||||||
or (
|
or (
|
||||||
string.match(url, "^https?://gateway%.reddit%.com/")
|
string.match(url, "^https?://gateway%.reddit%.com/")
|
||||||
@ -109,8 +111,12 @@ allowed = function(url, parenturl)
|
|||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if string.match(url, "^https?://gateway%.reddit%.com/desktopapi/v1/morecomments/")
|
||||||
|
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$") then
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
if (string.match(url, "^https?://[^/]*redditmedia%.com/")
|
if (string.match(url, "^https?://[^/]*redditmedia%.com/")
|
||||||
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$")
|
|
||||||
or string.match(url, "^https?://v%.redd%.it/")
|
or string.match(url, "^https?://v%.redd%.it/")
|
||||||
or string.match(url, "^https?://i%.redd%.it/")
|
or string.match(url, "^https?://i%.redd%.it/")
|
||||||
or string.match(url, "^https?://[^%.]*preview%.redd%.it/.")
|
or string.match(url, "^https?://[^%.]*preview%.redd%.it/.")
|
||||||
@ -216,10 +222,14 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if string.match(url, "^https?://www%.reddit%.com/") then
|
if string.match(url, "^https?://www%.reddit%.com/")
|
||||||
|
and not string.match(url, "/api/") then
|
||||||
check(string.gsub(url, "^https?://www%.reddit%.com/", "https://old.reddit.com/"))
|
check(string.gsub(url, "^https?://www%.reddit%.com/", "https://old.reddit.com/"))
|
||||||
--elseif string.match(url, "^https?://old%.reddit%.com/") then
|
end
|
||||||
-- check(string.gsub(url, "^https?://old%.reddit%.com/", "https://www.reddit.com/"))
|
|
||||||
|
local match = string.match(url, "^https?://preview%.redd%.it/([a-zA-Z0-9]+%.[a-zA-Z0-9]+)")
|
||||||
|
if match then
|
||||||
|
check("https://i.redd.it/" .. match)
|
||||||
end
|
end
|
||||||
|
|
||||||
if allowed(url)
|
if allowed(url)
|
||||||
|
Loading…
Reference in New Issue
Block a user