Version 20210330.01. Handle 403 on v.redd.it on deleted post.

pull/10/head
arkiver 3 years ago
parent 8849165130
commit 07ed16c44b

@ -60,7 +60,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20210321.01'
VERSION = '20210330.01'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20

@ -11,6 +11,8 @@ local item_type = nil
local item_name = nil
local item_value = nil
local selftext = nil
local item_types = {}
for s in string.gmatch(item_names, "([^\n]+)") do
local t, n = string.match(s, "^([^:]+):(.+)$")
@ -477,6 +479,12 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
io.stdout:flush()
abort_item()
end
if selftext then
io.stdout:write("sefltext already found.\n")
io.stdout:flush()
abort_item()
end
selftext = child["data"]["selftext"]
checknewurl(child["data"]["permalink"])
end
end
@ -513,6 +521,7 @@ wget.callbacks.httploop_result = function(url, err, http_stat)
local match = string.match(url["url"], "^https?://www%.reddit.com/api/info%.json%?id=t[0-9]_([a-z0-9]+)$")
if match then
abortgrab = false
selftext = nil
posts[match] = true
if not item_types[match] then
io.stdout:write("Type for ID not found.\n")
@ -552,6 +561,11 @@ wget.callbacks.httploop_result = function(url, err, http_stat)
abort_item()
return wget.actions.EXIT
end
if status_code == 403 and string.match(url["url"], "^https?://v%.redd%.it/")
and selftext == "[deleted]" then
return wget.actions.EXIT
end
if status_code >= 500
or (status_code >= 400 and status_code ~= 404)

Loading…
Cancel
Save