Version 20230607.04. Abort on video for now.

pull/17/head
arkiver 1 year ago
parent f63c8ab696
commit 7bb5c39419

@ -59,7 +59,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20230607.03' VERSION = '20230607.04'
TRACKER_ID = 'reddit' TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li' TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20 MULTI_ITEM_SIZE = 20

@ -271,6 +271,10 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
downloaded[url] = true downloaded[url] = true
if abortgrab then
return {}
end
local function check(urla) local function check(urla)
if no_more_svc if no_more_svc
and string.match(urla, "^https?://[^/]+/svc/") then and string.match(urla, "^https?://[^/]+/svc/") then
@ -556,6 +560,11 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end end
selftext = child["data"]["selftext"] selftext = child["data"]["selftext"]
checknewurl(child["data"]["permalink"]) checknewurl(child["data"]["permalink"])
-- temp
if child["data"]["is_video"] then
error()
end
--
if child["data"]["is_video"] and not child["data"]["secure_media"] then if child["data"]["is_video"] and not child["data"]["secure_media"] then
io.stdout:write("Video still being processed.\n") io.stdout:write("Video still being processed.\n")
io.stdout:flush() io.stdout:flush()
@ -658,6 +667,14 @@ wget.callbacks.write_to_warc = function(url, http_stat)
retry_url = true retry_url = true
return false return false
end end
if string.match(url["url"], "/api/info%.json") then
local html = read_file(http_stat["local_file"])
if string.match(html, "v%.redd%.it")
or string.match(html, "reddit_video") then
abort_item()
return false
end
end
if string.match(url["url"], "^https?://www%.reddit%.com/") then if string.match(url["url"], "^https?://www%.reddit%.com/") then
local html = read_file(http_stat["local_file"]) local html = read_file(http_stat["local_file"])
if ( if (

Loading…
Cancel
Save