Version 20230607.04. Abort on video for now.

pull/17/head
arkiver 12 months ago
parent f63c8ab696
commit 7bb5c39419

@ -59,7 +59,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20230607.03'
VERSION = '20230607.04'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20

@ -271,6 +271,10 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
downloaded[url] = true
if abortgrab then
return {}
end
local function check(urla)
if no_more_svc
and string.match(urla, "^https?://[^/]+/svc/") then
@ -556,6 +560,11 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end
selftext = child["data"]["selftext"]
checknewurl(child["data"]["permalink"])
-- temp
if child["data"]["is_video"] then
error()
end
--
if child["data"]["is_video"] and not child["data"]["secure_media"] then
io.stdout:write("Video still being processed.\n")
io.stdout:flush()
@ -658,6 +667,14 @@ wget.callbacks.write_to_warc = function(url, http_stat)
retry_url = true
return false
end
if string.match(url["url"], "/api/info%.json") then
local html = read_file(http_stat["local_file"])
if string.match(html, "v%.redd%.it")
or string.match(html, "reddit_video") then
abort_item()
return false
end
end
if string.match(url["url"], "^https?://www%.reddit%.com/") then
local html = read_file(http_stat["local_file"])
if (

Loading…
Cancel
Save