Version 20210707.01. Do not get media for cross posts.

pull/10/head
arkiver 3 years ago
parent 4b976e2ea7
commit ed80cb5a9d

@ -59,7 +59,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20210521.01' VERSION = '20210707.01'
TRACKER_ID = 'reddit' TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li' TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20 MULTI_ITEM_SIZE = 20

@ -34,6 +34,7 @@ local abortgrab = false
local posts = {} local posts = {}
local requested_children = {} local requested_children = {}
local thumbs = {} local thumbs = {}
local is_crosspost = false
local outlinks = {} local outlinks = {}
@ -182,7 +183,8 @@ allowed = function(url, parenturl)
or string.match(url, "^https?://i%.redd%.it/") or string.match(url, "^https?://i%.redd%.it/")
or string.match(url, "^https?://[^%.]*preview%.redd%.it/.") or string.match(url, "^https?://[^%.]*preview%.redd%.it/.")
) )
and not string.match(item_type, "comment") then and not string.match(item_type, "comment")
and not is_crosspost then
if parenturl if parenturl
and string.match(parenturl, "^https?://www%.reddit.com/api/info%.json%?id=t") and string.match(parenturl, "^https?://www%.reddit.com/api/info%.json%?id=t")
and not string.match(url, "^https?://v%.redd%.it/") and not string.match(url, "^https?://v%.redd%.it/")
@ -517,6 +519,10 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
io.stdout:flush() io.stdout:flush()
abort_item() abort_item()
end end
local crosspost_parent = child["data"]["crosspost_parent"]
if crosspost_parent and crosspost_parent ~= string.match(url, "(t[0-9]_[a-z0-9]+)") then
is_crosspost = true
end
end end
end end
for newurl in string.gmatch(string.gsub(html, """, '"'), '([^"]+)') do for newurl in string.gmatch(string.gsub(html, """, '"'), '([^"]+)') do
@ -553,6 +559,7 @@ wget.callbacks.httploop_result = function(url, err, http_stat)
if match then if match then
abortgrab = false abortgrab = false
selftext = nil selftext = nil
is_crosspost = false
posts[match] = true posts[match] = true
if not item_types[match] then if not item_types[match] then
io.stdout:write("Type for ID not found.\n") io.stdout:write("Type for ID not found.\n")

Loading…
Cancel
Save