Version 20230614.01. Fix check for valid data.

pull/17/head
arkiver 11 months ago
parent 4936505b0f
commit e84e804fc5

@ -59,7 +59,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20230612.02'
VERSION = '20230614.01'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 40

@ -711,19 +711,27 @@ wget.callbacks.write_to_warc = function(url, http_stat)
return false
end
end
local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/")
local is_comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/")
if (
string.match(url["url"], "^https?://[^/]+/svc/")
and string.match(html, 'level%s*=')
) or (
string.match(url["url"], "^https?://www%.reddit%.com/r/")
and not comments_comment
and not string.match(html, "<shreddit%-redirect")
) or (
comments_comment
and not string.match(html, "<shreddit%-title")
and (
(
not is_comments_comment
and item_type == "comment"
and not string.match(html, "<shreddit%-redirect")
) or (
(
is_comments_comment
or item_type ~= "comment"
)
and not string.match(html, "<shreddit%-title")
)
)
) then
io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n")
io.stdout:write("Reddit has a problem for URL " .. url["url"] .. ". Pausing 120 seconds and aborting.\n")
io.stdout:flush()
os.execute("sleep 120")
killgrab = true

Loading…
Cancel
Save