Version 20230614.01. Fix check for valid data.

pull/17/head
arkiver 12 months ago
parent 4936505b0f
commit e84e804fc5

@ -59,7 +59,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20230612.02' VERSION = '20230614.01'
TRACKER_ID = 'reddit' TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li' TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 40 MULTI_ITEM_SIZE = 40

@ -711,19 +711,27 @@ wget.callbacks.write_to_warc = function(url, http_stat)
return false return false
end end
end end
local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/") local is_comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/")
if ( if (
string.match(url["url"], "^https?://[^/]+/svc/") string.match(url["url"], "^https?://[^/]+/svc/")
and string.match(html, 'level%s*=') and string.match(html, 'level%s*=')
) or ( ) or (
string.match(url["url"], "^https?://www%.reddit%.com/r/") string.match(url["url"], "^https?://www%.reddit%.com/r/")
and not comments_comment and (
and not string.match(html, "<shreddit%-redirect") (
) or ( not is_comments_comment
comments_comment and item_type == "comment"
and not string.match(html, "<shreddit%-title") and not string.match(html, "<shreddit%-redirect")
) or (
(
is_comments_comment
or item_type ~= "comment"
)
and not string.match(html, "<shreddit%-title")
)
)
) then ) then
io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n") io.stdout:write("Reddit has a problem for URL " .. url["url"] .. ". Pausing 120 seconds and aborting.\n")
io.stdout:flush() io.stdout:flush()
os.execute("sleep 120") os.execute("sleep 120")
killgrab = true killgrab = true

Loading…
Cancel
Save