Version 20230612.02. Add Reddit problem check for /comments/.../comment/ URL.

This commit is contained in:
arkiver 2023-06-14 03:07:27 +02:00
parent 57adbb381c
commit 4936505b0f
2 changed files with 6 additions and 1 deletions

View File

@ -59,7 +59,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20230612.01' VERSION = '20230612.02'
TRACKER_ID = 'reddit' TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li' TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 40 MULTI_ITEM_SIZE = 40

View File

@ -711,11 +711,16 @@ wget.callbacks.write_to_warc = function(url, http_stat)
return false return false
end end
end end
local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/")
if ( if (
string.match(url["url"], "^https?://[^/]+/svc/") string.match(url["url"], "^https?://[^/]+/svc/")
and string.match(html, 'level%s*=') and string.match(html, 'level%s*=')
) or ( ) or (
string.match(url["url"], "^https?://www%.reddit%.com/r/") string.match(url["url"], "^https?://www%.reddit%.com/r/")
and not comments_comment
and not string.match(html, "<shreddit%-redirect")
) or (
comments_comment
and not string.match(html, "<shreddit%-title") and not string.match(html, "<shreddit%-title")
) then ) then
io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n") io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n")