mirror of
https://github.com/ArchiveTeam/reddit-grab
synced 2024-11-12 01:10:50 +00:00
Version 20230612.02. Add Reddit problem check for /comments/.../comment/ URL.
This commit is contained in:
parent
57adbb381c
commit
4936505b0f
@ -59,7 +59,7 @@ if not WGET_AT:
|
|||||||
#
|
#
|
||||||
# Update this each time you make a non-cosmetic change.
|
# Update this each time you make a non-cosmetic change.
|
||||||
# It will be added to the WARC files and reported to the tracker.
|
# It will be added to the WARC files and reported to the tracker.
|
||||||
VERSION = '20230612.01'
|
VERSION = '20230612.02'
|
||||||
TRACKER_ID = 'reddit'
|
TRACKER_ID = 'reddit'
|
||||||
TRACKER_HOST = 'legacy-api.arpa.li'
|
TRACKER_HOST = 'legacy-api.arpa.li'
|
||||||
MULTI_ITEM_SIZE = 40
|
MULTI_ITEM_SIZE = 40
|
||||||
|
@ -711,11 +711,16 @@ wget.callbacks.write_to_warc = function(url, http_stat)
|
|||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/")
|
||||||
if (
|
if (
|
||||||
string.match(url["url"], "^https?://[^/]+/svc/")
|
string.match(url["url"], "^https?://[^/]+/svc/")
|
||||||
and string.match(html, 'level%s*=')
|
and string.match(html, 'level%s*=')
|
||||||
) or (
|
) or (
|
||||||
string.match(url["url"], "^https?://www%.reddit%.com/r/")
|
string.match(url["url"], "^https?://www%.reddit%.com/r/")
|
||||||
|
and not comments_comment
|
||||||
|
and not string.match(html, "<shreddit%-redirect")
|
||||||
|
) or (
|
||||||
|
comments_comment
|
||||||
and not string.match(html, "<shreddit%-title")
|
and not string.match(html, "<shreddit%-title")
|
||||||
) then
|
) then
|
||||||
io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n")
|
io.stdout:write("Reddit has problems. Pausing 120 seconds and aborting.\n")
|
||||||
|
Loading…
Reference in New Issue
Block a user