Version 20230619.02. Accept 404 on mediaembed URL.

pull/18/head
arkiver 11 months ago
parent d2571cde06
commit f1ef7d1697

@ -59,7 +59,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20230619.01'
VERSION = '20230619.02'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 40

@ -657,13 +657,18 @@ wget.callbacks.write_to_warc = function(url, http_stat)
end
end
if (
http_stat["len"] == 0
and status_code == 200
) or (
status_code ~= 200
and status_code ~= 301
and status_code ~= 302
and status_code ~= 308
(
http_stat["len"] == 0
and status_code == 200
) or (
status_code ~= 200
and status_code ~= 301
and status_code ~= 302
and status_code ~= 308
)
) and not (
string.match(url["url"], "^https?://[^/]*redditmedia%.com/mediaembed/")
and status_code == 404
) then
print("Not writing to WARC.")
retry_url = true

Loading…
Cancel
Save