From 4936505b0f7ebdf19d4300206d1e15caeb244343 Mon Sep 17 00:00:00 2001 From: arkiver Date: Wed, 14 Jun 2023 03:07:27 +0200 Subject: [PATCH] Version 20230612.02. Add Reddit problem check for /comments/.../comment/ URL. --- pipeline.py | 2 +- reddit.lua | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pipeline.py b/pipeline.py index b2cc89a..3fbe033 100644 --- a/pipeline.py +++ b/pipeline.py @@ -59,7 +59,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20230612.01' +VERSION = '20230612.02' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 40 diff --git a/reddit.lua b/reddit.lua index 1f60972..47cc85a 100644 --- a/reddit.lua +++ b/reddit.lua @@ -711,11 +711,16 @@ wget.callbacks.write_to_warc = function(url, http_stat) return false end end + local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/") if ( string.match(url["url"], "^https?://[^/]+/svc/") and string.match(html, 'level%s*=') ) or ( string.match(url["url"], "^https?://www%.reddit%.com/r/") + and not comments_comment + and not string.match(html, "