diff --git a/pipeline.py b/pipeline.py index b2cc89a..3fbe033 100644 --- a/pipeline.py +++ b/pipeline.py @@ -59,7 +59,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20230612.01' +VERSION = '20230612.02' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 40 diff --git a/reddit.lua b/reddit.lua index 1f60972..47cc85a 100644 --- a/reddit.lua +++ b/reddit.lua @@ -711,11 +711,16 @@ wget.callbacks.write_to_warc = function(url, http_stat) return false end end + local comments_comment = string.match(url["url"], "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]+/comment/[^/]+/") if ( string.match(url["url"], "^https?://[^/]+/svc/") and string.match(html, 'level%s*=') ) or ( string.match(url["url"], "^https?://www%.reddit%.com/r/") + and not comments_comment + and not string.match(html, "