From d4d5c9a93f844ff1789fe017c66febca2f2a45b0 Mon Sep 17 00:00:00 2001 From: Arkiver2 Date: Tue, 30 Jul 2019 00:18:23 +0200 Subject: [PATCH] Skip amp.reddit.com post pages. --- reddit.lua | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/reddit.lua b/reddit.lua index 0fadac4..58c5fd2 100644 --- a/reddit.lua +++ b/reddit.lua @@ -15,7 +15,6 @@ local abortgrab = false local posts = {} local requested_children = {} -local outlinks = {} for ignore in io.open("ignore-list", "r"):lines() do downloaded[ignore] = true @@ -51,7 +50,8 @@ allowed = function(url, parenturl) or string.match(url, "^https?://[^/]*reddit%.app%.link/") or string.match(url, "^https?://out%.reddit%.com/r/") or (string.match(url, "^https?://gateway%.reddit%.com/") and not string.match(url, "/morecomments/")) - or string.match(url, "/%.rss$") then + or string.match(url, "/%.rss$") + or (parenturl and string.match(url, "^https?://amp%.reddit%.com/")) then return false end @@ -72,7 +72,8 @@ allowed = function(url, parenturl) if string.match(url, "^https?://[^/]*redditmedia%.com/") or string.match(url, "^https?://old%.reddit%.com/api/morechildren$") - or string.match(url, "^https?://v%.redd%.it/[^/]+/[^/]+$") then + or string.match(url, "^https?://v%.redd%.it/[^/]+/[^/]+$") + or string.match(url, "^https?://preview%.redd%.it/[^/]+/[^/]+$") then return true end @@ -103,7 +104,7 @@ wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_pars end if (downloaded[url] ~= true and addedtolist[url] ~= true) - and (allowed(url, parent["url"]) or html == 0) then + and (allowed(url, parent["url"]) or (allowed(parent["url"]) and html == 0)) then addedtolist[url] = true return true end @@ -290,7 +291,7 @@ wget.callbacks.httploop_result = function(url, err, http_stat) io.stdout:flush() local maxtries = 8 if not allowed(url["url"], nil) then - maxtries = 2 + maxtries = 0 end if tries > maxtries then io.stdout:write("\nI give up...\n")