Version 20200730.01. Support /user/ post better (like /r/).

This commit is contained in:
arkiver 2020-07-29 18:40:05 -04:00
parent 450d4e0413
commit 23bfe8b12c
2 changed files with 10 additions and 1 deletions

View File

@ -54,7 +54,7 @@ if not WGET_AT:
# #
# Update this each time you make a non-cosmetic change. # Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker. # It will be added to the WARC files and reported to the tracker.
VERSION = '20200728.01' VERSION = '20200730.01'
USER_AGENT = 'Archive Team' USER_AGENT = 'Archive Team'
TRACKER_ID = 'reddittest' TRACKER_ID = 'reddittest'
TRACKER_HOST = 'trackerproxy.meo.ws' TRACKER_HOST = 'trackerproxy.meo.ws'

View File

@ -93,6 +93,11 @@ allowed = function(url, parenturl)
and string.match(parenturl, "^https?://[^/]*reddit%.com/r/[^/]+/duplicates/") and string.match(parenturl, "^https?://[^/]*reddit%.com/r/[^/]+/duplicates/")
and string.match(url, "^https?://[^/]*reddit%.com/r/[^/]+/duplicates/") and string.match(url, "^https?://[^/]*reddit%.com/r/[^/]+/duplicates/")
) )
or (
parenturl
and string.match(parenturl, "^https?://[^/]*reddit%.com/user/[^/]+/duplicates/")
and string.match(url, "^https?://[^/]*reddit%.com/user/[^/]+/duplicates/")
)
or not ( or not (
string.match(url, "^https?://[^/]*redd%.it/") string.match(url, "^https?://[^/]*redd%.it/")
or string.match(url, "^https?://[^/]*reddit%.com/") or string.match(url, "^https?://[^/]*reddit%.com/")
@ -277,6 +282,7 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end end
end end
elseif string.match(url, "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]") elseif string.match(url, "^https?://www%.reddit%.com/r/[^/]+/comments/[^/]")
or string.match(url, "^https?://www%.reddit%.com/user/[^/]+/comments/[^/]")
or string.match(url, "^https?://www%.reddit%.com/comments/[^/]") or string.match(url, "^https?://www%.reddit%.com/comments/[^/]")
or string.match(url, "^https?://gateway%.reddit%.com/desktopapi/v1/morecomments/t3_[^%?]") then or string.match(url, "^https?://gateway%.reddit%.com/desktopapi/v1/morecomments/t3_[^%?]") then
local comments_data = nil local comments_data = nil
@ -295,6 +301,9 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
abortgrab = true abortgrab = true
end end
local comment_id = string.match(url, "^https?://www%.reddit%.com/r/[^/]+/comments/([^/]+)") local comment_id = string.match(url, "^https?://www%.reddit%.com/r/[^/]+/comments/([^/]+)")
if comment_id == nil then
comment_id = string.match(url, "^https?://www%.reddit%.com/user/[^/]+/comments/([^/]+)")
end
if comment_id == nil then if comment_id == nil then
comment_id = string.match(url, "^https?://www%.reddit%.com/comments/([^/]+)") comment_id = string.match(url, "^https?://www%.reddit%.com/comments/([^/]+)")
end end