mirror of
https://github.com/ArchiveTeam/reddit-grab
synced 2024-11-10 07:10:38 +00:00
Version 20190729.01; do not get page requisites from outlinks; do not pip install warcio.
This commit is contained in:
parent
8902255c76
commit
4cf7bd18f0
@ -69,7 +69,7 @@ if not WGET_LUA:
|
|||||||
#
|
#
|
||||||
# Update this each time you make a non-cosmetic change.
|
# Update this each time you make a non-cosmetic change.
|
||||||
# It will be added to the WARC files and reported to the tracker.
|
# It will be added to the WARC files and reported to the tracker.
|
||||||
VERSION = '20190405.01'
|
VERSION = '20190729.01'
|
||||||
USER_AGENT = 'ArchiveTeam'
|
USER_AGENT = 'ArchiveTeam'
|
||||||
TRACKER_ID = 'reddit'
|
TRACKER_ID = 'reddit'
|
||||||
TRACKER_HOST = 'tracker.archiveteam.org'
|
TRACKER_HOST = 'tracker.archiveteam.org'
|
||||||
|
11
reddit.lua
11
reddit.lua
@ -70,8 +70,7 @@ allowed = function(url, parenturl)
|
|||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
if string.match(url, "^https?://i%.redd%.it/")
|
if string.match(url, "^https?://[^/]*redditmedia%.com/")
|
||||||
or string.match(url, "^https?://[^/]*redditmedia%.com/")
|
|
||||||
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$")
|
or string.match(url, "^https?://old%.reddit%.com/api/morechildren$")
|
||||||
or string.match(url, "^https?://v%.redd%.it/[^/]+/[^/]+$") then
|
or string.match(url, "^https?://v%.redd%.it/[^/]+/[^/]+$") then
|
||||||
return true
|
return true
|
||||||
@ -84,13 +83,11 @@ allowed = function(url, parenturl)
|
|||||||
end
|
end
|
||||||
|
|
||||||
if parenturl
|
if parenturl
|
||||||
and (string.match(parenturl, "^https?://www%.reddit%.com/") or outlinks[parenturl])
|
and string.match(parenturl, "^https?://www%.reddit%.com/")
|
||||||
and not string.match(url, "^https?://[^/]*reddit%.com/")
|
and not string.match(url, "^https?://[^/]*reddit%.com/")
|
||||||
and not string.match(url, "^https?://[^/]*youtube%.com")
|
and not string.match(url, "^https?://[^/]*youtube%.com")
|
||||||
and not string.match(url, "^https?://[^/]*youtu%.be") then
|
and not string.match(url, "^https?://[^/]*youtu%.be")
|
||||||
if outlinks[parenturl] == nil then
|
and not string.match(url, "^https?://[^/]*redd%.it/") then
|
||||||
outlinks[url] = true
|
|
||||||
end
|
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
PIP=pip
|
|
||||||
|
|
||||||
if type pip3 > /dev/null 2>&1
|
|
||||||
then
|
|
||||||
PIP=pip3
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Installing warcio"
|
|
||||||
if ! sudo $PIP install warcio --upgrade
|
|
||||||
then
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user