Version 20220109.02. Cut off URL at space when found between brackets without href= in front.

pull/13/head
arkiver 2 years ago
parent df35317e0c
commit 383c101aef

@ -59,7 +59,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20220109.01'
VERSION = '20220109.02'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20

@ -528,10 +528,10 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end
end
end
for newurl in string.gmatch(string.gsub(html, """, '"'), '([^"]+)') do
for newurl in string.gmatch(string.gsub(html, """, '"'), '([^"%s]+)') do
checknewurl(newurl)
end
for newurl in string.gmatch(string.gsub(html, "'", "'"), "([^']+)") do
for newurl in string.gmatch(string.gsub(html, "'", "'"), "([^'%s]+)") do
checknewurl(newurl)
end
for newurl in string.gmatch(html, ">%s*([^<%s]+)") do

Loading…
Cancel
Save