You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
4.4 KiB
Lua

9 years ago
dofile("urlcode.lua")
dofile("table_show.lua")
local url_count = 0
local tries = 0
local item_type = os.getenv('item_type')
local item_value = os.getenv('item_value')
local downloaded = {}
local addedtolist = {}
read_file = function(file)
if file then
local f = assert(io.open(file))
local data = f:read("*all")
f:close()
return data
else
return ""
end
end
wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason)
local url = urlpos["url"]["url"]
local html = urlpos["link_expect_html"]
if downloaded[url] == true or addedtolist[url] == true then
return false
end
if (downloaded[url] ~= true or addedtolist[url] ~= true) then
if (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") and not string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]")) or html == 0 then
return true
else
return false
end
end
end
wget.callbacks.get_urls = function(file, url, is_css, iri)
local urls = {}
local html = nil
if downloaded[url] ~= true then
downloaded[url] = true
end
local function check(url)
if (downloaded[url] ~= true and addedtolist[url] ~= true) and (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") or string.match(url, "redditmedia%.com")) and not string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") then
if string.match(url, "&") then
table.insert(urls, { url=string.gsub(url, "&", "&") })
addedtolist[url] = true
addedtolist[string.gsub(url, "&", "&")] = true
elseif string.match(url, "#") then
table.insert(urls, { url=string.match(url, "(https?:[^#]+)#") })
addedtolist[url] = true
addedtolist[string.match(url, "(https?:[^#]+)#")] = true
else
table.insert(urls, { url=url })
addedtolist[url] = true
end
end
end
if string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") and not string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") then
html = read_file(file)
for newurl in string.gmatch(html, '"(https?://[^"]+)"') do
check(newurl)
end
for newurl in string.gmatch(html, "'(https?://[^']+)'") do
check(newurl)
end
for newurl in string.gmatch(html, '("/[^"]+)"') do
if string.match(newurl, '"//') then
check(string.gsub(newurl, '"//', 'http://'))
elseif not string.match(newurl, '"//') then
check(string.match(url, "(https?://[^/]+)/")..string.match(newurl, '"(/.+)'))
end
end
for newurl in string.gmatch(html, "('/[^']+)'") do
if string.match(newurl, "'//") then
check(string.gsub(newurl, "'//", "http://"))
elseif not string.match(newurl, "'//") then
check(string.match(url, '(https?://[^/]+)/')..string.match(newurl, "'(/.+)"))
end
end
end
return urls
end
wget.callbacks.httploop_result = function(url, err, http_stat)
-- NEW for 2014: Slightly more verbose messages because people keep
-- complaining that it's not moving or not working
status_code = http_stat["statcode"]
url_count = url_count + 1
io.stdout:write(url_count .. "=" .. status_code .. " " .. url["url"] .. ". \n")
io.stdout:flush()
if (status_code >= 200 and status_code <= 399) then
if string.match(url.url, "https://") then
local newurl = string.gsub(url.url, "https://", "http://")
downloaded[newurl] = true
else
downloaded[url.url] = true
end
end
if status_code >= 500 or
(status_code >= 400 and status_code ~= 404 and status_code ~= 403) then
io.stdout:write("\nServer returned "..http_stat.statcode..". Sleeping.\n")
io.stdout:flush()
os.execute("sleep 10")
tries = tries + 1
if tries >= 6 then
io.stdout:write("\nI give up...\n")
io.stdout:flush()
tries = 0
return wget.actions.ABORT
else
return wget.actions.CONTINUE
end
elseif status_code == 0 then
io.stdout:write("\nServer returned "..http_stat.statcode..". Sleeping.\n")
io.stdout:flush()
os.execute("sleep 10")
tries = tries + 1
if tries >= 6 then
io.stdout:write("\nI give up...\n")
io.stdout:flush()
tries = 0
return wget.actions.ABORT
else
return wget.actions.CONTINUE
end
end
tries = 0
local sleep_time = 0
if sleep_time > 0.001 then
os.execute("sleep " .. sleep_time)
end
return wget.actions.NOTHING
end