From 9d1ea0c6888125db219b9007f8434e1bdb472dec Mon Sep 17 00:00:00 2001
From: Arkiver2 <arkiver@protonmail.com>
Date: Fri, 22 Feb 2019 01:15:18 +0100
Subject: [PATCH] rewrite

---
 .gitignore         |    4 +
 JSON.lua           | 1053 ++++++++++++++++++++++++++++++++++++++++++++
 README.md          |   48 +-
 cookies            |    1 +
 get-wget-lua.sh    |    0
 ignore-list        |    0
 pipeline.py        |  269 ++++++-----
 reddit.lua         |  296 ++++++++-----
 warrior-install.sh |   17 +
 wget-lua-warrior   |  Bin
 10 files changed, 1474 insertions(+), 214 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 JSON.lua
 mode change 100644 => 100755 get-wget-lua.sh
 create mode 100644 ignore-list
 create mode 100755 warrior-install.sh
 mode change 100644 => 100755 wget-lua-warrior

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..44ebf6a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*~
+*.pyc
+data/
+wget-lua
diff --git a/JSON.lua b/JSON.lua
new file mode 100644
index 0000000..5f11425
--- /dev/null
+++ b/JSON.lua
@@ -0,0 +1,1053 @@
+-- -*- coding: utf-8 -*-
+--
+-- Simple JSON encoding and decoding in pure Lua.
+--
+-- Copyright 2010-2014 Jeffrey Friedl
+-- http://regex.info/blog/
+--
+-- Latest version: http://regex.info/blog/lua/json
+--
+-- This code is released under a Creative Commons CC-BY "Attribution" License:
+-- http://creativecommons.org/licenses/by/3.0/deed.en_US
+--
+-- It can be used for any purpose so long as the copyright notice above,
+-- the web-page links above, and the 'AUTHOR_NOTE' string below are
+-- maintained. Enjoy.
+--
+local VERSION = 20141223.14 -- version history at end of file
+local AUTHOR_NOTE = "-[ JSON.lua package by Jeffrey Friedl (http://regex.info/blog/lua/json) version 20141223.14 ]-"
+
+--
+-- The 'AUTHOR_NOTE' variable exists so that information about the source
+-- of the package is maintained even in compiled versions. It's also
+-- included in OBJDEF below mostly to quiet warnings about unused variables.
+--
+local OBJDEF = {
+   VERSION      = VERSION,
+   AUTHOR_NOTE  = AUTHOR_NOTE,
+}
+
+
+--
+-- Simple JSON encoding and decoding in pure Lua.
+-- http://www.json.org/
+--
+--
+--   JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines
+--
+--   local lua_value = JSON:decode(raw_json_text)
+--
+--   local raw_json_text    = JSON:encode(lua_table_or_value)
+--   local pretty_json_text = JSON:encode_pretty(lua_table_or_value) -- "pretty printed" version for human readability
+--
+--
+--
+-- DECODING (from a JSON string to a Lua table)
+--
+--
+--   JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines
+--
+--   local lua_value = JSON:decode(raw_json_text)
+--
+--   If the JSON text is for an object or an array, e.g.
+--     { "what": "books", "count": 3 }
+--   or
+--     [ "Larry", "Curly", "Moe" ]
+--
+--   the result is a Lua table, e.g.
+--     { what = "books", count = 3 }
+--   or
+--     { "Larry", "Curly", "Moe" }
+--
+--
+--   The encode and decode routines accept an optional second argument,
+--   "etc", which is not used during encoding or decoding, but upon error
+--   is passed along to error handlers. It can be of any type (including nil).
+--
+--
+--
+-- ERROR HANDLING
+--
+--   With most errors during decoding, this code calls
+--
+--      JSON:onDecodeError(message, text, location, etc)
+--
+--   with a message about the error, and if known, the JSON text being
+--   parsed and the byte count where the problem was discovered. You can
+--   replace the default JSON:onDecodeError() with your own function.
+--
+--   The default onDecodeError() merely augments the message with data
+--   about the text and the location if known (and if a second 'etc'
+--   argument had been provided to decode(), its value is tacked onto the
+--   message as well), and then calls JSON.assert(), which itself defaults
+--   to Lua's built-in assert(), and can also be overridden.
+--
+--   For example, in an Adobe Lightroom plugin, you might use something like
+--
+--          function JSON:onDecodeError(message, text, location, etc)
+--             LrErrors.throwUserError("Internal Error: invalid JSON data")
+--          end
+--
+--   or even just
+--
+--          function JSON.assert(message)
+--             LrErrors.throwUserError("Internal Error: " .. message)
+--          end
+--
+--   If JSON:decode() is passed a nil, this is called instead:
+--
+--      JSON:onDecodeOfNilError(message, nil, nil, etc)
+--
+--   and if JSON:decode() is passed HTML instead of JSON, this is called:
+--
+--      JSON:onDecodeOfHTMLError(message, text, nil, etc)
+--
+--   The use of the fourth 'etc' argument allows stronger coordination
+--   between decoding and error reporting, especially when you provide your
+--   own error-handling routines. Continuing with the the Adobe Lightroom
+--   plugin example:
+--
+--          function JSON:onDecodeError(message, text, location, etc)
+--             local note = "Internal Error: invalid JSON data"
+--             if type(etc) = 'table' and etc.photo then
+--                note = note .. " while processing for " .. etc.photo:getFormattedMetadata('fileName')
+--             end
+--             LrErrors.throwUserError(note)
+--          end
+--
+--            :
+--            :
+--
+--          for i, photo in ipairs(photosToProcess) do
+--               :             
+--               :             
+--               local data = JSON:decode(someJsonText, { photo = photo })
+--               :             
+--               :             
+--          end
+--
+--
+--
+--
+--
+-- DECODING AND STRICT TYPES
+--
+--   Because both JSON objects and JSON arrays are converted to Lua tables,
+--   it's not normally possible to tell which original JSON type a
+--   particular Lua table was derived from, or guarantee decode-encode
+--   round-trip equivalency.
+--
+--   However, if you enable strictTypes, e.g.
+--
+--      JSON = assert(loadfile "JSON.lua")() --load the routines
+--      JSON.strictTypes = true
+--
+--   then the Lua table resulting from the decoding of a JSON object or
+--   JSON array is marked via Lua metatable, so that when re-encoded with
+--   JSON:encode() it ends up as the appropriate JSON type.
+--
+--   (This is not the default because other routines may not work well with
+--   tables that have a metatable set, for example, Lightroom API calls.)
+--
+--
+-- ENCODING (from a lua table to a JSON string)
+--
+--   JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines
+--
+--   local raw_json_text    = JSON:encode(lua_table_or_value)
+--   local pretty_json_text = JSON:encode_pretty(lua_table_or_value) -- "pretty printed" version for human readability
+--   local custom_pretty    = JSON:encode(lua_table_or_value, etc, { pretty = true, indent = "|  ", align_keys = false })
+--
+--   On error during encoding, this code calls:
+--
+--     JSON:onEncodeError(message, etc)
+--
+--   which you can override in your local JSON object.
+--
+--   The 'etc' in the error call is the second argument to encode()
+--   and encode_pretty(), or nil if it wasn't provided.
+--
+--
+-- PRETTY-PRINTING
+--
+--   An optional third argument, a table of options, allows a bit of
+--   configuration about how the encoding takes place:
+--
+--     pretty = JSON:encode(val, etc, {
+--                                       pretty = true,      -- if false, no other options matter
+--                                       indent = "   ",     -- this provides for a three-space indent per nesting level
+--                                       align_keys = false, -- see below
+--                                     })
+--
+--   encode() and encode_pretty() are identical except that encode_pretty()
+--   provides a default options table if none given in the call:
+--
+--       { pretty = true, align_keys = false, indent = "  " }
+--
+--   For example, if
+--
+--      JSON:encode(data)
+--
+--   produces:
+--
+--      {"city":"Kyoto","climate":{"avg_temp":16,"humidity":"high","snowfall":"minimal"},"country":"Japan","wards":11}
+--
+--   then
+--
+--      JSON:encode_pretty(data)
+--
+--   produces:
+--
+--      {
+--        "city": "Kyoto",
+--        "climate": {
+--          "avg_temp": 16,
+--          "humidity": "high",
+--          "snowfall": "minimal"
+--        },
+--        "country": "Japan",
+--        "wards": 11
+--      }
+--
+--   The following three lines return identical results:
+--       JSON:encode_pretty(data)
+--       JSON:encode_pretty(data, nil, { pretty = true, align_keys = false, indent = "  " })
+--       JSON:encode       (data, nil, { pretty = true, align_keys = false, indent = "  " })
+--
+--   An example of setting your own indent string:
+--
+--     JSON:encode_pretty(data, nil, { pretty = true, indent = "|    " })
+--
+--   produces:
+--
+--      {
+--      |    "city": "Kyoto",
+--      |    "climate": {
+--      |    |    "avg_temp": 16,
+--      |    |    "humidity": "high",
+--      |    |    "snowfall": "minimal"
+--      |    },
+--      |    "country": "Japan",
+--      |    "wards": 11
+--      }
+--
+--   An example of setting align_keys to true:
+--
+--     JSON:encode_pretty(data, nil, { pretty = true, indent = "  ", align_keys = true })
+--  
+--   produces:
+--   
+--      {
+--           "city": "Kyoto",
+--        "climate": {
+--                     "avg_temp": 16,
+--                     "humidity": "high",
+--                     "snowfall": "minimal"
+--                   },
+--        "country": "Japan",
+--          "wards": 11
+--      }
+--
+--   which I must admit is kinda ugly, sorry. This was the default for
+--   encode_pretty() prior to version 20141223.14.
+--
+--
+--  AMBIGUOUS SITUATIONS DURING THE ENCODING
+--
+--   During the encode, if a Lua table being encoded contains both string
+--   and numeric keys, it fits neither JSON's idea of an object, nor its
+--   idea of an array. To get around this, when any string key exists (or
+--   when non-positive numeric keys exist), numeric keys are converted to
+--   strings.
+--
+--   For example, 
+--     JSON:encode({ "one", "two", "three", SOMESTRING = "some string" }))
+--   produces the JSON object
+--     {"1":"one","2":"two","3":"three","SOMESTRING":"some string"}
+--
+--   To prohibit this conversion and instead make it an error condition, set
+--      JSON.noKeyConversion = true
+--
+
+
+
+
+--
+-- SUMMARY OF METHODS YOU CAN OVERRIDE IN YOUR LOCAL LUA JSON OBJECT
+--
+--    assert
+--    onDecodeError
+--    onDecodeOfNilError
+--    onDecodeOfHTMLError
+--    onEncodeError
+--
+--  If you want to create a separate Lua JSON object with its own error handlers,
+--  you can reload JSON.lua or use the :new() method.
+--
+---------------------------------------------------------------------------
+
+local default_pretty_indent  = "  "
+local default_pretty_options = { pretty = true, align_keys = false, indent = default_pretty_indent }
+
+local isArray  = { __tostring = function() return "JSON array"  end }    isArray.__index  = isArray
+local isObject = { __tostring = function() return "JSON object" end }    isObject.__index = isObject
+
+
+function OBJDEF:newArray(tbl)
+   return setmetatable(tbl or {}, isArray)
+end
+
+function OBJDEF:newObject(tbl)
+   return setmetatable(tbl or {}, isObject)
+end
+
+local function unicode_codepoint_as_utf8(codepoint)
+   --
+   -- codepoint is a number
+   --
+   if codepoint <= 127 then
+      return string.char(codepoint)
+
+   elseif codepoint <= 2047 then
+      --
+      -- 110yyyxx 10xxxxxx         <-- useful notation from http://en.wikipedia.org/wiki/Utf8
+      --
+      local highpart = math.floor(codepoint / 0x40)
+      local lowpart  = codepoint - (0x40 * highpart)
+      return string.char(0xC0 + highpart,
+                         0x80 + lowpart)
+
+   elseif codepoint <= 65535 then
+      --
+      -- 1110yyyy 10yyyyxx 10xxxxxx
+      --
+      local highpart  = math.floor(codepoint / 0x1000)
+      local remainder = codepoint - 0x1000 * highpart
+      local midpart   = math.floor(remainder / 0x40)
+      local lowpart   = remainder - 0x40 * midpart
+
+      highpart = 0xE0 + highpart
+      midpart  = 0x80 + midpart
+      lowpart  = 0x80 + lowpart
+
+      --
+      -- Check for an invalid character (thanks Andy R. at Adobe).
+      -- See table 3.7, page 93, in http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G28070
+      --
+      if ( highpart == 0xE0 and midpart < 0xA0 ) or
+         ( highpart == 0xED and midpart > 0x9F ) or
+         ( highpart == 0xF0 and midpart < 0x90 ) or
+         ( highpart == 0xF4 and midpart > 0x8F )
+      then
+         return "?"
+      else
+         return string.char(highpart,
+                            midpart,
+                            lowpart)
+      end
+
+   else
+      --
+      -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
+      --
+      local highpart  = math.floor(codepoint / 0x40000)
+      local remainder = codepoint - 0x40000 * highpart
+      local midA      = math.floor(remainder / 0x1000)
+      remainder       = remainder - 0x1000 * midA
+      local midB      = math.floor(remainder / 0x40)
+      local lowpart   = remainder - 0x40 * midB
+
+      return string.char(0xF0 + highpart,
+                         0x80 + midA,
+                         0x80 + midB,
+                         0x80 + lowpart)
+   end
+end
+
+function OBJDEF:onDecodeError(message, text, location, etc)
+   if text then
+      if location then
+         message = string.format("%s at char %d of: %s", message, location, text)
+      else
+         message = string.format("%s: %s", message, text)
+      end
+   end
+
+   if etc ~= nil then
+      message = message .. " (" .. OBJDEF:encode(etc) .. ")"
+   end
+
+   if self.assert then
+      self.assert(false, message)
+   else
+      assert(false, message)
+   end
+end
+
+OBJDEF.onDecodeOfNilError  = OBJDEF.onDecodeError
+OBJDEF.onDecodeOfHTMLError = OBJDEF.onDecodeError
+
+function OBJDEF:onEncodeError(message, etc)
+   if etc ~= nil then
+      message = message .. " (" .. OBJDEF:encode(etc) .. ")"
+   end
+
+   if self.assert then
+      self.assert(false, message)
+   else
+      assert(false, message)
+   end
+end
+
+local function grok_number(self, text, start, etc)
+   --
+   -- Grab the integer part
+   --
+   local integer_part = text:match('^-?[1-9]%d*', start)
+                     or text:match("^-?0",        start)
+
+   if not integer_part then
+      self:onDecodeError("expected number", text, start, etc)
+   end
+
+   local i = start + integer_part:len()
+
+   --
+   -- Grab an optional decimal part
+   --
+   local decimal_part = text:match('^%.%d+', i) or ""
+
+   i = i + decimal_part:len()
+
+   --
+   -- Grab an optional exponential part
+   --
+   local exponent_part = text:match('^[eE][-+]?%d+', i) or ""
+
+   i = i + exponent_part:len()
+
+   local full_number_text = integer_part .. decimal_part .. exponent_part
+   local as_number = tonumber(full_number_text)
+
+   if not as_number then
+      self:onDecodeError("bad number", text, start, etc)
+   end
+
+   return as_number, i
+end
+
+
+local function grok_string(self, text, start, etc)
+
+   if text:sub(start,start) ~= '"' then
+      self:onDecodeError("expected string's opening quote", text, start, etc)
+   end
+
+   local i = start + 1 -- +1 to bypass the initial quote
+   local text_len = text:len()
+   local VALUE = ""
+   while i <= text_len do
+      local c = text:sub(i,i)
+      if c == '"' then
+         return VALUE, i + 1
+      end
+      if c ~= '\\' then
+         VALUE = VALUE .. c
+         i = i + 1
+      elseif text:match('^\\b', i) then
+         VALUE = VALUE .. "\b"
+         i = i + 2
+      elseif text:match('^\\f', i) then
+         VALUE = VALUE .. "\f"
+         i = i + 2
+      elseif text:match('^\\n', i) then
+         VALUE = VALUE .. "\n"
+         i = i + 2
+      elseif text:match('^\\r', i) then
+         VALUE = VALUE .. "\r"
+         i = i + 2
+      elseif text:match('^\\t', i) then
+         VALUE = VALUE .. "\t"
+         i = i + 2
+      else
+         local hex = text:match('^\\u([0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i)
+         if hex then
+            i = i + 6 -- bypass what we just read
+
+            -- We have a Unicode codepoint. It could be standalone, or if in the proper range and
+            -- followed by another in a specific range, it'll be a two-code surrogate pair.
+            local codepoint = tonumber(hex, 16)
+            if codepoint >= 0xD800 and codepoint <= 0xDBFF then
+               -- it's a hi surrogate... see whether we have a following low
+               local lo_surrogate = text:match('^\\u([dD][cdefCDEF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i)
+               if lo_surrogate then
+                  i = i + 6 -- bypass the low surrogate we just read
+                  codepoint = 0x2400 + (codepoint - 0xD800) * 0x400 + tonumber(lo_surrogate, 16)
+               else
+                  -- not a proper low, so we'll just leave the first codepoint as is and spit it out.
+               end
+            end
+            VALUE = VALUE .. unicode_codepoint_as_utf8(codepoint)
+
+         else
+
+            -- just pass through what's escaped
+            VALUE = VALUE .. text:match('^\\(.)', i)
+            i = i + 2
+         end
+      end
+   end
+
+   self:onDecodeError("unclosed string", text, start, etc)
+end
+
+local function skip_whitespace(text, start)
+
+   local _, match_end = text:find("^[ \n\r\t]+", start) -- [http://www.ietf.org/rfc/rfc4627.txt] Section 2
+   if match_end then
+      return match_end + 1
+   else
+      return start
+   end
+end
+
+local grok_one -- assigned later
+
+local function grok_object(self, text, start, etc)
+   if text:sub(start,start) ~= '{' then
+      self:onDecodeError("expected '{'", text, start, etc)
+   end
+
+   local i = skip_whitespace(text, start + 1) -- +1 to skip the '{'
+
+   local VALUE = self.strictTypes and self:newObject { } or { }
+
+   if text:sub(i,i) == '}' then
+      return VALUE, i + 1
+   end
+   local text_len = text:len()
+   while i <= text_len do
+      local key, new_i = grok_string(self, text, i, etc)
+
+      i = skip_whitespace(text, new_i)
+
+      if text:sub(i, i) ~= ':' then
+         self:onDecodeError("expected colon", text, i, etc)
+      end
+
+      i = skip_whitespace(text, i + 1)
+
+      local new_val, new_i = grok_one(self, text, i)
+
+      VALUE[key] = new_val
+
+      --
+      -- Expect now either '}' to end things, or a ',' to allow us to continue.
+      --
+      i = skip_whitespace(text, new_i)
+
+      local c = text:sub(i,i)
+
+      if c == '}' then
+         return VALUE, i + 1
+      end
+
+      if text:sub(i, i) ~= ',' then
+         self:onDecodeError("expected comma or '}'", text, i, etc)
+      end
+
+      i = skip_whitespace(text, i + 1)
+   end
+
+   self:onDecodeError("unclosed '{'", text, start, etc)
+end
+
+local function grok_array(self, text, start, etc)
+   if text:sub(start,start) ~= '[' then
+      self:onDecodeError("expected '['", text, start, etc)
+   end
+
+   local i = skip_whitespace(text, start + 1) -- +1 to skip the '['
+   local VALUE = self.strictTypes and self:newArray { } or { }
+   if text:sub(i,i) == ']' then
+      return VALUE, i + 1
+   end
+
+   local VALUE_INDEX = 1
+
+   local text_len = text:len()
+   while i <= text_len do
+      local val, new_i = grok_one(self, text, i)
+
+      -- can't table.insert(VALUE, val) here because it's a no-op if val is nil
+      VALUE[VALUE_INDEX] = val
+      VALUE_INDEX = VALUE_INDEX + 1
+
+      i = skip_whitespace(text, new_i)
+
+      --
+      -- Expect now either ']' to end things, or a ',' to allow us to continue.
+      --
+      local c = text:sub(i,i)
+      if c == ']' then
+         return VALUE, i + 1
+      end
+      if text:sub(i, i) ~= ',' then
+         self:onDecodeError("expected comma or '['", text, i, etc)
+      end
+      i = skip_whitespace(text, i + 1)
+   end
+   self:onDecodeError("unclosed '['", text, start, etc)
+end
+
+
+grok_one = function(self, text, start, etc)
+   -- Skip any whitespace
+   start = skip_whitespace(text, start)
+
+   if start > text:len() then
+      self:onDecodeError("unexpected end of string", text, nil, etc)
+   end
+
+   if text:find('^"', start) then
+      return grok_string(self, text, start, etc)
+
+   elseif text:find('^[-0123456789 ]', start) then
+      return grok_number(self, text, start, etc)
+
+   elseif text:find('^%{', start) then
+      return grok_object(self, text, start, etc)
+
+   elseif text:find('^%[', start) then
+      return grok_array(self, text, start, etc)
+
+   elseif text:find('^true', start) then
+      return true, start + 4
+
+   elseif text:find('^false', start) then
+      return false, start + 5
+
+   elseif text:find('^null', start) then
+      return nil, start + 4
+
+   else
+      self:onDecodeError("can't parse JSON", text, start, etc)
+   end
+end
+
+function OBJDEF:decode(text, etc)
+   if type(self) ~= 'table' or self.__index ~= OBJDEF then
+      OBJDEF:onDecodeError("JSON:decode must be called in method format", nil, nil, etc)
+   end
+
+   if text == nil then
+      self:onDecodeOfNilError(string.format("nil passed to JSON:decode()"), nil, nil, etc)
+   elseif type(text) ~= 'string' then
+      self:onDecodeError(string.format("expected string argument to JSON:decode(), got %s", type(text)), nil, nil, etc)
+   end
+
+   if text:match('^%s*$') then
+      return nil
+   end
+
+   if text:match('^%s*<') then
+      -- Can't be JSON... we'll assume it's HTML
+      self:onDecodeOfHTMLError(string.format("html passed to JSON:decode()"), text, nil, etc)
+   end
+
+   --
+   -- Ensure that it's not UTF-32 or UTF-16.
+   -- Those are perfectly valid encodings for JSON (as per RFC 4627 section 3),
+   -- but this package can't handle them.
+   --
+   if text:sub(1,1):byte() == 0 or (text:len() >= 2 and text:sub(2,2):byte() == 0) then
+      self:onDecodeError("JSON package groks only UTF-8, sorry", text, nil, etc)
+   end
+
+   local success, value = pcall(grok_one, self, text, 1, etc)
+
+   if success then
+      return value
+   else
+      -- if JSON:onDecodeError() didn't abort out of the pcall, we'll have received the error message here as "value", so pass it along as an assert.
+      if self.assert then
+         self.assert(false, value)
+      else
+         assert(false, value)
+      end
+      -- and if we're still here, return a nil and throw the error message on as a second arg
+      return nil, value
+   end
+end
+
+local function backslash_replacement_function(c)
+   if c == "\n" then
+      return "\\n"
+   elseif c == "\r" then
+      return "\\r"
+   elseif c == "\t" then
+      return "\\t"
+   elseif c == "\b" then
+      return "\\b"
+   elseif c == "\f" then
+      return "\\f"
+   elseif c == '"' then
+      return '\\"'
+   elseif c == '\\' then
+      return '\\\\'
+   else
+      return string.format("\\u%04x", c:byte())
+   end
+end
+
+local chars_to_be_escaped_in_JSON_string
+   = '['
+   ..    '"'    -- class sub-pattern to match a double quote
+   ..    '%\\'  -- class sub-pattern to match a backslash
+   ..    '%z'   -- class sub-pattern to match a null
+   ..    '\001' .. '-' .. '\031' -- class sub-pattern to match control characters
+   .. ']'
+
+local function json_string_literal(value)
+   local newval = value:gsub(chars_to_be_escaped_in_JSON_string, backslash_replacement_function)
+   return '"' .. newval .. '"'
+end
+
+local function object_or_array(self, T, etc)
+   --
+   -- We need to inspect all the keys... if there are any strings, we'll convert to a JSON
+   -- object. If there are only numbers, it's a JSON array.
+   --
+   -- If we'll be converting to a JSON object, we'll want to sort the keys so that the
+   -- end result is deterministic.
+   --
+   local string_keys = { }
+   local number_keys = { }
+   local number_keys_must_be_strings = false
+   local maximum_number_key
+
+   for key in pairs(T) do
+      if type(key) == 'string' then
+         table.insert(string_keys, key)
+      elseif type(key) == 'number' then
+         table.insert(number_keys, key)
+         if key <= 0 or key >= math.huge then
+            number_keys_must_be_strings = true
+         elseif not maximum_number_key or key > maximum_number_key then
+            maximum_number_key = key
+         end
+      else
+         self:onEncodeError("can't encode table with a key of type " .. type(key), etc)
+      end
+   end
+
+   if #string_keys == 0 and not number_keys_must_be_strings then
+      --
+      -- An empty table, or a numeric-only array
+      --
+      if #number_keys > 0 then
+         return nil, maximum_number_key -- an array
+      elseif tostring(T) == "JSON array" then
+         return nil
+      elseif tostring(T) == "JSON object" then
+         return { }
+      else
+         -- have to guess, so we'll pick array, since empty arrays are likely more common than empty objects
+         return nil
+      end
+   end
+
+   table.sort(string_keys)
+
+   local map
+   if #number_keys > 0 then
+      --
+      -- If we're here then we have either mixed string/number keys, or numbers inappropriate for a JSON array
+      -- It's not ideal, but we'll turn the numbers into strings so that we can at least create a JSON object.
+      --
+
+      if self.noKeyConversion then
+         self:onEncodeError("a table with both numeric and string keys could be an object or array; aborting", etc)
+      end
+
+      --
+      -- Have to make a shallow copy of the source table so we can remap the numeric keys to be strings
+      --
+      map = { }
+      for key, val in pairs(T) do
+         map[key] = val
+      end
+
+      table.sort(number_keys)
+
+      --
+      -- Throw numeric keys in there as strings
+      --
+      for _, number_key in ipairs(number_keys) do
+         local string_key = tostring(number_key)
+         if map[string_key] == nil then
+            table.insert(string_keys , string_key)
+            map[string_key] = T[number_key]
+         else
+            self:onEncodeError("conflict converting table with mixed-type keys into a JSON object: key " .. number_key .. " exists both as a string and a number.", etc)
+         end
+      end
+   end
+
+   return string_keys, nil, map
+end
+
+--
+-- Encode
+--
+-- 'options' is nil, or a table with possible keys:
+--    pretty            -- if true, return a pretty-printed version
+--    indent            -- a string (usually of spaces) used to indent each nested level
+--    align_keys        -- if true, align all the keys when formatting a table
+--
+local encode_value -- must predeclare because it calls itself
+function encode_value(self, value, parents, etc, options, indent)
+
+   if value == nil then
+      return 'null'
+
+   elseif type(value) == 'string' then
+      return json_string_literal(value)
+
+   elseif type(value) == 'number' then
+      if value ~= value then
+         --
+         -- NaN (Not a Number).
+         -- JSON has no NaN, so we have to fudge the best we can. This should really be a package option.
+         --
+         return "null"
+      elseif value >= math.huge then
+         --
+         -- Positive infinity. JSON has no INF, so we have to fudge the best we can. This should
+         -- really be a package option. Note: at least with some implementations, positive infinity
+         -- is both ">= math.huge" and "<= -math.huge", which makes no sense but that's how it is.
+         -- Negative infinity is properly "<= -math.huge". So, we must be sure to check the ">="
+         -- case first.
+         --
+         return "1e+9999"
+      elseif value <= -math.huge then
+         --
+         -- Negative infinity.
+         -- JSON has no INF, so we have to fudge the best we can. This should really be a package option.
+         --
+         return "-1e+9999"
+      else
+         return tostring(value)
+      end
+
+   elseif type(value) == 'boolean' then
+      return tostring(value)
+
+   elseif type(value) ~= 'table' then
+      self:onEncodeError("can't convert " .. type(value) .. " to JSON", etc)
+
+   else
+      --
+      -- A table to be converted to either a JSON object or array.
+      --
+      local T = value
+
+      if type(options) ~= 'table' then
+         options = {}
+      end
+      if type(indent) ~= 'string' then
+         indent = ""
+      end
+
+      if parents[T] then
+         self:onEncodeError("table " .. tostring(T) .. " is a child of itself", etc)
+      else
+         parents[T] = true
+      end
+
+      local result_value
+
+      local object_keys, maximum_number_key, map = object_or_array(self, T, etc)
+      if maximum_number_key then
+         --
+         -- An array...
+         --
+         local ITEMS = { }
+         for i = 1, maximum_number_key do
+            table.insert(ITEMS, encode_value(self, T[i], parents, etc, options, indent))
+         end
+
+         if options.pretty then
+            result_value = "[ " .. table.concat(ITEMS, ", ") .. " ]"
+         else
+            result_value = "["  .. table.concat(ITEMS, ",")  .. "]"
+         end
+
+      elseif object_keys then
+         --
+         -- An object
+         --
+         local TT = map or T
+
+         if options.pretty then
+
+            local KEYS = { }
+            local max_key_length = 0
+            for _, key in ipairs(object_keys) do
+               local encoded = encode_value(self, tostring(key), parents, etc, options, indent)
+               if options.align_keys then
+                  max_key_length = math.max(max_key_length, #encoded)
+               end
+               table.insert(KEYS, encoded)
+            end
+            local key_indent = indent .. tostring(options.indent or "")
+            local subtable_indent = key_indent .. string.rep(" ", max_key_length) .. (options.align_keys and "  " or "")
+            local FORMAT = "%s%" .. string.format("%d", max_key_length) .. "s: %s"
+
+            local COMBINED_PARTS = { }
+            for i, key in ipairs(object_keys) do
+               local encoded_val = encode_value(self, TT[key], parents, etc, options, subtable_indent)
+               table.insert(COMBINED_PARTS, string.format(FORMAT, key_indent, KEYS[i], encoded_val))
+            end
+            result_value = "{\n" .. table.concat(COMBINED_PARTS, ",\n") .. "\n" .. indent .. "}"
+
+         else
+
+            local PARTS = { }
+            for _, key in ipairs(object_keys) do
+               local encoded_val = encode_value(self, TT[key],       parents, etc, options, indent)
+               local encoded_key = encode_value(self, tostring(key), parents, etc, options, indent)
+               table.insert(PARTS, string.format("%s:%s", encoded_key, encoded_val))
+            end
+            result_value = "{" .. table.concat(PARTS, ",") .. "}"
+
+         end
+      else
+         --
+         -- An empty array/object... we'll treat it as an array, though it should really be an option
+         --
+         result_value = "[]"
+      end
+
+      parents[T] = false
+      return result_value
+   end
+end
+
+
+function OBJDEF:encode(value, etc, options)
+   if type(self) ~= 'table' or self.__index ~= OBJDEF then
+      OBJDEF:onEncodeError("JSON:encode must be called in method format", etc)
+   end
+   return encode_value(self, value, {}, etc, options or nil)
+end
+
+function OBJDEF:encode_pretty(value, etc, options)
+   if type(self) ~= 'table' or self.__index ~= OBJDEF then
+      OBJDEF:onEncodeError("JSON:encode_pretty must be called in method format", etc)
+   end
+   return encode_value(self, value, {}, etc, options or default_pretty_options)
+end
+
+function OBJDEF.__tostring()
+   return "JSON encode/decode package"
+end
+
+OBJDEF.__index = OBJDEF
+
+function OBJDEF:new(args)
+   local new = { }
+
+   if args then
+      for key, val in pairs(args) do
+         new[key] = val
+      end
+   end
+
+   return setmetatable(new, OBJDEF)
+end
+
+return OBJDEF:new()
+
+--
+-- Version history:
+--
+--   20141223.14   The encode_pretty() routine produced fine results for small datasets, but isn't really
+--                 appropriate for anything large, so with help from Alex Aulbach I've made the encode routines
+--                 more flexible, and changed the default encode_pretty() to be more generally useful.
+--
+--                 Added a third 'options' argument to the encode() and encode_pretty() routines, to control
+--                 how the encoding takes place.
+--
+--                 Updated docs to add assert() call to the loadfile() line, just as good practice so that
+--                 if there is a problem loading JSON.lua, the appropriate error message will percolate up.
+--
+--   20140920.13   Put back (in a way that doesn't cause warnings about unused variables) the author string,
+--                 so that the source of the package, and its version number, are visible in compiled copies.
+--
+--   20140911.12   Minor lua cleanup.
+--                 Fixed internal reference to 'JSON.noKeyConversion' to reference 'self' instead of 'JSON'.
+--                 (Thanks to SmugMug's David Parry for these.)
+--
+--   20140418.11   JSON nulls embedded within an array were being ignored, such that
+--                     ["1",null,null,null,null,null,"seven"],
+--                 would return
+--                     {1,"seven"}
+--                 It's now fixed to properly return
+--                     {1, nil, nil, nil, nil, nil, "seven"}
+--                 Thanks to "haddock" for catching the error.
+--
+--   20140116.10   The user's JSON.assert() wasn't always being used. Thanks to "blue" for the heads up.
+--
+--   20131118.9    Update for Lua 5.3... it seems that tostring(2/1) produces "2.0" instead of "2",
+--                 and this caused some problems.
+--
+--   20131031.8    Unified the code for encode() and encode_pretty(); they had been stupidly separate,
+--                 and had of course diverged (encode_pretty didn't get the fixes that encode got, so
+--                 sometimes produced incorrect results; thanks to Mattie for the heads up).
+--
+--                 Handle encoding tables with non-positive numeric keys (unlikely, but possible).
+--
+--                 If a table has both numeric and string keys, or its numeric keys are inappropriate
+--                 (such as being non-positive or infinite), the numeric keys are turned into
+--                 string keys appropriate for a JSON object. So, as before,
+--                         JSON:encode({ "one", "two", "three" })
+--                 produces the array
+--                         ["one","two","three"]
+--                 but now something with mixed key types like
+--                         JSON:encode({ "one", "two", "three", SOMESTRING = "some string" }))
+--                 instead of throwing an error produces an object:
+--                         {"1":"one","2":"two","3":"three","SOMESTRING":"some string"}
+--
+--                 To maintain the prior throw-an-error semantics, set
+--                      JSON.noKeyConversion = true
+--                 
+--   20131004.7    Release under a Creative Commons CC-BY license, which I should have done from day one, sorry.
+--
+--   20130120.6    Comment update: added a link to the specific page on my blog where this code can
+--                 be found, so that folks who come across the code outside of my blog can find updates
+--                 more easily.
+--
+--   20111207.5    Added support for the 'etc' arguments, for better error reporting.
+--
+--   20110731.4    More feedback from David Kolf on how to make the tests for Nan/Infinity system independent.
+--
+--   20110730.3    Incorporated feedback from David Kolf at http://lua-users.org/wiki/JsonModules:
+--
+--                   * When encoding lua for JSON, Sparse numeric arrays are now handled by
+--                     spitting out full arrays, such that
+--                        JSON:encode({"one", "two", [10] = "ten"})
+--                     returns
+--                        ["one","two",null,null,null,null,null,null,null,"ten"]
+--
+--                     In 20100810.2 and earlier, only up to the first non-null value would have been retained.
+--
+--                   * When encoding lua for JSON, numeric value NaN gets spit out as null, and infinity as "1+e9999".
+--                     Version 20100810.2 and earlier created invalid JSON in both cases.
+--
+--                   * Unicode surrogate pairs are now detected when decoding JSON.
+--
+--   20100810.2    added some checking to ensure that an invalid Unicode character couldn't leak in to the UTF-8 encoding
+--
+--   20100731.1    initial public release
+--
diff --git a/README.md b/README.md
index 992c0a7..d3f2bf1 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Running without a warrior
 -------------------------
 To run this outside the warrior, clone this repository, cd into its directory and run:
 
-    pip install seesaw
+    pip install --upgrade seesaw
     ./get-wget-lua.sh
 
 then start downloading with:
@@ -32,9 +32,9 @@ For more options, run:
 
     run-pipeline --help
 
-If you don't have root access and/or your version of pip is very old, you can replace "pip install seesaw" with:
+If you don't have root access and/or your version of pip is very old, you can replace "pip install --upgrade seesaw" with:
 
-    wget https://raw.github.com/pypa/pip/master/contrib/get-pip.py ; python get-pip.py --user ; ~/.local/bin/pip install --user seesaw
+    wget https://raw.github.com/pypa/pip/master/contrib/get-pip.py ; python get-pip.py --user ; ~/.local/bin/pip install --upgrade --user seesaw
 
 so that pip and seesaw are installed in your home, then run
 
@@ -56,26 +56,32 @@ Distribution-specific setup
 ### For Debian/Ubuntu:
 
     adduser --system --group --shell /bin/bash archiveteam
-    apt-get install -y git-core libgnutls-dev lua5.1 liblua5.1-0 liblua5.1-0-dev screen python-dev python-pip bzip2 zlib1g-dev
-    pip install seesaw
+    apt-get update && apt-get install -y git-core libgnutls-dev lua5.1 liblua5.1-0 liblua5.1-0-dev screen python-dev python-pip bzip2 zlib1g-dev flex autoconf
+    pip install --upgrade seesaw
     su -c "cd /home/archiveteam; git clone https://github.com/ArchiveTeam/reddit-grab.git; cd reddit-grab; ./get-wget-lua.sh" archiveteam
     screen su -c "cd /home/archiveteam/reddit-grab/; run-pipeline pipeline.py --concurrent 2 --address '127.0.0.1' YOURNICKHERE" archiveteam
     [... ctrl+A D to detach ...]
 
+In __Debian Jessie__, the `libgnutls-dev` package was renamed to `libgnutls28-dev`. So, you need to do the following instead:
+
+    adduser --system --group --shell /bin/bash archiveteam
+    apt-get update && apt-get install -y git-core libgnutls28-dev lua5.1 liblua5.1-0 liblua5.1-0-dev screen python-dev python-pip bzip2 zlib1g-dev flex autoconf
+    [... pretty much the same as above ...]
+
 Wget-lua is also available on [ArchiveTeam's PPA](https://launchpad.net/~archiveteam/+archive/wget-lua) for Ubuntu.
 
 ### For CentOS:
 
-Ensure that you have the CentOS equivalent of bzip2 installed as well. You might need the EPEL repository to be enabled.
+Ensure that you have the CentOS equivalent of bzip2 installed as well. You will the EPEL repository to be enabled.
 
-    yum -y install gnutls-devel lua-devel python-pip zlib-devel
-    pip install seesaw
+    yum -y install autoconf automake flex gnutls-devel lua-devel python-pip zlib-devel
+    pip install --upgrade seesaw
     [... pretty much the same as above ...]
 
 ### For openSUSE:
 
     zypper install liblua5_1 lua51 lua51-devel screen python-pip libgnutls-devel bzip2 python-devel gcc make
-    pip install seesaw
+    pip install --upgrade seesaw
     [... pretty much the same as above ...]
 
 ### For OS X:
@@ -83,7 +89,7 @@ Ensure that you have the CentOS equivalent of bzip2 installed as well. You might
 You need Homebrew. Ensure that you have the OS X equivalent of bzip2 installed as well.
 
     brew install python lua gnutls
-    pip install seesaw
+    pip install --upgrade seesaw
     [... pretty much the same as above ...]
 
 **There is a known issue with some packaged versions of rsync. If you get errors during the upload stage, reddit-grab will not work with your rsync version.**
@@ -97,12 +103,21 @@ This supposedly fixes it:
 Ensure that you have the Arch equivalent of bzip2 installed as well.
 
 1. Make sure you have `python2-pip` installed.
-2. Install [https://aur.archlinux.org/packages/wget-lua/](the wget-lua package from the AUR). 
-3. Run `pip2 install seesaw`.
+2. Install [the wget-lua package from the AUR](https://aur.archlinux.org/packages/wget-lua/). 
+3. Run `pip2 install --upgrade seesaw`.
 4. Modify the run-pipeline script in seesaw to point at `#!/usr/bin/python2` instead of `#!/usr/bin/python`.
 5. `useradd --system --group users --shell /bin/bash --create-home archiveteam`
 6. `screen su -c "cd /home/archiveteam/reddit-grab/; run-pipeline pipeline.py --concurrent 2 --address '127.0.0.1' YOURNICKHERE" archiveteam`
 
+### For Alpine Linux:
+
+    apk add lua5.1 git python bzip2 bash rsync gcc libc-dev lua5.1-dev zlib-dev gnutls-dev autoconf flex make
+    python -m ensurepip
+    pip install -U seesaw
+    git clone https://github.com/ArchiveTeam/reddit-grab
+    cd reddit-grab; ./get-wget-lua.sh
+    run-pipeline pipeline.py --concurrent 2 --address '127.0.0.1' YOURNICKHERE
+
 ### For FreeBSD:
 
 Honestly, I have no idea. `./get-wget-lua.sh` supposedly doesn't work due to differences in the `tar` that ships with FreeBSD. Another problem is the apparent absence of Lua 5.1 development headers. If you figure this out, please do let us know on IRC (irc.efnet.org #archiveteam).
@@ -134,6 +149,12 @@ If you're sure that you followed the steps to install `seesaw`, permissions on y
 
     chmod o+rX -R /usr/local/lib/python2.7/dist-packages
 
+### run-pipeline: command not found
+
+Install `seesaw` using `pip2` instead of `pip`.
+
+    pip2 install seesaw
+
 ### Issues in the code
 
 If you notice a bug and want to file a bug report, please use the GitHub issues tracker.
@@ -142,4 +163,5 @@ Are you a developer? Help write code for us! Look at our [developer documentatio
 
 ### Other problems
 
-Have an issue not listed here? Join us on IRC and ask! We can be found at irc.efnet.org #deaddit.
+Have an issue not listed here? Join us on IRC and ask! We can be found at irc.efnet.org #shreddit.
+
diff --git a/cookies b/cookies
index 8c506c1..3cabb29 100644
--- a/cookies
+++ b/cookies
@@ -1 +1,2 @@
 .reddit.com	TRUE	/	FALSE	0	over18	1
+
diff --git a/get-wget-lua.sh b/get-wget-lua.sh
old mode 100644
new mode 100755
diff --git a/ignore-list b/ignore-list
new file mode 100644
index 0000000..e69de29
diff --git a/pipeline.py b/pipeline.py
index d799ecb..55c8f3c 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -5,6 +5,7 @@ import hashlib
 import os.path
 import random
 from seesaw.config import realize, NumberConfigValue
+from seesaw.externalprocess import ExternalProcess
 from seesaw.item import ItemInterpolation, ItemValue
 from seesaw.task import SimpleTask, LimitConcurrent
 from seesaw.tracker import GetItemFromTracker, PrepareStatsForTracker, \
@@ -15,8 +16,15 @@ import subprocess
 import sys
 import time
 import string
-import requests
 import re
+import random
+
+try:
+    import warcio
+    from warcio.archiveiterator import ArchiveIterator
+    from warcio.warcwriter import WARCWriter
+except:
+    raise Exception("Please install warc with 'sudo pip install warcio --upgrade'.")
 
 import seesaw
 from seesaw.externalprocess import WgetDownload
@@ -24,10 +32,12 @@ from seesaw.pipeline import Pipeline
 from seesaw.project import Project
 from seesaw.util import find_executable
 
+from tornado import httpclient
+
 
 # check the seesaw version
-if StrictVersion(seesaw.__version__) < StrictVersion("0.8.5"):
-    raise Exception("This pipeline needs seesaw version 0.8.5 or higher.")
+if StrictVersion(seesaw.__version__) < StrictVersion('0.8.5'):
+    raise Exception('This pipeline needs seesaw version 0.8.5 or higher.')
 
 
 ###########################################################################
@@ -37,21 +47,21 @@ if StrictVersion(seesaw.__version__) < StrictVersion("0.8.5"):
 # 1. does not crash with --version, and
 # 2. prints the required version string
 WGET_LUA = find_executable(
-    "Wget+Lua",
-    ["GNU Wget 1.14.lua.20130523-9a5c"],
+    'Wget+Lua',
+    ['GNU Wget 1.14.lua.20130523-9a5c', 'GNU Wget 1.14.lua.20160530-955376b'],
     [
-        "./wget-lua",
-        "./wget-lua-warrior",
-        "./wget-lua-local",
-        "../wget-lua",
-        "../../wget-lua",
-        "/home/warrior/wget-lua",
-        "/usr/bin/wget-lua"
+        './wget-lua',
+        './wget-lua-warrior',
+        './wget-lua-local',
+        '../wget-lua',
+        '../../wget-lua',
+        '/home/warrior/wget-lua',
+        '/usr/bin/wget-lua'
     ]
 )
 
 if not WGET_LUA:
-    raise Exception("No usable Wget+Lua found.")
+    raise Exception('No usable Wget+Lua found.')
 
 
 ###########################################################################
@@ -59,7 +69,7 @@ if not WGET_LUA:
 #
 # Update this each time you make a non-cosmetic change.
 # It will be added to the WARC files and reported to the tracker.
-VERSION = "20150620.02"
+VERSION = '20190222.01'
 USER_AGENT = 'ArchiveTeam'
 TRACKER_ID = 'reddit'
 TRACKER_HOST = 'tracker.archiveteam.org'
@@ -73,7 +83,7 @@ TRACKER_HOST = 'tracker.archiveteam.org'
 # each item.
 class CheckIP(SimpleTask):
     def __init__(self):
-        SimpleTask.__init__(self, "CheckIP")
+        SimpleTask.__init__(self, 'CheckIP')
         self._counter = 0
 
     def process(self, item):
@@ -106,39 +116,98 @@ class CheckIP(SimpleTask):
 
 class PrepareDirectories(SimpleTask):
     def __init__(self, warc_prefix):
-        SimpleTask.__init__(self, "PrepareDirectories")
+        SimpleTask.__init__(self, 'PrepareDirectories')
         self.warc_prefix = warc_prefix
 
     def process(self, item):
-        item_name = item["item_name"]
+        item_name = item['item_name']
         escaped_item_name = item_name.replace(':', '_').replace('/', '_').replace('~', '_')
-        dirname = "/".join((item["data_dir"], escaped_item_name))
+        item_hash = hashlib.sha1(item_name.encode('utf-8')).hexdigest()
+        dirname = '/'.join((item['data_dir'], item_hash))
 
         if os.path.isdir(dirname):
             shutil.rmtree(dirname)
 
         os.makedirs(dirname)
 
-        item["item_dir"] = dirname
-        item["warc_file_base"] = "%s-%s-%s" % (self.warc_prefix, escaped_item_name,
-            time.strftime("%Y%m%d-%H%M%S"))
+        item['item_dir'] = dirname
+        item['warc_file_base'] = '%s-%s-%s' % (self.warc_prefix, item_hash,
+            time.strftime('%Y%m%d-%H%M%S'))
 
-        open("%(item_dir)s/%(warc_file_base)s.warc.gz" % item, "w").close()
+        open('%(item_dir)s/%(warc_file_base)s.warc.gz' % item, 'w').close()
+        open('%(item_dir)s/%(warc_file_base)s_data.txt' % item, 'w').close()
+
+
+class Deduplicate(SimpleTask):
+    def __init__(self):
+        SimpleTask.__init__(self, 'Deduplicate')
+
+    def process(self, item):
+        digests = {}
+        input_filename = '%(item_dir)s/%(warc_file_base)s.warc.gz' % item
+        output_filename = '%(item_dir)s/%(warc_file_base)s-deduplicated.warc.gz' % item
+        with open(input_filename, 'rb') as f_in, \
+                open(output_filename, 'wb') as f_out:
+            writer = WARCWriter(filebuf=f_out, gzip=True)
+            for record in ArchiveIterator(f_in):
+                url = record.rec_headers.get_header('WARC-Target-URI')
+                if url is not None and url.startswith('<'):
+                    url = re.search('^<(.+)>$', url).group(1)
+                    record.rec_headers.replace_header('WARC-Target-URI', url)
+                if record.rec_headers.get_header('WARC-Type') == 'response':
+                    digest = record.rec_headers.get_header('WARC-Payload-Digest')
+                    if digest in digests:
+                        writer.write_record(
+                            self._record_response_to_revisit(writer, record,
+                                                             digests[digest])
+                        )
+                    else:
+                        digests[digest] = (
+                            record.rec_headers.get_header('WARC-Record-ID'),
+                            record.rec_headers.get_header('WARC-Date'),
+                            record.rec_headers.get_header('WARC-Target-URI')
+                        )
+                        writer.write_record(record)
+                elif record.rec_headers.get_header('WARC-Type') == 'warcinfo':
+                    record.rec_headers.replace_header('WARC-Filename', output_filename)
+                    writer.write_record(record)
+                else:
+                    writer.write_record(record)
+
+    def _record_response_to_revisit(self, writer, record, duplicate):
+        warc_headers = record.rec_headers
+        warc_headers.replace_header('WARC-Refers-To', duplicate[0])
+        warc_headers.replace_header('WARC-Refers-To-Date', duplicate[1])
+        warc_headers.replace_header('WARC-Refers-To-Target-URI', duplicate[2])
+        warc_headers.replace_header('WARC-Type', 'revisit')
+        warc_headers.replace_header('WARC-Truncated', 'length')
+        warc_headers.replace_header('WARC-Profile',
+                                    'http://netpreserve.org/warc/1.0/' \
+                                    'revisit/identical-payload-digest')
+        warc_headers.remove_header('WARC-Block-Digest')
+        warc_headers.remove_header('Content-Length')
+        return writer.create_warc_record(
+            record.rec_headers.get_header('WARC-Target-URI'),
+            'revisit',
+            warc_headers=warc_headers,
+            http_headers=record.http_headers
+        )
 
 
 class MoveFiles(SimpleTask):
     def __init__(self):
-        SimpleTask.__init__(self, "MoveFiles")
+        SimpleTask.__init__(self, 'MoveFiles')
 
     def process(self, item):
-        # NEW for 2014! Check if wget was compiled with zlib support
-        if os.path.exists("%(item_dir)s/%(warc_file_base)s.warc" % item):
+        if os.path.exists('%(item_dir)s/%(warc_file_base)s.warc' % item):
             raise Exception('Please compile wget with zlib support!')
 
-        os.rename("%(item_dir)s/%(warc_file_base)s.warc.gz" % item,
-              "%(data_dir)s/%(warc_file_base)s.warc.gz" % item)
+        os.rename('%(item_dir)s/%(warc_file_base)s-deduplicated.warc.gz' % item,
+            '%(data_dir)s/%(warc_file_base)s-deduplicated.warc.gz' % item)
+        os.rename('%(item_dir)s/%(warc_file_base)s_data.txt' % item,
+            '%(data_dir)s/%(warc_file_base)s_data.txt' % item)
 
-        shutil.rmtree("%(item_dir)s" % item)
+        shutil.rmtree('%(item_dir)s' % item)
 
 
 def get_hash(filename):
@@ -163,62 +232,54 @@ def stats_id_function(item):
 
 
 class WgetArgs(object):
+    post_chars = string.digits + string.ascii_lowercase
+
+    def int_to_str(self, i):
+        d, m = divmod(i, 36)
+        if d > 0:
+            return self.int_to_str(d) + self.post_chars[m]
+        return self.post_chars[m]
+
     def realize(self, item):
         wget_args = [
             WGET_LUA,
-            "-U", USER_AGENT,
-            "-nv",
-            "--lua-script", "reddit.lua",
-            "--load-cookies", "cookies",
-            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
-            "--no-check-certificate",
-            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
-            "--truncate-output",
-            "-e", "robots=off",
-            "--rotate-dns",
-            "--recursive", "--level=inf",
-            "--no-parent",
-            "--page-requisites",
-            "--timeout", "30",
-            "--tries", "inf",
-            "--domains", "reddit.com,redditmedia.com",
-            "--span-hosts",
-            "--waitretry", "30",
-            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
-            "--warc-header", "operator: Archive Team",
-            "--warc-header", "reddit-dld-script-version: " + VERSION,
-            "--warc-header", ItemInterpolation("reddit-user: %(item_name)s"),
+            '-U', USER_AGENT,
+            '-nv',
+            '--lua-script', 'reddit.lua',
+            '--load-cookies', 'cookies',
+            '-o', ItemInterpolation('%(item_dir)s/wget.log'),
+            '--no-check-certificate',
+            '--output-document', ItemInterpolation('%(item_dir)s/wget.tmp'),
+            '--truncate-output',
+            '-e', 'robots=off',
+            '--rotate-dns',
+            '--recursive', '--level=inf',
+            '--no-parent',
+            '--page-requisites',
+            '--timeout', '30',
+            '--tries', 'inf',
+            '--domains', 'reddit.com',
+            '--span-hosts',
+            '--waitretry', '30',
+            '--warc-file', ItemInterpolation('%(item_dir)s/%(warc_file_base)s'),
+            '--warc-header', 'operator: Archive Team',
+            '--warc-header', 'reddit-dld-script-version: ' + VERSION,
+            '--warc-header', ItemInterpolation('reddit-item: %(item_name)s')
         ]
         
         item_name = item['item_name']
-        assert ':' in item_name
         item_type, item_value = item_name.split(':', 1)
         
         item['item_type'] = item_type
         item['item_value'] = item_value
-        
-        assert item_type in ('36comments')
-        
-        if item_type == '36comments':
-            suffixes = string.digits + string.ascii_lowercase
-            for url in ['http://redd.it/{0}{1}'.format(item_value, a) for a in suffixes]:
-                wget_args.append(url)
-#            for suffix in suffixes:
-#                commenturl = 'https://www.reddit.com/comments/{0}{1}/'.format(item_value, suffix)
-#                html = requests.get(commenturl, headers={'User-Agent': 'ArchiveTeam'})
-#                print('Downloaded', html.status_code, getattr(html, 'reason'))
-#                sys.stdout.flush()
-#                if html.status_code == 200:
-#                    if not html.text:
-#                        raise Exception('Something went wrong during the download. ({0})'.format(html.status_code))
-#                    else:
-#                        for origurl in re.findall(r'href="(https?:\/\/www\.reddit\.com\/r\/[^/]+\/comments\/{0}{1}\/[^"]+)"'.format(item_value, suffix), html.text):
-#                            if (re.search(r'https?:\/\/www\.reddit\.com\/r\/[^/]+\/comments\/[^/]+\/[^/]+\/', origurl) or re.search(r'https?:\/\/www\.reddit\.com\/r\/[^/]+\/comments\/[^/]+\/', origurl)) and not re.search(r'https?:\/\/www\.reddit\.com\/r\/[^/]+\/comments\/[^/]+\/[^/]+\/.', origurl):
-#                                wget_args.append(origurl)
-#                elif html.status_code == 404:
-#                    print('This url is 404.')
-#                else:
-#                    raise Exception('Something went wrong during the download. ({0})'.format(html.status_code))
+
+        if item_type in ('posts'):
+            start, end = item_value.split('-')
+            for i in range(int(start), int(end)+1):
+                post_id = self.int_to_str(i)
+                wget_args.extend(['--warc-header', 'reddit-post: {}'.format(post_id)])
+                wget_args.append('https://www.reddit.com/comments/{}'.format(post_id))
+                wget_args.append('https://old.reddit.com/comments/{}'.format(post_id))
         else:
             raise Exception('Unknown item')
         
@@ -237,59 +298,67 @@ class WgetArgs(object):
 # This will be shown in the warrior management panel. The logo should not
 # be too big. The deadline is optional.
 project = Project(
-    title="reddit",
-    project_html="""
-        <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/thumb/b/b5/Reddit_logo.png/320px-Reddit_logo.png" height="50px" title=""/>
-        <h2>www.reddit.com <span class="links"><a href="https://www.reddit.com/">Website</a> &middot; <a href="http://tracker.archiveteam.org/reddit/">Leaderboard</a></span></h2>
-        <p>Grabbing reddit.</p>
-    """
+    title='reddit',
+    project_html='''
+        <img class="project-logo" alt="Project logo" src="https://www.archiveteam.org/images/b/b5/Reddit_logo.png" height="50px" title=""/>
+        <h2>reddit.com <span class="links"><a href="https://reddit.com/">Website</a> &middot; <a href="http://tracker.archiveteam.org/reddit/">Leaderboard</a></span></h2>
+        <p>Archiving everything from reddit.</p>
+    '''
 )
 
 pipeline = Pipeline(
     CheckIP(),
-    GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader,
+    GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader,
         VERSION),
-    PrepareDirectories(warc_prefix="reddit"),
+    PrepareDirectories(warc_prefix='reddit'),
     WgetDownload(
         WgetArgs(),
         max_tries=2,
-        accept_on_exit_code=[0, 8],
+        accept_on_exit_code=[0, 4, 8],
         env={
-            "item_dir": ItemValue("item_dir"),
-            "item_value": ItemValue("item_value"),
-            "item_type": ItemValue("item_type"),
+            'item_dir': ItemValue('item_dir'),
+            'item_value': ItemValue('item_value'),
+            'item_type': ItemValue('item_type'),
+            'warc_file_base': ItemValue('warc_file_base')
         }
     ),
+    Deduplicate(),
     PrepareStatsForTracker(
-        defaults={"downloader": downloader, "version": VERSION},
+        defaults={'downloader': downloader, 'version': VERSION},
         file_groups={
-            "data": [
-                ItemInterpolation("%(item_dir)s/%(warc_file_base)s.warc.gz")
+            'data': [
+                ItemInterpolation('%(item_dir)s/%(warc_file_base)s-deduplicated.warc.gz')
             ]
         },
         id_function=stats_id_function,
     ),
     MoveFiles(),
-    LimitConcurrent(NumberConfigValue(min=1, max=4, default="1",
-        name="shared:rsync_threads", title="Rsync threads",
-        description="The maximum number of concurrent uploads."),
+    LimitConcurrent(NumberConfigValue(min=1, max=20, default='20',
+        name='shared:rsync_threads', title='Rsync threads',
+        description='The maximum number of concurrent uploads.'),
         UploadWithTracker(
-            "http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
+            'http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
             downloader=downloader,
             version=VERSION,
             files=[
-                ItemInterpolation("%(data_dir)s/%(warc_file_base)s.warc.gz")
+                ItemInterpolation('%(data_dir)s/%(warc_file_base)s-deduplicated.warc.gz'),
+                ItemInterpolation('%(data_dir)s/%(warc_file_base)s_data.txt')
             ],
-            rsync_target_source_path=ItemInterpolation("%(data_dir)s/"),
+            rsync_target_source_path=ItemInterpolation('%(data_dir)s/'),
             rsync_extra_args=[
-                "--recursive",
-                "--partial",
-                "--partial-dir", ".rsync-tmp",
+                '--sockopts=SO_SNDBUF=8388608,SO_RCVBUF=8388608',
+                '--recursive',
+                '--partial',
+                '--partial-dir', '.rsync-tmp',
+                '--min-size', '1',
+                '--no-compress',
+                '--compress-level', '0'
             ]
             ),
     ),
     SendDoneToTracker(
-        tracker_url="http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
-        stats=ItemValue("stats")
+        tracker_url='http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
+        stats=ItemValue('stats')
     )
 )
+
diff --git a/reddit.lua b/reddit.lua
index e170969..5f5adf3 100644
--- a/reddit.lua
+++ b/reddit.lua
@@ -1,22 +1,32 @@
-dofile("urlcode.lua")
 dofile("table_show.lua")
+dofile("urlcode.lua")
+JSON = (loadfile "JSON.lua")()
 
-local url_count = 0
-local tries = 0
 local item_type = os.getenv('item_type')
 local item_value = os.getenv('item_value')
+local item_dir = os.getenv('item_dir')
+local warc_file_base = os.getenv('warc_file_base')
 
+local url_count = 0
+local tries = 0
 local downloaded = {}
 local addedtolist = {}
+local abortgrab = false
 
--- Do not download these urls:
-downloaded["http://pixel.redditmedia.com/pixel/of_destiny.png?v=q1Ga4BM4n71zceWwjRg4266wx1BqgGjx8isnnrLeBUv%2FXq%2Bk60QeBpQruPDKFQFv%2FDWVNxp63YPBIKv8pMk%2BhrkV3HA5b7GO"] = true
-downloaded["http://pixel.redditmedia.com/pixel/of_doom.png"] = true
-downloaded["http://pixel.redditmedia.com/pixel/of_delight.png"] = true
-downloaded["http://pixel.redditmedia.com/pixel/of_discovery.png"] = true
-downloaded["http://pixel.redditmedia.com/pixel/of_diversity.png"] = true
-downloaded["http://pixel.redditmedia.com/click"] = true
-downloaded["https://stats.redditmedia.com/"] = true
+local posts = {}
+local requested_children = {}
+
+for ignore in io.open("ignore-list", "r"):lines() do
+  downloaded[ignore] = true
+end
+
+load_json_file = function(file)
+  if file then
+    return JSON:decode(file)
+  else
+    return nil
+  end
+end
 
 read_file = function(file)
   if file then
@@ -29,141 +39,218 @@ read_file = function(file)
   end
 end
 
-wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason)
-  local url = urlpos["url"]["url"]
-  local html = urlpos["link_expect_html"]
-
-  if downloaded[url] == true or addedtolist[url] == true then
+allowed = function(url, parenturl)
+  if string.match(url, "'+")
+      or string.match(url, "[<>\\%*%$;%^%[%],%(%){}]")
+      or string.match(url, "^https?://[^/]*reddit%.com/login")
+      or string.match(url, "^https?://[^/]*reddit%.com/register")
+      or string.match(url, "%?sort=")
+      or string.match(url, "^https?://www%.reddit%.com/") --TEMP
+      or string.match(url, "/%.rss$") then
     return false
   end
-  
-  if (downloaded[url] ~= true or addedtolist[url] ~= true) then
-    if string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") and not (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") or string.match(url, "%?sort=") or string.match(url, "%?ref=") or string.match(url, "%?count=") or string.match(url, "%.rss") or string.match(url, "%?originalUrl=") or string.match(url, "m%.reddit%.com") or string.match(url, "thumbs%.redditmedia%.com")) then
-      addedtolist[url] = true
-      return true
-    else
+
+  local tested = {}
+  for s in string.gmatch(url, "([^/]+)") do
+    if tested[s] == nil then
+      tested[s] = 0
+    end
+    if tested[s] == 6 then
       return false
     end
-  else
+    tested[s] = tested[s] + 1
+  end
+
+  if url .. "/" == parenturl then
     return false
   end
+
+  if string.match(url, "^https?://i%.redd%.it/")
+      or string.match(url, "^https?://[^/]*redditmedia%.com/")
+      or string.match(url, "^https://old.reddit.com/api/morechildren$") then
+    return true
+  end
+
+  for s in string.gmatch(url, "([a-z0-9]+)") do
+    if posts[s] then
+      return true
+    end
+  end
+  
+  return false
 end
 
+wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason)
+  local url = urlpos["url"]["url"]
+  local html = urlpos["link_expect_html"]
+
+  if string.match(url, "[<>\\%*%$;%^%[%],%(%){}]") then
+    return false
+  end
+
+  if (downloaded[url] ~= true and addedtolist[url] ~= true)
+      and (allowed(url, parent["url"]) or html == 0) then
+    addedtolist[url] = true
+    return true
+  end
+  
+  return false
+end
 
 wget.callbacks.get_urls = function(file, url, is_css, iri)
   local urls = {}
   local html = nil
+  
+  downloaded[url] = true
 
-  if downloaded[url] ~= true then
-    downloaded[url] = true
-  end
- 
-  local function check(url)
-    if (downloaded[url] ~= true and addedtolist[url] ~= true) and (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") or (string.match(url, "redditmedia%.com")) and not (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") or string.match(url, "thumbs%.redditmedia%.com") or string.match(url, "%?sort=") or string.match(url, "%?ref=")  or string.match(url, "%?count=") or string.match(url, "%.rss") or string.match(url, "%?originalUrl=") or string.match(url, "m%.reddit%.com")) then
-      if string.match(url, "&amp;") then
-        table.insert(urls, { url=string.gsub(url, "&amp;", "&") })
-        addedtolist[url] = true
-        addedtolist[string.gsub(url, "&amp;", "&")] = true
-      elseif string.match(url, "#") then
-        table.insert(urls, { url=string.match(url, "(https?//:[^#]+)#") })
-        addedtolist[url] = true
-        addedtolist[string.match(url, "(https?//:[^#]+)#")] = true
-      else
-        table.insert(urls, { url=url })
-        addedtolist[url] = true
-      end
+  local function check(urla)
+    local origurl = url
+    local url = string.match(urla, "^([^#]+)")
+    local url_ = string.gsub(url, "&amp;", "&")
+    if (downloaded[url_] ~= true and addedtolist[url_] ~= true)
+        and allowed(url_, origurl) then
+      table.insert(urls, { url=url_ })
+      addedtolist[url_] = true
+      addedtolist[url] = true
     end
   end
-  
-  if string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z]") and not (string.match(url, "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") or string.match(url, "/related/"..item_value)) then
+
+  local function checknewurl(newurl)
+    if string.match(newurl, "^https?:////") then
+      check(string.gsub(newurl, ":////", "://"))
+    elseif string.match(newurl, "^https?://") then
+      check(newurl)
+    elseif string.match(newurl, "^https?:\\/\\?/") then
+      check(string.gsub(newurl, "\\", ""))
+    elseif string.match(newurl, "^\\/\\/") then
+      check(string.match(url, "^(https?:)")..string.gsub(newurl, "\\", ""))
+    elseif string.match(newurl, "^//") then
+      check(string.match(url, "^(https?:)")..newurl)
+    elseif string.match(newurl, "^\\/") then
+      check(string.match(url, "^(https?://[^/]+)")..string.gsub(newurl, "\\", ""))
+    elseif string.match(newurl, "^/") then
+      check(string.match(url, "^(https?://[^/]+)")..newurl)
+    elseif string.match(newurl, "^%./") then
+      checknewurl(string.match(newurl, "^%.(.+)"))
+    end
+  end
+
+  local function checknewshorturl(newurl)
+    if string.match(newurl, "^%?") then
+      check(string.match(url, "^(https?://[^%?]+)")..newurl)
+    elseif not (string.match(newurl, "^https?:\\?/\\?//?/?")
+        or string.match(newurl, "^[/\\]")
+        or string.match(newurl, "^%./")
+        or string.match(newurl, "^[jJ]ava[sS]cript:")
+        or string.match(newurl, "^[mM]ail[tT]o:")
+        or string.match(newurl, "^vine:")
+        or string.match(newurl, "^android%-app:")
+        or string.match(newurl, "^ios%-app:")
+        or string.match(newurl, "^%${")) then
+      check(string.match(url, "^(https?://.+/)")..newurl)
+    end
+  end
+
+  if string.match(url, "^https?://www%.reddit%.com/comments/[a-z0-9]+$")
+      or string.match(url, "^https?://old%.reddit%.com/comments/[a-z0-9]+$") then
+    posts[string.match(url, "[a-z0-9]+$")] = true
+  end
+
+  if allowed(url, nil)
+      and not string.match(url, "^https?://[^/]*redditmedia%.com/")
+      and not string.match(url, "^https?://[^/]*redditstatic%.com/") then
     html = read_file(file)
-    for newurl in string.gmatch(html, '"thumbnail[^"]+"[^"]+"[^"]+"[^"]+"(//[^"]+)"') do
-      if downloaded[string.gsub(newurl, "//", "http://")] ~= true and addedtolist[string.gsub(newurl, "//", "http://")] ~= true then
-        table.insert(urls, { url=string.gsub(newurl, "//", "http://") })
-        addedtolist[string.gsub(newurl, "//", "http://")] = true
+    if string.match(url, "^https://old.reddit.com/api/morechildren$") then
+      html = string.gsub(html, '\\"', '"')
+    end
+    if string.match(url, "^https?://old%.reddit%.com/") then
+      for s in string.gmatch(html, "(return%s+morechildren%(this,%s*'[^']+',%s*'[^']+',%s*'[^']+',%s*[0-9]+,%s*'[^']+'%))") do
+        local link_id, sort, children, depth, limit_children = string.match(s, "%(this,%s*'([^']+)',%s*'([^']+)',%s*'([^']+)',%s*([0-9]+),%s*'([^']+)'%)$")
+        local id = string.match(children, "^([^,]+)")
+        local subreddit = string.match(html, 'data%-subreddit="([^"]+)"')
+        local post_data = "link_id=" .. link_id .. "&sort=" .. sort .. "&children=" .. string.gsub(children, ",", "%%2C") .. "&depth=" .. depth .. "&id=t1_" .. id .. "&limit_children=" .. limit_children .. "&r=" .. subreddit .. "&renderstyle=html"
+        if requested_children[post_data] == nil then
+          requested_children[post_data] = true
+          table.insert(urls, {url="https://old.reddit.com/api/morechildren",
+                              post_data=post_data})
+        end
       end
     end
-    for newurl in string.gmatch(html, '"(https?://[^"]+)"') do
-      check(newurl)
+    for newurl in string.gmatch(string.gsub(html, "&quot;", '"'), '([^"]+)') do
+      checknewurl(newurl)
     end
-    for newurl in string.gmatch(html, "'(https?://[^']+)'") do
-      check(newurl)
+    for newurl in string.gmatch(string.gsub(html, "&#039;", "'"), "([^']+)") do
+      checknewurl(newurl)
     end
-    for newurl in string.gmatch(html, '("/[^"]+)"') do
-      if string.match(newurl, '"//') then
-        check(string.gsub(newurl, '"//', 'http://'))
-      elseif not string.match(newurl, '"//') then
-        check(string.match(url, "(https?://[^/]+)/")..string.match(newurl, '"(/.+)'))
-      end
+    for newurl in string.gmatch(html, ">%s*([^<%s]+)") do
+      checknewurl(newurl)
     end
-    for newurl in string.gmatch(html, "('/[^']+)'") do
-      if string.match(newurl, "'//") then
-        check(string.gsub(newurl, "'//", "http://"))
-      elseif not string.match(newurl, "'//") then
-        check(string.match(url, '(https?://[^/]+)/')..string.match(newurl, "'(/.+)"))
-      end
+    for newurl in string.gmatch(html, "[^%-]href='([^']+)'") do
+      checknewshorturl(newurl)
+    end
+    for newurl in string.gmatch(html, '[^%-]href="([^"]+)"') do
+      checknewshorturl(newurl)
+    end
+    for newurl in string.gmatch(html, ":%s*url%(([^%)]+)%)") do
+      checknewurl(newurl)
     end
   end
-  
+
   return urls
 end
-  
 
 wget.callbacks.httploop_result = function(url, err, http_stat)
-  -- NEW for 2014: Slightly more verbose messages because people keep
-  -- complaining that it's not moving or not working
   status_code = http_stat["statcode"]
   
   url_count = url_count + 1
-  io.stdout:write(url_count .. "=" .. status_code .. " " .. url["url"] .. ".  \n")
+  io.stdout:write(url_count .. "=" .. status_code .. " " .. url["url"] .. "  \n")
   io.stdout:flush()
 
-  if (status_code >= 200 and status_code <= 399) then
-    if string.match(url.url, "https://") then
-      local newurl = string.gsub(url.url, "https://", "http://")
-      downloaded[newurl] = true
-    else
-      downloaded[url.url] = true
+  if (status_code >= 300 and status_code <= 399) then
+    local newloc = string.match(http_stat["newloc"], "^([^#]+)")
+    if string.match(newloc, "^//") then
+      newloc = string.match(url["url"], "^(https?:)") .. string.match(newloc, "^//(.+)")
+    elseif string.match(newloc, "^/") then
+      newloc = string.match(url["url"], "^(https?://[^/]+)") .. newloc
+    elseif not string.match(newloc, "^https?://") then
+      newloc = string.match(url["url"], "^(https?://.+/)") .. newloc
+    end
+    if downloaded[newloc] == true or addedtolist[newloc] == true then
+      return wget.actions.EXIT
     end
   end
   
-  if status_code >= 500 or
-    (status_code >= 400 and status_code ~= 404 and status_code ~= 403) then
+  if (status_code >= 200 and status_code <= 399) then
+    downloaded[url["url"]] = true
+    downloaded[string.gsub(url["url"], "https?://", "http://")] = true
+  end
 
-    io.stdout:write("\nServer returned "..http_stat.statcode..". Sleeping.\n")
+  if abortgrab == true then
+    io.stdout:write("ABORTING...\n")
+    return wget.actions.ABORT
+  end
+  
+  if status_code >= 500
+      or (status_code >= 400 and status_code ~= 403 and status_code ~= 404)
+      or status_code  == 0 then
+    io.stdout:write("Server returned "..http_stat.statcode.." ("..err.."). Sleeping.\n")
     io.stdout:flush()
-
-    os.execute("sleep 10")
-
-    tries = tries + 1
-
-    if tries >= 6 then
+    local maxtries = 8
+    if not allowed(url["url"], nil) then
+        maxtries = 2
+    end
+    if tries > maxtries then
       io.stdout:write("\nI give up...\n")
       io.stdout:flush()
       tries = 0
-      if string.match(url["url"], "[^a-z0-9]"..item_value.."[0-9a-z]") and not string.match(url["url"], "[^a-z0-9]"..item_value.."[0-9a-z][0-9a-z]") then
+      if allowed(url["url"], nil) then
         return wget.actions.ABORT
       else
         return wget.actions.EXIT
       end
     else
-      return wget.actions.CONTINUE
-    end
-  elseif status_code == 0 then
-
-    io.stdout:write("\nServer returned "..http_stat.statcode..". Sleeping.\n")
-    io.stdout:flush()
-
-    os.execute("sleep 10")
-    
-    tries = tries + 1
-
-    if tries >= 6 then
-      io.stdout:write("\nI give up...\n")
-      io.stdout:flush()
-      tries = 0
-      return wget.actions.ABORT
-    else
+      os.execute("sleep " .. math.floor(math.pow(2, tries)))
+      tries = tries + 1
       return wget.actions.CONTINUE
     end
   end
@@ -178,3 +265,10 @@ wget.callbacks.httploop_result = function(url, err, http_stat)
 
   return wget.actions.NOTHING
 end
+
+wget.callbacks.before_exit = function(exit_status, exit_status_string)
+  if abortgrab == true then
+    return wget.exits.IO_FAIL
+  end
+  return exit_status
+end
diff --git a/warrior-install.sh b/warrior-install.sh
new file mode 100755
index 0000000..135477f
--- /dev/null
+++ b/warrior-install.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+PIP=pip
+
+if type pip3 > /dev/null 2>&1
+then
+  PIP=pip3
+fi
+
+echo "Installing warcio"
+if ! sudo $PIP install warcio --upgrade
+then
+  exit 1
+fi
+
+exit 0
+
diff --git a/wget-lua-warrior b/wget-lua-warrior
old mode 100644
new mode 100755